Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set sw=2 ts=2 et tw=79: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #include "nsIAtom.h" |
michael@0 | 8 | #include "nsParser.h" |
michael@0 | 9 | #include "nsString.h" |
michael@0 | 10 | #include "nsCRT.h" |
michael@0 | 11 | #include "nsScanner.h" |
michael@0 | 12 | #include "plstr.h" |
michael@0 | 13 | #include "nsIStringStream.h" |
michael@0 | 14 | #include "nsIChannel.h" |
michael@0 | 15 | #include "nsICachingChannel.h" |
michael@0 | 16 | #include "nsICacheEntryDescriptor.h" |
michael@0 | 17 | #include "nsIInputStream.h" |
michael@0 | 18 | #include "CNavDTD.h" |
michael@0 | 19 | #include "prenv.h" |
michael@0 | 20 | #include "prlock.h" |
michael@0 | 21 | #include "prcvar.h" |
michael@0 | 22 | #include "nsParserCIID.h" |
michael@0 | 23 | #include "nsReadableUtils.h" |
michael@0 | 24 | #include "nsCOMPtr.h" |
michael@0 | 25 | #include "nsExpatDriver.h" |
michael@0 | 26 | #include "nsIServiceManager.h" |
michael@0 | 27 | #include "nsICategoryManager.h" |
michael@0 | 28 | #include "nsISupportsPrimitives.h" |
michael@0 | 29 | #include "nsIFragmentContentSink.h" |
michael@0 | 30 | #include "nsStreamUtils.h" |
michael@0 | 31 | #include "nsHTMLTokenizer.h" |
michael@0 | 32 | #include "nsNetUtil.h" |
michael@0 | 33 | #include "nsScriptLoader.h" |
michael@0 | 34 | #include "nsDataHashtable.h" |
michael@0 | 35 | #include "nsXPCOMCIDInternal.h" |
michael@0 | 36 | #include "nsMimeTypes.h" |
michael@0 | 37 | #include "mozilla/CondVar.h" |
michael@0 | 38 | #include "mozilla/Mutex.h" |
michael@0 | 39 | #include "nsParserConstants.h" |
michael@0 | 40 | #include "nsCharsetSource.h" |
michael@0 | 41 | #include "nsContentUtils.h" |
michael@0 | 42 | #include "nsThreadUtils.h" |
michael@0 | 43 | #include "nsIHTMLContentSink.h" |
michael@0 | 44 | |
michael@0 | 45 | #include "mozilla/dom/EncodingUtils.h" |
michael@0 | 46 | |
michael@0 | 47 | using namespace mozilla; |
michael@0 | 48 | using mozilla::dom::EncodingUtils; |
michael@0 | 49 | |
michael@0 | 50 | #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 |
michael@0 | 51 | #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 |
michael@0 | 52 | #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 |
michael@0 | 53 | #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 |
michael@0 | 54 | #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 |
michael@0 | 55 | |
michael@0 | 56 | //-------------- Begin ParseContinue Event Definition ------------------------ |
michael@0 | 57 | /* |
michael@0 | 58 | The parser can be explicitly interrupted by passing a return value of |
michael@0 | 59 | NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause |
michael@0 | 60 | the parser to stop processing and allow the application to return to the event |
michael@0 | 61 | loop. The data which was left at the time of interruption will be processed |
michael@0 | 62 | the next time OnDataAvailable is called. If the parser has received its final |
michael@0 | 63 | chunk of data then OnDataAvailable will no longer be called by the networking |
michael@0 | 64 | module, so the parser will schedule a nsParserContinueEvent which will call |
michael@0 | 65 | the parser to process the remaining data after returning to the event loop. |
michael@0 | 66 | If the parser is interrupted while processing the remaining data it will |
michael@0 | 67 | schedule another ParseContinueEvent. The processing of data followed by |
michael@0 | 68 | scheduling of the continue events will proceed until either: |
michael@0 | 69 | |
michael@0 | 70 | 1) All of the remaining data can be processed without interrupting |
michael@0 | 71 | 2) The parser has been cancelled. |
michael@0 | 72 | |
michael@0 | 73 | |
michael@0 | 74 | This capability is currently used in CNavDTD and nsHTMLContentSink. The |
michael@0 | 75 | nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be |
michael@0 | 76 | processed and when each token is processed. The nsHTML content sink records |
michael@0 | 77 | the time when the chunk has started processing and will return |
michael@0 | 78 | NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a |
michael@0 | 79 | threshold called max tokenizing processing time. This allows the content sink |
michael@0 | 80 | to limit how much data is processed in a single chunk which in turn gates how |
michael@0 | 81 | much time is spent away from the event loop. Processing smaller chunks of data |
michael@0 | 82 | also reduces the time spent in subsequent reflows. |
michael@0 | 83 | |
michael@0 | 84 | This capability is most apparent when loading large documents. If the maximum |
michael@0 | 85 | token processing time is set small enough the application will remain |
michael@0 | 86 | responsive during document load. |
michael@0 | 87 | |
michael@0 | 88 | A side-effect of this capability is that document load is not complete when |
michael@0 | 89 | the last chunk of data is passed to OnDataAvailable since the parser may have |
michael@0 | 90 | been interrupted when the last chunk of data arrived. The document is complete |
michael@0 | 91 | when all of the document has been tokenized and there aren't any pending |
michael@0 | 92 | nsParserContinueEvents. This can cause problems if the application assumes |
michael@0 | 93 | that it can monitor the load requests to determine when the document load has |
michael@0 | 94 | been completed. This is what happens in Mozilla. The document is considered |
michael@0 | 95 | completely loaded when all of the load requests have been satisfied. To delay |
michael@0 | 96 | the document load until all of the parsing has been completed the |
michael@0 | 97 | nsHTMLContentSink adds a dummy parser load request which is not removed until |
michael@0 | 98 | the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call |
michael@0 | 99 | DidBuildModel until the final chunk of data has been passed to the parser |
michael@0 | 100 | through the OnDataAvailable and there aren't any pending |
michael@0 | 101 | nsParserContineEvents. |
michael@0 | 102 | |
michael@0 | 103 | Currently the parser is ignores requests to be interrupted during the |
michael@0 | 104 | processing of script. This is because a document.write followed by JavaScript |
michael@0 | 105 | calls to manipulate the DOM may fail if the parser was interrupted during the |
michael@0 | 106 | document.write. |
michael@0 | 107 | |
michael@0 | 108 | For more details @see bugzilla bug 76722 |
michael@0 | 109 | */ |
michael@0 | 110 | |
michael@0 | 111 | |
michael@0 | 112 | class nsParserContinueEvent : public nsRunnable |
michael@0 | 113 | { |
michael@0 | 114 | public: |
michael@0 | 115 | nsRefPtr<nsParser> mParser; |
michael@0 | 116 | |
michael@0 | 117 | nsParserContinueEvent(nsParser* aParser) |
michael@0 | 118 | : mParser(aParser) |
michael@0 | 119 | {} |
michael@0 | 120 | |
michael@0 | 121 | NS_IMETHOD Run() |
michael@0 | 122 | { |
michael@0 | 123 | mParser->HandleParserContinueEvent(this); |
michael@0 | 124 | return NS_OK; |
michael@0 | 125 | } |
michael@0 | 126 | }; |
michael@0 | 127 | |
michael@0 | 128 | //-------------- End ParseContinue Event Definition ------------------------ |
michael@0 | 129 | |
michael@0 | 130 | /** |
michael@0 | 131 | * default constructor |
michael@0 | 132 | */ |
michael@0 | 133 | nsParser::nsParser() |
michael@0 | 134 | { |
michael@0 | 135 | Initialize(true); |
michael@0 | 136 | } |
michael@0 | 137 | |
michael@0 | 138 | nsParser::~nsParser() |
michael@0 | 139 | { |
michael@0 | 140 | Cleanup(); |
michael@0 | 141 | } |
michael@0 | 142 | |
michael@0 | 143 | void |
michael@0 | 144 | nsParser::Initialize(bool aConstructor) |
michael@0 | 145 | { |
michael@0 | 146 | if (aConstructor) { |
michael@0 | 147 | // Raw pointer |
michael@0 | 148 | mParserContext = 0; |
michael@0 | 149 | } |
michael@0 | 150 | else { |
michael@0 | 151 | // nsCOMPtrs |
michael@0 | 152 | mObserver = nullptr; |
michael@0 | 153 | mUnusedInput.Truncate(); |
michael@0 | 154 | } |
michael@0 | 155 | |
michael@0 | 156 | mContinueEvent = nullptr; |
michael@0 | 157 | mCharsetSource = kCharsetUninitialized; |
michael@0 | 158 | mCharset.AssignLiteral("ISO-8859-1"); |
michael@0 | 159 | mInternalState = NS_OK; |
michael@0 | 160 | mStreamStatus = NS_OK; |
michael@0 | 161 | mCommand = eViewNormal; |
michael@0 | 162 | mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | |
michael@0 | 163 | NS_PARSER_FLAG_PARSER_ENABLED | |
michael@0 | 164 | NS_PARSER_FLAG_CAN_TOKENIZE; |
michael@0 | 165 | |
michael@0 | 166 | mProcessingNetworkData = false; |
michael@0 | 167 | mIsAboutBlank = false; |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | void |
michael@0 | 171 | nsParser::Cleanup() |
michael@0 | 172 | { |
michael@0 | 173 | #ifdef DEBUG |
michael@0 | 174 | if (mParserContext && mParserContext->mPrevContext) { |
michael@0 | 175 | NS_WARNING("Extra parser contexts still on the parser stack"); |
michael@0 | 176 | } |
michael@0 | 177 | #endif |
michael@0 | 178 | |
michael@0 | 179 | while (mParserContext) { |
michael@0 | 180 | CParserContext *pc = mParserContext->mPrevContext; |
michael@0 | 181 | delete mParserContext; |
michael@0 | 182 | mParserContext = pc; |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | // It should not be possible for this flag to be set when we are getting |
michael@0 | 186 | // destroyed since this flag implies a pending nsParserContinueEvent, which |
michael@0 | 187 | // has an owning reference to |this|. |
michael@0 | 188 | NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); |
michael@0 | 189 | } |
michael@0 | 190 | |
michael@0 | 191 | NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) |
michael@0 | 192 | |
michael@0 | 193 | NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) |
michael@0 | 194 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD) |
michael@0 | 195 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) |
michael@0 | 196 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) |
michael@0 | 197 | NS_IMPL_CYCLE_COLLECTION_UNLINK_END |
michael@0 | 198 | |
michael@0 | 199 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) |
michael@0 | 200 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD) |
michael@0 | 201 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) |
michael@0 | 202 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) |
michael@0 | 203 | CParserContext *pc = tmp->mParserContext; |
michael@0 | 204 | while (pc) { |
michael@0 | 205 | cb.NoteXPCOMChild(pc->mTokenizer); |
michael@0 | 206 | pc = pc->mPrevContext; |
michael@0 | 207 | } |
michael@0 | 208 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END |
michael@0 | 209 | |
michael@0 | 210 | NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) |
michael@0 | 211 | NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) |
michael@0 | 212 | NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) |
michael@0 | 213 | NS_INTERFACE_MAP_ENTRY(nsIStreamListener) |
michael@0 | 214 | NS_INTERFACE_MAP_ENTRY(nsIParser) |
michael@0 | 215 | NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) |
michael@0 | 216 | NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) |
michael@0 | 217 | NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) |
michael@0 | 218 | NS_INTERFACE_MAP_END |
michael@0 | 219 | |
michael@0 | 220 | // The parser continue event is posted only if |
michael@0 | 221 | // all of the data to parse has been passed to ::OnDataAvailable |
michael@0 | 222 | // and the parser has been interrupted by the content sink |
michael@0 | 223 | // because the processing of tokens took too long. |
michael@0 | 224 | |
michael@0 | 225 | nsresult |
michael@0 | 226 | nsParser::PostContinueEvent() |
michael@0 | 227 | { |
michael@0 | 228 | if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { |
michael@0 | 229 | // If this flag isn't set, then there shouldn't be a live continue event! |
michael@0 | 230 | NS_ASSERTION(!mContinueEvent, "bad"); |
michael@0 | 231 | |
michael@0 | 232 | // This creates a reference cycle between this and the event that is |
michael@0 | 233 | // broken when the event fires. |
michael@0 | 234 | nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this); |
michael@0 | 235 | if (NS_FAILED(NS_DispatchToCurrentThread(event))) { |
michael@0 | 236 | NS_WARNING("failed to dispatch parser continuation event"); |
michael@0 | 237 | } else { |
michael@0 | 238 | mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
michael@0 | 239 | mContinueEvent = event; |
michael@0 | 240 | } |
michael@0 | 241 | } |
michael@0 | 242 | return NS_OK; |
michael@0 | 243 | } |
michael@0 | 244 | |
michael@0 | 245 | NS_IMETHODIMP_(void) |
michael@0 | 246 | nsParser::GetCommand(nsCString& aCommand) |
michael@0 | 247 | { |
michael@0 | 248 | aCommand = mCommandStr; |
michael@0 | 249 | } |
michael@0 | 250 | |
michael@0 | 251 | /** |
michael@0 | 252 | * Call this method once you've created a parser, and want to instruct it |
michael@0 | 253 | * about the command which caused the parser to be constructed. For example, |
michael@0 | 254 | * this allows us to select a DTD which can do, say, view-source. |
michael@0 | 255 | * |
michael@0 | 256 | * @param aCommand the command string to set |
michael@0 | 257 | */ |
michael@0 | 258 | NS_IMETHODIMP_(void) |
michael@0 | 259 | nsParser::SetCommand(const char* aCommand) |
michael@0 | 260 | { |
michael@0 | 261 | mCommandStr.Assign(aCommand); |
michael@0 | 262 | if (mCommandStr.Equals("view-source")) { |
michael@0 | 263 | mCommand = eViewSource; |
michael@0 | 264 | } else if (mCommandStr.Equals("view-fragment")) { |
michael@0 | 265 | mCommand = eViewFragment; |
michael@0 | 266 | } else { |
michael@0 | 267 | mCommand = eViewNormal; |
michael@0 | 268 | } |
michael@0 | 269 | } |
michael@0 | 270 | |
michael@0 | 271 | /** |
michael@0 | 272 | * Call this method once you've created a parser, and want to instruct it |
michael@0 | 273 | * about the command which caused the parser to be constructed. For example, |
michael@0 | 274 | * this allows us to select a DTD which can do, say, view-source. |
michael@0 | 275 | * |
michael@0 | 276 | * @param aParserCommand the command to set |
michael@0 | 277 | */ |
michael@0 | 278 | NS_IMETHODIMP_(void) |
michael@0 | 279 | nsParser::SetCommand(eParserCommands aParserCommand) |
michael@0 | 280 | { |
michael@0 | 281 | mCommand = aParserCommand; |
michael@0 | 282 | } |
michael@0 | 283 | |
michael@0 | 284 | /** |
michael@0 | 285 | * Call this method once you've created a parser, and want to instruct it |
michael@0 | 286 | * about what charset to load |
michael@0 | 287 | * |
michael@0 | 288 | * @param aCharset- the charset of a document |
michael@0 | 289 | * @param aCharsetSource- the source of the charset |
michael@0 | 290 | */ |
michael@0 | 291 | NS_IMETHODIMP_(void) |
michael@0 | 292 | nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource) |
michael@0 | 293 | { |
michael@0 | 294 | mCharset = aCharset; |
michael@0 | 295 | mCharsetSource = aCharsetSource; |
michael@0 | 296 | if (mParserContext && mParserContext->mScanner) { |
michael@0 | 297 | mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); |
michael@0 | 298 | } |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | void |
michael@0 | 302 | nsParser::SetSinkCharset(nsACString& aCharset) |
michael@0 | 303 | { |
michael@0 | 304 | if (mSink) { |
michael@0 | 305 | mSink->SetDocumentCharset(aCharset); |
michael@0 | 306 | } |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | /** |
michael@0 | 310 | * This method gets called in order to set the content |
michael@0 | 311 | * sink for this parser to dump nodes to. |
michael@0 | 312 | * |
michael@0 | 313 | * @param nsIContentSink interface for node receiver |
michael@0 | 314 | */ |
michael@0 | 315 | NS_IMETHODIMP_(void) |
michael@0 | 316 | nsParser::SetContentSink(nsIContentSink* aSink) |
michael@0 | 317 | { |
michael@0 | 318 | NS_PRECONDITION(aSink, "sink cannot be null!"); |
michael@0 | 319 | mSink = aSink; |
michael@0 | 320 | |
michael@0 | 321 | if (mSink) { |
michael@0 | 322 | mSink->SetParser(this); |
michael@0 | 323 | nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink); |
michael@0 | 324 | if (htmlSink) { |
michael@0 | 325 | mIsAboutBlank = true; |
michael@0 | 326 | } |
michael@0 | 327 | } |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | /** |
michael@0 | 331 | * retrieve the sink set into the parser |
michael@0 | 332 | * @return current sink |
michael@0 | 333 | */ |
michael@0 | 334 | NS_IMETHODIMP_(nsIContentSink*) |
michael@0 | 335 | nsParser::GetContentSink() |
michael@0 | 336 | { |
michael@0 | 337 | return mSink; |
michael@0 | 338 | } |
michael@0 | 339 | |
michael@0 | 340 | /** |
michael@0 | 341 | * Determine what DTD mode (and thus what layout nsCompatibility mode) |
michael@0 | 342 | * to use for this document based on the first chunk of data received |
michael@0 | 343 | * from the network (each parsercontext can have its own mode). (No, |
michael@0 | 344 | * this is not an optimal solution -- we really don't need to know until |
michael@0 | 345 | * after we've received the DOCTYPE, and this could easily be part of |
michael@0 | 346 | * the regular parsing process if the parser were designed in a way that |
michael@0 | 347 | * made such modifications easy.) |
michael@0 | 348 | */ |
michael@0 | 349 | |
michael@0 | 350 | // Parse the PS production in the SGML spec (excluding the part dealing |
michael@0 | 351 | // with entity references) starting at theIndex into theBuffer, and |
michael@0 | 352 | // return the first index after the end of the production. |
michael@0 | 353 | static int32_t |
michael@0 | 354 | ParsePS(const nsString& aBuffer, int32_t aIndex) |
michael@0 | 355 | { |
michael@0 | 356 | for (;;) { |
michael@0 | 357 | char16_t ch = aBuffer.CharAt(aIndex); |
michael@0 | 358 | if ((ch == char16_t(' ')) || (ch == char16_t('\t')) || |
michael@0 | 359 | (ch == char16_t('\n')) || (ch == char16_t('\r'))) { |
michael@0 | 360 | ++aIndex; |
michael@0 | 361 | } else if (ch == char16_t('-')) { |
michael@0 | 362 | int32_t tmpIndex; |
michael@0 | 363 | if (aBuffer.CharAt(aIndex+1) == char16_t('-') && |
michael@0 | 364 | kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) { |
michael@0 | 365 | aIndex = tmpIndex + 2; |
michael@0 | 366 | } else { |
michael@0 | 367 | return aIndex; |
michael@0 | 368 | } |
michael@0 | 369 | } else { |
michael@0 | 370 | return aIndex; |
michael@0 | 371 | } |
michael@0 | 372 | } |
michael@0 | 373 | } |
michael@0 | 374 | |
michael@0 | 375 | #define PARSE_DTD_HAVE_DOCTYPE (1<<0) |
michael@0 | 376 | #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) |
michael@0 | 377 | #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) |
michael@0 | 378 | #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) |
michael@0 | 379 | |
michael@0 | 380 | // return true on success (includes not present), false on failure |
michael@0 | 381 | static bool |
michael@0 | 382 | ParseDocTypeDecl(const nsString &aBuffer, |
michael@0 | 383 | int32_t *aResultFlags, |
michael@0 | 384 | nsString &aPublicID, |
michael@0 | 385 | nsString &aSystemID) |
michael@0 | 386 | { |
michael@0 | 387 | bool haveDoctype = false; |
michael@0 | 388 | *aResultFlags = 0; |
michael@0 | 389 | |
michael@0 | 390 | // Skip through any comments and processing instructions |
michael@0 | 391 | // The PI-skipping is a bit of a hack. |
michael@0 | 392 | int32_t theIndex = 0; |
michael@0 | 393 | do { |
michael@0 | 394 | theIndex = aBuffer.FindChar('<', theIndex); |
michael@0 | 395 | if (theIndex == kNotFound) break; |
michael@0 | 396 | char16_t nextChar = aBuffer.CharAt(theIndex+1); |
michael@0 | 397 | if (nextChar == char16_t('!')) { |
michael@0 | 398 | int32_t tmpIndex = theIndex + 2; |
michael@0 | 399 | if (kNotFound != |
michael@0 | 400 | (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) { |
michael@0 | 401 | haveDoctype = true; |
michael@0 | 402 | theIndex += 7; // skip "DOCTYPE" |
michael@0 | 403 | break; |
michael@0 | 404 | } |
michael@0 | 405 | theIndex = ParsePS(aBuffer, tmpIndex); |
michael@0 | 406 | theIndex = aBuffer.FindChar('>', theIndex); |
michael@0 | 407 | } else if (nextChar == char16_t('?')) { |
michael@0 | 408 | theIndex = aBuffer.FindChar('>', theIndex); |
michael@0 | 409 | } else { |
michael@0 | 410 | break; |
michael@0 | 411 | } |
michael@0 | 412 | } while (theIndex != kNotFound); |
michael@0 | 413 | |
michael@0 | 414 | if (!haveDoctype) |
michael@0 | 415 | return true; |
michael@0 | 416 | *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; |
michael@0 | 417 | |
michael@0 | 418 | theIndex = ParsePS(aBuffer, theIndex); |
michael@0 | 419 | theIndex = aBuffer.Find("HTML", true, theIndex, 0); |
michael@0 | 420 | if (kNotFound == theIndex) |
michael@0 | 421 | return false; |
michael@0 | 422 | theIndex = ParsePS(aBuffer, theIndex+4); |
michael@0 | 423 | int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0); |
michael@0 | 424 | |
michael@0 | 425 | if (kNotFound != tmpIndex) { |
michael@0 | 426 | theIndex = ParsePS(aBuffer, tmpIndex+6); |
michael@0 | 427 | |
michael@0 | 428 | // We get here only if we've read <!DOCTYPE HTML PUBLIC |
michael@0 | 429 | // (not case sensitive) possibly with comments within. |
michael@0 | 430 | |
michael@0 | 431 | // Now find the beginning and end of the public identifier |
michael@0 | 432 | // and the system identifier (if present). |
michael@0 | 433 | |
michael@0 | 434 | char16_t lit = aBuffer.CharAt(theIndex); |
michael@0 | 435 | if ((lit != char16_t('\"')) && (lit != char16_t('\''))) |
michael@0 | 436 | return false; |
michael@0 | 437 | |
michael@0 | 438 | // Start is the first character, excluding the quote, and End is |
michael@0 | 439 | // the final quote, so there are (end-start) characters. |
michael@0 | 440 | |
michael@0 | 441 | int32_t PublicIDStart = theIndex + 1; |
michael@0 | 442 | int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart); |
michael@0 | 443 | if (kNotFound == PublicIDEnd) |
michael@0 | 444 | return false; |
michael@0 | 445 | theIndex = ParsePS(aBuffer, PublicIDEnd + 1); |
michael@0 | 446 | char16_t next = aBuffer.CharAt(theIndex); |
michael@0 | 447 | if (next == char16_t('>')) { |
michael@0 | 448 | // There was a public identifier, but no system |
michael@0 | 449 | // identifier, |
michael@0 | 450 | // so do nothing. |
michael@0 | 451 | // This is needed to avoid the else at the end, and it's |
michael@0 | 452 | // also the most common case. |
michael@0 | 453 | } else if ((next == char16_t('\"')) || |
michael@0 | 454 | (next == char16_t('\''))) { |
michael@0 | 455 | // We found a system identifier. |
michael@0 | 456 | *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; |
michael@0 | 457 | int32_t SystemIDStart = theIndex + 1; |
michael@0 | 458 | int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); |
michael@0 | 459 | if (kNotFound == SystemIDEnd) |
michael@0 | 460 | return false; |
michael@0 | 461 | aSystemID = |
michael@0 | 462 | Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); |
michael@0 | 463 | } else if (next == char16_t('[')) { |
michael@0 | 464 | // We found an internal subset. |
michael@0 | 465 | *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; |
michael@0 | 466 | } else { |
michael@0 | 467 | // Something's wrong. |
michael@0 | 468 | return false; |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | // Since a public ID is a minimum literal, we must trim |
michael@0 | 472 | // and collapse whitespace |
michael@0 | 473 | aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); |
michael@0 | 474 | aPublicID.CompressWhitespace(true, true); |
michael@0 | 475 | *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; |
michael@0 | 476 | } else { |
michael@0 | 477 | tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0); |
michael@0 | 478 | if (kNotFound != tmpIndex) { |
michael@0 | 479 | // DOCTYPES with system ID but no Public ID |
michael@0 | 480 | *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; |
michael@0 | 481 | |
michael@0 | 482 | theIndex = ParsePS(aBuffer, tmpIndex+6); |
michael@0 | 483 | char16_t next = aBuffer.CharAt(theIndex); |
michael@0 | 484 | if (next != char16_t('\"') && next != char16_t('\'')) |
michael@0 | 485 | return false; |
michael@0 | 486 | |
michael@0 | 487 | int32_t SystemIDStart = theIndex + 1; |
michael@0 | 488 | int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); |
michael@0 | 489 | |
michael@0 | 490 | if (kNotFound == SystemIDEnd) |
michael@0 | 491 | return false; |
michael@0 | 492 | aSystemID = |
michael@0 | 493 | Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); |
michael@0 | 494 | theIndex = ParsePS(aBuffer, SystemIDEnd + 1); |
michael@0 | 495 | } |
michael@0 | 496 | |
michael@0 | 497 | char16_t nextChar = aBuffer.CharAt(theIndex); |
michael@0 | 498 | if (nextChar == char16_t('[')) |
michael@0 | 499 | *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; |
michael@0 | 500 | else if (nextChar != char16_t('>')) |
michael@0 | 501 | return false; |
michael@0 | 502 | } |
michael@0 | 503 | return true; |
michael@0 | 504 | } |
michael@0 | 505 | |
michael@0 | 506 | struct PubIDInfo |
michael@0 | 507 | { |
michael@0 | 508 | enum eMode { |
michael@0 | 509 | eQuirks, /* always quirks mode, unless there's an internal subset */ |
michael@0 | 510 | eAlmostStandards,/* eCompatibility_AlmostStandards */ |
michael@0 | 511 | eFullStandards /* eCompatibility_FullStandards */ |
michael@0 | 512 | /* |
michael@0 | 513 | * public IDs that should trigger strict mode are not listed |
michael@0 | 514 | * since we want all future public IDs to trigger strict mode as |
michael@0 | 515 | * well |
michael@0 | 516 | */ |
michael@0 | 517 | }; |
michael@0 | 518 | |
michael@0 | 519 | const char* name; |
michael@0 | 520 | eMode mode_if_no_sysid; |
michael@0 | 521 | eMode mode_if_sysid; |
michael@0 | 522 | }; |
michael@0 | 523 | |
michael@0 | 524 | #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) |
michael@0 | 525 | |
michael@0 | 526 | // These must be in nsCRT::strcmp order so binary-search can be used. |
michael@0 | 527 | // This is verified, |#ifdef DEBUG|, below. |
michael@0 | 528 | |
michael@0 | 529 | // Even though public identifiers should be case sensitive, we will do |
michael@0 | 530 | // all comparisons after converting to lower case in order to do |
michael@0 | 531 | // case-insensitive comparison since there are a number of existing web |
michael@0 | 532 | // sites that use the incorrect case. Therefore all of the public |
michael@0 | 533 | // identifiers below are in lower case (with the correct case following, |
michael@0 | 534 | // in comments). The case is verified, |#ifdef DEBUG|, below. |
michael@0 | 535 | static const PubIDInfo kPublicIDs[] = { |
michael@0 | 536 | {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 537 | {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 538 | {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 539 | {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 540 | {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 541 | {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 542 | {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 543 | {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 544 | {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 545 | {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 546 | {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 547 | {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 548 | {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 549 | {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 550 | {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 551 | {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 552 | {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 553 | {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 554 | {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 555 | {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 556 | {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 557 | {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 558 | {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 559 | {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 560 | {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 561 | {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 562 | {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 563 | {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 564 | {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 565 | {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 566 | {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 567 | {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 568 | {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 569 | {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 570 | {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 571 | {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 572 | {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 573 | {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 574 | {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 575 | {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 576 | {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 577 | {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 578 | {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 579 | {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 580 | {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 581 | {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 582 | {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 583 | {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 584 | {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 585 | {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 586 | {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 587 | {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 588 | {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 589 | {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 590 | {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 591 | {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 592 | {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 593 | {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 594 | {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 595 | {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 596 | {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 597 | {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 598 | {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, |
michael@0 | 599 | {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, |
michael@0 | 600 | {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 601 | {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 602 | {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 603 | {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, |
michael@0 | 604 | {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, |
michael@0 | 605 | {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 606 | {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 607 | {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 608 | {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 609 | {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 610 | {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 611 | {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
michael@0 | 612 | }; |
michael@0 | 613 | |
michael@0 | 614 | #ifdef DEBUG |
michael@0 | 615 | static void |
michael@0 | 616 | VerifyPublicIDs() |
michael@0 | 617 | { |
michael@0 | 618 | static bool gVerified = false; |
michael@0 | 619 | if (!gVerified) { |
michael@0 | 620 | gVerified = true; |
michael@0 | 621 | uint32_t i; |
michael@0 | 622 | for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { |
michael@0 | 623 | if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { |
michael@0 | 624 | NS_NOTREACHED("doctypes out of order"); |
michael@0 | 625 | printf("Doctypes %s and %s out of order.\n", |
michael@0 | 626 | kPublicIDs[i].name, kPublicIDs[i+1].name); |
michael@0 | 627 | } |
michael@0 | 628 | } |
michael@0 | 629 | for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { |
michael@0 | 630 | nsAutoCString lcPubID(kPublicIDs[i].name); |
michael@0 | 631 | ToLowerCase(lcPubID); |
michael@0 | 632 | if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { |
michael@0 | 633 | NS_NOTREACHED("doctype not lower case"); |
michael@0 | 634 | printf("Doctype %s not lower case.\n", kPublicIDs[i].name); |
michael@0 | 635 | } |
michael@0 | 636 | } |
michael@0 | 637 | } |
michael@0 | 638 | } |
michael@0 | 639 | #endif |
michael@0 | 640 | |
michael@0 | 641 | static void |
michael@0 | 642 | DetermineHTMLParseMode(const nsString& aBuffer, |
michael@0 | 643 | nsDTDMode& aParseMode, |
michael@0 | 644 | eParserDocType& aDocType) |
michael@0 | 645 | { |
michael@0 | 646 | #ifdef DEBUG |
michael@0 | 647 | VerifyPublicIDs(); |
michael@0 | 648 | #endif |
michael@0 | 649 | int32_t resultFlags; |
michael@0 | 650 | nsAutoString publicIDUCS2, sysIDUCS2; |
michael@0 | 651 | if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { |
michael@0 | 652 | if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { |
michael@0 | 653 | // no DOCTYPE |
michael@0 | 654 | aParseMode = eDTDMode_quirks; |
michael@0 | 655 | aDocType = eHTML_Quirks; |
michael@0 | 656 | } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || |
michael@0 | 657 | !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { |
michael@0 | 658 | // A doctype with an internal subset is always full_standards. |
michael@0 | 659 | // A doctype without a public ID is always full_standards. |
michael@0 | 660 | aDocType = eHTML_Strict; |
michael@0 | 661 | aParseMode = eDTDMode_full_standards; |
michael@0 | 662 | |
michael@0 | 663 | // Special hack for IBM's custom DOCTYPE. |
michael@0 | 664 | if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && |
michael@0 | 665 | sysIDUCS2 == NS_LITERAL_STRING( |
michael@0 | 666 | "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { |
michael@0 | 667 | aParseMode = eDTDMode_quirks; |
michael@0 | 668 | aDocType = eHTML_Quirks; |
michael@0 | 669 | } |
michael@0 | 670 | |
michael@0 | 671 | } else { |
michael@0 | 672 | // We have to check our list of public IDs to see what to do. |
michael@0 | 673 | // Yes, we want UCS2 to ASCII lossy conversion. |
michael@0 | 674 | nsAutoCString publicID; |
michael@0 | 675 | publicID.AssignWithConversion(publicIDUCS2); |
michael@0 | 676 | |
michael@0 | 677 | // See comment above definition of kPublicIDs about case |
michael@0 | 678 | // sensitivity. |
michael@0 | 679 | ToLowerCase(publicID); |
michael@0 | 680 | |
michael@0 | 681 | // Binary search to see if we can find the correct public ID |
michael@0 | 682 | // These must be signed since maximum can go below zero and we'll |
michael@0 | 683 | // crash if it's unsigned. |
michael@0 | 684 | int32_t minimum = 0; |
michael@0 | 685 | int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1; |
michael@0 | 686 | int32_t index; |
michael@0 | 687 | for (;;) { |
michael@0 | 688 | index = (minimum + maximum) / 2; |
michael@0 | 689 | int32_t comparison = |
michael@0 | 690 | nsCRT::strcmp(publicID.get(), kPublicIDs[index].name); |
michael@0 | 691 | if (comparison == 0) |
michael@0 | 692 | break; |
michael@0 | 693 | if (comparison < 0) |
michael@0 | 694 | maximum = index - 1; |
michael@0 | 695 | else |
michael@0 | 696 | minimum = index + 1; |
michael@0 | 697 | |
michael@0 | 698 | if (maximum < minimum) { |
michael@0 | 699 | // The DOCTYPE is not in our list, so it must be full_standards. |
michael@0 | 700 | aParseMode = eDTDMode_full_standards; |
michael@0 | 701 | aDocType = eHTML_Strict; |
michael@0 | 702 | return; |
michael@0 | 703 | } |
michael@0 | 704 | } |
michael@0 | 705 | |
michael@0 | 706 | switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) |
michael@0 | 707 | ? kPublicIDs[index].mode_if_sysid |
michael@0 | 708 | : kPublicIDs[index].mode_if_no_sysid) |
michael@0 | 709 | { |
michael@0 | 710 | case PubIDInfo::eQuirks: |
michael@0 | 711 | aParseMode = eDTDMode_quirks; |
michael@0 | 712 | aDocType = eHTML_Quirks; |
michael@0 | 713 | break; |
michael@0 | 714 | case PubIDInfo::eAlmostStandards: |
michael@0 | 715 | aParseMode = eDTDMode_almost_standards; |
michael@0 | 716 | aDocType = eHTML_Strict; |
michael@0 | 717 | break; |
michael@0 | 718 | case PubIDInfo::eFullStandards: |
michael@0 | 719 | aParseMode = eDTDMode_full_standards; |
michael@0 | 720 | aDocType = eHTML_Strict; |
michael@0 | 721 | break; |
michael@0 | 722 | default: |
michael@0 | 723 | NS_NOTREACHED("no other cases!"); |
michael@0 | 724 | } |
michael@0 | 725 | } |
michael@0 | 726 | } else { |
michael@0 | 727 | // badly formed DOCTYPE -> quirks |
michael@0 | 728 | aParseMode = eDTDMode_quirks; |
michael@0 | 729 | aDocType = eHTML_Quirks; |
michael@0 | 730 | } |
michael@0 | 731 | } |
michael@0 | 732 | |
michael@0 | 733 | static void |
michael@0 | 734 | DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode, |
michael@0 | 735 | eParserDocType& aDocType, const nsACString& aMimeType) |
michael@0 | 736 | { |
michael@0 | 737 | if (aMimeType.EqualsLiteral(TEXT_HTML)) { |
michael@0 | 738 | DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); |
michael@0 | 739 | } else if (nsContentUtils::IsPlainTextType(aMimeType)) { |
michael@0 | 740 | aDocType = ePlainText; |
michael@0 | 741 | aParseMode = eDTDMode_quirks; |
michael@0 | 742 | } else { // Some form of XML |
michael@0 | 743 | aDocType = eXML; |
michael@0 | 744 | aParseMode = eDTDMode_full_standards; |
michael@0 | 745 | } |
michael@0 | 746 | } |
michael@0 | 747 | |
michael@0 | 748 | static nsIDTD* |
michael@0 | 749 | FindSuitableDTD(CParserContext& aParserContext) |
michael@0 | 750 | { |
michael@0 | 751 | // We always find a DTD. |
michael@0 | 752 | aParserContext.mAutoDetectStatus = ePrimaryDetect; |
michael@0 | 753 | |
michael@0 | 754 | // Quick check for view source. |
michael@0 | 755 | NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource, |
michael@0 | 756 | "The old parser is not supposed to be used for View Source anymore."); |
michael@0 | 757 | |
michael@0 | 758 | // Now see if we're parsing HTML (which, as far as we're concerned, simply |
michael@0 | 759 | // means "not XML"). |
michael@0 | 760 | if (aParserContext.mDocType != eXML) { |
michael@0 | 761 | return new CNavDTD(); |
michael@0 | 762 | } |
michael@0 | 763 | |
michael@0 | 764 | // If we're here, then we'd better be parsing XML. |
michael@0 | 765 | NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?"); |
michael@0 | 766 | return new nsExpatDriver(); |
michael@0 | 767 | } |
michael@0 | 768 | |
michael@0 | 769 | NS_IMETHODIMP |
michael@0 | 770 | nsParser::CancelParsingEvents() |
michael@0 | 771 | { |
michael@0 | 772 | if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { |
michael@0 | 773 | NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); |
michael@0 | 774 | // Revoke the pending continue parsing event |
michael@0 | 775 | mContinueEvent = nullptr; |
michael@0 | 776 | mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
michael@0 | 777 | } |
michael@0 | 778 | return NS_OK; |
michael@0 | 779 | } |
michael@0 | 780 | |
michael@0 | 781 | //////////////////////////////////////////////////////////////////////// |
michael@0 | 782 | |
michael@0 | 783 | /** |
michael@0 | 784 | * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value |
michael@0 | 785 | * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1 |
michael@0 | 786 | * (which could be success or failure). |
michael@0 | 787 | * |
michael@0 | 788 | * To understand the motivation for this construct, consider these example |
michael@0 | 789 | * methods: |
michael@0 | 790 | * |
michael@0 | 791 | * nsresult nsSomething::DoThatThing(nsIWhatever* obj) { |
michael@0 | 792 | * nsresult rv = NS_OK; |
michael@0 | 793 | * ... |
michael@0 | 794 | * return obj->DoThatThing(); |
michael@0 | 795 | * NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 796 | * ... |
michael@0 | 797 | * return rv; |
michael@0 | 798 | * } |
michael@0 | 799 | * |
michael@0 | 800 | * void nsCaller::MakeThingsHappen() { |
michael@0 | 801 | * return mSomething->DoThatThing(mWhatever); |
michael@0 | 802 | * } |
michael@0 | 803 | * |
michael@0 | 804 | * Suppose, for whatever reason*, we want to shift responsibility for calling |
michael@0 | 805 | * mWhatever->DoThatThing() from nsSomething::DoThatThing up to |
michael@0 | 806 | * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows: |
michael@0 | 807 | * |
michael@0 | 808 | * nsresult nsSomething::DoThatThing() { |
michael@0 | 809 | * nsresult rv = NS_OK; |
michael@0 | 810 | * ... |
michael@0 | 811 | * ... |
michael@0 | 812 | * return rv; |
michael@0 | 813 | * } |
michael@0 | 814 | * |
michael@0 | 815 | * void nsCaller::MakeThingsHappen() { |
michael@0 | 816 | * nsresult rv; |
michael@0 | 817 | * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(), |
michael@0 | 818 | * mWhatever->DoThatThing(), |
michael@0 | 819 | * rv); |
michael@0 | 820 | * return rv; |
michael@0 | 821 | * } |
michael@0 | 822 | * |
michael@0 | 823 | * *Possible reasons include: nsCaller doesn't want to give mSomething access |
michael@0 | 824 | * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will |
michael@0 | 825 | * be called regardless of how nsSomething::DoThatThing behaves, &c. |
michael@0 | 826 | */ |
michael@0 | 827 | #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \ |
michael@0 | 828 | nsresult RV##__temp = EXPR1; \ |
michael@0 | 829 | RV = EXPR2; \ |
michael@0 | 830 | if (NS_FAILED(RV)) { \ |
michael@0 | 831 | RV = RV##__temp; \ |
michael@0 | 832 | } \ |
michael@0 | 833 | } |
michael@0 | 834 | |
michael@0 | 835 | /** |
michael@0 | 836 | * This gets called just prior to the model actually |
michael@0 | 837 | * being constructed. It's important to make this the |
michael@0 | 838 | * last thing that happens right before parsing, so we |
michael@0 | 839 | * can delay until the last moment the resolution of |
michael@0 | 840 | * which DTD to use (unless of course we're assigned one). |
michael@0 | 841 | */ |
michael@0 | 842 | nsresult |
michael@0 | 843 | nsParser::WillBuildModel(nsString& aFilename) |
michael@0 | 844 | { |
michael@0 | 845 | if (!mParserContext) |
michael@0 | 846 | return kInvalidParserContext; |
michael@0 | 847 | |
michael@0 | 848 | if (eUnknownDetect != mParserContext->mAutoDetectStatus) |
michael@0 | 849 | return NS_OK; |
michael@0 | 850 | |
michael@0 | 851 | if (eDTDMode_unknown == mParserContext->mDTDMode || |
michael@0 | 852 | eDTDMode_autodetect == mParserContext->mDTDMode) { |
michael@0 | 853 | char16_t buf[1025]; |
michael@0 | 854 | nsFixedString theBuffer(buf, 1024, 0); |
michael@0 | 855 | |
michael@0 | 856 | // Grab 1024 characters, starting at the first non-whitespace |
michael@0 | 857 | // character, to look for the doctype in. |
michael@0 | 858 | mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition()); |
michael@0 | 859 | DetermineParseMode(theBuffer, mParserContext->mDTDMode, |
michael@0 | 860 | mParserContext->mDocType, mParserContext->mMimeType); |
michael@0 | 861 | } |
michael@0 | 862 | |
michael@0 | 863 | NS_ASSERTION(!mDTD || !mParserContext->mPrevContext, |
michael@0 | 864 | "Clobbering DTD for non-root parser context!"); |
michael@0 | 865 | mDTD = FindSuitableDTD(*mParserContext); |
michael@0 | 866 | NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 867 | |
michael@0 | 868 | nsITokenizer* tokenizer; |
michael@0 | 869 | nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer); |
michael@0 | 870 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 871 | |
michael@0 | 872 | rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); |
michael@0 | 873 | nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode()); |
michael@0 | 874 | // nsIDTD::WillBuildModel used to be responsible for calling |
michael@0 | 875 | // nsIContentSink::WillBuildModel, but that obligation isn't expressible |
michael@0 | 876 | // in the nsIDTD interface itself, so it's sounder and simpler to give that |
michael@0 | 877 | // responsibility back to the parser. The former behavior of the DTD was to |
michael@0 | 878 | // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns |
michael@0 | 879 | // failure we should use sinkResult instead of rv, to preserve the old error |
michael@0 | 880 | // handling behavior of the DTD: |
michael@0 | 881 | return NS_FAILED(sinkResult) ? sinkResult : rv; |
michael@0 | 882 | } |
michael@0 | 883 | |
michael@0 | 884 | /** |
michael@0 | 885 | * This gets called when the parser is done with its input. |
michael@0 | 886 | * Note that the parser may have been called recursively, so we |
michael@0 | 887 | * have to check for a prev. context before closing out the DTD/sink. |
michael@0 | 888 | */ |
michael@0 | 889 | nsresult |
michael@0 | 890 | nsParser::DidBuildModel(nsresult anErrorCode) |
michael@0 | 891 | { |
michael@0 | 892 | nsresult result = anErrorCode; |
michael@0 | 893 | |
michael@0 | 894 | if (IsComplete()) { |
michael@0 | 895 | if (mParserContext && !mParserContext->mPrevContext) { |
michael@0 | 896 | // Let sink know if we're about to end load because we've been terminated. |
michael@0 | 897 | // In that case we don't want it to run deferred scripts. |
michael@0 | 898 | bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; |
michael@0 | 899 | if (mDTD && mSink) { |
michael@0 | 900 | nsresult dtdResult = mDTD->DidBuildModel(anErrorCode), |
michael@0 | 901 | sinkResult = mSink->DidBuildModel(terminated); |
michael@0 | 902 | // nsIDTD::DidBuildModel used to be responsible for calling |
michael@0 | 903 | // nsIContentSink::DidBuildModel, but that obligation isn't expressible |
michael@0 | 904 | // in the nsIDTD interface itself, so it's sounder and simpler to give |
michael@0 | 905 | // that responsibility back to the parser. The former behavior of the |
michael@0 | 906 | // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the |
michael@0 | 907 | // sink returns failure we should use sinkResult instead of dtdResult, |
michael@0 | 908 | // to preserve the old error handling behavior of the DTD: |
michael@0 | 909 | result = NS_FAILED(sinkResult) ? sinkResult : dtdResult; |
michael@0 | 910 | } |
michael@0 | 911 | |
michael@0 | 912 | //Ref. to bug 61462. |
michael@0 | 913 | mParserContext->mRequest = 0; |
michael@0 | 914 | } |
michael@0 | 915 | } |
michael@0 | 916 | |
michael@0 | 917 | return result; |
michael@0 | 918 | } |
michael@0 | 919 | |
michael@0 | 920 | /** |
michael@0 | 921 | * This method adds a new parser context to the list, |
michael@0 | 922 | * pushing the current one to the next position. |
michael@0 | 923 | * |
michael@0 | 924 | * @param ptr to new context |
michael@0 | 925 | */ |
michael@0 | 926 | void |
michael@0 | 927 | nsParser::PushContext(CParserContext& aContext) |
michael@0 | 928 | { |
michael@0 | 929 | NS_ASSERTION(aContext.mPrevContext == mParserContext, |
michael@0 | 930 | "Trying to push a context whose previous context differs from " |
michael@0 | 931 | "the current parser context."); |
michael@0 | 932 | mParserContext = &aContext; |
michael@0 | 933 | } |
michael@0 | 934 | |
michael@0 | 935 | /** |
michael@0 | 936 | * This method pops the topmost context off the stack, |
michael@0 | 937 | * returning it to the user. The next context (if any) |
michael@0 | 938 | * becomes the current context. |
michael@0 | 939 | * @update gess7/22/98 |
michael@0 | 940 | * @return prev. context |
michael@0 | 941 | */ |
michael@0 | 942 | CParserContext* |
michael@0 | 943 | nsParser::PopContext() |
michael@0 | 944 | { |
michael@0 | 945 | CParserContext* oldContext = mParserContext; |
michael@0 | 946 | if (oldContext) { |
michael@0 | 947 | mParserContext = oldContext->mPrevContext; |
michael@0 | 948 | if (mParserContext) { |
michael@0 | 949 | // If the old context was blocked, propagate the blocked state |
michael@0 | 950 | // back to the new one. Also, propagate the stream listener state |
michael@0 | 951 | // but don't override onStop state to guarantee the call to DidBuildModel(). |
michael@0 | 952 | if (mParserContext->mStreamListenerState != eOnStop) { |
michael@0 | 953 | mParserContext->mStreamListenerState = oldContext->mStreamListenerState; |
michael@0 | 954 | } |
michael@0 | 955 | } |
michael@0 | 956 | } |
michael@0 | 957 | return oldContext; |
michael@0 | 958 | } |
michael@0 | 959 | |
michael@0 | 960 | /** |
michael@0 | 961 | * Call this when you want control whether or not the parser will parse |
michael@0 | 962 | * and tokenize input (TRUE), or whether it just caches input to be |
michael@0 | 963 | * parsed later (FALSE). |
michael@0 | 964 | * |
michael@0 | 965 | * @param aState determines whether we parse/tokenize or just cache. |
michael@0 | 966 | * @return current state |
michael@0 | 967 | */ |
michael@0 | 968 | void |
michael@0 | 969 | nsParser::SetUnusedInput(nsString& aBuffer) |
michael@0 | 970 | { |
michael@0 | 971 | mUnusedInput = aBuffer; |
michael@0 | 972 | } |
michael@0 | 973 | |
michael@0 | 974 | /** |
michael@0 | 975 | * Call this when you want to *force* the parser to terminate the |
michael@0 | 976 | * parsing process altogether. This is binary -- so once you terminate |
michael@0 | 977 | * you can't resume without restarting altogether. |
michael@0 | 978 | */ |
michael@0 | 979 | NS_IMETHODIMP |
michael@0 | 980 | nsParser::Terminate(void) |
michael@0 | 981 | { |
michael@0 | 982 | // We should only call DidBuildModel once, so don't do anything if this is |
michael@0 | 983 | // the second time that Terminate has been called. |
michael@0 | 984 | if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { |
michael@0 | 985 | return NS_OK; |
michael@0 | 986 | } |
michael@0 | 987 | |
michael@0 | 988 | nsresult result = NS_OK; |
michael@0 | 989 | // XXX - [ until we figure out a way to break parser-sink circularity ] |
michael@0 | 990 | // Hack - Hold a reference until we are completely done... |
michael@0 | 991 | nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
michael@0 | 992 | mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; |
michael@0 | 993 | |
michael@0 | 994 | // CancelParsingEvents must be called to avoid leaking the nsParser object |
michael@0 | 995 | // @see bug 108049 |
michael@0 | 996 | // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents |
michael@0 | 997 | // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note: |
michael@0 | 998 | // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag. |
michael@0 | 999 | CancelParsingEvents(); |
michael@0 | 1000 | |
michael@0 | 1001 | // If we got interrupted in the middle of a document.write, then we might |
michael@0 | 1002 | // have more than one parser context on our parsercontext stack. This has |
michael@0 | 1003 | // the effect of making DidBuildModel a no-op, meaning that we never call |
michael@0 | 1004 | // our sink's DidBuildModel and break the reference cycle, causing a leak. |
michael@0 | 1005 | // Since we're getting terminated, we manually clean up our context stack. |
michael@0 | 1006 | while (mParserContext && mParserContext->mPrevContext) { |
michael@0 | 1007 | CParserContext *prev = mParserContext->mPrevContext; |
michael@0 | 1008 | delete mParserContext; |
michael@0 | 1009 | mParserContext = prev; |
michael@0 | 1010 | } |
michael@0 | 1011 | |
michael@0 | 1012 | if (mDTD) { |
michael@0 | 1013 | mDTD->Terminate(); |
michael@0 | 1014 | DidBuildModel(result); |
michael@0 | 1015 | } else if (mSink) { |
michael@0 | 1016 | // We have no parser context or no DTD yet (so we got terminated before we |
michael@0 | 1017 | // got any data). Manually break the reference cycle with the sink. |
michael@0 | 1018 | result = mSink->DidBuildModel(true); |
michael@0 | 1019 | NS_ENSURE_SUCCESS(result, result); |
michael@0 | 1020 | } |
michael@0 | 1021 | |
michael@0 | 1022 | return NS_OK; |
michael@0 | 1023 | } |
michael@0 | 1024 | |
michael@0 | 1025 | NS_IMETHODIMP |
michael@0 | 1026 | nsParser::ContinueInterruptedParsing() |
michael@0 | 1027 | { |
michael@0 | 1028 | // If there are scripts executing, then the content sink is jumping the gun |
michael@0 | 1029 | // (probably due to a synchronous XMLHttpRequest) and will re-enable us |
michael@0 | 1030 | // later, see bug 460706. |
michael@0 | 1031 | if (!IsOkToProcessNetworkData()) { |
michael@0 | 1032 | return NS_OK; |
michael@0 | 1033 | } |
michael@0 | 1034 | |
michael@0 | 1035 | // If the stream has already finished, there's a good chance |
michael@0 | 1036 | // that we might start closing things down when the parser |
michael@0 | 1037 | // is reenabled. To make sure that we're not deleted across |
michael@0 | 1038 | // the reenabling process, hold a reference to ourselves. |
michael@0 | 1039 | nsresult result=NS_OK; |
michael@0 | 1040 | nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
michael@0 | 1041 | nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); |
michael@0 | 1042 | |
michael@0 | 1043 | #ifdef DEBUG |
michael@0 | 1044 | if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { |
michael@0 | 1045 | NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser."); |
michael@0 | 1046 | } |
michael@0 | 1047 | #endif |
michael@0 | 1048 | |
michael@0 | 1049 | bool isFinalChunk = mParserContext && |
michael@0 | 1050 | mParserContext->mStreamListenerState == eOnStop; |
michael@0 | 1051 | |
michael@0 | 1052 | mProcessingNetworkData = true; |
michael@0 | 1053 | if (mSink) { |
michael@0 | 1054 | mSink->WillParse(); |
michael@0 | 1055 | } |
michael@0 | 1056 | result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 |
michael@0 | 1057 | mProcessingNetworkData = false; |
michael@0 | 1058 | |
michael@0 | 1059 | if (result != NS_OK) { |
michael@0 | 1060 | result=mInternalState; |
michael@0 | 1061 | } |
michael@0 | 1062 | |
michael@0 | 1063 | return result; |
michael@0 | 1064 | } |
michael@0 | 1065 | |
michael@0 | 1066 | /** |
michael@0 | 1067 | * Stops parsing temporarily. That's it will prevent the |
michael@0 | 1068 | * parser from building up content model. |
michael@0 | 1069 | */ |
michael@0 | 1070 | NS_IMETHODIMP_(void) |
michael@0 | 1071 | nsParser::BlockParser() |
michael@0 | 1072 | { |
michael@0 | 1073 | mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED; |
michael@0 | 1074 | } |
michael@0 | 1075 | |
michael@0 | 1076 | /** |
michael@0 | 1077 | * Open up the parser for tokenization, building up content |
michael@0 | 1078 | * model..etc. However, this method does not resume parsing |
michael@0 | 1079 | * automatically. It's the callers' responsibility to restart |
michael@0 | 1080 | * the parsing engine. |
michael@0 | 1081 | */ |
michael@0 | 1082 | NS_IMETHODIMP_(void) |
michael@0 | 1083 | nsParser::UnblockParser() |
michael@0 | 1084 | { |
michael@0 | 1085 | if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { |
michael@0 | 1086 | mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; |
michael@0 | 1087 | } else { |
michael@0 | 1088 | NS_WARNING("Trying to unblock an unblocked parser."); |
michael@0 | 1089 | } |
michael@0 | 1090 | } |
michael@0 | 1091 | |
michael@0 | 1092 | NS_IMETHODIMP_(void) |
michael@0 | 1093 | nsParser::ContinueInterruptedParsingAsync() |
michael@0 | 1094 | { |
michael@0 | 1095 | mSink->ContinueInterruptedParsingAsync(); |
michael@0 | 1096 | } |
michael@0 | 1097 | |
michael@0 | 1098 | /** |
michael@0 | 1099 | * Call this to query whether the parser is enabled or not. |
michael@0 | 1100 | */ |
michael@0 | 1101 | NS_IMETHODIMP_(bool) |
michael@0 | 1102 | nsParser::IsParserEnabled() |
michael@0 | 1103 | { |
michael@0 | 1104 | return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0; |
michael@0 | 1105 | } |
michael@0 | 1106 | |
michael@0 | 1107 | /** |
michael@0 | 1108 | * Call this to query whether the parser thinks it's done with parsing. |
michael@0 | 1109 | */ |
michael@0 | 1110 | NS_IMETHODIMP_(bool) |
michael@0 | 1111 | nsParser::IsComplete() |
michael@0 | 1112 | { |
michael@0 | 1113 | return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); |
michael@0 | 1114 | } |
michael@0 | 1115 | |
michael@0 | 1116 | |
michael@0 | 1117 | void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev) |
michael@0 | 1118 | { |
michael@0 | 1119 | // Ignore any revoked continue events... |
michael@0 | 1120 | if (mContinueEvent != ev) |
michael@0 | 1121 | return; |
michael@0 | 1122 | |
michael@0 | 1123 | mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
michael@0 | 1124 | mContinueEvent = nullptr; |
michael@0 | 1125 | |
michael@0 | 1126 | NS_ASSERTION(IsOkToProcessNetworkData(), |
michael@0 | 1127 | "Interrupted in the middle of a script?"); |
michael@0 | 1128 | ContinueInterruptedParsing(); |
michael@0 | 1129 | } |
michael@0 | 1130 | |
michael@0 | 1131 | bool |
michael@0 | 1132 | nsParser::IsInsertionPointDefined() |
michael@0 | 1133 | { |
michael@0 | 1134 | return false; |
michael@0 | 1135 | } |
michael@0 | 1136 | |
michael@0 | 1137 | void |
michael@0 | 1138 | nsParser::BeginEvaluatingParserInsertedScript() |
michael@0 | 1139 | { |
michael@0 | 1140 | } |
michael@0 | 1141 | |
michael@0 | 1142 | void |
michael@0 | 1143 | nsParser::EndEvaluatingParserInsertedScript() |
michael@0 | 1144 | { |
michael@0 | 1145 | } |
michael@0 | 1146 | |
michael@0 | 1147 | void |
michael@0 | 1148 | nsParser::MarkAsNotScriptCreated(const char* aCommand) |
michael@0 | 1149 | { |
michael@0 | 1150 | } |
michael@0 | 1151 | |
michael@0 | 1152 | bool |
michael@0 | 1153 | nsParser::IsScriptCreated() |
michael@0 | 1154 | { |
michael@0 | 1155 | return false; |
michael@0 | 1156 | } |
michael@0 | 1157 | |
michael@0 | 1158 | /** |
michael@0 | 1159 | * This is the main controlling routine in the parsing process. |
michael@0 | 1160 | * Note that it may get called multiple times for the same scanner, |
michael@0 | 1161 | * since this is a pushed based system, and all the tokens may |
michael@0 | 1162 | * not have been consumed by the scanner during a given invocation |
michael@0 | 1163 | * of this method. |
michael@0 | 1164 | */ |
michael@0 | 1165 | NS_IMETHODIMP |
michael@0 | 1166 | nsParser::Parse(nsIURI* aURL, |
michael@0 | 1167 | nsIRequestObserver* aListener, |
michael@0 | 1168 | void* aKey, |
michael@0 | 1169 | nsDTDMode aMode) |
michael@0 | 1170 | { |
michael@0 | 1171 | |
michael@0 | 1172 | NS_PRECONDITION(aURL, "Error: Null URL given"); |
michael@0 | 1173 | |
michael@0 | 1174 | nsresult result=kBadURL; |
michael@0 | 1175 | mObserver = aListener; |
michael@0 | 1176 | |
michael@0 | 1177 | if (aURL) { |
michael@0 | 1178 | nsAutoCString spec; |
michael@0 | 1179 | nsresult rv = aURL->GetSpec(spec); |
michael@0 | 1180 | if (rv != NS_OK) { |
michael@0 | 1181 | return rv; |
michael@0 | 1182 | } |
michael@0 | 1183 | NS_ConvertUTF8toUTF16 theName(spec); |
michael@0 | 1184 | |
michael@0 | 1185 | nsScanner* theScanner = new nsScanner(theName, false); |
michael@0 | 1186 | CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey, |
michael@0 | 1187 | mCommand, aListener); |
michael@0 | 1188 | if (pc && theScanner) { |
michael@0 | 1189 | pc->mMultipart = true; |
michael@0 | 1190 | pc->mContextType = CParserContext::eCTURL; |
michael@0 | 1191 | pc->mDTDMode = aMode; |
michael@0 | 1192 | PushContext(*pc); |
michael@0 | 1193 | |
michael@0 | 1194 | result = NS_OK; |
michael@0 | 1195 | } else { |
michael@0 | 1196 | result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT; |
michael@0 | 1197 | } |
michael@0 | 1198 | } |
michael@0 | 1199 | return result; |
michael@0 | 1200 | } |
michael@0 | 1201 | |
michael@0 | 1202 | /** |
michael@0 | 1203 | * Used by XML fragment parsing below. |
michael@0 | 1204 | * |
michael@0 | 1205 | * @param aSourceBuffer contains a string-full of real content |
michael@0 | 1206 | */ |
michael@0 | 1207 | nsresult |
michael@0 | 1208 | nsParser::Parse(const nsAString& aSourceBuffer, |
michael@0 | 1209 | void* aKey, |
michael@0 | 1210 | bool aLastCall) |
michael@0 | 1211 | { |
michael@0 | 1212 | nsresult result = NS_OK; |
michael@0 | 1213 | |
michael@0 | 1214 | // Don't bother if we're never going to parse this. |
michael@0 | 1215 | if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { |
michael@0 | 1216 | return result; |
michael@0 | 1217 | } |
michael@0 | 1218 | |
michael@0 | 1219 | if (!aLastCall && aSourceBuffer.IsEmpty()) { |
michael@0 | 1220 | // Nothing is being passed to the parser so return |
michael@0 | 1221 | // immediately. mUnusedInput will get processed when |
michael@0 | 1222 | // some data is actually passed in. |
michael@0 | 1223 | // But if this is the last call, make sure to finish up |
michael@0 | 1224 | // stuff correctly. |
michael@0 | 1225 | return result; |
michael@0 | 1226 | } |
michael@0 | 1227 | |
michael@0 | 1228 | // Maintain a reference to ourselves so we don't go away |
michael@0 | 1229 | // till we're completely done. |
michael@0 | 1230 | nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
michael@0 | 1231 | |
michael@0 | 1232 | if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) { |
michael@0 | 1233 | // Note: The following code will always find the parser context associated |
michael@0 | 1234 | // with the given key, even if that context has been suspended (e.g., for |
michael@0 | 1235 | // another document.write call). This doesn't appear to be exactly what IE |
michael@0 | 1236 | // does in the case where this happens, but this makes more sense. |
michael@0 | 1237 | CParserContext* pc = mParserContext; |
michael@0 | 1238 | while (pc && pc->mKey != aKey) { |
michael@0 | 1239 | pc = pc->mPrevContext; |
michael@0 | 1240 | } |
michael@0 | 1241 | |
michael@0 | 1242 | if (!pc) { |
michael@0 | 1243 | // Only make a new context if we don't have one, OR if we do, but has a |
michael@0 | 1244 | // different context key. |
michael@0 | 1245 | nsScanner* theScanner = new nsScanner(mUnusedInput); |
michael@0 | 1246 | NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 1247 | |
michael@0 | 1248 | eAutoDetectResult theStatus = eUnknownDetect; |
michael@0 | 1249 | |
michael@0 | 1250 | if (mParserContext && |
michael@0 | 1251 | mParserContext->mMimeType.EqualsLiteral("application/xml")) { |
michael@0 | 1252 | // Ref. Bug 90379 |
michael@0 | 1253 | NS_ASSERTION(mDTD, "How come the DTD is null?"); |
michael@0 | 1254 | |
michael@0 | 1255 | if (mParserContext) { |
michael@0 | 1256 | theStatus = mParserContext->mAutoDetectStatus; |
michael@0 | 1257 | // Added this to fix bug 32022. |
michael@0 | 1258 | } |
michael@0 | 1259 | } |
michael@0 | 1260 | |
michael@0 | 1261 | pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, |
michael@0 | 1262 | 0, theStatus, aLastCall); |
michael@0 | 1263 | NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 1264 | |
michael@0 | 1265 | PushContext(*pc); |
michael@0 | 1266 | |
michael@0 | 1267 | pc->mMultipart = !aLastCall; // By default |
michael@0 | 1268 | if (pc->mPrevContext) { |
michael@0 | 1269 | pc->mMultipart |= pc->mPrevContext->mMultipart; |
michael@0 | 1270 | } |
michael@0 | 1271 | |
michael@0 | 1272 | // Start fix bug 40143 |
michael@0 | 1273 | if (pc->mMultipart) { |
michael@0 | 1274 | pc->mStreamListenerState = eOnDataAvail; |
michael@0 | 1275 | if (pc->mScanner) { |
michael@0 | 1276 | pc->mScanner->SetIncremental(true); |
michael@0 | 1277 | } |
michael@0 | 1278 | } else { |
michael@0 | 1279 | pc->mStreamListenerState = eOnStop; |
michael@0 | 1280 | if (pc->mScanner) { |
michael@0 | 1281 | pc->mScanner->SetIncremental(false); |
michael@0 | 1282 | } |
michael@0 | 1283 | } |
michael@0 | 1284 | // end fix for 40143 |
michael@0 | 1285 | |
michael@0 | 1286 | pc->mContextType=CParserContext::eCTString; |
michael@0 | 1287 | pc->SetMimeType(NS_LITERAL_CSTRING("application/xml")); |
michael@0 | 1288 | pc->mDTDMode = eDTDMode_full_standards; |
michael@0 | 1289 | |
michael@0 | 1290 | mUnusedInput.Truncate(); |
michael@0 | 1291 | |
michael@0 | 1292 | pc->mScanner->Append(aSourceBuffer); |
michael@0 | 1293 | // Do not interrupt document.write() - bug 95487 |
michael@0 | 1294 | result = ResumeParse(false, false, false); |
michael@0 | 1295 | } else { |
michael@0 | 1296 | pc->mScanner->Append(aSourceBuffer); |
michael@0 | 1297 | if (!pc->mPrevContext) { |
michael@0 | 1298 | // Set stream listener state to eOnStop, on the final context - Fix 68160, |
michael@0 | 1299 | // to guarantee DidBuildModel() call - Fix 36148 |
michael@0 | 1300 | if (aLastCall) { |
michael@0 | 1301 | pc->mStreamListenerState = eOnStop; |
michael@0 | 1302 | pc->mScanner->SetIncremental(false); |
michael@0 | 1303 | } |
michael@0 | 1304 | |
michael@0 | 1305 | if (pc == mParserContext) { |
michael@0 | 1306 | // If pc is not mParserContext, then this call to ResumeParse would |
michael@0 | 1307 | // do the wrong thing and try to continue parsing using |
michael@0 | 1308 | // mParserContext. We need to wait to actually resume parsing on pc. |
michael@0 | 1309 | ResumeParse(false, false, false); |
michael@0 | 1310 | } |
michael@0 | 1311 | } |
michael@0 | 1312 | } |
michael@0 | 1313 | } |
michael@0 | 1314 | |
michael@0 | 1315 | return result; |
michael@0 | 1316 | } |
michael@0 | 1317 | |
michael@0 | 1318 | NS_IMETHODIMP |
michael@0 | 1319 | nsParser::ParseFragment(const nsAString& aSourceBuffer, |
michael@0 | 1320 | nsTArray<nsString>& aTagStack) |
michael@0 | 1321 | { |
michael@0 | 1322 | nsresult result = NS_OK; |
michael@0 | 1323 | nsAutoString theContext; |
michael@0 | 1324 | uint32_t theCount = aTagStack.Length(); |
michael@0 | 1325 | uint32_t theIndex = 0; |
michael@0 | 1326 | |
michael@0 | 1327 | // Disable observers for fragments |
michael@0 | 1328 | mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED; |
michael@0 | 1329 | |
michael@0 | 1330 | for (theIndex = 0; theIndex < theCount; theIndex++) { |
michael@0 | 1331 | theContext.AppendLiteral("<"); |
michael@0 | 1332 | theContext.Append(aTagStack[theCount - theIndex - 1]); |
michael@0 | 1333 | theContext.AppendLiteral(">"); |
michael@0 | 1334 | } |
michael@0 | 1335 | |
michael@0 | 1336 | if (theCount == 0) { |
michael@0 | 1337 | // Ensure that the buffer is not empty. Because none of the DTDs care |
michael@0 | 1338 | // about leading whitespace, this doesn't change the result. |
michael@0 | 1339 | theContext.AssignLiteral(" "); |
michael@0 | 1340 | } |
michael@0 | 1341 | |
michael@0 | 1342 | // First, parse the context to build up the DTD's tag stack. Note that we |
michael@0 | 1343 | // pass false for the aLastCall parameter. |
michael@0 | 1344 | result = Parse(theContext, |
michael@0 | 1345 | (void*)&theContext, |
michael@0 | 1346 | false); |
michael@0 | 1347 | if (NS_FAILED(result)) { |
michael@0 | 1348 | mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; |
michael@0 | 1349 | return result; |
michael@0 | 1350 | } |
michael@0 | 1351 | |
michael@0 | 1352 | if (!mSink) { |
michael@0 | 1353 | // Parse must have failed in the XML case and so the sink was killed. |
michael@0 | 1354 | return NS_ERROR_HTMLPARSER_STOPPARSING; |
michael@0 | 1355 | } |
michael@0 | 1356 | |
michael@0 | 1357 | nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink); |
michael@0 | 1358 | NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); |
michael@0 | 1359 | |
michael@0 | 1360 | fragSink->WillBuildContent(); |
michael@0 | 1361 | // Now, parse the actual content. Note that this is the last call |
michael@0 | 1362 | // for HTML content, but for XML, we will want to build and parse |
michael@0 | 1363 | // the end tags. However, if tagStack is empty, it's the last call |
michael@0 | 1364 | // for XML as well. |
michael@0 | 1365 | if (theCount == 0) { |
michael@0 | 1366 | result = Parse(aSourceBuffer, |
michael@0 | 1367 | &theContext, |
michael@0 | 1368 | true); |
michael@0 | 1369 | fragSink->DidBuildContent(); |
michael@0 | 1370 | } else { |
michael@0 | 1371 | // Add an end tag chunk, so expat will read the whole source buffer, |
michael@0 | 1372 | // and not worry about ']]' etc. |
michael@0 | 1373 | result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"), |
michael@0 | 1374 | &theContext, |
michael@0 | 1375 | false); |
michael@0 | 1376 | fragSink->DidBuildContent(); |
michael@0 | 1377 | |
michael@0 | 1378 | if (NS_SUCCEEDED(result)) { |
michael@0 | 1379 | nsAutoString endContext; |
michael@0 | 1380 | for (theIndex = 0; theIndex < theCount; theIndex++) { |
michael@0 | 1381 | // we already added an end tag chunk above |
michael@0 | 1382 | if (theIndex > 0) { |
michael@0 | 1383 | endContext.AppendLiteral("</"); |
michael@0 | 1384 | } |
michael@0 | 1385 | |
michael@0 | 1386 | nsString& thisTag = aTagStack[theIndex]; |
michael@0 | 1387 | // was there an xmlns=? |
michael@0 | 1388 | int32_t endOfTag = thisTag.FindChar(char16_t(' ')); |
michael@0 | 1389 | if (endOfTag == -1) { |
michael@0 | 1390 | endContext.Append(thisTag); |
michael@0 | 1391 | } else { |
michael@0 | 1392 | endContext.Append(Substring(thisTag,0,endOfTag)); |
michael@0 | 1393 | } |
michael@0 | 1394 | |
michael@0 | 1395 | endContext.AppendLiteral(">"); |
michael@0 | 1396 | } |
michael@0 | 1397 | |
michael@0 | 1398 | result = Parse(endContext, |
michael@0 | 1399 | &theContext, |
michael@0 | 1400 | true); |
michael@0 | 1401 | } |
michael@0 | 1402 | } |
michael@0 | 1403 | |
michael@0 | 1404 | mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; |
michael@0 | 1405 | |
michael@0 | 1406 | return result; |
michael@0 | 1407 | } |
michael@0 | 1408 | |
michael@0 | 1409 | /** |
michael@0 | 1410 | * This routine is called to cause the parser to continue parsing its |
michael@0 | 1411 | * underlying stream. This call allows the parse process to happen in |
michael@0 | 1412 | * chunks, such as when the content is push based, and we need to parse in |
michael@0 | 1413 | * pieces. |
michael@0 | 1414 | * |
michael@0 | 1415 | * An interesting change in how the parser gets used has led us to add extra |
michael@0 | 1416 | * processing to this method. The case occurs when the parser is blocked in |
michael@0 | 1417 | * one context, and gets a parse(string) call in another context. In this |
michael@0 | 1418 | * case, the parserContexts are linked. No problem. |
michael@0 | 1419 | * |
michael@0 | 1420 | * The problem is that Parse(string) assumes that it can proceed unabated, |
michael@0 | 1421 | * but if the parser is already blocked that assumption is false. So we |
michael@0 | 1422 | * needed to add a mechanism here to allow the parser to continue to process |
michael@0 | 1423 | * (the pop and free) contexts until 1) it get's blocked again; 2) it runs |
michael@0 | 1424 | * out of contexts. |
michael@0 | 1425 | * |
michael@0 | 1426 | * |
michael@0 | 1427 | * @param allowItertion : set to true if non-script resumption is requested |
michael@0 | 1428 | * @param aIsFinalChunk : tells us when the last chunk of data is provided. |
michael@0 | 1429 | * @return error code -- 0 if ok, non-zero if error. |
michael@0 | 1430 | */ |
michael@0 | 1431 | nsresult |
michael@0 | 1432 | nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, |
michael@0 | 1433 | bool aCanInterrupt) |
michael@0 | 1434 | { |
michael@0 | 1435 | nsresult result = NS_OK; |
michael@0 | 1436 | |
michael@0 | 1437 | if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) && |
michael@0 | 1438 | mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { |
michael@0 | 1439 | |
michael@0 | 1440 | result = WillBuildModel(mParserContext->mScanner->GetFilename()); |
michael@0 | 1441 | if (NS_FAILED(result)) { |
michael@0 | 1442 | mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; |
michael@0 | 1443 | return result; |
michael@0 | 1444 | } |
michael@0 | 1445 | |
michael@0 | 1446 | if (mDTD) { |
michael@0 | 1447 | mSink->WillResume(); |
michael@0 | 1448 | bool theIterationIsOk = true; |
michael@0 | 1449 | |
michael@0 | 1450 | while (result == NS_OK && theIterationIsOk) { |
michael@0 | 1451 | if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) { |
michael@0 | 1452 | // -- Ref: Bug# 22485 -- |
michael@0 | 1453 | // Insert the unused input into the source buffer |
michael@0 | 1454 | // as if it was read from the input stream. |
michael@0 | 1455 | // Adding UngetReadable() per vidur!! |
michael@0 | 1456 | mParserContext->mScanner->UngetReadable(mUnusedInput); |
michael@0 | 1457 | mUnusedInput.Truncate(0); |
michael@0 | 1458 | } |
michael@0 | 1459 | |
michael@0 | 1460 | // Only allow parsing to be interrupted in the subsequent call to |
michael@0 | 1461 | // build model. |
michael@0 | 1462 | nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) |
michael@0 | 1463 | ? Tokenize(aIsFinalChunk) |
michael@0 | 1464 | : NS_OK; |
michael@0 | 1465 | result = BuildModel(); |
michael@0 | 1466 | |
michael@0 | 1467 | if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { |
michael@0 | 1468 | PostContinueEvent(); |
michael@0 | 1469 | } |
michael@0 | 1470 | |
michael@0 | 1471 | theIterationIsOk = theTokenizerResult != kEOF && |
michael@0 | 1472 | result != NS_ERROR_HTMLPARSER_INTERRUPTED; |
michael@0 | 1473 | |
michael@0 | 1474 | // Make sure not to stop parsing too early. Therefore, before shutting |
michael@0 | 1475 | // down the parser, it's important to check whether the input buffer |
michael@0 | 1476 | // has been scanned to completion (theTokenizerResult should be kEOF). |
michael@0 | 1477 | // kEOF -> End of buffer. |
michael@0 | 1478 | |
michael@0 | 1479 | // If we're told to block the parser, we disable all further parsing |
michael@0 | 1480 | // (and cache any data coming in) until the parser is re-enabled. |
michael@0 | 1481 | if (NS_ERROR_HTMLPARSER_BLOCK == result) { |
michael@0 | 1482 | mSink->WillInterrupt(); |
michael@0 | 1483 | if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) { |
michael@0 | 1484 | // If we were blocked by a recursive invocation, don't re-block. |
michael@0 | 1485 | BlockParser(); |
michael@0 | 1486 | } |
michael@0 | 1487 | return NS_OK; |
michael@0 | 1488 | } |
michael@0 | 1489 | if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { |
michael@0 | 1490 | // Note: Parser Terminate() calls DidBuildModel. |
michael@0 | 1491 | if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { |
michael@0 | 1492 | DidBuildModel(mStreamStatus); |
michael@0 | 1493 | mInternalState = result; |
michael@0 | 1494 | } |
michael@0 | 1495 | |
michael@0 | 1496 | return NS_OK; |
michael@0 | 1497 | } |
michael@0 | 1498 | if ((NS_OK == result && theTokenizerResult == kEOF) || |
michael@0 | 1499 | result == NS_ERROR_HTMLPARSER_INTERRUPTED) { |
michael@0 | 1500 | bool theContextIsStringBased = |
michael@0 | 1501 | CParserContext::eCTString == mParserContext->mContextType; |
michael@0 | 1502 | |
michael@0 | 1503 | if (mParserContext->mStreamListenerState == eOnStop || |
michael@0 | 1504 | !mParserContext->mMultipart || theContextIsStringBased) { |
michael@0 | 1505 | if (!mParserContext->mPrevContext) { |
michael@0 | 1506 | if (mParserContext->mStreamListenerState == eOnStop) { |
michael@0 | 1507 | DidBuildModel(mStreamStatus); |
michael@0 | 1508 | return NS_OK; |
michael@0 | 1509 | } |
michael@0 | 1510 | } else { |
michael@0 | 1511 | CParserContext* theContext = PopContext(); |
michael@0 | 1512 | if (theContext) { |
michael@0 | 1513 | theIterationIsOk = allowIteration && theContextIsStringBased; |
michael@0 | 1514 | if (theContext->mCopyUnused) { |
michael@0 | 1515 | theContext->mScanner->CopyUnusedData(mUnusedInput); |
michael@0 | 1516 | } |
michael@0 | 1517 | |
michael@0 | 1518 | delete theContext; |
michael@0 | 1519 | } |
michael@0 | 1520 | |
michael@0 | 1521 | result = mInternalState; |
michael@0 | 1522 | aIsFinalChunk = mParserContext && |
michael@0 | 1523 | mParserContext->mStreamListenerState == eOnStop; |
michael@0 | 1524 | // ...then intentionally fall through to mSink->WillInterrupt()... |
michael@0 | 1525 | } |
michael@0 | 1526 | } |
michael@0 | 1527 | } |
michael@0 | 1528 | |
michael@0 | 1529 | if (theTokenizerResult == kEOF || |
michael@0 | 1530 | result == NS_ERROR_HTMLPARSER_INTERRUPTED) { |
michael@0 | 1531 | result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; |
michael@0 | 1532 | mSink->WillInterrupt(); |
michael@0 | 1533 | } |
michael@0 | 1534 | } |
michael@0 | 1535 | } else { |
michael@0 | 1536 | mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; |
michael@0 | 1537 | } |
michael@0 | 1538 | } |
michael@0 | 1539 | |
michael@0 | 1540 | return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; |
michael@0 | 1541 | } |
michael@0 | 1542 | |
michael@0 | 1543 | /** |
michael@0 | 1544 | * This is where we loop over the tokens created in the |
michael@0 | 1545 | * tokenization phase, and try to make sense out of them. |
michael@0 | 1546 | */ |
michael@0 | 1547 | nsresult |
michael@0 | 1548 | nsParser::BuildModel() |
michael@0 | 1549 | { |
michael@0 | 1550 | nsITokenizer* theTokenizer = nullptr; |
michael@0 | 1551 | |
michael@0 | 1552 | nsresult result = NS_OK; |
michael@0 | 1553 | if (mParserContext) { |
michael@0 | 1554 | result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
michael@0 | 1555 | } |
michael@0 | 1556 | |
michael@0 | 1557 | if (NS_SUCCEEDED(result)) { |
michael@0 | 1558 | if (mDTD) { |
michael@0 | 1559 | result = mDTD->BuildModel(theTokenizer, mSink); |
michael@0 | 1560 | } |
michael@0 | 1561 | } else { |
michael@0 | 1562 | mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER; |
michael@0 | 1563 | } |
michael@0 | 1564 | return result; |
michael@0 | 1565 | } |
michael@0 | 1566 | |
michael@0 | 1567 | /******************************************************************* |
michael@0 | 1568 | These methods are used to talk to the netlib system... |
michael@0 | 1569 | *******************************************************************/ |
michael@0 | 1570 | |
michael@0 | 1571 | nsresult |
michael@0 | 1572 | nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) |
michael@0 | 1573 | { |
michael@0 | 1574 | NS_PRECONDITION(eNone == mParserContext->mStreamListenerState, |
michael@0 | 1575 | "Parser's nsIStreamListener API was not setup " |
michael@0 | 1576 | "correctly in constructor."); |
michael@0 | 1577 | if (mObserver) { |
michael@0 | 1578 | mObserver->OnStartRequest(request, aContext); |
michael@0 | 1579 | } |
michael@0 | 1580 | mParserContext->mStreamListenerState = eOnStart; |
michael@0 | 1581 | mParserContext->mAutoDetectStatus = eUnknownDetect; |
michael@0 | 1582 | mParserContext->mRequest = request; |
michael@0 | 1583 | |
michael@0 | 1584 | NS_ASSERTION(!mParserContext->mPrevContext, |
michael@0 | 1585 | "Clobbering DTD for non-root parser context!"); |
michael@0 | 1586 | mDTD = nullptr; |
michael@0 | 1587 | |
michael@0 | 1588 | nsresult rv; |
michael@0 | 1589 | nsAutoCString contentType; |
michael@0 | 1590 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); |
michael@0 | 1591 | if (channel) { |
michael@0 | 1592 | rv = channel->GetContentType(contentType); |
michael@0 | 1593 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 1594 | mParserContext->SetMimeType(contentType); |
michael@0 | 1595 | } |
michael@0 | 1596 | } |
michael@0 | 1597 | |
michael@0 | 1598 | rv = NS_OK; |
michael@0 | 1599 | |
michael@0 | 1600 | return rv; |
michael@0 | 1601 | } |
michael@0 | 1602 | |
michael@0 | 1603 | static bool |
michael@0 | 1604 | ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen, |
michael@0 | 1605 | nsCString& oCharset) |
michael@0 | 1606 | { |
michael@0 | 1607 | // This code is rather pointless to have. Might as well reuse expat as |
michael@0 | 1608 | // seen in nsHtml5StreamParser. -- hsivonen |
michael@0 | 1609 | oCharset.Truncate(); |
michael@0 | 1610 | if ((aLen >= 5) && |
michael@0 | 1611 | ('<' == aBytes[0]) && |
michael@0 | 1612 | ('?' == aBytes[1]) && |
michael@0 | 1613 | ('x' == aBytes[2]) && |
michael@0 | 1614 | ('m' == aBytes[3]) && |
michael@0 | 1615 | ('l' == aBytes[4])) { |
michael@0 | 1616 | int32_t i; |
michael@0 | 1617 | bool versionFound = false, encodingFound = false; |
michael@0 | 1618 | for (i = 6; i < aLen && !encodingFound; ++i) { |
michael@0 | 1619 | // end of XML declaration? |
michael@0 | 1620 | if ((((char*) aBytes)[i] == '?') && |
michael@0 | 1621 | ((i + 1) < aLen) && |
michael@0 | 1622 | (((char*) aBytes)[i + 1] == '>')) { |
michael@0 | 1623 | break; |
michael@0 | 1624 | } |
michael@0 | 1625 | // Version is required. |
michael@0 | 1626 | if (!versionFound) { |
michael@0 | 1627 | // Want to avoid string comparisons, hence looking for 'n' |
michael@0 | 1628 | // and only if found check the string leading to it. Not |
michael@0 | 1629 | // foolproof, but fast. |
michael@0 | 1630 | // The shortest string allowed before this is (strlen==13): |
michael@0 | 1631 | // <?xml version |
michael@0 | 1632 | if ((((char*) aBytes)[i] == 'n') && |
michael@0 | 1633 | (i >= 12) && |
michael@0 | 1634 | (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) { |
michael@0 | 1635 | // Fast forward through version |
michael@0 | 1636 | char q = 0; |
michael@0 | 1637 | for (++i; i < aLen; ++i) { |
michael@0 | 1638 | char qi = ((char*) aBytes)[i]; |
michael@0 | 1639 | if (qi == '\'' || qi == '"') { |
michael@0 | 1640 | if (q && q == qi) { |
michael@0 | 1641 | // ending quote |
michael@0 | 1642 | versionFound = true; |
michael@0 | 1643 | break; |
michael@0 | 1644 | } else { |
michael@0 | 1645 | // Starting quote |
michael@0 | 1646 | q = qi; |
michael@0 | 1647 | } |
michael@0 | 1648 | } |
michael@0 | 1649 | } |
michael@0 | 1650 | } |
michael@0 | 1651 | } else { |
michael@0 | 1652 | // encoding must follow version |
michael@0 | 1653 | // Want to avoid string comparisons, hence looking for 'g' |
michael@0 | 1654 | // and only if found check the string leading to it. Not |
michael@0 | 1655 | // foolproof, but fast. |
michael@0 | 1656 | // The shortest allowed string before this (strlen==26): |
michael@0 | 1657 | // <?xml version="1" encoding |
michael@0 | 1658 | if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp( |
michael@0 | 1659 | "encodin", (char*) (aBytes + i - 7), 7))) { |
michael@0 | 1660 | int32_t encStart = 0; |
michael@0 | 1661 | char q = 0; |
michael@0 | 1662 | for (++i; i < aLen; ++i) { |
michael@0 | 1663 | char qi = ((char*) aBytes)[i]; |
michael@0 | 1664 | if (qi == '\'' || qi == '"') { |
michael@0 | 1665 | if (q && q == qi) { |
michael@0 | 1666 | int32_t count = i - encStart; |
michael@0 | 1667 | // encoding value is invalid if it is UTF-16 |
michael@0 | 1668 | if (count > 0 && PL_strncasecmp("UTF-16", |
michael@0 | 1669 | (char*) (aBytes + encStart), count)) { |
michael@0 | 1670 | oCharset.Assign((char*) (aBytes + encStart), count); |
michael@0 | 1671 | } |
michael@0 | 1672 | encodingFound = true; |
michael@0 | 1673 | break; |
michael@0 | 1674 | } else { |
michael@0 | 1675 | encStart = i + 1; |
michael@0 | 1676 | q = qi; |
michael@0 | 1677 | } |
michael@0 | 1678 | } |
michael@0 | 1679 | } |
michael@0 | 1680 | } |
michael@0 | 1681 | } // if (!versionFound) |
michael@0 | 1682 | } // for |
michael@0 | 1683 | } |
michael@0 | 1684 | return !oCharset.IsEmpty(); |
michael@0 | 1685 | } |
michael@0 | 1686 | |
michael@0 | 1687 | inline const char |
michael@0 | 1688 | GetNextChar(nsACString::const_iterator& aStart, |
michael@0 | 1689 | nsACString::const_iterator& aEnd) |
michael@0 | 1690 | { |
michael@0 | 1691 | NS_ASSERTION(aStart != aEnd, "end of buffer"); |
michael@0 | 1692 | return (++aStart != aEnd) ? *aStart : '\0'; |
michael@0 | 1693 | } |
michael@0 | 1694 | |
michael@0 | 1695 | static NS_METHOD |
michael@0 | 1696 | NoOpParserWriteFunc(nsIInputStream* in, |
michael@0 | 1697 | void* closure, |
michael@0 | 1698 | const char* fromRawSegment, |
michael@0 | 1699 | uint32_t toOffset, |
michael@0 | 1700 | uint32_t count, |
michael@0 | 1701 | uint32_t *writeCount) |
michael@0 | 1702 | { |
michael@0 | 1703 | *writeCount = count; |
michael@0 | 1704 | return NS_OK; |
michael@0 | 1705 | } |
michael@0 | 1706 | |
michael@0 | 1707 | typedef struct { |
michael@0 | 1708 | bool mNeedCharsetCheck; |
michael@0 | 1709 | nsParser* mParser; |
michael@0 | 1710 | nsScanner* mScanner; |
michael@0 | 1711 | nsIRequest* mRequest; |
michael@0 | 1712 | } ParserWriteStruct; |
michael@0 | 1713 | |
michael@0 | 1714 | /* |
michael@0 | 1715 | * This function is invoked as a result of a call to a stream's |
michael@0 | 1716 | * ReadSegments() method. It is called for each contiguous buffer |
michael@0 | 1717 | * of data in the underlying stream or pipe. Using ReadSegments |
michael@0 | 1718 | * allows us to avoid copying data to read out of the stream. |
michael@0 | 1719 | */ |
michael@0 | 1720 | static NS_METHOD |
michael@0 | 1721 | ParserWriteFunc(nsIInputStream* in, |
michael@0 | 1722 | void* closure, |
michael@0 | 1723 | const char* fromRawSegment, |
michael@0 | 1724 | uint32_t toOffset, |
michael@0 | 1725 | uint32_t count, |
michael@0 | 1726 | uint32_t *writeCount) |
michael@0 | 1727 | { |
michael@0 | 1728 | nsresult result; |
michael@0 | 1729 | ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure); |
michael@0 | 1730 | const unsigned char* buf = |
michael@0 | 1731 | reinterpret_cast<const unsigned char*> (fromRawSegment); |
michael@0 | 1732 | uint32_t theNumRead = count; |
michael@0 | 1733 | |
michael@0 | 1734 | if (!pws) { |
michael@0 | 1735 | return NS_ERROR_FAILURE; |
michael@0 | 1736 | } |
michael@0 | 1737 | |
michael@0 | 1738 | if (pws->mNeedCharsetCheck) { |
michael@0 | 1739 | pws->mNeedCharsetCheck = false; |
michael@0 | 1740 | int32_t source; |
michael@0 | 1741 | nsAutoCString preferred; |
michael@0 | 1742 | nsAutoCString maybePrefer; |
michael@0 | 1743 | pws->mParser->GetDocumentCharset(preferred, source); |
michael@0 | 1744 | |
michael@0 | 1745 | // This code was bogus when I found it. It expects the BOM or the XML |
michael@0 | 1746 | // declaration to be entirely in the first network buffer. -- hsivonen |
michael@0 | 1747 | if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) { |
michael@0 | 1748 | // The decoder will swallow the BOM. The UTF-16 will re-sniff for |
michael@0 | 1749 | // endianness. The value of preferred is now either "UTF-8" or "UTF-16". |
michael@0 | 1750 | preferred.Assign(maybePrefer); |
michael@0 | 1751 | source = kCharsetFromByteOrderMark; |
michael@0 | 1752 | } else if (source < kCharsetFromChannel) { |
michael@0 | 1753 | nsAutoCString declCharset; |
michael@0 | 1754 | |
michael@0 | 1755 | if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { |
michael@0 | 1756 | if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) { |
michael@0 | 1757 | preferred.Assign(maybePrefer); |
michael@0 | 1758 | source = kCharsetFromMetaTag; |
michael@0 | 1759 | } |
michael@0 | 1760 | } |
michael@0 | 1761 | } |
michael@0 | 1762 | |
michael@0 | 1763 | pws->mParser->SetDocumentCharset(preferred, source); |
michael@0 | 1764 | pws->mParser->SetSinkCharset(preferred); |
michael@0 | 1765 | |
michael@0 | 1766 | } |
michael@0 | 1767 | |
michael@0 | 1768 | result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest); |
michael@0 | 1769 | if (NS_SUCCEEDED(result)) { |
michael@0 | 1770 | *writeCount = count; |
michael@0 | 1771 | } |
michael@0 | 1772 | |
michael@0 | 1773 | return result; |
michael@0 | 1774 | } |
michael@0 | 1775 | |
michael@0 | 1776 | nsresult |
michael@0 | 1777 | nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext, |
michael@0 | 1778 | nsIInputStream *pIStream, uint64_t sourceOffset, |
michael@0 | 1779 | uint32_t aLength) |
michael@0 | 1780 | { |
michael@0 | 1781 | NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState || |
michael@0 | 1782 | eOnDataAvail == mParserContext->mStreamListenerState), |
michael@0 | 1783 | "Error: OnStartRequest() must be called before OnDataAvailable()"); |
michael@0 | 1784 | NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream), |
michael@0 | 1785 | "Must have a buffered input stream"); |
michael@0 | 1786 | |
michael@0 | 1787 | nsresult rv = NS_OK; |
michael@0 | 1788 | |
michael@0 | 1789 | if (mIsAboutBlank) { |
michael@0 | 1790 | MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank"); |
michael@0 | 1791 | // ... but if an extension tries to feed us data for about:blank in a |
michael@0 | 1792 | // release build, silently ignore the data. |
michael@0 | 1793 | uint32_t totalRead; |
michael@0 | 1794 | rv = pIStream->ReadSegments(NoOpParserWriteFunc, |
michael@0 | 1795 | nullptr, |
michael@0 | 1796 | aLength, |
michael@0 | 1797 | &totalRead); |
michael@0 | 1798 | return rv; |
michael@0 | 1799 | } |
michael@0 | 1800 | |
michael@0 | 1801 | CParserContext *theContext = mParserContext; |
michael@0 | 1802 | |
michael@0 | 1803 | while (theContext && theContext->mRequest != request) { |
michael@0 | 1804 | theContext = theContext->mPrevContext; |
michael@0 | 1805 | } |
michael@0 | 1806 | |
michael@0 | 1807 | if (theContext) { |
michael@0 | 1808 | theContext->mStreamListenerState = eOnDataAvail; |
michael@0 | 1809 | |
michael@0 | 1810 | if (eInvalidDetect == theContext->mAutoDetectStatus) { |
michael@0 | 1811 | if (theContext->mScanner) { |
michael@0 | 1812 | nsScannerIterator iter; |
michael@0 | 1813 | theContext->mScanner->EndReading(iter); |
michael@0 | 1814 | theContext->mScanner->SetPosition(iter, true); |
michael@0 | 1815 | } |
michael@0 | 1816 | } |
michael@0 | 1817 | |
michael@0 | 1818 | uint32_t totalRead; |
michael@0 | 1819 | ParserWriteStruct pws; |
michael@0 | 1820 | pws.mNeedCharsetCheck = true; |
michael@0 | 1821 | pws.mParser = this; |
michael@0 | 1822 | pws.mScanner = theContext->mScanner; |
michael@0 | 1823 | pws.mRequest = request; |
michael@0 | 1824 | |
michael@0 | 1825 | rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); |
michael@0 | 1826 | if (NS_FAILED(rv)) { |
michael@0 | 1827 | return rv; |
michael@0 | 1828 | } |
michael@0 | 1829 | |
michael@0 | 1830 | // Don't bother to start parsing until we've seen some |
michael@0 | 1831 | // non-whitespace data |
michael@0 | 1832 | if (IsOkToProcessNetworkData() && |
michael@0 | 1833 | theContext->mScanner->FirstNonWhitespacePosition() >= 0) { |
michael@0 | 1834 | nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
michael@0 | 1835 | nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); |
michael@0 | 1836 | mProcessingNetworkData = true; |
michael@0 | 1837 | if (mSink) { |
michael@0 | 1838 | mSink->WillParse(); |
michael@0 | 1839 | } |
michael@0 | 1840 | rv = ResumeParse(); |
michael@0 | 1841 | mProcessingNetworkData = false; |
michael@0 | 1842 | } |
michael@0 | 1843 | } else { |
michael@0 | 1844 | rv = NS_ERROR_UNEXPECTED; |
michael@0 | 1845 | } |
michael@0 | 1846 | |
michael@0 | 1847 | return rv; |
michael@0 | 1848 | } |
michael@0 | 1849 | |
michael@0 | 1850 | /** |
michael@0 | 1851 | * This is called by the networking library once the last block of data |
michael@0 | 1852 | * has been collected from the net. |
michael@0 | 1853 | */ |
michael@0 | 1854 | nsresult |
michael@0 | 1855 | nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext, |
michael@0 | 1856 | nsresult status) |
michael@0 | 1857 | { |
michael@0 | 1858 | nsresult rv = NS_OK; |
michael@0 | 1859 | |
michael@0 | 1860 | CParserContext *pc = mParserContext; |
michael@0 | 1861 | while (pc) { |
michael@0 | 1862 | if (pc->mRequest == request) { |
michael@0 | 1863 | pc->mStreamListenerState = eOnStop; |
michael@0 | 1864 | pc->mScanner->SetIncremental(false); |
michael@0 | 1865 | break; |
michael@0 | 1866 | } |
michael@0 | 1867 | |
michael@0 | 1868 | pc = pc->mPrevContext; |
michael@0 | 1869 | } |
michael@0 | 1870 | |
michael@0 | 1871 | mStreamStatus = status; |
michael@0 | 1872 | |
michael@0 | 1873 | if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) { |
michael@0 | 1874 | mProcessingNetworkData = true; |
michael@0 | 1875 | if (mSink) { |
michael@0 | 1876 | mSink->WillParse(); |
michael@0 | 1877 | } |
michael@0 | 1878 | rv = ResumeParse(true, true); |
michael@0 | 1879 | mProcessingNetworkData = false; |
michael@0 | 1880 | } |
michael@0 | 1881 | |
michael@0 | 1882 | // If the parser isn't enabled, we don't finish parsing till |
michael@0 | 1883 | // it is reenabled. |
michael@0 | 1884 | |
michael@0 | 1885 | |
michael@0 | 1886 | // XXX Should we wait to notify our observers as well if the |
michael@0 | 1887 | // parser isn't yet enabled? |
michael@0 | 1888 | if (mObserver) { |
michael@0 | 1889 | mObserver->OnStopRequest(request, aContext, status); |
michael@0 | 1890 | } |
michael@0 | 1891 | |
michael@0 | 1892 | return rv; |
michael@0 | 1893 | } |
michael@0 | 1894 | |
michael@0 | 1895 | |
michael@0 | 1896 | /******************************************************************* |
michael@0 | 1897 | Here come the tokenization methods... |
michael@0 | 1898 | *******************************************************************/ |
michael@0 | 1899 | |
michael@0 | 1900 | |
michael@0 | 1901 | /** |
michael@0 | 1902 | * Part of the code sandwich, this gets called right before |
michael@0 | 1903 | * the tokenization process begins. The main reason for |
michael@0 | 1904 | * this call is to allow the delegate to do initialization. |
michael@0 | 1905 | */ |
michael@0 | 1906 | bool |
michael@0 | 1907 | nsParser::WillTokenize(bool aIsFinalChunk) |
michael@0 | 1908 | { |
michael@0 | 1909 | if (!mParserContext) { |
michael@0 | 1910 | return true; |
michael@0 | 1911 | } |
michael@0 | 1912 | |
michael@0 | 1913 | nsITokenizer* theTokenizer; |
michael@0 | 1914 | nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
michael@0 | 1915 | NS_ENSURE_SUCCESS(result, false); |
michael@0 | 1916 | return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk)); |
michael@0 | 1917 | } |
michael@0 | 1918 | |
michael@0 | 1919 | |
michael@0 | 1920 | /** |
michael@0 | 1921 | * This is the primary control routine to consume tokens. |
michael@0 | 1922 | * It iteratively consumes tokens until an error occurs or |
michael@0 | 1923 | * you run out of data. |
michael@0 | 1924 | */ |
michael@0 | 1925 | nsresult nsParser::Tokenize(bool aIsFinalChunk) |
michael@0 | 1926 | { |
michael@0 | 1927 | nsITokenizer* theTokenizer; |
michael@0 | 1928 | |
michael@0 | 1929 | nsresult result = NS_ERROR_NOT_AVAILABLE; |
michael@0 | 1930 | if (mParserContext) { |
michael@0 | 1931 | result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
michael@0 | 1932 | } |
michael@0 | 1933 | |
michael@0 | 1934 | if (NS_SUCCEEDED(result)) { |
michael@0 | 1935 | bool flushTokens = false; |
michael@0 | 1936 | |
michael@0 | 1937 | bool killSink = false; |
michael@0 | 1938 | |
michael@0 | 1939 | WillTokenize(aIsFinalChunk); |
michael@0 | 1940 | while (NS_SUCCEEDED(result)) { |
michael@0 | 1941 | mParserContext->mScanner->Mark(); |
michael@0 | 1942 | result = theTokenizer->ConsumeToken(*mParserContext->mScanner, |
michael@0 | 1943 | flushTokens); |
michael@0 | 1944 | if (NS_FAILED(result)) { |
michael@0 | 1945 | mParserContext->mScanner->RewindToMark(); |
michael@0 | 1946 | if (kEOF == result){ |
michael@0 | 1947 | break; |
michael@0 | 1948 | } |
michael@0 | 1949 | if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { |
michael@0 | 1950 | killSink = true; |
michael@0 | 1951 | result = Terminate(); |
michael@0 | 1952 | break; |
michael@0 | 1953 | } |
michael@0 | 1954 | } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) { |
michael@0 | 1955 | // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931. |
michael@0 | 1956 | // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 -- |
michael@0 | 1957 | // Also remember to update the marked position. |
michael@0 | 1958 | mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; |
michael@0 | 1959 | mParserContext->mScanner->Mark(); |
michael@0 | 1960 | break; |
michael@0 | 1961 | } |
michael@0 | 1962 | } |
michael@0 | 1963 | |
michael@0 | 1964 | if (killSink) { |
michael@0 | 1965 | mSink = nullptr; |
michael@0 | 1966 | } |
michael@0 | 1967 | } else { |
michael@0 | 1968 | result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; |
michael@0 | 1969 | } |
michael@0 | 1970 | |
michael@0 | 1971 | return result; |
michael@0 | 1972 | } |
michael@0 | 1973 | |
michael@0 | 1974 | /** |
michael@0 | 1975 | * Get the channel associated with this parser |
michael@0 | 1976 | * |
michael@0 | 1977 | * @param aChannel out param that will contain the result |
michael@0 | 1978 | * @return NS_OK if successful |
michael@0 | 1979 | */ |
michael@0 | 1980 | NS_IMETHODIMP |
michael@0 | 1981 | nsParser::GetChannel(nsIChannel** aChannel) |
michael@0 | 1982 | { |
michael@0 | 1983 | nsresult result = NS_ERROR_NOT_AVAILABLE; |
michael@0 | 1984 | if (mParserContext && mParserContext->mRequest) { |
michael@0 | 1985 | result = CallQueryInterface(mParserContext->mRequest, aChannel); |
michael@0 | 1986 | } |
michael@0 | 1987 | return result; |
michael@0 | 1988 | } |
michael@0 | 1989 | |
michael@0 | 1990 | /** |
michael@0 | 1991 | * Get the DTD associated with this parser |
michael@0 | 1992 | */ |
michael@0 | 1993 | NS_IMETHODIMP |
michael@0 | 1994 | nsParser::GetDTD(nsIDTD** aDTD) |
michael@0 | 1995 | { |
michael@0 | 1996 | if (mParserContext) { |
michael@0 | 1997 | NS_IF_ADDREF(*aDTD = mDTD); |
michael@0 | 1998 | } |
michael@0 | 1999 | |
michael@0 | 2000 | return NS_OK; |
michael@0 | 2001 | } |
michael@0 | 2002 | |
michael@0 | 2003 | /** |
michael@0 | 2004 | * Get this as nsIStreamListener |
michael@0 | 2005 | */ |
michael@0 | 2006 | nsIStreamListener* |
michael@0 | 2007 | nsParser::GetStreamListener() |
michael@0 | 2008 | { |
michael@0 | 2009 | return this; |
michael@0 | 2010 | } |