parser/htmlparser/src/nsParser.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set sw=2 ts=2 et tw=79: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "nsIAtom.h"
michael@0 8 #include "nsParser.h"
michael@0 9 #include "nsString.h"
michael@0 10 #include "nsCRT.h"
michael@0 11 #include "nsScanner.h"
michael@0 12 #include "plstr.h"
michael@0 13 #include "nsIStringStream.h"
michael@0 14 #include "nsIChannel.h"
michael@0 15 #include "nsICachingChannel.h"
michael@0 16 #include "nsICacheEntryDescriptor.h"
michael@0 17 #include "nsIInputStream.h"
michael@0 18 #include "CNavDTD.h"
michael@0 19 #include "prenv.h"
michael@0 20 #include "prlock.h"
michael@0 21 #include "prcvar.h"
michael@0 22 #include "nsParserCIID.h"
michael@0 23 #include "nsReadableUtils.h"
michael@0 24 #include "nsCOMPtr.h"
michael@0 25 #include "nsExpatDriver.h"
michael@0 26 #include "nsIServiceManager.h"
michael@0 27 #include "nsICategoryManager.h"
michael@0 28 #include "nsISupportsPrimitives.h"
michael@0 29 #include "nsIFragmentContentSink.h"
michael@0 30 #include "nsStreamUtils.h"
michael@0 31 #include "nsHTMLTokenizer.h"
michael@0 32 #include "nsNetUtil.h"
michael@0 33 #include "nsScriptLoader.h"
michael@0 34 #include "nsDataHashtable.h"
michael@0 35 #include "nsXPCOMCIDInternal.h"
michael@0 36 #include "nsMimeTypes.h"
michael@0 37 #include "mozilla/CondVar.h"
michael@0 38 #include "mozilla/Mutex.h"
michael@0 39 #include "nsParserConstants.h"
michael@0 40 #include "nsCharsetSource.h"
michael@0 41 #include "nsContentUtils.h"
michael@0 42 #include "nsThreadUtils.h"
michael@0 43 #include "nsIHTMLContentSink.h"
michael@0 44
michael@0 45 #include "mozilla/dom/EncodingUtils.h"
michael@0 46
michael@0 47 using namespace mozilla;
michael@0 48 using mozilla::dom::EncodingUtils;
michael@0 49
michael@0 50 #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002
michael@0 51 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
michael@0 52 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
michael@0 53 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
michael@0 54 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
michael@0 55
michael@0 56 //-------------- Begin ParseContinue Event Definition ------------------------
michael@0 57 /*
michael@0 58 The parser can be explicitly interrupted by passing a return value of
michael@0 59 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
michael@0 60 the parser to stop processing and allow the application to return to the event
michael@0 61 loop. The data which was left at the time of interruption will be processed
michael@0 62 the next time OnDataAvailable is called. If the parser has received its final
michael@0 63 chunk of data then OnDataAvailable will no longer be called by the networking
michael@0 64 module, so the parser will schedule a nsParserContinueEvent which will call
michael@0 65 the parser to process the remaining data after returning to the event loop.
michael@0 66 If the parser is interrupted while processing the remaining data it will
michael@0 67 schedule another ParseContinueEvent. The processing of data followed by
michael@0 68 scheduling of the continue events will proceed until either:
michael@0 69
michael@0 70 1) All of the remaining data can be processed without interrupting
michael@0 71 2) The parser has been cancelled.
michael@0 72
michael@0 73
michael@0 74 This capability is currently used in CNavDTD and nsHTMLContentSink. The
michael@0 75 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
michael@0 76 processed and when each token is processed. The nsHTML content sink records
michael@0 77 the time when the chunk has started processing and will return
michael@0 78 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
michael@0 79 threshold called max tokenizing processing time. This allows the content sink
michael@0 80 to limit how much data is processed in a single chunk which in turn gates how
michael@0 81 much time is spent away from the event loop. Processing smaller chunks of data
michael@0 82 also reduces the time spent in subsequent reflows.
michael@0 83
michael@0 84 This capability is most apparent when loading large documents. If the maximum
michael@0 85 token processing time is set small enough the application will remain
michael@0 86 responsive during document load.
michael@0 87
michael@0 88 A side-effect of this capability is that document load is not complete when
michael@0 89 the last chunk of data is passed to OnDataAvailable since the parser may have
michael@0 90 been interrupted when the last chunk of data arrived. The document is complete
michael@0 91 when all of the document has been tokenized and there aren't any pending
michael@0 92 nsParserContinueEvents. This can cause problems if the application assumes
michael@0 93 that it can monitor the load requests to determine when the document load has
michael@0 94 been completed. This is what happens in Mozilla. The document is considered
michael@0 95 completely loaded when all of the load requests have been satisfied. To delay
michael@0 96 the document load until all of the parsing has been completed the
michael@0 97 nsHTMLContentSink adds a dummy parser load request which is not removed until
michael@0 98 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
michael@0 99 DidBuildModel until the final chunk of data has been passed to the parser
michael@0 100 through the OnDataAvailable and there aren't any pending
michael@0 101 nsParserContineEvents.
michael@0 102
michael@0 103 Currently the parser is ignores requests to be interrupted during the
michael@0 104 processing of script. This is because a document.write followed by JavaScript
michael@0 105 calls to manipulate the DOM may fail if the parser was interrupted during the
michael@0 106 document.write.
michael@0 107
michael@0 108 For more details @see bugzilla bug 76722
michael@0 109 */
michael@0 110
michael@0 111
michael@0 112 class nsParserContinueEvent : public nsRunnable
michael@0 113 {
michael@0 114 public:
michael@0 115 nsRefPtr<nsParser> mParser;
michael@0 116
michael@0 117 nsParserContinueEvent(nsParser* aParser)
michael@0 118 : mParser(aParser)
michael@0 119 {}
michael@0 120
michael@0 121 NS_IMETHOD Run()
michael@0 122 {
michael@0 123 mParser->HandleParserContinueEvent(this);
michael@0 124 return NS_OK;
michael@0 125 }
michael@0 126 };
michael@0 127
michael@0 128 //-------------- End ParseContinue Event Definition ------------------------
michael@0 129
michael@0 130 /**
michael@0 131 * default constructor
michael@0 132 */
michael@0 133 nsParser::nsParser()
michael@0 134 {
michael@0 135 Initialize(true);
michael@0 136 }
michael@0 137
michael@0 138 nsParser::~nsParser()
michael@0 139 {
michael@0 140 Cleanup();
michael@0 141 }
michael@0 142
michael@0 143 void
michael@0 144 nsParser::Initialize(bool aConstructor)
michael@0 145 {
michael@0 146 if (aConstructor) {
michael@0 147 // Raw pointer
michael@0 148 mParserContext = 0;
michael@0 149 }
michael@0 150 else {
michael@0 151 // nsCOMPtrs
michael@0 152 mObserver = nullptr;
michael@0 153 mUnusedInput.Truncate();
michael@0 154 }
michael@0 155
michael@0 156 mContinueEvent = nullptr;
michael@0 157 mCharsetSource = kCharsetUninitialized;
michael@0 158 mCharset.AssignLiteral("ISO-8859-1");
michael@0 159 mInternalState = NS_OK;
michael@0 160 mStreamStatus = NS_OK;
michael@0 161 mCommand = eViewNormal;
michael@0 162 mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
michael@0 163 NS_PARSER_FLAG_PARSER_ENABLED |
michael@0 164 NS_PARSER_FLAG_CAN_TOKENIZE;
michael@0 165
michael@0 166 mProcessingNetworkData = false;
michael@0 167 mIsAboutBlank = false;
michael@0 168 }
michael@0 169
michael@0 170 void
michael@0 171 nsParser::Cleanup()
michael@0 172 {
michael@0 173 #ifdef DEBUG
michael@0 174 if (mParserContext && mParserContext->mPrevContext) {
michael@0 175 NS_WARNING("Extra parser contexts still on the parser stack");
michael@0 176 }
michael@0 177 #endif
michael@0 178
michael@0 179 while (mParserContext) {
michael@0 180 CParserContext *pc = mParserContext->mPrevContext;
michael@0 181 delete mParserContext;
michael@0 182 mParserContext = pc;
michael@0 183 }
michael@0 184
michael@0 185 // It should not be possible for this flag to be set when we are getting
michael@0 186 // destroyed since this flag implies a pending nsParserContinueEvent, which
michael@0 187 // has an owning reference to |this|.
michael@0 188 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
michael@0 189 }
michael@0 190
michael@0 191 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
michael@0 192
michael@0 193 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
michael@0 194 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
michael@0 195 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
michael@0 196 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
michael@0 197 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
michael@0 198
michael@0 199 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
michael@0 200 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
michael@0 201 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
michael@0 202 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
michael@0 203 CParserContext *pc = tmp->mParserContext;
michael@0 204 while (pc) {
michael@0 205 cb.NoteXPCOMChild(pc->mTokenizer);
michael@0 206 pc = pc->mPrevContext;
michael@0 207 }
michael@0 208 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
michael@0 209
michael@0 210 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
michael@0 211 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
michael@0 212 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
michael@0 213 NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
michael@0 214 NS_INTERFACE_MAP_ENTRY(nsIParser)
michael@0 215 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
michael@0 216 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
michael@0 217 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
michael@0 218 NS_INTERFACE_MAP_END
michael@0 219
michael@0 220 // The parser continue event is posted only if
michael@0 221 // all of the data to parse has been passed to ::OnDataAvailable
michael@0 222 // and the parser has been interrupted by the content sink
michael@0 223 // because the processing of tokens took too long.
michael@0 224
michael@0 225 nsresult
michael@0 226 nsParser::PostContinueEvent()
michael@0 227 {
michael@0 228 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
michael@0 229 // If this flag isn't set, then there shouldn't be a live continue event!
michael@0 230 NS_ASSERTION(!mContinueEvent, "bad");
michael@0 231
michael@0 232 // This creates a reference cycle between this and the event that is
michael@0 233 // broken when the event fires.
michael@0 234 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
michael@0 235 if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
michael@0 236 NS_WARNING("failed to dispatch parser continuation event");
michael@0 237 } else {
michael@0 238 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0 239 mContinueEvent = event;
michael@0 240 }
michael@0 241 }
michael@0 242 return NS_OK;
michael@0 243 }
michael@0 244
michael@0 245 NS_IMETHODIMP_(void)
michael@0 246 nsParser::GetCommand(nsCString& aCommand)
michael@0 247 {
michael@0 248 aCommand = mCommandStr;
michael@0 249 }
michael@0 250
michael@0 251 /**
michael@0 252 * Call this method once you've created a parser, and want to instruct it
michael@0 253 * about the command which caused the parser to be constructed. For example,
michael@0 254 * this allows us to select a DTD which can do, say, view-source.
michael@0 255 *
michael@0 256 * @param aCommand the command string to set
michael@0 257 */
michael@0 258 NS_IMETHODIMP_(void)
michael@0 259 nsParser::SetCommand(const char* aCommand)
michael@0 260 {
michael@0 261 mCommandStr.Assign(aCommand);
michael@0 262 if (mCommandStr.Equals("view-source")) {
michael@0 263 mCommand = eViewSource;
michael@0 264 } else if (mCommandStr.Equals("view-fragment")) {
michael@0 265 mCommand = eViewFragment;
michael@0 266 } else {
michael@0 267 mCommand = eViewNormal;
michael@0 268 }
michael@0 269 }
michael@0 270
michael@0 271 /**
michael@0 272 * Call this method once you've created a parser, and want to instruct it
michael@0 273 * about the command which caused the parser to be constructed. For example,
michael@0 274 * this allows us to select a DTD which can do, say, view-source.
michael@0 275 *
michael@0 276 * @param aParserCommand the command to set
michael@0 277 */
michael@0 278 NS_IMETHODIMP_(void)
michael@0 279 nsParser::SetCommand(eParserCommands aParserCommand)
michael@0 280 {
michael@0 281 mCommand = aParserCommand;
michael@0 282 }
michael@0 283
michael@0 284 /**
michael@0 285 * Call this method once you've created a parser, and want to instruct it
michael@0 286 * about what charset to load
michael@0 287 *
michael@0 288 * @param aCharset- the charset of a document
michael@0 289 * @param aCharsetSource- the source of the charset
michael@0 290 */
michael@0 291 NS_IMETHODIMP_(void)
michael@0 292 nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
michael@0 293 {
michael@0 294 mCharset = aCharset;
michael@0 295 mCharsetSource = aCharsetSource;
michael@0 296 if (mParserContext && mParserContext->mScanner) {
michael@0 297 mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
michael@0 298 }
michael@0 299 }
michael@0 300
michael@0 301 void
michael@0 302 nsParser::SetSinkCharset(nsACString& aCharset)
michael@0 303 {
michael@0 304 if (mSink) {
michael@0 305 mSink->SetDocumentCharset(aCharset);
michael@0 306 }
michael@0 307 }
michael@0 308
michael@0 309 /**
michael@0 310 * This method gets called in order to set the content
michael@0 311 * sink for this parser to dump nodes to.
michael@0 312 *
michael@0 313 * @param nsIContentSink interface for node receiver
michael@0 314 */
michael@0 315 NS_IMETHODIMP_(void)
michael@0 316 nsParser::SetContentSink(nsIContentSink* aSink)
michael@0 317 {
michael@0 318 NS_PRECONDITION(aSink, "sink cannot be null!");
michael@0 319 mSink = aSink;
michael@0 320
michael@0 321 if (mSink) {
michael@0 322 mSink->SetParser(this);
michael@0 323 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
michael@0 324 if (htmlSink) {
michael@0 325 mIsAboutBlank = true;
michael@0 326 }
michael@0 327 }
michael@0 328 }
michael@0 329
michael@0 330 /**
michael@0 331 * retrieve the sink set into the parser
michael@0 332 * @return current sink
michael@0 333 */
michael@0 334 NS_IMETHODIMP_(nsIContentSink*)
michael@0 335 nsParser::GetContentSink()
michael@0 336 {
michael@0 337 return mSink;
michael@0 338 }
michael@0 339
michael@0 340 /**
michael@0 341 * Determine what DTD mode (and thus what layout nsCompatibility mode)
michael@0 342 * to use for this document based on the first chunk of data received
michael@0 343 * from the network (each parsercontext can have its own mode). (No,
michael@0 344 * this is not an optimal solution -- we really don't need to know until
michael@0 345 * after we've received the DOCTYPE, and this could easily be part of
michael@0 346 * the regular parsing process if the parser were designed in a way that
michael@0 347 * made such modifications easy.)
michael@0 348 */
michael@0 349
michael@0 350 // Parse the PS production in the SGML spec (excluding the part dealing
michael@0 351 // with entity references) starting at theIndex into theBuffer, and
michael@0 352 // return the first index after the end of the production.
michael@0 353 static int32_t
michael@0 354 ParsePS(const nsString& aBuffer, int32_t aIndex)
michael@0 355 {
michael@0 356 for (;;) {
michael@0 357 char16_t ch = aBuffer.CharAt(aIndex);
michael@0 358 if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
michael@0 359 (ch == char16_t('\n')) || (ch == char16_t('\r'))) {
michael@0 360 ++aIndex;
michael@0 361 } else if (ch == char16_t('-')) {
michael@0 362 int32_t tmpIndex;
michael@0 363 if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
michael@0 364 kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
michael@0 365 aIndex = tmpIndex + 2;
michael@0 366 } else {
michael@0 367 return aIndex;
michael@0 368 }
michael@0 369 } else {
michael@0 370 return aIndex;
michael@0 371 }
michael@0 372 }
michael@0 373 }
michael@0 374
michael@0 375 #define PARSE_DTD_HAVE_DOCTYPE (1<<0)
michael@0 376 #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1)
michael@0 377 #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2)
michael@0 378 #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3)
michael@0 379
michael@0 380 // return true on success (includes not present), false on failure
michael@0 381 static bool
michael@0 382 ParseDocTypeDecl(const nsString &aBuffer,
michael@0 383 int32_t *aResultFlags,
michael@0 384 nsString &aPublicID,
michael@0 385 nsString &aSystemID)
michael@0 386 {
michael@0 387 bool haveDoctype = false;
michael@0 388 *aResultFlags = 0;
michael@0 389
michael@0 390 // Skip through any comments and processing instructions
michael@0 391 // The PI-skipping is a bit of a hack.
michael@0 392 int32_t theIndex = 0;
michael@0 393 do {
michael@0 394 theIndex = aBuffer.FindChar('<', theIndex);
michael@0 395 if (theIndex == kNotFound) break;
michael@0 396 char16_t nextChar = aBuffer.CharAt(theIndex+1);
michael@0 397 if (nextChar == char16_t('!')) {
michael@0 398 int32_t tmpIndex = theIndex + 2;
michael@0 399 if (kNotFound !=
michael@0 400 (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
michael@0 401 haveDoctype = true;
michael@0 402 theIndex += 7; // skip "DOCTYPE"
michael@0 403 break;
michael@0 404 }
michael@0 405 theIndex = ParsePS(aBuffer, tmpIndex);
michael@0 406 theIndex = aBuffer.FindChar('>', theIndex);
michael@0 407 } else if (nextChar == char16_t('?')) {
michael@0 408 theIndex = aBuffer.FindChar('>', theIndex);
michael@0 409 } else {
michael@0 410 break;
michael@0 411 }
michael@0 412 } while (theIndex != kNotFound);
michael@0 413
michael@0 414 if (!haveDoctype)
michael@0 415 return true;
michael@0 416 *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
michael@0 417
michael@0 418 theIndex = ParsePS(aBuffer, theIndex);
michael@0 419 theIndex = aBuffer.Find("HTML", true, theIndex, 0);
michael@0 420 if (kNotFound == theIndex)
michael@0 421 return false;
michael@0 422 theIndex = ParsePS(aBuffer, theIndex+4);
michael@0 423 int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
michael@0 424
michael@0 425 if (kNotFound != tmpIndex) {
michael@0 426 theIndex = ParsePS(aBuffer, tmpIndex+6);
michael@0 427
michael@0 428 // We get here only if we've read <!DOCTYPE HTML PUBLIC
michael@0 429 // (not case sensitive) possibly with comments within.
michael@0 430
michael@0 431 // Now find the beginning and end of the public identifier
michael@0 432 // and the system identifier (if present).
michael@0 433
michael@0 434 char16_t lit = aBuffer.CharAt(theIndex);
michael@0 435 if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
michael@0 436 return false;
michael@0 437
michael@0 438 // Start is the first character, excluding the quote, and End is
michael@0 439 // the final quote, so there are (end-start) characters.
michael@0 440
michael@0 441 int32_t PublicIDStart = theIndex + 1;
michael@0 442 int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
michael@0 443 if (kNotFound == PublicIDEnd)
michael@0 444 return false;
michael@0 445 theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
michael@0 446 char16_t next = aBuffer.CharAt(theIndex);
michael@0 447 if (next == char16_t('>')) {
michael@0 448 // There was a public identifier, but no system
michael@0 449 // identifier,
michael@0 450 // so do nothing.
michael@0 451 // This is needed to avoid the else at the end, and it's
michael@0 452 // also the most common case.
michael@0 453 } else if ((next == char16_t('\"')) ||
michael@0 454 (next == char16_t('\''))) {
michael@0 455 // We found a system identifier.
michael@0 456 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
michael@0 457 int32_t SystemIDStart = theIndex + 1;
michael@0 458 int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
michael@0 459 if (kNotFound == SystemIDEnd)
michael@0 460 return false;
michael@0 461 aSystemID =
michael@0 462 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
michael@0 463 } else if (next == char16_t('[')) {
michael@0 464 // We found an internal subset.
michael@0 465 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
michael@0 466 } else {
michael@0 467 // Something's wrong.
michael@0 468 return false;
michael@0 469 }
michael@0 470
michael@0 471 // Since a public ID is a minimum literal, we must trim
michael@0 472 // and collapse whitespace
michael@0 473 aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
michael@0 474 aPublicID.CompressWhitespace(true, true);
michael@0 475 *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
michael@0 476 } else {
michael@0 477 tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
michael@0 478 if (kNotFound != tmpIndex) {
michael@0 479 // DOCTYPES with system ID but no Public ID
michael@0 480 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
michael@0 481
michael@0 482 theIndex = ParsePS(aBuffer, tmpIndex+6);
michael@0 483 char16_t next = aBuffer.CharAt(theIndex);
michael@0 484 if (next != char16_t('\"') && next != char16_t('\''))
michael@0 485 return false;
michael@0 486
michael@0 487 int32_t SystemIDStart = theIndex + 1;
michael@0 488 int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
michael@0 489
michael@0 490 if (kNotFound == SystemIDEnd)
michael@0 491 return false;
michael@0 492 aSystemID =
michael@0 493 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
michael@0 494 theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
michael@0 495 }
michael@0 496
michael@0 497 char16_t nextChar = aBuffer.CharAt(theIndex);
michael@0 498 if (nextChar == char16_t('['))
michael@0 499 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
michael@0 500 else if (nextChar != char16_t('>'))
michael@0 501 return false;
michael@0 502 }
michael@0 503 return true;
michael@0 504 }
michael@0 505
michael@0 506 struct PubIDInfo
michael@0 507 {
michael@0 508 enum eMode {
michael@0 509 eQuirks, /* always quirks mode, unless there's an internal subset */
michael@0 510 eAlmostStandards,/* eCompatibility_AlmostStandards */
michael@0 511 eFullStandards /* eCompatibility_FullStandards */
michael@0 512 /*
michael@0 513 * public IDs that should trigger strict mode are not listed
michael@0 514 * since we want all future public IDs to trigger strict mode as
michael@0 515 * well
michael@0 516 */
michael@0 517 };
michael@0 518
michael@0 519 const char* name;
michael@0 520 eMode mode_if_no_sysid;
michael@0 521 eMode mode_if_sysid;
michael@0 522 };
michael@0 523
michael@0 524 #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
michael@0 525
michael@0 526 // These must be in nsCRT::strcmp order so binary-search can be used.
michael@0 527 // This is verified, |#ifdef DEBUG|, below.
michael@0 528
michael@0 529 // Even though public identifiers should be case sensitive, we will do
michael@0 530 // all comparisons after converting to lower case in order to do
michael@0 531 // case-insensitive comparison since there are a number of existing web
michael@0 532 // sites that use the incorrect case. Therefore all of the public
michael@0 533 // identifiers below are in lower case (with the correct case following,
michael@0 534 // in comments). The case is verified, |#ifdef DEBUG|, below.
michael@0 535 static const PubIDInfo kPublicIDs[] = {
michael@0 536 {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 537 {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 538 {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 539 {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 540 {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 541 {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 542 {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 543 {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 544 {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 545 {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 546 {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 547 {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 548 {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 549 {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 550 {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 551 {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 552 {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 553 {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 554 {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 555 {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 556 {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 557 {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 558 {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 559 {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 560 {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 561 {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 562 {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 563 {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 564 {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 565 {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 566 {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 567 {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 568 {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 569 {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 570 {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 571 {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 572 {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 573 {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 574 {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 575 {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 576 {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 577 {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 578 {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 579 {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 580 {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 581 {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 582 {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 583 {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 584 {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 585 {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 586 {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 587 {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 588 {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 589 {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 590 {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 591 {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 592 {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 593 {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 594 {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 595 {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 596 {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 597 {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 598 {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
michael@0 599 {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
michael@0 600 {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 601 {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 602 {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 603 {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
michael@0 604 {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
michael@0 605 {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 606 {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 607 {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 608 {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 609 {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 610 {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 611 {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0 612 };
michael@0 613
michael@0 614 #ifdef DEBUG
michael@0 615 static void
michael@0 616 VerifyPublicIDs()
michael@0 617 {
michael@0 618 static bool gVerified = false;
michael@0 619 if (!gVerified) {
michael@0 620 gVerified = true;
michael@0 621 uint32_t i;
michael@0 622 for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
michael@0 623 if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
michael@0 624 NS_NOTREACHED("doctypes out of order");
michael@0 625 printf("Doctypes %s and %s out of order.\n",
michael@0 626 kPublicIDs[i].name, kPublicIDs[i+1].name);
michael@0 627 }
michael@0 628 }
michael@0 629 for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
michael@0 630 nsAutoCString lcPubID(kPublicIDs[i].name);
michael@0 631 ToLowerCase(lcPubID);
michael@0 632 if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
michael@0 633 NS_NOTREACHED("doctype not lower case");
michael@0 634 printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
michael@0 635 }
michael@0 636 }
michael@0 637 }
michael@0 638 }
michael@0 639 #endif
michael@0 640
michael@0 641 static void
michael@0 642 DetermineHTMLParseMode(const nsString& aBuffer,
michael@0 643 nsDTDMode& aParseMode,
michael@0 644 eParserDocType& aDocType)
michael@0 645 {
michael@0 646 #ifdef DEBUG
michael@0 647 VerifyPublicIDs();
michael@0 648 #endif
michael@0 649 int32_t resultFlags;
michael@0 650 nsAutoString publicIDUCS2, sysIDUCS2;
michael@0 651 if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
michael@0 652 if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
michael@0 653 // no DOCTYPE
michael@0 654 aParseMode = eDTDMode_quirks;
michael@0 655 aDocType = eHTML_Quirks;
michael@0 656 } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
michael@0 657 !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
michael@0 658 // A doctype with an internal subset is always full_standards.
michael@0 659 // A doctype without a public ID is always full_standards.
michael@0 660 aDocType = eHTML_Strict;
michael@0 661 aParseMode = eDTDMode_full_standards;
michael@0 662
michael@0 663 // Special hack for IBM's custom DOCTYPE.
michael@0 664 if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
michael@0 665 sysIDUCS2 == NS_LITERAL_STRING(
michael@0 666 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
michael@0 667 aParseMode = eDTDMode_quirks;
michael@0 668 aDocType = eHTML_Quirks;
michael@0 669 }
michael@0 670
michael@0 671 } else {
michael@0 672 // We have to check our list of public IDs to see what to do.
michael@0 673 // Yes, we want UCS2 to ASCII lossy conversion.
michael@0 674 nsAutoCString publicID;
michael@0 675 publicID.AssignWithConversion(publicIDUCS2);
michael@0 676
michael@0 677 // See comment above definition of kPublicIDs about case
michael@0 678 // sensitivity.
michael@0 679 ToLowerCase(publicID);
michael@0 680
michael@0 681 // Binary search to see if we can find the correct public ID
michael@0 682 // These must be signed since maximum can go below zero and we'll
michael@0 683 // crash if it's unsigned.
michael@0 684 int32_t minimum = 0;
michael@0 685 int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1;
michael@0 686 int32_t index;
michael@0 687 for (;;) {
michael@0 688 index = (minimum + maximum) / 2;
michael@0 689 int32_t comparison =
michael@0 690 nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
michael@0 691 if (comparison == 0)
michael@0 692 break;
michael@0 693 if (comparison < 0)
michael@0 694 maximum = index - 1;
michael@0 695 else
michael@0 696 minimum = index + 1;
michael@0 697
michael@0 698 if (maximum < minimum) {
michael@0 699 // The DOCTYPE is not in our list, so it must be full_standards.
michael@0 700 aParseMode = eDTDMode_full_standards;
michael@0 701 aDocType = eHTML_Strict;
michael@0 702 return;
michael@0 703 }
michael@0 704 }
michael@0 705
michael@0 706 switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
michael@0 707 ? kPublicIDs[index].mode_if_sysid
michael@0 708 : kPublicIDs[index].mode_if_no_sysid)
michael@0 709 {
michael@0 710 case PubIDInfo::eQuirks:
michael@0 711 aParseMode = eDTDMode_quirks;
michael@0 712 aDocType = eHTML_Quirks;
michael@0 713 break;
michael@0 714 case PubIDInfo::eAlmostStandards:
michael@0 715 aParseMode = eDTDMode_almost_standards;
michael@0 716 aDocType = eHTML_Strict;
michael@0 717 break;
michael@0 718 case PubIDInfo::eFullStandards:
michael@0 719 aParseMode = eDTDMode_full_standards;
michael@0 720 aDocType = eHTML_Strict;
michael@0 721 break;
michael@0 722 default:
michael@0 723 NS_NOTREACHED("no other cases!");
michael@0 724 }
michael@0 725 }
michael@0 726 } else {
michael@0 727 // badly formed DOCTYPE -> quirks
michael@0 728 aParseMode = eDTDMode_quirks;
michael@0 729 aDocType = eHTML_Quirks;
michael@0 730 }
michael@0 731 }
michael@0 732
michael@0 733 static void
michael@0 734 DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
michael@0 735 eParserDocType& aDocType, const nsACString& aMimeType)
michael@0 736 {
michael@0 737 if (aMimeType.EqualsLiteral(TEXT_HTML)) {
michael@0 738 DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
michael@0 739 } else if (nsContentUtils::IsPlainTextType(aMimeType)) {
michael@0 740 aDocType = ePlainText;
michael@0 741 aParseMode = eDTDMode_quirks;
michael@0 742 } else { // Some form of XML
michael@0 743 aDocType = eXML;
michael@0 744 aParseMode = eDTDMode_full_standards;
michael@0 745 }
michael@0 746 }
michael@0 747
michael@0 748 static nsIDTD*
michael@0 749 FindSuitableDTD(CParserContext& aParserContext)
michael@0 750 {
michael@0 751 // We always find a DTD.
michael@0 752 aParserContext.mAutoDetectStatus = ePrimaryDetect;
michael@0 753
michael@0 754 // Quick check for view source.
michael@0 755 NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource,
michael@0 756 "The old parser is not supposed to be used for View Source anymore.");
michael@0 757
michael@0 758 // Now see if we're parsing HTML (which, as far as we're concerned, simply
michael@0 759 // means "not XML").
michael@0 760 if (aParserContext.mDocType != eXML) {
michael@0 761 return new CNavDTD();
michael@0 762 }
michael@0 763
michael@0 764 // If we're here, then we'd better be parsing XML.
michael@0 765 NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
michael@0 766 return new nsExpatDriver();
michael@0 767 }
michael@0 768
michael@0 769 NS_IMETHODIMP
michael@0 770 nsParser::CancelParsingEvents()
michael@0 771 {
michael@0 772 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
michael@0 773 NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
michael@0 774 // Revoke the pending continue parsing event
michael@0 775 mContinueEvent = nullptr;
michael@0 776 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0 777 }
michael@0 778 return NS_OK;
michael@0 779 }
michael@0 780
michael@0 781 ////////////////////////////////////////////////////////////////////////
michael@0 782
michael@0 783 /**
michael@0 784 * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
michael@0 785 * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
michael@0 786 * (which could be success or failure).
michael@0 787 *
michael@0 788 * To understand the motivation for this construct, consider these example
michael@0 789 * methods:
michael@0 790 *
michael@0 791 * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
michael@0 792 * nsresult rv = NS_OK;
michael@0 793 * ...
michael@0 794 * return obj->DoThatThing();
michael@0 795 * NS_ENSURE_SUCCESS(rv, rv);
michael@0 796 * ...
michael@0 797 * return rv;
michael@0 798 * }
michael@0 799 *
michael@0 800 * void nsCaller::MakeThingsHappen() {
michael@0 801 * return mSomething->DoThatThing(mWhatever);
michael@0 802 * }
michael@0 803 *
michael@0 804 * Suppose, for whatever reason*, we want to shift responsibility for calling
michael@0 805 * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
michael@0 806 * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
michael@0 807 *
michael@0 808 * nsresult nsSomething::DoThatThing() {
michael@0 809 * nsresult rv = NS_OK;
michael@0 810 * ...
michael@0 811 * ...
michael@0 812 * return rv;
michael@0 813 * }
michael@0 814 *
michael@0 815 * void nsCaller::MakeThingsHappen() {
michael@0 816 * nsresult rv;
michael@0 817 * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
michael@0 818 * mWhatever->DoThatThing(),
michael@0 819 * rv);
michael@0 820 * return rv;
michael@0 821 * }
michael@0 822 *
michael@0 823 * *Possible reasons include: nsCaller doesn't want to give mSomething access
michael@0 824 * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
michael@0 825 * be called regardless of how nsSomething::DoThatThing behaves, &c.
michael@0 826 */
michael@0 827 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \
michael@0 828 nsresult RV##__temp = EXPR1; \
michael@0 829 RV = EXPR2; \
michael@0 830 if (NS_FAILED(RV)) { \
michael@0 831 RV = RV##__temp; \
michael@0 832 } \
michael@0 833 }
michael@0 834
michael@0 835 /**
michael@0 836 * This gets called just prior to the model actually
michael@0 837 * being constructed. It's important to make this the
michael@0 838 * last thing that happens right before parsing, so we
michael@0 839 * can delay until the last moment the resolution of
michael@0 840 * which DTD to use (unless of course we're assigned one).
michael@0 841 */
michael@0 842 nsresult
michael@0 843 nsParser::WillBuildModel(nsString& aFilename)
michael@0 844 {
michael@0 845 if (!mParserContext)
michael@0 846 return kInvalidParserContext;
michael@0 847
michael@0 848 if (eUnknownDetect != mParserContext->mAutoDetectStatus)
michael@0 849 return NS_OK;
michael@0 850
michael@0 851 if (eDTDMode_unknown == mParserContext->mDTDMode ||
michael@0 852 eDTDMode_autodetect == mParserContext->mDTDMode) {
michael@0 853 char16_t buf[1025];
michael@0 854 nsFixedString theBuffer(buf, 1024, 0);
michael@0 855
michael@0 856 // Grab 1024 characters, starting at the first non-whitespace
michael@0 857 // character, to look for the doctype in.
michael@0 858 mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
michael@0 859 DetermineParseMode(theBuffer, mParserContext->mDTDMode,
michael@0 860 mParserContext->mDocType, mParserContext->mMimeType);
michael@0 861 }
michael@0 862
michael@0 863 NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
michael@0 864 "Clobbering DTD for non-root parser context!");
michael@0 865 mDTD = FindSuitableDTD(*mParserContext);
michael@0 866 NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
michael@0 867
michael@0 868 nsITokenizer* tokenizer;
michael@0 869 nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
michael@0 870 NS_ENSURE_SUCCESS(rv, rv);
michael@0 871
michael@0 872 rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
michael@0 873 nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
michael@0 874 // nsIDTD::WillBuildModel used to be responsible for calling
michael@0 875 // nsIContentSink::WillBuildModel, but that obligation isn't expressible
michael@0 876 // in the nsIDTD interface itself, so it's sounder and simpler to give that
michael@0 877 // responsibility back to the parser. The former behavior of the DTD was to
michael@0 878 // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
michael@0 879 // failure we should use sinkResult instead of rv, to preserve the old error
michael@0 880 // handling behavior of the DTD:
michael@0 881 return NS_FAILED(sinkResult) ? sinkResult : rv;
michael@0 882 }
michael@0 883
michael@0 884 /**
michael@0 885 * This gets called when the parser is done with its input.
michael@0 886 * Note that the parser may have been called recursively, so we
michael@0 887 * have to check for a prev. context before closing out the DTD/sink.
michael@0 888 */
michael@0 889 nsresult
michael@0 890 nsParser::DidBuildModel(nsresult anErrorCode)
michael@0 891 {
michael@0 892 nsresult result = anErrorCode;
michael@0 893
michael@0 894 if (IsComplete()) {
michael@0 895 if (mParserContext && !mParserContext->mPrevContext) {
michael@0 896 // Let sink know if we're about to end load because we've been terminated.
michael@0 897 // In that case we don't want it to run deferred scripts.
michael@0 898 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0 899 if (mDTD && mSink) {
michael@0 900 nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
michael@0 901 sinkResult = mSink->DidBuildModel(terminated);
michael@0 902 // nsIDTD::DidBuildModel used to be responsible for calling
michael@0 903 // nsIContentSink::DidBuildModel, but that obligation isn't expressible
michael@0 904 // in the nsIDTD interface itself, so it's sounder and simpler to give
michael@0 905 // that responsibility back to the parser. The former behavior of the
michael@0 906 // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
michael@0 907 // sink returns failure we should use sinkResult instead of dtdResult,
michael@0 908 // to preserve the old error handling behavior of the DTD:
michael@0 909 result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
michael@0 910 }
michael@0 911
michael@0 912 //Ref. to bug 61462.
michael@0 913 mParserContext->mRequest = 0;
michael@0 914 }
michael@0 915 }
michael@0 916
michael@0 917 return result;
michael@0 918 }
michael@0 919
michael@0 920 /**
michael@0 921 * This method adds a new parser context to the list,
michael@0 922 * pushing the current one to the next position.
michael@0 923 *
michael@0 924 * @param ptr to new context
michael@0 925 */
michael@0 926 void
michael@0 927 nsParser::PushContext(CParserContext& aContext)
michael@0 928 {
michael@0 929 NS_ASSERTION(aContext.mPrevContext == mParserContext,
michael@0 930 "Trying to push a context whose previous context differs from "
michael@0 931 "the current parser context.");
michael@0 932 mParserContext = &aContext;
michael@0 933 }
michael@0 934
michael@0 935 /**
michael@0 936 * This method pops the topmost context off the stack,
michael@0 937 * returning it to the user. The next context (if any)
michael@0 938 * becomes the current context.
michael@0 939 * @update gess7/22/98
michael@0 940 * @return prev. context
michael@0 941 */
michael@0 942 CParserContext*
michael@0 943 nsParser::PopContext()
michael@0 944 {
michael@0 945 CParserContext* oldContext = mParserContext;
michael@0 946 if (oldContext) {
michael@0 947 mParserContext = oldContext->mPrevContext;
michael@0 948 if (mParserContext) {
michael@0 949 // If the old context was blocked, propagate the blocked state
michael@0 950 // back to the new one. Also, propagate the stream listener state
michael@0 951 // but don't override onStop state to guarantee the call to DidBuildModel().
michael@0 952 if (mParserContext->mStreamListenerState != eOnStop) {
michael@0 953 mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
michael@0 954 }
michael@0 955 }
michael@0 956 }
michael@0 957 return oldContext;
michael@0 958 }
michael@0 959
michael@0 960 /**
michael@0 961 * Call this when you want control whether or not the parser will parse
michael@0 962 * and tokenize input (TRUE), or whether it just caches input to be
michael@0 963 * parsed later (FALSE).
michael@0 964 *
michael@0 965 * @param aState determines whether we parse/tokenize or just cache.
michael@0 966 * @return current state
michael@0 967 */
michael@0 968 void
michael@0 969 nsParser::SetUnusedInput(nsString& aBuffer)
michael@0 970 {
michael@0 971 mUnusedInput = aBuffer;
michael@0 972 }
michael@0 973
michael@0 974 /**
michael@0 975 * Call this when you want to *force* the parser to terminate the
michael@0 976 * parsing process altogether. This is binary -- so once you terminate
michael@0 977 * you can't resume without restarting altogether.
michael@0 978 */
michael@0 979 NS_IMETHODIMP
michael@0 980 nsParser::Terminate(void)
michael@0 981 {
michael@0 982 // We should only call DidBuildModel once, so don't do anything if this is
michael@0 983 // the second time that Terminate has been called.
michael@0 984 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0 985 return NS_OK;
michael@0 986 }
michael@0 987
michael@0 988 nsresult result = NS_OK;
michael@0 989 // XXX - [ until we figure out a way to break parser-sink circularity ]
michael@0 990 // Hack - Hold a reference until we are completely done...
michael@0 991 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0 992 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0 993
michael@0 994 // CancelParsingEvents must be called to avoid leaking the nsParser object
michael@0 995 // @see bug 108049
michael@0 996 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
michael@0 997 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
michael@0 998 // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
michael@0 999 CancelParsingEvents();
michael@0 1000
michael@0 1001 // If we got interrupted in the middle of a document.write, then we might
michael@0 1002 // have more than one parser context on our parsercontext stack. This has
michael@0 1003 // the effect of making DidBuildModel a no-op, meaning that we never call
michael@0 1004 // our sink's DidBuildModel and break the reference cycle, causing a leak.
michael@0 1005 // Since we're getting terminated, we manually clean up our context stack.
michael@0 1006 while (mParserContext && mParserContext->mPrevContext) {
michael@0 1007 CParserContext *prev = mParserContext->mPrevContext;
michael@0 1008 delete mParserContext;
michael@0 1009 mParserContext = prev;
michael@0 1010 }
michael@0 1011
michael@0 1012 if (mDTD) {
michael@0 1013 mDTD->Terminate();
michael@0 1014 DidBuildModel(result);
michael@0 1015 } else if (mSink) {
michael@0 1016 // We have no parser context or no DTD yet (so we got terminated before we
michael@0 1017 // got any data). Manually break the reference cycle with the sink.
michael@0 1018 result = mSink->DidBuildModel(true);
michael@0 1019 NS_ENSURE_SUCCESS(result, result);
michael@0 1020 }
michael@0 1021
michael@0 1022 return NS_OK;
michael@0 1023 }
michael@0 1024
michael@0 1025 NS_IMETHODIMP
michael@0 1026 nsParser::ContinueInterruptedParsing()
michael@0 1027 {
michael@0 1028 // If there are scripts executing, then the content sink is jumping the gun
michael@0 1029 // (probably due to a synchronous XMLHttpRequest) and will re-enable us
michael@0 1030 // later, see bug 460706.
michael@0 1031 if (!IsOkToProcessNetworkData()) {
michael@0 1032 return NS_OK;
michael@0 1033 }
michael@0 1034
michael@0 1035 // If the stream has already finished, there's a good chance
michael@0 1036 // that we might start closing things down when the parser
michael@0 1037 // is reenabled. To make sure that we're not deleted across
michael@0 1038 // the reenabling process, hold a reference to ourselves.
michael@0 1039 nsresult result=NS_OK;
michael@0 1040 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0 1041 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
michael@0 1042
michael@0 1043 #ifdef DEBUG
michael@0 1044 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
michael@0 1045 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
michael@0 1046 }
michael@0 1047 #endif
michael@0 1048
michael@0 1049 bool isFinalChunk = mParserContext &&
michael@0 1050 mParserContext->mStreamListenerState == eOnStop;
michael@0 1051
michael@0 1052 mProcessingNetworkData = true;
michael@0 1053 if (mSink) {
michael@0 1054 mSink->WillParse();
michael@0 1055 }
michael@0 1056 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
michael@0 1057 mProcessingNetworkData = false;
michael@0 1058
michael@0 1059 if (result != NS_OK) {
michael@0 1060 result=mInternalState;
michael@0 1061 }
michael@0 1062
michael@0 1063 return result;
michael@0 1064 }
michael@0 1065
michael@0 1066 /**
michael@0 1067 * Stops parsing temporarily. That's it will prevent the
michael@0 1068 * parser from building up content model.
michael@0 1069 */
michael@0 1070 NS_IMETHODIMP_(void)
michael@0 1071 nsParser::BlockParser()
michael@0 1072 {
michael@0 1073 mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
michael@0 1074 }
michael@0 1075
michael@0 1076 /**
michael@0 1077 * Open up the parser for tokenization, building up content
michael@0 1078 * model..etc. However, this method does not resume parsing
michael@0 1079 * automatically. It's the callers' responsibility to restart
michael@0 1080 * the parsing engine.
michael@0 1081 */
michael@0 1082 NS_IMETHODIMP_(void)
michael@0 1083 nsParser::UnblockParser()
michael@0 1084 {
michael@0 1085 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
michael@0 1086 mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
michael@0 1087 } else {
michael@0 1088 NS_WARNING("Trying to unblock an unblocked parser.");
michael@0 1089 }
michael@0 1090 }
michael@0 1091
michael@0 1092 NS_IMETHODIMP_(void)
michael@0 1093 nsParser::ContinueInterruptedParsingAsync()
michael@0 1094 {
michael@0 1095 mSink->ContinueInterruptedParsingAsync();
michael@0 1096 }
michael@0 1097
michael@0 1098 /**
michael@0 1099 * Call this to query whether the parser is enabled or not.
michael@0 1100 */
michael@0 1101 NS_IMETHODIMP_(bool)
michael@0 1102 nsParser::IsParserEnabled()
michael@0 1103 {
michael@0 1104 return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
michael@0 1105 }
michael@0 1106
michael@0 1107 /**
michael@0 1108 * Call this to query whether the parser thinks it's done with parsing.
michael@0 1109 */
michael@0 1110 NS_IMETHODIMP_(bool)
michael@0 1111 nsParser::IsComplete()
michael@0 1112 {
michael@0 1113 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
michael@0 1114 }
michael@0 1115
michael@0 1116
michael@0 1117 void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
michael@0 1118 {
michael@0 1119 // Ignore any revoked continue events...
michael@0 1120 if (mContinueEvent != ev)
michael@0 1121 return;
michael@0 1122
michael@0 1123 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0 1124 mContinueEvent = nullptr;
michael@0 1125
michael@0 1126 NS_ASSERTION(IsOkToProcessNetworkData(),
michael@0 1127 "Interrupted in the middle of a script?");
michael@0 1128 ContinueInterruptedParsing();
michael@0 1129 }
michael@0 1130
michael@0 1131 bool
michael@0 1132 nsParser::IsInsertionPointDefined()
michael@0 1133 {
michael@0 1134 return false;
michael@0 1135 }
michael@0 1136
michael@0 1137 void
michael@0 1138 nsParser::BeginEvaluatingParserInsertedScript()
michael@0 1139 {
michael@0 1140 }
michael@0 1141
michael@0 1142 void
michael@0 1143 nsParser::EndEvaluatingParserInsertedScript()
michael@0 1144 {
michael@0 1145 }
michael@0 1146
michael@0 1147 void
michael@0 1148 nsParser::MarkAsNotScriptCreated(const char* aCommand)
michael@0 1149 {
michael@0 1150 }
michael@0 1151
michael@0 1152 bool
michael@0 1153 nsParser::IsScriptCreated()
michael@0 1154 {
michael@0 1155 return false;
michael@0 1156 }
michael@0 1157
michael@0 1158 /**
michael@0 1159 * This is the main controlling routine in the parsing process.
michael@0 1160 * Note that it may get called multiple times for the same scanner,
michael@0 1161 * since this is a pushed based system, and all the tokens may
michael@0 1162 * not have been consumed by the scanner during a given invocation
michael@0 1163 * of this method.
michael@0 1164 */
michael@0 1165 NS_IMETHODIMP
michael@0 1166 nsParser::Parse(nsIURI* aURL,
michael@0 1167 nsIRequestObserver* aListener,
michael@0 1168 void* aKey,
michael@0 1169 nsDTDMode aMode)
michael@0 1170 {
michael@0 1171
michael@0 1172 NS_PRECONDITION(aURL, "Error: Null URL given");
michael@0 1173
michael@0 1174 nsresult result=kBadURL;
michael@0 1175 mObserver = aListener;
michael@0 1176
michael@0 1177 if (aURL) {
michael@0 1178 nsAutoCString spec;
michael@0 1179 nsresult rv = aURL->GetSpec(spec);
michael@0 1180 if (rv != NS_OK) {
michael@0 1181 return rv;
michael@0 1182 }
michael@0 1183 NS_ConvertUTF8toUTF16 theName(spec);
michael@0 1184
michael@0 1185 nsScanner* theScanner = new nsScanner(theName, false);
michael@0 1186 CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
michael@0 1187 mCommand, aListener);
michael@0 1188 if (pc && theScanner) {
michael@0 1189 pc->mMultipart = true;
michael@0 1190 pc->mContextType = CParserContext::eCTURL;
michael@0 1191 pc->mDTDMode = aMode;
michael@0 1192 PushContext(*pc);
michael@0 1193
michael@0 1194 result = NS_OK;
michael@0 1195 } else {
michael@0 1196 result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
michael@0 1197 }
michael@0 1198 }
michael@0 1199 return result;
michael@0 1200 }
michael@0 1201
michael@0 1202 /**
michael@0 1203 * Used by XML fragment parsing below.
michael@0 1204 *
michael@0 1205 * @param aSourceBuffer contains a string-full of real content
michael@0 1206 */
michael@0 1207 nsresult
michael@0 1208 nsParser::Parse(const nsAString& aSourceBuffer,
michael@0 1209 void* aKey,
michael@0 1210 bool aLastCall)
michael@0 1211 {
michael@0 1212 nsresult result = NS_OK;
michael@0 1213
michael@0 1214 // Don't bother if we're never going to parse this.
michael@0 1215 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0 1216 return result;
michael@0 1217 }
michael@0 1218
michael@0 1219 if (!aLastCall && aSourceBuffer.IsEmpty()) {
michael@0 1220 // Nothing is being passed to the parser so return
michael@0 1221 // immediately. mUnusedInput will get processed when
michael@0 1222 // some data is actually passed in.
michael@0 1223 // But if this is the last call, make sure to finish up
michael@0 1224 // stuff correctly.
michael@0 1225 return result;
michael@0 1226 }
michael@0 1227
michael@0 1228 // Maintain a reference to ourselves so we don't go away
michael@0 1229 // till we're completely done.
michael@0 1230 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0 1231
michael@0 1232 if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
michael@0 1233 // Note: The following code will always find the parser context associated
michael@0 1234 // with the given key, even if that context has been suspended (e.g., for
michael@0 1235 // another document.write call). This doesn't appear to be exactly what IE
michael@0 1236 // does in the case where this happens, but this makes more sense.
michael@0 1237 CParserContext* pc = mParserContext;
michael@0 1238 while (pc && pc->mKey != aKey) {
michael@0 1239 pc = pc->mPrevContext;
michael@0 1240 }
michael@0 1241
michael@0 1242 if (!pc) {
michael@0 1243 // Only make a new context if we don't have one, OR if we do, but has a
michael@0 1244 // different context key.
michael@0 1245 nsScanner* theScanner = new nsScanner(mUnusedInput);
michael@0 1246 NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
michael@0 1247
michael@0 1248 eAutoDetectResult theStatus = eUnknownDetect;
michael@0 1249
michael@0 1250 if (mParserContext &&
michael@0 1251 mParserContext->mMimeType.EqualsLiteral("application/xml")) {
michael@0 1252 // Ref. Bug 90379
michael@0 1253 NS_ASSERTION(mDTD, "How come the DTD is null?");
michael@0 1254
michael@0 1255 if (mParserContext) {
michael@0 1256 theStatus = mParserContext->mAutoDetectStatus;
michael@0 1257 // Added this to fix bug 32022.
michael@0 1258 }
michael@0 1259 }
michael@0 1260
michael@0 1261 pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
michael@0 1262 0, theStatus, aLastCall);
michael@0 1263 NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
michael@0 1264
michael@0 1265 PushContext(*pc);
michael@0 1266
michael@0 1267 pc->mMultipart = !aLastCall; // By default
michael@0 1268 if (pc->mPrevContext) {
michael@0 1269 pc->mMultipart |= pc->mPrevContext->mMultipart;
michael@0 1270 }
michael@0 1271
michael@0 1272 // Start fix bug 40143
michael@0 1273 if (pc->mMultipart) {
michael@0 1274 pc->mStreamListenerState = eOnDataAvail;
michael@0 1275 if (pc->mScanner) {
michael@0 1276 pc->mScanner->SetIncremental(true);
michael@0 1277 }
michael@0 1278 } else {
michael@0 1279 pc->mStreamListenerState = eOnStop;
michael@0 1280 if (pc->mScanner) {
michael@0 1281 pc->mScanner->SetIncremental(false);
michael@0 1282 }
michael@0 1283 }
michael@0 1284 // end fix for 40143
michael@0 1285
michael@0 1286 pc->mContextType=CParserContext::eCTString;
michael@0 1287 pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
michael@0 1288 pc->mDTDMode = eDTDMode_full_standards;
michael@0 1289
michael@0 1290 mUnusedInput.Truncate();
michael@0 1291
michael@0 1292 pc->mScanner->Append(aSourceBuffer);
michael@0 1293 // Do not interrupt document.write() - bug 95487
michael@0 1294 result = ResumeParse(false, false, false);
michael@0 1295 } else {
michael@0 1296 pc->mScanner->Append(aSourceBuffer);
michael@0 1297 if (!pc->mPrevContext) {
michael@0 1298 // Set stream listener state to eOnStop, on the final context - Fix 68160,
michael@0 1299 // to guarantee DidBuildModel() call - Fix 36148
michael@0 1300 if (aLastCall) {
michael@0 1301 pc->mStreamListenerState = eOnStop;
michael@0 1302 pc->mScanner->SetIncremental(false);
michael@0 1303 }
michael@0 1304
michael@0 1305 if (pc == mParserContext) {
michael@0 1306 // If pc is not mParserContext, then this call to ResumeParse would
michael@0 1307 // do the wrong thing and try to continue parsing using
michael@0 1308 // mParserContext. We need to wait to actually resume parsing on pc.
michael@0 1309 ResumeParse(false, false, false);
michael@0 1310 }
michael@0 1311 }
michael@0 1312 }
michael@0 1313 }
michael@0 1314
michael@0 1315 return result;
michael@0 1316 }
michael@0 1317
michael@0 1318 NS_IMETHODIMP
michael@0 1319 nsParser::ParseFragment(const nsAString& aSourceBuffer,
michael@0 1320 nsTArray<nsString>& aTagStack)
michael@0 1321 {
michael@0 1322 nsresult result = NS_OK;
michael@0 1323 nsAutoString theContext;
michael@0 1324 uint32_t theCount = aTagStack.Length();
michael@0 1325 uint32_t theIndex = 0;
michael@0 1326
michael@0 1327 // Disable observers for fragments
michael@0 1328 mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0 1329
michael@0 1330 for (theIndex = 0; theIndex < theCount; theIndex++) {
michael@0 1331 theContext.AppendLiteral("<");
michael@0 1332 theContext.Append(aTagStack[theCount - theIndex - 1]);
michael@0 1333 theContext.AppendLiteral(">");
michael@0 1334 }
michael@0 1335
michael@0 1336 if (theCount == 0) {
michael@0 1337 // Ensure that the buffer is not empty. Because none of the DTDs care
michael@0 1338 // about leading whitespace, this doesn't change the result.
michael@0 1339 theContext.AssignLiteral(" ");
michael@0 1340 }
michael@0 1341
michael@0 1342 // First, parse the context to build up the DTD's tag stack. Note that we
michael@0 1343 // pass false for the aLastCall parameter.
michael@0 1344 result = Parse(theContext,
michael@0 1345 (void*)&theContext,
michael@0 1346 false);
michael@0 1347 if (NS_FAILED(result)) {
michael@0 1348 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0 1349 return result;
michael@0 1350 }
michael@0 1351
michael@0 1352 if (!mSink) {
michael@0 1353 // Parse must have failed in the XML case and so the sink was killed.
michael@0 1354 return NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0 1355 }
michael@0 1356
michael@0 1357 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
michael@0 1358 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
michael@0 1359
michael@0 1360 fragSink->WillBuildContent();
michael@0 1361 // Now, parse the actual content. Note that this is the last call
michael@0 1362 // for HTML content, but for XML, we will want to build and parse
michael@0 1363 // the end tags. However, if tagStack is empty, it's the last call
michael@0 1364 // for XML as well.
michael@0 1365 if (theCount == 0) {
michael@0 1366 result = Parse(aSourceBuffer,
michael@0 1367 &theContext,
michael@0 1368 true);
michael@0 1369 fragSink->DidBuildContent();
michael@0 1370 } else {
michael@0 1371 // Add an end tag chunk, so expat will read the whole source buffer,
michael@0 1372 // and not worry about ']]' etc.
michael@0 1373 result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
michael@0 1374 &theContext,
michael@0 1375 false);
michael@0 1376 fragSink->DidBuildContent();
michael@0 1377
michael@0 1378 if (NS_SUCCEEDED(result)) {
michael@0 1379 nsAutoString endContext;
michael@0 1380 for (theIndex = 0; theIndex < theCount; theIndex++) {
michael@0 1381 // we already added an end tag chunk above
michael@0 1382 if (theIndex > 0) {
michael@0 1383 endContext.AppendLiteral("</");
michael@0 1384 }
michael@0 1385
michael@0 1386 nsString& thisTag = aTagStack[theIndex];
michael@0 1387 // was there an xmlns=?
michael@0 1388 int32_t endOfTag = thisTag.FindChar(char16_t(' '));
michael@0 1389 if (endOfTag == -1) {
michael@0 1390 endContext.Append(thisTag);
michael@0 1391 } else {
michael@0 1392 endContext.Append(Substring(thisTag,0,endOfTag));
michael@0 1393 }
michael@0 1394
michael@0 1395 endContext.AppendLiteral(">");
michael@0 1396 }
michael@0 1397
michael@0 1398 result = Parse(endContext,
michael@0 1399 &theContext,
michael@0 1400 true);
michael@0 1401 }
michael@0 1402 }
michael@0 1403
michael@0 1404 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0 1405
michael@0 1406 return result;
michael@0 1407 }
michael@0 1408
michael@0 1409 /**
michael@0 1410 * This routine is called to cause the parser to continue parsing its
michael@0 1411 * underlying stream. This call allows the parse process to happen in
michael@0 1412 * chunks, such as when the content is push based, and we need to parse in
michael@0 1413 * pieces.
michael@0 1414 *
michael@0 1415 * An interesting change in how the parser gets used has led us to add extra
michael@0 1416 * processing to this method. The case occurs when the parser is blocked in
michael@0 1417 * one context, and gets a parse(string) call in another context. In this
michael@0 1418 * case, the parserContexts are linked. No problem.
michael@0 1419 *
michael@0 1420 * The problem is that Parse(string) assumes that it can proceed unabated,
michael@0 1421 * but if the parser is already blocked that assumption is false. So we
michael@0 1422 * needed to add a mechanism here to allow the parser to continue to process
michael@0 1423 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
michael@0 1424 * out of contexts.
michael@0 1425 *
michael@0 1426 *
michael@0 1427 * @param allowItertion : set to true if non-script resumption is requested
michael@0 1428 * @param aIsFinalChunk : tells us when the last chunk of data is provided.
michael@0 1429 * @return error code -- 0 if ok, non-zero if error.
michael@0 1430 */
michael@0 1431 nsresult
michael@0 1432 nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
michael@0 1433 bool aCanInterrupt)
michael@0 1434 {
michael@0 1435 nsresult result = NS_OK;
michael@0 1436
michael@0 1437 if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
michael@0 1438 mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0 1439
michael@0 1440 result = WillBuildModel(mParserContext->mScanner->GetFilename());
michael@0 1441 if (NS_FAILED(result)) {
michael@0 1442 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
michael@0 1443 return result;
michael@0 1444 }
michael@0 1445
michael@0 1446 if (mDTD) {
michael@0 1447 mSink->WillResume();
michael@0 1448 bool theIterationIsOk = true;
michael@0 1449
michael@0 1450 while (result == NS_OK && theIterationIsOk) {
michael@0 1451 if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
michael@0 1452 // -- Ref: Bug# 22485 --
michael@0 1453 // Insert the unused input into the source buffer
michael@0 1454 // as if it was read from the input stream.
michael@0 1455 // Adding UngetReadable() per vidur!!
michael@0 1456 mParserContext->mScanner->UngetReadable(mUnusedInput);
michael@0 1457 mUnusedInput.Truncate(0);
michael@0 1458 }
michael@0 1459
michael@0 1460 // Only allow parsing to be interrupted in the subsequent call to
michael@0 1461 // build model.
michael@0 1462 nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
michael@0 1463 ? Tokenize(aIsFinalChunk)
michael@0 1464 : NS_OK;
michael@0 1465 result = BuildModel();
michael@0 1466
michael@0 1467 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
michael@0 1468 PostContinueEvent();
michael@0 1469 }
michael@0 1470
michael@0 1471 theIterationIsOk = theTokenizerResult != kEOF &&
michael@0 1472 result != NS_ERROR_HTMLPARSER_INTERRUPTED;
michael@0 1473
michael@0 1474 // Make sure not to stop parsing too early. Therefore, before shutting
michael@0 1475 // down the parser, it's important to check whether the input buffer
michael@0 1476 // has been scanned to completion (theTokenizerResult should be kEOF).
michael@0 1477 // kEOF -> End of buffer.
michael@0 1478
michael@0 1479 // If we're told to block the parser, we disable all further parsing
michael@0 1480 // (and cache any data coming in) until the parser is re-enabled.
michael@0 1481 if (NS_ERROR_HTMLPARSER_BLOCK == result) {
michael@0 1482 mSink->WillInterrupt();
michael@0 1483 if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
michael@0 1484 // If we were blocked by a recursive invocation, don't re-block.
michael@0 1485 BlockParser();
michael@0 1486 }
michael@0 1487 return NS_OK;
michael@0 1488 }
michael@0 1489 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
michael@0 1490 // Note: Parser Terminate() calls DidBuildModel.
michael@0 1491 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0 1492 DidBuildModel(mStreamStatus);
michael@0 1493 mInternalState = result;
michael@0 1494 }
michael@0 1495
michael@0 1496 return NS_OK;
michael@0 1497 }
michael@0 1498 if ((NS_OK == result && theTokenizerResult == kEOF) ||
michael@0 1499 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
michael@0 1500 bool theContextIsStringBased =
michael@0 1501 CParserContext::eCTString == mParserContext->mContextType;
michael@0 1502
michael@0 1503 if (mParserContext->mStreamListenerState == eOnStop ||
michael@0 1504 !mParserContext->mMultipart || theContextIsStringBased) {
michael@0 1505 if (!mParserContext->mPrevContext) {
michael@0 1506 if (mParserContext->mStreamListenerState == eOnStop) {
michael@0 1507 DidBuildModel(mStreamStatus);
michael@0 1508 return NS_OK;
michael@0 1509 }
michael@0 1510 } else {
michael@0 1511 CParserContext* theContext = PopContext();
michael@0 1512 if (theContext) {
michael@0 1513 theIterationIsOk = allowIteration && theContextIsStringBased;
michael@0 1514 if (theContext->mCopyUnused) {
michael@0 1515 theContext->mScanner->CopyUnusedData(mUnusedInput);
michael@0 1516 }
michael@0 1517
michael@0 1518 delete theContext;
michael@0 1519 }
michael@0 1520
michael@0 1521 result = mInternalState;
michael@0 1522 aIsFinalChunk = mParserContext &&
michael@0 1523 mParserContext->mStreamListenerState == eOnStop;
michael@0 1524 // ...then intentionally fall through to mSink->WillInterrupt()...
michael@0 1525 }
michael@0 1526 }
michael@0 1527 }
michael@0 1528
michael@0 1529 if (theTokenizerResult == kEOF ||
michael@0 1530 result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
michael@0 1531 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
michael@0 1532 mSink->WillInterrupt();
michael@0 1533 }
michael@0 1534 }
michael@0 1535 } else {
michael@0 1536 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
michael@0 1537 }
michael@0 1538 }
michael@0 1539
michael@0 1540 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
michael@0 1541 }
michael@0 1542
michael@0 1543 /**
michael@0 1544 * This is where we loop over the tokens created in the
michael@0 1545 * tokenization phase, and try to make sense out of them.
michael@0 1546 */
michael@0 1547 nsresult
michael@0 1548 nsParser::BuildModel()
michael@0 1549 {
michael@0 1550 nsITokenizer* theTokenizer = nullptr;
michael@0 1551
michael@0 1552 nsresult result = NS_OK;
michael@0 1553 if (mParserContext) {
michael@0 1554 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0 1555 }
michael@0 1556
michael@0 1557 if (NS_SUCCEEDED(result)) {
michael@0 1558 if (mDTD) {
michael@0 1559 result = mDTD->BuildModel(theTokenizer, mSink);
michael@0 1560 }
michael@0 1561 } else {
michael@0 1562 mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
michael@0 1563 }
michael@0 1564 return result;
michael@0 1565 }
michael@0 1566
michael@0 1567 /*******************************************************************
michael@0 1568 These methods are used to talk to the netlib system...
michael@0 1569 *******************************************************************/
michael@0 1570
michael@0 1571 nsresult
michael@0 1572 nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
michael@0 1573 {
michael@0 1574 NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
michael@0 1575 "Parser's nsIStreamListener API was not setup "
michael@0 1576 "correctly in constructor.");
michael@0 1577 if (mObserver) {
michael@0 1578 mObserver->OnStartRequest(request, aContext);
michael@0 1579 }
michael@0 1580 mParserContext->mStreamListenerState = eOnStart;
michael@0 1581 mParserContext->mAutoDetectStatus = eUnknownDetect;
michael@0 1582 mParserContext->mRequest = request;
michael@0 1583
michael@0 1584 NS_ASSERTION(!mParserContext->mPrevContext,
michael@0 1585 "Clobbering DTD for non-root parser context!");
michael@0 1586 mDTD = nullptr;
michael@0 1587
michael@0 1588 nsresult rv;
michael@0 1589 nsAutoCString contentType;
michael@0 1590 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
michael@0 1591 if (channel) {
michael@0 1592 rv = channel->GetContentType(contentType);
michael@0 1593 if (NS_SUCCEEDED(rv)) {
michael@0 1594 mParserContext->SetMimeType(contentType);
michael@0 1595 }
michael@0 1596 }
michael@0 1597
michael@0 1598 rv = NS_OK;
michael@0 1599
michael@0 1600 return rv;
michael@0 1601 }
michael@0 1602
michael@0 1603 static bool
michael@0 1604 ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
michael@0 1605 nsCString& oCharset)
michael@0 1606 {
michael@0 1607 // This code is rather pointless to have. Might as well reuse expat as
michael@0 1608 // seen in nsHtml5StreamParser. -- hsivonen
michael@0 1609 oCharset.Truncate();
michael@0 1610 if ((aLen >= 5) &&
michael@0 1611 ('<' == aBytes[0]) &&
michael@0 1612 ('?' == aBytes[1]) &&
michael@0 1613 ('x' == aBytes[2]) &&
michael@0 1614 ('m' == aBytes[3]) &&
michael@0 1615 ('l' == aBytes[4])) {
michael@0 1616 int32_t i;
michael@0 1617 bool versionFound = false, encodingFound = false;
michael@0 1618 for (i = 6; i < aLen && !encodingFound; ++i) {
michael@0 1619 // end of XML declaration?
michael@0 1620 if ((((char*) aBytes)[i] == '?') &&
michael@0 1621 ((i + 1) < aLen) &&
michael@0 1622 (((char*) aBytes)[i + 1] == '>')) {
michael@0 1623 break;
michael@0 1624 }
michael@0 1625 // Version is required.
michael@0 1626 if (!versionFound) {
michael@0 1627 // Want to avoid string comparisons, hence looking for 'n'
michael@0 1628 // and only if found check the string leading to it. Not
michael@0 1629 // foolproof, but fast.
michael@0 1630 // The shortest string allowed before this is (strlen==13):
michael@0 1631 // <?xml version
michael@0 1632 if ((((char*) aBytes)[i] == 'n') &&
michael@0 1633 (i >= 12) &&
michael@0 1634 (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
michael@0 1635 // Fast forward through version
michael@0 1636 char q = 0;
michael@0 1637 for (++i; i < aLen; ++i) {
michael@0 1638 char qi = ((char*) aBytes)[i];
michael@0 1639 if (qi == '\'' || qi == '"') {
michael@0 1640 if (q && q == qi) {
michael@0 1641 // ending quote
michael@0 1642 versionFound = true;
michael@0 1643 break;
michael@0 1644 } else {
michael@0 1645 // Starting quote
michael@0 1646 q = qi;
michael@0 1647 }
michael@0 1648 }
michael@0 1649 }
michael@0 1650 }
michael@0 1651 } else {
michael@0 1652 // encoding must follow version
michael@0 1653 // Want to avoid string comparisons, hence looking for 'g'
michael@0 1654 // and only if found check the string leading to it. Not
michael@0 1655 // foolproof, but fast.
michael@0 1656 // The shortest allowed string before this (strlen==26):
michael@0 1657 // <?xml version="1" encoding
michael@0 1658 if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
michael@0 1659 "encodin", (char*) (aBytes + i - 7), 7))) {
michael@0 1660 int32_t encStart = 0;
michael@0 1661 char q = 0;
michael@0 1662 for (++i; i < aLen; ++i) {
michael@0 1663 char qi = ((char*) aBytes)[i];
michael@0 1664 if (qi == '\'' || qi == '"') {
michael@0 1665 if (q && q == qi) {
michael@0 1666 int32_t count = i - encStart;
michael@0 1667 // encoding value is invalid if it is UTF-16
michael@0 1668 if (count > 0 && PL_strncasecmp("UTF-16",
michael@0 1669 (char*) (aBytes + encStart), count)) {
michael@0 1670 oCharset.Assign((char*) (aBytes + encStart), count);
michael@0 1671 }
michael@0 1672 encodingFound = true;
michael@0 1673 break;
michael@0 1674 } else {
michael@0 1675 encStart = i + 1;
michael@0 1676 q = qi;
michael@0 1677 }
michael@0 1678 }
michael@0 1679 }
michael@0 1680 }
michael@0 1681 } // if (!versionFound)
michael@0 1682 } // for
michael@0 1683 }
michael@0 1684 return !oCharset.IsEmpty();
michael@0 1685 }
michael@0 1686
michael@0 1687 inline const char
michael@0 1688 GetNextChar(nsACString::const_iterator& aStart,
michael@0 1689 nsACString::const_iterator& aEnd)
michael@0 1690 {
michael@0 1691 NS_ASSERTION(aStart != aEnd, "end of buffer");
michael@0 1692 return (++aStart != aEnd) ? *aStart : '\0';
michael@0 1693 }
michael@0 1694
michael@0 1695 static NS_METHOD
michael@0 1696 NoOpParserWriteFunc(nsIInputStream* in,
michael@0 1697 void* closure,
michael@0 1698 const char* fromRawSegment,
michael@0 1699 uint32_t toOffset,
michael@0 1700 uint32_t count,
michael@0 1701 uint32_t *writeCount)
michael@0 1702 {
michael@0 1703 *writeCount = count;
michael@0 1704 return NS_OK;
michael@0 1705 }
michael@0 1706
michael@0 1707 typedef struct {
michael@0 1708 bool mNeedCharsetCheck;
michael@0 1709 nsParser* mParser;
michael@0 1710 nsScanner* mScanner;
michael@0 1711 nsIRequest* mRequest;
michael@0 1712 } ParserWriteStruct;
michael@0 1713
michael@0 1714 /*
michael@0 1715 * This function is invoked as a result of a call to a stream's
michael@0 1716 * ReadSegments() method. It is called for each contiguous buffer
michael@0 1717 * of data in the underlying stream or pipe. Using ReadSegments
michael@0 1718 * allows us to avoid copying data to read out of the stream.
michael@0 1719 */
michael@0 1720 static NS_METHOD
michael@0 1721 ParserWriteFunc(nsIInputStream* in,
michael@0 1722 void* closure,
michael@0 1723 const char* fromRawSegment,
michael@0 1724 uint32_t toOffset,
michael@0 1725 uint32_t count,
michael@0 1726 uint32_t *writeCount)
michael@0 1727 {
michael@0 1728 nsresult result;
michael@0 1729 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
michael@0 1730 const unsigned char* buf =
michael@0 1731 reinterpret_cast<const unsigned char*> (fromRawSegment);
michael@0 1732 uint32_t theNumRead = count;
michael@0 1733
michael@0 1734 if (!pws) {
michael@0 1735 return NS_ERROR_FAILURE;
michael@0 1736 }
michael@0 1737
michael@0 1738 if (pws->mNeedCharsetCheck) {
michael@0 1739 pws->mNeedCharsetCheck = false;
michael@0 1740 int32_t source;
michael@0 1741 nsAutoCString preferred;
michael@0 1742 nsAutoCString maybePrefer;
michael@0 1743 pws->mParser->GetDocumentCharset(preferred, source);
michael@0 1744
michael@0 1745 // This code was bogus when I found it. It expects the BOM or the XML
michael@0 1746 // declaration to be entirely in the first network buffer. -- hsivonen
michael@0 1747 if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
michael@0 1748 // The decoder will swallow the BOM. The UTF-16 will re-sniff for
michael@0 1749 // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
michael@0 1750 preferred.Assign(maybePrefer);
michael@0 1751 source = kCharsetFromByteOrderMark;
michael@0 1752 } else if (source < kCharsetFromChannel) {
michael@0 1753 nsAutoCString declCharset;
michael@0 1754
michael@0 1755 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
michael@0 1756 if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
michael@0 1757 preferred.Assign(maybePrefer);
michael@0 1758 source = kCharsetFromMetaTag;
michael@0 1759 }
michael@0 1760 }
michael@0 1761 }
michael@0 1762
michael@0 1763 pws->mParser->SetDocumentCharset(preferred, source);
michael@0 1764 pws->mParser->SetSinkCharset(preferred);
michael@0 1765
michael@0 1766 }
michael@0 1767
michael@0 1768 result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest);
michael@0 1769 if (NS_SUCCEEDED(result)) {
michael@0 1770 *writeCount = count;
michael@0 1771 }
michael@0 1772
michael@0 1773 return result;
michael@0 1774 }
michael@0 1775
michael@0 1776 nsresult
michael@0 1777 nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
michael@0 1778 nsIInputStream *pIStream, uint64_t sourceOffset,
michael@0 1779 uint32_t aLength)
michael@0 1780 {
michael@0 1781 NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
michael@0 1782 eOnDataAvail == mParserContext->mStreamListenerState),
michael@0 1783 "Error: OnStartRequest() must be called before OnDataAvailable()");
michael@0 1784 NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
michael@0 1785 "Must have a buffered input stream");
michael@0 1786
michael@0 1787 nsresult rv = NS_OK;
michael@0 1788
michael@0 1789 if (mIsAboutBlank) {
michael@0 1790 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
michael@0 1791 // ... but if an extension tries to feed us data for about:blank in a
michael@0 1792 // release build, silently ignore the data.
michael@0 1793 uint32_t totalRead;
michael@0 1794 rv = pIStream->ReadSegments(NoOpParserWriteFunc,
michael@0 1795 nullptr,
michael@0 1796 aLength,
michael@0 1797 &totalRead);
michael@0 1798 return rv;
michael@0 1799 }
michael@0 1800
michael@0 1801 CParserContext *theContext = mParserContext;
michael@0 1802
michael@0 1803 while (theContext && theContext->mRequest != request) {
michael@0 1804 theContext = theContext->mPrevContext;
michael@0 1805 }
michael@0 1806
michael@0 1807 if (theContext) {
michael@0 1808 theContext->mStreamListenerState = eOnDataAvail;
michael@0 1809
michael@0 1810 if (eInvalidDetect == theContext->mAutoDetectStatus) {
michael@0 1811 if (theContext->mScanner) {
michael@0 1812 nsScannerIterator iter;
michael@0 1813 theContext->mScanner->EndReading(iter);
michael@0 1814 theContext->mScanner->SetPosition(iter, true);
michael@0 1815 }
michael@0 1816 }
michael@0 1817
michael@0 1818 uint32_t totalRead;
michael@0 1819 ParserWriteStruct pws;
michael@0 1820 pws.mNeedCharsetCheck = true;
michael@0 1821 pws.mParser = this;
michael@0 1822 pws.mScanner = theContext->mScanner;
michael@0 1823 pws.mRequest = request;
michael@0 1824
michael@0 1825 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
michael@0 1826 if (NS_FAILED(rv)) {
michael@0 1827 return rv;
michael@0 1828 }
michael@0 1829
michael@0 1830 // Don't bother to start parsing until we've seen some
michael@0 1831 // non-whitespace data
michael@0 1832 if (IsOkToProcessNetworkData() &&
michael@0 1833 theContext->mScanner->FirstNonWhitespacePosition() >= 0) {
michael@0 1834 nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0 1835 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
michael@0 1836 mProcessingNetworkData = true;
michael@0 1837 if (mSink) {
michael@0 1838 mSink->WillParse();
michael@0 1839 }
michael@0 1840 rv = ResumeParse();
michael@0 1841 mProcessingNetworkData = false;
michael@0 1842 }
michael@0 1843 } else {
michael@0 1844 rv = NS_ERROR_UNEXPECTED;
michael@0 1845 }
michael@0 1846
michael@0 1847 return rv;
michael@0 1848 }
michael@0 1849
michael@0 1850 /**
michael@0 1851 * This is called by the networking library once the last block of data
michael@0 1852 * has been collected from the net.
michael@0 1853 */
michael@0 1854 nsresult
michael@0 1855 nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
michael@0 1856 nsresult status)
michael@0 1857 {
michael@0 1858 nsresult rv = NS_OK;
michael@0 1859
michael@0 1860 CParserContext *pc = mParserContext;
michael@0 1861 while (pc) {
michael@0 1862 if (pc->mRequest == request) {
michael@0 1863 pc->mStreamListenerState = eOnStop;
michael@0 1864 pc->mScanner->SetIncremental(false);
michael@0 1865 break;
michael@0 1866 }
michael@0 1867
michael@0 1868 pc = pc->mPrevContext;
michael@0 1869 }
michael@0 1870
michael@0 1871 mStreamStatus = status;
michael@0 1872
michael@0 1873 if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
michael@0 1874 mProcessingNetworkData = true;
michael@0 1875 if (mSink) {
michael@0 1876 mSink->WillParse();
michael@0 1877 }
michael@0 1878 rv = ResumeParse(true, true);
michael@0 1879 mProcessingNetworkData = false;
michael@0 1880 }
michael@0 1881
michael@0 1882 // If the parser isn't enabled, we don't finish parsing till
michael@0 1883 // it is reenabled.
michael@0 1884
michael@0 1885
michael@0 1886 // XXX Should we wait to notify our observers as well if the
michael@0 1887 // parser isn't yet enabled?
michael@0 1888 if (mObserver) {
michael@0 1889 mObserver->OnStopRequest(request, aContext, status);
michael@0 1890 }
michael@0 1891
michael@0 1892 return rv;
michael@0 1893 }
michael@0 1894
michael@0 1895
michael@0 1896 /*******************************************************************
michael@0 1897 Here come the tokenization methods...
michael@0 1898 *******************************************************************/
michael@0 1899
michael@0 1900
michael@0 1901 /**
michael@0 1902 * Part of the code sandwich, this gets called right before
michael@0 1903 * the tokenization process begins. The main reason for
michael@0 1904 * this call is to allow the delegate to do initialization.
michael@0 1905 */
michael@0 1906 bool
michael@0 1907 nsParser::WillTokenize(bool aIsFinalChunk)
michael@0 1908 {
michael@0 1909 if (!mParserContext) {
michael@0 1910 return true;
michael@0 1911 }
michael@0 1912
michael@0 1913 nsITokenizer* theTokenizer;
michael@0 1914 nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0 1915 NS_ENSURE_SUCCESS(result, false);
michael@0 1916 return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
michael@0 1917 }
michael@0 1918
michael@0 1919
michael@0 1920 /**
michael@0 1921 * This is the primary control routine to consume tokens.
michael@0 1922 * It iteratively consumes tokens until an error occurs or
michael@0 1923 * you run out of data.
michael@0 1924 */
michael@0 1925 nsresult nsParser::Tokenize(bool aIsFinalChunk)
michael@0 1926 {
michael@0 1927 nsITokenizer* theTokenizer;
michael@0 1928
michael@0 1929 nsresult result = NS_ERROR_NOT_AVAILABLE;
michael@0 1930 if (mParserContext) {
michael@0 1931 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0 1932 }
michael@0 1933
michael@0 1934 if (NS_SUCCEEDED(result)) {
michael@0 1935 bool flushTokens = false;
michael@0 1936
michael@0 1937 bool killSink = false;
michael@0 1938
michael@0 1939 WillTokenize(aIsFinalChunk);
michael@0 1940 while (NS_SUCCEEDED(result)) {
michael@0 1941 mParserContext->mScanner->Mark();
michael@0 1942 result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
michael@0 1943 flushTokens);
michael@0 1944 if (NS_FAILED(result)) {
michael@0 1945 mParserContext->mScanner->RewindToMark();
michael@0 1946 if (kEOF == result){
michael@0 1947 break;
michael@0 1948 }
michael@0 1949 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
michael@0 1950 killSink = true;
michael@0 1951 result = Terminate();
michael@0 1952 break;
michael@0 1953 }
michael@0 1954 } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
michael@0 1955 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
michael@0 1956 // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
michael@0 1957 // Also remember to update the marked position.
michael@0 1958 mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
michael@0 1959 mParserContext->mScanner->Mark();
michael@0 1960 break;
michael@0 1961 }
michael@0 1962 }
michael@0 1963
michael@0 1964 if (killSink) {
michael@0 1965 mSink = nullptr;
michael@0 1966 }
michael@0 1967 } else {
michael@0 1968 result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
michael@0 1969 }
michael@0 1970
michael@0 1971 return result;
michael@0 1972 }
michael@0 1973
michael@0 1974 /**
michael@0 1975 * Get the channel associated with this parser
michael@0 1976 *
michael@0 1977 * @param aChannel out param that will contain the result
michael@0 1978 * @return NS_OK if successful
michael@0 1979 */
michael@0 1980 NS_IMETHODIMP
michael@0 1981 nsParser::GetChannel(nsIChannel** aChannel)
michael@0 1982 {
michael@0 1983 nsresult result = NS_ERROR_NOT_AVAILABLE;
michael@0 1984 if (mParserContext && mParserContext->mRequest) {
michael@0 1985 result = CallQueryInterface(mParserContext->mRequest, aChannel);
michael@0 1986 }
michael@0 1987 return result;
michael@0 1988 }
michael@0 1989
michael@0 1990 /**
michael@0 1991 * Get the DTD associated with this parser
michael@0 1992 */
michael@0 1993 NS_IMETHODIMP
michael@0 1994 nsParser::GetDTD(nsIDTD** aDTD)
michael@0 1995 {
michael@0 1996 if (mParserContext) {
michael@0 1997 NS_IF_ADDREF(*aDTD = mDTD);
michael@0 1998 }
michael@0 1999
michael@0 2000 return NS_OK;
michael@0 2001 }
michael@0 2002
michael@0 2003 /**
michael@0 2004 * Get this as nsIStreamListener
michael@0 2005 */
michael@0 2006 nsIStreamListener*
michael@0 2007 nsParser::GetStreamListener()
michael@0 2008 {
michael@0 2009 return this;
michael@0 2010 }

mercurial