1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/htmlparser/src/nsParser.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2010 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set sw=2 ts=2 et tw=79: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "nsIAtom.h" 1.11 +#include "nsParser.h" 1.12 +#include "nsString.h" 1.13 +#include "nsCRT.h" 1.14 +#include "nsScanner.h" 1.15 +#include "plstr.h" 1.16 +#include "nsIStringStream.h" 1.17 +#include "nsIChannel.h" 1.18 +#include "nsICachingChannel.h" 1.19 +#include "nsICacheEntryDescriptor.h" 1.20 +#include "nsIInputStream.h" 1.21 +#include "CNavDTD.h" 1.22 +#include "prenv.h" 1.23 +#include "prlock.h" 1.24 +#include "prcvar.h" 1.25 +#include "nsParserCIID.h" 1.26 +#include "nsReadableUtils.h" 1.27 +#include "nsCOMPtr.h" 1.28 +#include "nsExpatDriver.h" 1.29 +#include "nsIServiceManager.h" 1.30 +#include "nsICategoryManager.h" 1.31 +#include "nsISupportsPrimitives.h" 1.32 +#include "nsIFragmentContentSink.h" 1.33 +#include "nsStreamUtils.h" 1.34 +#include "nsHTMLTokenizer.h" 1.35 +#include "nsNetUtil.h" 1.36 +#include "nsScriptLoader.h" 1.37 +#include "nsDataHashtable.h" 1.38 +#include "nsXPCOMCIDInternal.h" 1.39 +#include "nsMimeTypes.h" 1.40 +#include "mozilla/CondVar.h" 1.41 +#include "mozilla/Mutex.h" 1.42 +#include "nsParserConstants.h" 1.43 +#include "nsCharsetSource.h" 1.44 +#include "nsContentUtils.h" 1.45 +#include "nsThreadUtils.h" 1.46 +#include "nsIHTMLContentSink.h" 1.47 + 1.48 +#include "mozilla/dom/EncodingUtils.h" 1.49 + 1.50 +using namespace mozilla; 1.51 +using mozilla::dom::EncodingUtils; 1.52 + 1.53 +#define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 1.54 +#define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 1.55 +#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 1.56 +#define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 1.57 +#define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 1.58 + 1.59 +//-------------- Begin ParseContinue Event Definition ------------------------ 1.60 +/* 1.61 +The parser can be explicitly interrupted by passing a return value of 1.62 +NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause 1.63 +the parser to stop processing and allow the application to return to the event 1.64 +loop. The data which was left at the time of interruption will be processed 1.65 +the next time OnDataAvailable is called. If the parser has received its final 1.66 +chunk of data then OnDataAvailable will no longer be called by the networking 1.67 +module, so the parser will schedule a nsParserContinueEvent which will call 1.68 +the parser to process the remaining data after returning to the event loop. 1.69 +If the parser is interrupted while processing the remaining data it will 1.70 +schedule another ParseContinueEvent. The processing of data followed by 1.71 +scheduling of the continue events will proceed until either: 1.72 + 1.73 + 1) All of the remaining data can be processed without interrupting 1.74 + 2) The parser has been cancelled. 1.75 + 1.76 + 1.77 +This capability is currently used in CNavDTD and nsHTMLContentSink. The 1.78 +nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be 1.79 +processed and when each token is processed. The nsHTML content sink records 1.80 +the time when the chunk has started processing and will return 1.81 +NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a 1.82 +threshold called max tokenizing processing time. This allows the content sink 1.83 +to limit how much data is processed in a single chunk which in turn gates how 1.84 +much time is spent away from the event loop. Processing smaller chunks of data 1.85 +also reduces the time spent in subsequent reflows. 1.86 + 1.87 +This capability is most apparent when loading large documents. If the maximum 1.88 +token processing time is set small enough the application will remain 1.89 +responsive during document load. 1.90 + 1.91 +A side-effect of this capability is that document load is not complete when 1.92 +the last chunk of data is passed to OnDataAvailable since the parser may have 1.93 +been interrupted when the last chunk of data arrived. The document is complete 1.94 +when all of the document has been tokenized and there aren't any pending 1.95 +nsParserContinueEvents. This can cause problems if the application assumes 1.96 +that it can monitor the load requests to determine when the document load has 1.97 +been completed. This is what happens in Mozilla. The document is considered 1.98 +completely loaded when all of the load requests have been satisfied. To delay 1.99 +the document load until all of the parsing has been completed the 1.100 +nsHTMLContentSink adds a dummy parser load request which is not removed until 1.101 +the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call 1.102 +DidBuildModel until the final chunk of data has been passed to the parser 1.103 +through the OnDataAvailable and there aren't any pending 1.104 +nsParserContineEvents. 1.105 + 1.106 +Currently the parser is ignores requests to be interrupted during the 1.107 +processing of script. This is because a document.write followed by JavaScript 1.108 +calls to manipulate the DOM may fail if the parser was interrupted during the 1.109 +document.write. 1.110 + 1.111 +For more details @see bugzilla bug 76722 1.112 +*/ 1.113 + 1.114 + 1.115 +class nsParserContinueEvent : public nsRunnable 1.116 +{ 1.117 +public: 1.118 + nsRefPtr<nsParser> mParser; 1.119 + 1.120 + nsParserContinueEvent(nsParser* aParser) 1.121 + : mParser(aParser) 1.122 + {} 1.123 + 1.124 + NS_IMETHOD Run() 1.125 + { 1.126 + mParser->HandleParserContinueEvent(this); 1.127 + return NS_OK; 1.128 + } 1.129 +}; 1.130 + 1.131 +//-------------- End ParseContinue Event Definition ------------------------ 1.132 + 1.133 +/** 1.134 + * default constructor 1.135 + */ 1.136 +nsParser::nsParser() 1.137 +{ 1.138 + Initialize(true); 1.139 +} 1.140 + 1.141 +nsParser::~nsParser() 1.142 +{ 1.143 + Cleanup(); 1.144 +} 1.145 + 1.146 +void 1.147 +nsParser::Initialize(bool aConstructor) 1.148 +{ 1.149 + if (aConstructor) { 1.150 + // Raw pointer 1.151 + mParserContext = 0; 1.152 + } 1.153 + else { 1.154 + // nsCOMPtrs 1.155 + mObserver = nullptr; 1.156 + mUnusedInput.Truncate(); 1.157 + } 1.158 + 1.159 + mContinueEvent = nullptr; 1.160 + mCharsetSource = kCharsetUninitialized; 1.161 + mCharset.AssignLiteral("ISO-8859-1"); 1.162 + mInternalState = NS_OK; 1.163 + mStreamStatus = NS_OK; 1.164 + mCommand = eViewNormal; 1.165 + mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | 1.166 + NS_PARSER_FLAG_PARSER_ENABLED | 1.167 + NS_PARSER_FLAG_CAN_TOKENIZE; 1.168 + 1.169 + mProcessingNetworkData = false; 1.170 + mIsAboutBlank = false; 1.171 +} 1.172 + 1.173 +void 1.174 +nsParser::Cleanup() 1.175 +{ 1.176 +#ifdef DEBUG 1.177 + if (mParserContext && mParserContext->mPrevContext) { 1.178 + NS_WARNING("Extra parser contexts still on the parser stack"); 1.179 + } 1.180 +#endif 1.181 + 1.182 + while (mParserContext) { 1.183 + CParserContext *pc = mParserContext->mPrevContext; 1.184 + delete mParserContext; 1.185 + mParserContext = pc; 1.186 + } 1.187 + 1.188 + // It should not be possible for this flag to be set when we are getting 1.189 + // destroyed since this flag implies a pending nsParserContinueEvent, which 1.190 + // has an owning reference to |this|. 1.191 + NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); 1.192 +} 1.193 + 1.194 +NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) 1.195 + 1.196 +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) 1.197 + NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD) 1.198 + NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) 1.199 + NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) 1.200 +NS_IMPL_CYCLE_COLLECTION_UNLINK_END 1.201 + 1.202 +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) 1.203 + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD) 1.204 + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) 1.205 + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) 1.206 + CParserContext *pc = tmp->mParserContext; 1.207 + while (pc) { 1.208 + cb.NoteXPCOMChild(pc->mTokenizer); 1.209 + pc = pc->mPrevContext; 1.210 + } 1.211 +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END 1.212 + 1.213 +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) 1.214 +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) 1.215 +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) 1.216 + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) 1.217 + NS_INTERFACE_MAP_ENTRY(nsIParser) 1.218 + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) 1.219 + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) 1.220 + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) 1.221 +NS_INTERFACE_MAP_END 1.222 + 1.223 +// The parser continue event is posted only if 1.224 +// all of the data to parse has been passed to ::OnDataAvailable 1.225 +// and the parser has been interrupted by the content sink 1.226 +// because the processing of tokens took too long. 1.227 + 1.228 +nsresult 1.229 +nsParser::PostContinueEvent() 1.230 +{ 1.231 + if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { 1.232 + // If this flag isn't set, then there shouldn't be a live continue event! 1.233 + NS_ASSERTION(!mContinueEvent, "bad"); 1.234 + 1.235 + // This creates a reference cycle between this and the event that is 1.236 + // broken when the event fires. 1.237 + nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this); 1.238 + if (NS_FAILED(NS_DispatchToCurrentThread(event))) { 1.239 + NS_WARNING("failed to dispatch parser continuation event"); 1.240 + } else { 1.241 + mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 1.242 + mContinueEvent = event; 1.243 + } 1.244 + } 1.245 + return NS_OK; 1.246 +} 1.247 + 1.248 +NS_IMETHODIMP_(void) 1.249 +nsParser::GetCommand(nsCString& aCommand) 1.250 +{ 1.251 + aCommand = mCommandStr; 1.252 +} 1.253 + 1.254 +/** 1.255 + * Call this method once you've created a parser, and want to instruct it 1.256 + * about the command which caused the parser to be constructed. For example, 1.257 + * this allows us to select a DTD which can do, say, view-source. 1.258 + * 1.259 + * @param aCommand the command string to set 1.260 + */ 1.261 +NS_IMETHODIMP_(void) 1.262 +nsParser::SetCommand(const char* aCommand) 1.263 +{ 1.264 + mCommandStr.Assign(aCommand); 1.265 + if (mCommandStr.Equals("view-source")) { 1.266 + mCommand = eViewSource; 1.267 + } else if (mCommandStr.Equals("view-fragment")) { 1.268 + mCommand = eViewFragment; 1.269 + } else { 1.270 + mCommand = eViewNormal; 1.271 + } 1.272 +} 1.273 + 1.274 +/** 1.275 + * Call this method once you've created a parser, and want to instruct it 1.276 + * about the command which caused the parser to be constructed. For example, 1.277 + * this allows us to select a DTD which can do, say, view-source. 1.278 + * 1.279 + * @param aParserCommand the command to set 1.280 + */ 1.281 +NS_IMETHODIMP_(void) 1.282 +nsParser::SetCommand(eParserCommands aParserCommand) 1.283 +{ 1.284 + mCommand = aParserCommand; 1.285 +} 1.286 + 1.287 +/** 1.288 + * Call this method once you've created a parser, and want to instruct it 1.289 + * about what charset to load 1.290 + * 1.291 + * @param aCharset- the charset of a document 1.292 + * @param aCharsetSource- the source of the charset 1.293 + */ 1.294 +NS_IMETHODIMP_(void) 1.295 +nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource) 1.296 +{ 1.297 + mCharset = aCharset; 1.298 + mCharsetSource = aCharsetSource; 1.299 + if (mParserContext && mParserContext->mScanner) { 1.300 + mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); 1.301 + } 1.302 +} 1.303 + 1.304 +void 1.305 +nsParser::SetSinkCharset(nsACString& aCharset) 1.306 +{ 1.307 + if (mSink) { 1.308 + mSink->SetDocumentCharset(aCharset); 1.309 + } 1.310 +} 1.311 + 1.312 +/** 1.313 + * This method gets called in order to set the content 1.314 + * sink for this parser to dump nodes to. 1.315 + * 1.316 + * @param nsIContentSink interface for node receiver 1.317 + */ 1.318 +NS_IMETHODIMP_(void) 1.319 +nsParser::SetContentSink(nsIContentSink* aSink) 1.320 +{ 1.321 + NS_PRECONDITION(aSink, "sink cannot be null!"); 1.322 + mSink = aSink; 1.323 + 1.324 + if (mSink) { 1.325 + mSink->SetParser(this); 1.326 + nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink); 1.327 + if (htmlSink) { 1.328 + mIsAboutBlank = true; 1.329 + } 1.330 + } 1.331 +} 1.332 + 1.333 +/** 1.334 + * retrieve the sink set into the parser 1.335 + * @return current sink 1.336 + */ 1.337 +NS_IMETHODIMP_(nsIContentSink*) 1.338 +nsParser::GetContentSink() 1.339 +{ 1.340 + return mSink; 1.341 +} 1.342 + 1.343 +/** 1.344 + * Determine what DTD mode (and thus what layout nsCompatibility mode) 1.345 + * to use for this document based on the first chunk of data received 1.346 + * from the network (each parsercontext can have its own mode). (No, 1.347 + * this is not an optimal solution -- we really don't need to know until 1.348 + * after we've received the DOCTYPE, and this could easily be part of 1.349 + * the regular parsing process if the parser were designed in a way that 1.350 + * made such modifications easy.) 1.351 + */ 1.352 + 1.353 +// Parse the PS production in the SGML spec (excluding the part dealing 1.354 +// with entity references) starting at theIndex into theBuffer, and 1.355 +// return the first index after the end of the production. 1.356 +static int32_t 1.357 +ParsePS(const nsString& aBuffer, int32_t aIndex) 1.358 +{ 1.359 + for (;;) { 1.360 + char16_t ch = aBuffer.CharAt(aIndex); 1.361 + if ((ch == char16_t(' ')) || (ch == char16_t('\t')) || 1.362 + (ch == char16_t('\n')) || (ch == char16_t('\r'))) { 1.363 + ++aIndex; 1.364 + } else if (ch == char16_t('-')) { 1.365 + int32_t tmpIndex; 1.366 + if (aBuffer.CharAt(aIndex+1) == char16_t('-') && 1.367 + kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) { 1.368 + aIndex = tmpIndex + 2; 1.369 + } else { 1.370 + return aIndex; 1.371 + } 1.372 + } else { 1.373 + return aIndex; 1.374 + } 1.375 + } 1.376 +} 1.377 + 1.378 +#define PARSE_DTD_HAVE_DOCTYPE (1<<0) 1.379 +#define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) 1.380 +#define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) 1.381 +#define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) 1.382 + 1.383 +// return true on success (includes not present), false on failure 1.384 +static bool 1.385 +ParseDocTypeDecl(const nsString &aBuffer, 1.386 + int32_t *aResultFlags, 1.387 + nsString &aPublicID, 1.388 + nsString &aSystemID) 1.389 +{ 1.390 + bool haveDoctype = false; 1.391 + *aResultFlags = 0; 1.392 + 1.393 + // Skip through any comments and processing instructions 1.394 + // The PI-skipping is a bit of a hack. 1.395 + int32_t theIndex = 0; 1.396 + do { 1.397 + theIndex = aBuffer.FindChar('<', theIndex); 1.398 + if (theIndex == kNotFound) break; 1.399 + char16_t nextChar = aBuffer.CharAt(theIndex+1); 1.400 + if (nextChar == char16_t('!')) { 1.401 + int32_t tmpIndex = theIndex + 2; 1.402 + if (kNotFound != 1.403 + (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) { 1.404 + haveDoctype = true; 1.405 + theIndex += 7; // skip "DOCTYPE" 1.406 + break; 1.407 + } 1.408 + theIndex = ParsePS(aBuffer, tmpIndex); 1.409 + theIndex = aBuffer.FindChar('>', theIndex); 1.410 + } else if (nextChar == char16_t('?')) { 1.411 + theIndex = aBuffer.FindChar('>', theIndex); 1.412 + } else { 1.413 + break; 1.414 + } 1.415 + } while (theIndex != kNotFound); 1.416 + 1.417 + if (!haveDoctype) 1.418 + return true; 1.419 + *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; 1.420 + 1.421 + theIndex = ParsePS(aBuffer, theIndex); 1.422 + theIndex = aBuffer.Find("HTML", true, theIndex, 0); 1.423 + if (kNotFound == theIndex) 1.424 + return false; 1.425 + theIndex = ParsePS(aBuffer, theIndex+4); 1.426 + int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0); 1.427 + 1.428 + if (kNotFound != tmpIndex) { 1.429 + theIndex = ParsePS(aBuffer, tmpIndex+6); 1.430 + 1.431 + // We get here only if we've read <!DOCTYPE HTML PUBLIC 1.432 + // (not case sensitive) possibly with comments within. 1.433 + 1.434 + // Now find the beginning and end of the public identifier 1.435 + // and the system identifier (if present). 1.436 + 1.437 + char16_t lit = aBuffer.CharAt(theIndex); 1.438 + if ((lit != char16_t('\"')) && (lit != char16_t('\''))) 1.439 + return false; 1.440 + 1.441 + // Start is the first character, excluding the quote, and End is 1.442 + // the final quote, so there are (end-start) characters. 1.443 + 1.444 + int32_t PublicIDStart = theIndex + 1; 1.445 + int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart); 1.446 + if (kNotFound == PublicIDEnd) 1.447 + return false; 1.448 + theIndex = ParsePS(aBuffer, PublicIDEnd + 1); 1.449 + char16_t next = aBuffer.CharAt(theIndex); 1.450 + if (next == char16_t('>')) { 1.451 + // There was a public identifier, but no system 1.452 + // identifier, 1.453 + // so do nothing. 1.454 + // This is needed to avoid the else at the end, and it's 1.455 + // also the most common case. 1.456 + } else if ((next == char16_t('\"')) || 1.457 + (next == char16_t('\''))) { 1.458 + // We found a system identifier. 1.459 + *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; 1.460 + int32_t SystemIDStart = theIndex + 1; 1.461 + int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); 1.462 + if (kNotFound == SystemIDEnd) 1.463 + return false; 1.464 + aSystemID = 1.465 + Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); 1.466 + } else if (next == char16_t('[')) { 1.467 + // We found an internal subset. 1.468 + *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; 1.469 + } else { 1.470 + // Something's wrong. 1.471 + return false; 1.472 + } 1.473 + 1.474 + // Since a public ID is a minimum literal, we must trim 1.475 + // and collapse whitespace 1.476 + aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); 1.477 + aPublicID.CompressWhitespace(true, true); 1.478 + *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; 1.479 + } else { 1.480 + tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0); 1.481 + if (kNotFound != tmpIndex) { 1.482 + // DOCTYPES with system ID but no Public ID 1.483 + *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; 1.484 + 1.485 + theIndex = ParsePS(aBuffer, tmpIndex+6); 1.486 + char16_t next = aBuffer.CharAt(theIndex); 1.487 + if (next != char16_t('\"') && next != char16_t('\'')) 1.488 + return false; 1.489 + 1.490 + int32_t SystemIDStart = theIndex + 1; 1.491 + int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); 1.492 + 1.493 + if (kNotFound == SystemIDEnd) 1.494 + return false; 1.495 + aSystemID = 1.496 + Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); 1.497 + theIndex = ParsePS(aBuffer, SystemIDEnd + 1); 1.498 + } 1.499 + 1.500 + char16_t nextChar = aBuffer.CharAt(theIndex); 1.501 + if (nextChar == char16_t('[')) 1.502 + *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; 1.503 + else if (nextChar != char16_t('>')) 1.504 + return false; 1.505 + } 1.506 + return true; 1.507 +} 1.508 + 1.509 +struct PubIDInfo 1.510 +{ 1.511 + enum eMode { 1.512 + eQuirks, /* always quirks mode, unless there's an internal subset */ 1.513 + eAlmostStandards,/* eCompatibility_AlmostStandards */ 1.514 + eFullStandards /* eCompatibility_FullStandards */ 1.515 + /* 1.516 + * public IDs that should trigger strict mode are not listed 1.517 + * since we want all future public IDs to trigger strict mode as 1.518 + * well 1.519 + */ 1.520 + }; 1.521 + 1.522 + const char* name; 1.523 + eMode mode_if_no_sysid; 1.524 + eMode mode_if_sysid; 1.525 +}; 1.526 + 1.527 +#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) 1.528 + 1.529 +// These must be in nsCRT::strcmp order so binary-search can be used. 1.530 +// This is verified, |#ifdef DEBUG|, below. 1.531 + 1.532 +// Even though public identifiers should be case sensitive, we will do 1.533 +// all comparisons after converting to lower case in order to do 1.534 +// case-insensitive comparison since there are a number of existing web 1.535 +// sites that use the incorrect case. Therefore all of the public 1.536 +// identifiers below are in lower case (with the correct case following, 1.537 +// in comments). The case is verified, |#ifdef DEBUG|, below. 1.538 +static const PubIDInfo kPublicIDs[] = { 1.539 + {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.540 + {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.541 + {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.542 + {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.543 + {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.544 + {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.545 + {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.546 + {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.547 + {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.548 + {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.549 + {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.550 + {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.551 + {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.552 + {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.553 + {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.554 + {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.555 + {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.556 + {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.557 + {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.558 + {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.559 + {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.560 + {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.561 + {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.562 + {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.563 + {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.564 + {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.565 + {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.566 + {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.567 + {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.568 + {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.569 + {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.570 + {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.571 + {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.572 + {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.573 + {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.574 + {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.575 + {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.576 + {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.577 + {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.578 + {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.579 + {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.580 + {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.581 + {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.582 + {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.583 + {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.584 + {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.585 + {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.586 + {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.587 + {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.588 + {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.589 + {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.590 + {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.591 + {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.592 + {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.593 + {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.594 + {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.595 + {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.596 + {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.597 + {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.598 + {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.599 + {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.600 + {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.601 + {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, 1.602 + {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, 1.603 + {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.604 + {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.605 + {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.606 + {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, 1.607 + {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, 1.608 + {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.609 + {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.610 + {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.611 + {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.612 + {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.613 + {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.614 + {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, 1.615 +}; 1.616 + 1.617 +#ifdef DEBUG 1.618 +static void 1.619 +VerifyPublicIDs() 1.620 +{ 1.621 + static bool gVerified = false; 1.622 + if (!gVerified) { 1.623 + gVerified = true; 1.624 + uint32_t i; 1.625 + for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { 1.626 + if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { 1.627 + NS_NOTREACHED("doctypes out of order"); 1.628 + printf("Doctypes %s and %s out of order.\n", 1.629 + kPublicIDs[i].name, kPublicIDs[i+1].name); 1.630 + } 1.631 + } 1.632 + for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { 1.633 + nsAutoCString lcPubID(kPublicIDs[i].name); 1.634 + ToLowerCase(lcPubID); 1.635 + if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { 1.636 + NS_NOTREACHED("doctype not lower case"); 1.637 + printf("Doctype %s not lower case.\n", kPublicIDs[i].name); 1.638 + } 1.639 + } 1.640 + } 1.641 +} 1.642 +#endif 1.643 + 1.644 +static void 1.645 +DetermineHTMLParseMode(const nsString& aBuffer, 1.646 + nsDTDMode& aParseMode, 1.647 + eParserDocType& aDocType) 1.648 +{ 1.649 +#ifdef DEBUG 1.650 + VerifyPublicIDs(); 1.651 +#endif 1.652 + int32_t resultFlags; 1.653 + nsAutoString publicIDUCS2, sysIDUCS2; 1.654 + if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { 1.655 + if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { 1.656 + // no DOCTYPE 1.657 + aParseMode = eDTDMode_quirks; 1.658 + aDocType = eHTML_Quirks; 1.659 + } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || 1.660 + !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { 1.661 + // A doctype with an internal subset is always full_standards. 1.662 + // A doctype without a public ID is always full_standards. 1.663 + aDocType = eHTML_Strict; 1.664 + aParseMode = eDTDMode_full_standards; 1.665 + 1.666 + // Special hack for IBM's custom DOCTYPE. 1.667 + if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && 1.668 + sysIDUCS2 == NS_LITERAL_STRING( 1.669 + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { 1.670 + aParseMode = eDTDMode_quirks; 1.671 + aDocType = eHTML_Quirks; 1.672 + } 1.673 + 1.674 + } else { 1.675 + // We have to check our list of public IDs to see what to do. 1.676 + // Yes, we want UCS2 to ASCII lossy conversion. 1.677 + nsAutoCString publicID; 1.678 + publicID.AssignWithConversion(publicIDUCS2); 1.679 + 1.680 + // See comment above definition of kPublicIDs about case 1.681 + // sensitivity. 1.682 + ToLowerCase(publicID); 1.683 + 1.684 + // Binary search to see if we can find the correct public ID 1.685 + // These must be signed since maximum can go below zero and we'll 1.686 + // crash if it's unsigned. 1.687 + int32_t minimum = 0; 1.688 + int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1; 1.689 + int32_t index; 1.690 + for (;;) { 1.691 + index = (minimum + maximum) / 2; 1.692 + int32_t comparison = 1.693 + nsCRT::strcmp(publicID.get(), kPublicIDs[index].name); 1.694 + if (comparison == 0) 1.695 + break; 1.696 + if (comparison < 0) 1.697 + maximum = index - 1; 1.698 + else 1.699 + minimum = index + 1; 1.700 + 1.701 + if (maximum < minimum) { 1.702 + // The DOCTYPE is not in our list, so it must be full_standards. 1.703 + aParseMode = eDTDMode_full_standards; 1.704 + aDocType = eHTML_Strict; 1.705 + return; 1.706 + } 1.707 + } 1.708 + 1.709 + switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) 1.710 + ? kPublicIDs[index].mode_if_sysid 1.711 + : kPublicIDs[index].mode_if_no_sysid) 1.712 + { 1.713 + case PubIDInfo::eQuirks: 1.714 + aParseMode = eDTDMode_quirks; 1.715 + aDocType = eHTML_Quirks; 1.716 + break; 1.717 + case PubIDInfo::eAlmostStandards: 1.718 + aParseMode = eDTDMode_almost_standards; 1.719 + aDocType = eHTML_Strict; 1.720 + break; 1.721 + case PubIDInfo::eFullStandards: 1.722 + aParseMode = eDTDMode_full_standards; 1.723 + aDocType = eHTML_Strict; 1.724 + break; 1.725 + default: 1.726 + NS_NOTREACHED("no other cases!"); 1.727 + } 1.728 + } 1.729 + } else { 1.730 + // badly formed DOCTYPE -> quirks 1.731 + aParseMode = eDTDMode_quirks; 1.732 + aDocType = eHTML_Quirks; 1.733 + } 1.734 +} 1.735 + 1.736 +static void 1.737 +DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode, 1.738 + eParserDocType& aDocType, const nsACString& aMimeType) 1.739 +{ 1.740 + if (aMimeType.EqualsLiteral(TEXT_HTML)) { 1.741 + DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); 1.742 + } else if (nsContentUtils::IsPlainTextType(aMimeType)) { 1.743 + aDocType = ePlainText; 1.744 + aParseMode = eDTDMode_quirks; 1.745 + } else { // Some form of XML 1.746 + aDocType = eXML; 1.747 + aParseMode = eDTDMode_full_standards; 1.748 + } 1.749 +} 1.750 + 1.751 +static nsIDTD* 1.752 +FindSuitableDTD(CParserContext& aParserContext) 1.753 +{ 1.754 + // We always find a DTD. 1.755 + aParserContext.mAutoDetectStatus = ePrimaryDetect; 1.756 + 1.757 + // Quick check for view source. 1.758 + NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource, 1.759 + "The old parser is not supposed to be used for View Source anymore."); 1.760 + 1.761 + // Now see if we're parsing HTML (which, as far as we're concerned, simply 1.762 + // means "not XML"). 1.763 + if (aParserContext.mDocType != eXML) { 1.764 + return new CNavDTD(); 1.765 + } 1.766 + 1.767 + // If we're here, then we'd better be parsing XML. 1.768 + NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?"); 1.769 + return new nsExpatDriver(); 1.770 +} 1.771 + 1.772 +NS_IMETHODIMP 1.773 +nsParser::CancelParsingEvents() 1.774 +{ 1.775 + if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { 1.776 + NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); 1.777 + // Revoke the pending continue parsing event 1.778 + mContinueEvent = nullptr; 1.779 + mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 1.780 + } 1.781 + return NS_OK; 1.782 +} 1.783 + 1.784 +//////////////////////////////////////////////////////////////////////// 1.785 + 1.786 +/** 1.787 + * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value 1.788 + * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1 1.789 + * (which could be success or failure). 1.790 + * 1.791 + * To understand the motivation for this construct, consider these example 1.792 + * methods: 1.793 + * 1.794 + * nsresult nsSomething::DoThatThing(nsIWhatever* obj) { 1.795 + * nsresult rv = NS_OK; 1.796 + * ... 1.797 + * return obj->DoThatThing(); 1.798 + * NS_ENSURE_SUCCESS(rv, rv); 1.799 + * ... 1.800 + * return rv; 1.801 + * } 1.802 + * 1.803 + * void nsCaller::MakeThingsHappen() { 1.804 + * return mSomething->DoThatThing(mWhatever); 1.805 + * } 1.806 + * 1.807 + * Suppose, for whatever reason*, we want to shift responsibility for calling 1.808 + * mWhatever->DoThatThing() from nsSomething::DoThatThing up to 1.809 + * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows: 1.810 + * 1.811 + * nsresult nsSomething::DoThatThing() { 1.812 + * nsresult rv = NS_OK; 1.813 + * ... 1.814 + * ... 1.815 + * return rv; 1.816 + * } 1.817 + * 1.818 + * void nsCaller::MakeThingsHappen() { 1.819 + * nsresult rv; 1.820 + * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(), 1.821 + * mWhatever->DoThatThing(), 1.822 + * rv); 1.823 + * return rv; 1.824 + * } 1.825 + * 1.826 + * *Possible reasons include: nsCaller doesn't want to give mSomething access 1.827 + * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will 1.828 + * be called regardless of how nsSomething::DoThatThing behaves, &c. 1.829 + */ 1.830 +#define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \ 1.831 + nsresult RV##__temp = EXPR1; \ 1.832 + RV = EXPR2; \ 1.833 + if (NS_FAILED(RV)) { \ 1.834 + RV = RV##__temp; \ 1.835 + } \ 1.836 +} 1.837 + 1.838 +/** 1.839 + * This gets called just prior to the model actually 1.840 + * being constructed. It's important to make this the 1.841 + * last thing that happens right before parsing, so we 1.842 + * can delay until the last moment the resolution of 1.843 + * which DTD to use (unless of course we're assigned one). 1.844 + */ 1.845 +nsresult 1.846 +nsParser::WillBuildModel(nsString& aFilename) 1.847 +{ 1.848 + if (!mParserContext) 1.849 + return kInvalidParserContext; 1.850 + 1.851 + if (eUnknownDetect != mParserContext->mAutoDetectStatus) 1.852 + return NS_OK; 1.853 + 1.854 + if (eDTDMode_unknown == mParserContext->mDTDMode || 1.855 + eDTDMode_autodetect == mParserContext->mDTDMode) { 1.856 + char16_t buf[1025]; 1.857 + nsFixedString theBuffer(buf, 1024, 0); 1.858 + 1.859 + // Grab 1024 characters, starting at the first non-whitespace 1.860 + // character, to look for the doctype in. 1.861 + mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition()); 1.862 + DetermineParseMode(theBuffer, mParserContext->mDTDMode, 1.863 + mParserContext->mDocType, mParserContext->mMimeType); 1.864 + } 1.865 + 1.866 + NS_ASSERTION(!mDTD || !mParserContext->mPrevContext, 1.867 + "Clobbering DTD for non-root parser context!"); 1.868 + mDTD = FindSuitableDTD(*mParserContext); 1.869 + NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY); 1.870 + 1.871 + nsITokenizer* tokenizer; 1.872 + nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer); 1.873 + NS_ENSURE_SUCCESS(rv, rv); 1.874 + 1.875 + rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); 1.876 + nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode()); 1.877 + // nsIDTD::WillBuildModel used to be responsible for calling 1.878 + // nsIContentSink::WillBuildModel, but that obligation isn't expressible 1.879 + // in the nsIDTD interface itself, so it's sounder and simpler to give that 1.880 + // responsibility back to the parser. The former behavior of the DTD was to 1.881 + // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns 1.882 + // failure we should use sinkResult instead of rv, to preserve the old error 1.883 + // handling behavior of the DTD: 1.884 + return NS_FAILED(sinkResult) ? sinkResult : rv; 1.885 +} 1.886 + 1.887 +/** 1.888 + * This gets called when the parser is done with its input. 1.889 + * Note that the parser may have been called recursively, so we 1.890 + * have to check for a prev. context before closing out the DTD/sink. 1.891 + */ 1.892 +nsresult 1.893 +nsParser::DidBuildModel(nsresult anErrorCode) 1.894 +{ 1.895 + nsresult result = anErrorCode; 1.896 + 1.897 + if (IsComplete()) { 1.898 + if (mParserContext && !mParserContext->mPrevContext) { 1.899 + // Let sink know if we're about to end load because we've been terminated. 1.900 + // In that case we don't want it to run deferred scripts. 1.901 + bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; 1.902 + if (mDTD && mSink) { 1.903 + nsresult dtdResult = mDTD->DidBuildModel(anErrorCode), 1.904 + sinkResult = mSink->DidBuildModel(terminated); 1.905 + // nsIDTD::DidBuildModel used to be responsible for calling 1.906 + // nsIContentSink::DidBuildModel, but that obligation isn't expressible 1.907 + // in the nsIDTD interface itself, so it's sounder and simpler to give 1.908 + // that responsibility back to the parser. The former behavior of the 1.909 + // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the 1.910 + // sink returns failure we should use sinkResult instead of dtdResult, 1.911 + // to preserve the old error handling behavior of the DTD: 1.912 + result = NS_FAILED(sinkResult) ? sinkResult : dtdResult; 1.913 + } 1.914 + 1.915 + //Ref. to bug 61462. 1.916 + mParserContext->mRequest = 0; 1.917 + } 1.918 + } 1.919 + 1.920 + return result; 1.921 +} 1.922 + 1.923 +/** 1.924 + * This method adds a new parser context to the list, 1.925 + * pushing the current one to the next position. 1.926 + * 1.927 + * @param ptr to new context 1.928 + */ 1.929 +void 1.930 +nsParser::PushContext(CParserContext& aContext) 1.931 +{ 1.932 + NS_ASSERTION(aContext.mPrevContext == mParserContext, 1.933 + "Trying to push a context whose previous context differs from " 1.934 + "the current parser context."); 1.935 + mParserContext = &aContext; 1.936 +} 1.937 + 1.938 +/** 1.939 + * This method pops the topmost context off the stack, 1.940 + * returning it to the user. The next context (if any) 1.941 + * becomes the current context. 1.942 + * @update gess7/22/98 1.943 + * @return prev. context 1.944 + */ 1.945 +CParserContext* 1.946 +nsParser::PopContext() 1.947 +{ 1.948 + CParserContext* oldContext = mParserContext; 1.949 + if (oldContext) { 1.950 + mParserContext = oldContext->mPrevContext; 1.951 + if (mParserContext) { 1.952 + // If the old context was blocked, propagate the blocked state 1.953 + // back to the new one. Also, propagate the stream listener state 1.954 + // but don't override onStop state to guarantee the call to DidBuildModel(). 1.955 + if (mParserContext->mStreamListenerState != eOnStop) { 1.956 + mParserContext->mStreamListenerState = oldContext->mStreamListenerState; 1.957 + } 1.958 + } 1.959 + } 1.960 + return oldContext; 1.961 +} 1.962 + 1.963 +/** 1.964 + * Call this when you want control whether or not the parser will parse 1.965 + * and tokenize input (TRUE), or whether it just caches input to be 1.966 + * parsed later (FALSE). 1.967 + * 1.968 + * @param aState determines whether we parse/tokenize or just cache. 1.969 + * @return current state 1.970 + */ 1.971 +void 1.972 +nsParser::SetUnusedInput(nsString& aBuffer) 1.973 +{ 1.974 + mUnusedInput = aBuffer; 1.975 +} 1.976 + 1.977 +/** 1.978 + * Call this when you want to *force* the parser to terminate the 1.979 + * parsing process altogether. This is binary -- so once you terminate 1.980 + * you can't resume without restarting altogether. 1.981 + */ 1.982 +NS_IMETHODIMP 1.983 +nsParser::Terminate(void) 1.984 +{ 1.985 + // We should only call DidBuildModel once, so don't do anything if this is 1.986 + // the second time that Terminate has been called. 1.987 + if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { 1.988 + return NS_OK; 1.989 + } 1.990 + 1.991 + nsresult result = NS_OK; 1.992 + // XXX - [ until we figure out a way to break parser-sink circularity ] 1.993 + // Hack - Hold a reference until we are completely done... 1.994 + nsCOMPtr<nsIParser> kungFuDeathGrip(this); 1.995 + mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; 1.996 + 1.997 + // CancelParsingEvents must be called to avoid leaking the nsParser object 1.998 + // @see bug 108049 1.999 + // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents 1.1000 + // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note: 1.1001 + // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag. 1.1002 + CancelParsingEvents(); 1.1003 + 1.1004 + // If we got interrupted in the middle of a document.write, then we might 1.1005 + // have more than one parser context on our parsercontext stack. This has 1.1006 + // the effect of making DidBuildModel a no-op, meaning that we never call 1.1007 + // our sink's DidBuildModel and break the reference cycle, causing a leak. 1.1008 + // Since we're getting terminated, we manually clean up our context stack. 1.1009 + while (mParserContext && mParserContext->mPrevContext) { 1.1010 + CParserContext *prev = mParserContext->mPrevContext; 1.1011 + delete mParserContext; 1.1012 + mParserContext = prev; 1.1013 + } 1.1014 + 1.1015 + if (mDTD) { 1.1016 + mDTD->Terminate(); 1.1017 + DidBuildModel(result); 1.1018 + } else if (mSink) { 1.1019 + // We have no parser context or no DTD yet (so we got terminated before we 1.1020 + // got any data). Manually break the reference cycle with the sink. 1.1021 + result = mSink->DidBuildModel(true); 1.1022 + NS_ENSURE_SUCCESS(result, result); 1.1023 + } 1.1024 + 1.1025 + return NS_OK; 1.1026 +} 1.1027 + 1.1028 +NS_IMETHODIMP 1.1029 +nsParser::ContinueInterruptedParsing() 1.1030 +{ 1.1031 + // If there are scripts executing, then the content sink is jumping the gun 1.1032 + // (probably due to a synchronous XMLHttpRequest) and will re-enable us 1.1033 + // later, see bug 460706. 1.1034 + if (!IsOkToProcessNetworkData()) { 1.1035 + return NS_OK; 1.1036 + } 1.1037 + 1.1038 + // If the stream has already finished, there's a good chance 1.1039 + // that we might start closing things down when the parser 1.1040 + // is reenabled. To make sure that we're not deleted across 1.1041 + // the reenabling process, hold a reference to ourselves. 1.1042 + nsresult result=NS_OK; 1.1043 + nsCOMPtr<nsIParser> kungFuDeathGrip(this); 1.1044 + nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); 1.1045 + 1.1046 +#ifdef DEBUG 1.1047 + if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { 1.1048 + NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser."); 1.1049 + } 1.1050 +#endif 1.1051 + 1.1052 + bool isFinalChunk = mParserContext && 1.1053 + mParserContext->mStreamListenerState == eOnStop; 1.1054 + 1.1055 + mProcessingNetworkData = true; 1.1056 + if (mSink) { 1.1057 + mSink->WillParse(); 1.1058 + } 1.1059 + result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 1.1060 + mProcessingNetworkData = false; 1.1061 + 1.1062 + if (result != NS_OK) { 1.1063 + result=mInternalState; 1.1064 + } 1.1065 + 1.1066 + return result; 1.1067 +} 1.1068 + 1.1069 +/** 1.1070 + * Stops parsing temporarily. That's it will prevent the 1.1071 + * parser from building up content model. 1.1072 + */ 1.1073 +NS_IMETHODIMP_(void) 1.1074 +nsParser::BlockParser() 1.1075 +{ 1.1076 + mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED; 1.1077 +} 1.1078 + 1.1079 +/** 1.1080 + * Open up the parser for tokenization, building up content 1.1081 + * model..etc. However, this method does not resume parsing 1.1082 + * automatically. It's the callers' responsibility to restart 1.1083 + * the parsing engine. 1.1084 + */ 1.1085 +NS_IMETHODIMP_(void) 1.1086 +nsParser::UnblockParser() 1.1087 +{ 1.1088 + if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { 1.1089 + mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; 1.1090 + } else { 1.1091 + NS_WARNING("Trying to unblock an unblocked parser."); 1.1092 + } 1.1093 +} 1.1094 + 1.1095 +NS_IMETHODIMP_(void) 1.1096 +nsParser::ContinueInterruptedParsingAsync() 1.1097 +{ 1.1098 + mSink->ContinueInterruptedParsingAsync(); 1.1099 +} 1.1100 + 1.1101 +/** 1.1102 + * Call this to query whether the parser is enabled or not. 1.1103 + */ 1.1104 +NS_IMETHODIMP_(bool) 1.1105 +nsParser::IsParserEnabled() 1.1106 +{ 1.1107 + return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0; 1.1108 +} 1.1109 + 1.1110 +/** 1.1111 + * Call this to query whether the parser thinks it's done with parsing. 1.1112 + */ 1.1113 +NS_IMETHODIMP_(bool) 1.1114 +nsParser::IsComplete() 1.1115 +{ 1.1116 + return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); 1.1117 +} 1.1118 + 1.1119 + 1.1120 +void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev) 1.1121 +{ 1.1122 + // Ignore any revoked continue events... 1.1123 + if (mContinueEvent != ev) 1.1124 + return; 1.1125 + 1.1126 + mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 1.1127 + mContinueEvent = nullptr; 1.1128 + 1.1129 + NS_ASSERTION(IsOkToProcessNetworkData(), 1.1130 + "Interrupted in the middle of a script?"); 1.1131 + ContinueInterruptedParsing(); 1.1132 +} 1.1133 + 1.1134 +bool 1.1135 +nsParser::IsInsertionPointDefined() 1.1136 +{ 1.1137 + return false; 1.1138 +} 1.1139 + 1.1140 +void 1.1141 +nsParser::BeginEvaluatingParserInsertedScript() 1.1142 +{ 1.1143 +} 1.1144 + 1.1145 +void 1.1146 +nsParser::EndEvaluatingParserInsertedScript() 1.1147 +{ 1.1148 +} 1.1149 + 1.1150 +void 1.1151 +nsParser::MarkAsNotScriptCreated(const char* aCommand) 1.1152 +{ 1.1153 +} 1.1154 + 1.1155 +bool 1.1156 +nsParser::IsScriptCreated() 1.1157 +{ 1.1158 + return false; 1.1159 +} 1.1160 + 1.1161 +/** 1.1162 + * This is the main controlling routine in the parsing process. 1.1163 + * Note that it may get called multiple times for the same scanner, 1.1164 + * since this is a pushed based system, and all the tokens may 1.1165 + * not have been consumed by the scanner during a given invocation 1.1166 + * of this method. 1.1167 + */ 1.1168 +NS_IMETHODIMP 1.1169 +nsParser::Parse(nsIURI* aURL, 1.1170 + nsIRequestObserver* aListener, 1.1171 + void* aKey, 1.1172 + nsDTDMode aMode) 1.1173 +{ 1.1174 + 1.1175 + NS_PRECONDITION(aURL, "Error: Null URL given"); 1.1176 + 1.1177 + nsresult result=kBadURL; 1.1178 + mObserver = aListener; 1.1179 + 1.1180 + if (aURL) { 1.1181 + nsAutoCString spec; 1.1182 + nsresult rv = aURL->GetSpec(spec); 1.1183 + if (rv != NS_OK) { 1.1184 + return rv; 1.1185 + } 1.1186 + NS_ConvertUTF8toUTF16 theName(spec); 1.1187 + 1.1188 + nsScanner* theScanner = new nsScanner(theName, false); 1.1189 + CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey, 1.1190 + mCommand, aListener); 1.1191 + if (pc && theScanner) { 1.1192 + pc->mMultipart = true; 1.1193 + pc->mContextType = CParserContext::eCTURL; 1.1194 + pc->mDTDMode = aMode; 1.1195 + PushContext(*pc); 1.1196 + 1.1197 + result = NS_OK; 1.1198 + } else { 1.1199 + result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT; 1.1200 + } 1.1201 + } 1.1202 + return result; 1.1203 +} 1.1204 + 1.1205 +/** 1.1206 + * Used by XML fragment parsing below. 1.1207 + * 1.1208 + * @param aSourceBuffer contains a string-full of real content 1.1209 + */ 1.1210 +nsresult 1.1211 +nsParser::Parse(const nsAString& aSourceBuffer, 1.1212 + void* aKey, 1.1213 + bool aLastCall) 1.1214 +{ 1.1215 + nsresult result = NS_OK; 1.1216 + 1.1217 + // Don't bother if we're never going to parse this. 1.1218 + if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { 1.1219 + return result; 1.1220 + } 1.1221 + 1.1222 + if (!aLastCall && aSourceBuffer.IsEmpty()) { 1.1223 + // Nothing is being passed to the parser so return 1.1224 + // immediately. mUnusedInput will get processed when 1.1225 + // some data is actually passed in. 1.1226 + // But if this is the last call, make sure to finish up 1.1227 + // stuff correctly. 1.1228 + return result; 1.1229 + } 1.1230 + 1.1231 + // Maintain a reference to ourselves so we don't go away 1.1232 + // till we're completely done. 1.1233 + nsCOMPtr<nsIParser> kungFuDeathGrip(this); 1.1234 + 1.1235 + if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) { 1.1236 + // Note: The following code will always find the parser context associated 1.1237 + // with the given key, even if that context has been suspended (e.g., for 1.1238 + // another document.write call). This doesn't appear to be exactly what IE 1.1239 + // does in the case where this happens, but this makes more sense. 1.1240 + CParserContext* pc = mParserContext; 1.1241 + while (pc && pc->mKey != aKey) { 1.1242 + pc = pc->mPrevContext; 1.1243 + } 1.1244 + 1.1245 + if (!pc) { 1.1246 + // Only make a new context if we don't have one, OR if we do, but has a 1.1247 + // different context key. 1.1248 + nsScanner* theScanner = new nsScanner(mUnusedInput); 1.1249 + NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY); 1.1250 + 1.1251 + eAutoDetectResult theStatus = eUnknownDetect; 1.1252 + 1.1253 + if (mParserContext && 1.1254 + mParserContext->mMimeType.EqualsLiteral("application/xml")) { 1.1255 + // Ref. Bug 90379 1.1256 + NS_ASSERTION(mDTD, "How come the DTD is null?"); 1.1257 + 1.1258 + if (mParserContext) { 1.1259 + theStatus = mParserContext->mAutoDetectStatus; 1.1260 + // Added this to fix bug 32022. 1.1261 + } 1.1262 + } 1.1263 + 1.1264 + pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, 1.1265 + 0, theStatus, aLastCall); 1.1266 + NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); 1.1267 + 1.1268 + PushContext(*pc); 1.1269 + 1.1270 + pc->mMultipart = !aLastCall; // By default 1.1271 + if (pc->mPrevContext) { 1.1272 + pc->mMultipart |= pc->mPrevContext->mMultipart; 1.1273 + } 1.1274 + 1.1275 + // Start fix bug 40143 1.1276 + if (pc->mMultipart) { 1.1277 + pc->mStreamListenerState = eOnDataAvail; 1.1278 + if (pc->mScanner) { 1.1279 + pc->mScanner->SetIncremental(true); 1.1280 + } 1.1281 + } else { 1.1282 + pc->mStreamListenerState = eOnStop; 1.1283 + if (pc->mScanner) { 1.1284 + pc->mScanner->SetIncremental(false); 1.1285 + } 1.1286 + } 1.1287 + // end fix for 40143 1.1288 + 1.1289 + pc->mContextType=CParserContext::eCTString; 1.1290 + pc->SetMimeType(NS_LITERAL_CSTRING("application/xml")); 1.1291 + pc->mDTDMode = eDTDMode_full_standards; 1.1292 + 1.1293 + mUnusedInput.Truncate(); 1.1294 + 1.1295 + pc->mScanner->Append(aSourceBuffer); 1.1296 + // Do not interrupt document.write() - bug 95487 1.1297 + result = ResumeParse(false, false, false); 1.1298 + } else { 1.1299 + pc->mScanner->Append(aSourceBuffer); 1.1300 + if (!pc->mPrevContext) { 1.1301 + // Set stream listener state to eOnStop, on the final context - Fix 68160, 1.1302 + // to guarantee DidBuildModel() call - Fix 36148 1.1303 + if (aLastCall) { 1.1304 + pc->mStreamListenerState = eOnStop; 1.1305 + pc->mScanner->SetIncremental(false); 1.1306 + } 1.1307 + 1.1308 + if (pc == mParserContext) { 1.1309 + // If pc is not mParserContext, then this call to ResumeParse would 1.1310 + // do the wrong thing and try to continue parsing using 1.1311 + // mParserContext. We need to wait to actually resume parsing on pc. 1.1312 + ResumeParse(false, false, false); 1.1313 + } 1.1314 + } 1.1315 + } 1.1316 + } 1.1317 + 1.1318 + return result; 1.1319 +} 1.1320 + 1.1321 +NS_IMETHODIMP 1.1322 +nsParser::ParseFragment(const nsAString& aSourceBuffer, 1.1323 + nsTArray<nsString>& aTagStack) 1.1324 +{ 1.1325 + nsresult result = NS_OK; 1.1326 + nsAutoString theContext; 1.1327 + uint32_t theCount = aTagStack.Length(); 1.1328 + uint32_t theIndex = 0; 1.1329 + 1.1330 + // Disable observers for fragments 1.1331 + mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED; 1.1332 + 1.1333 + for (theIndex = 0; theIndex < theCount; theIndex++) { 1.1334 + theContext.AppendLiteral("<"); 1.1335 + theContext.Append(aTagStack[theCount - theIndex - 1]); 1.1336 + theContext.AppendLiteral(">"); 1.1337 + } 1.1338 + 1.1339 + if (theCount == 0) { 1.1340 + // Ensure that the buffer is not empty. Because none of the DTDs care 1.1341 + // about leading whitespace, this doesn't change the result. 1.1342 + theContext.AssignLiteral(" "); 1.1343 + } 1.1344 + 1.1345 + // First, parse the context to build up the DTD's tag stack. Note that we 1.1346 + // pass false for the aLastCall parameter. 1.1347 + result = Parse(theContext, 1.1348 + (void*)&theContext, 1.1349 + false); 1.1350 + if (NS_FAILED(result)) { 1.1351 + mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; 1.1352 + return result; 1.1353 + } 1.1354 + 1.1355 + if (!mSink) { 1.1356 + // Parse must have failed in the XML case and so the sink was killed. 1.1357 + return NS_ERROR_HTMLPARSER_STOPPARSING; 1.1358 + } 1.1359 + 1.1360 + nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink); 1.1361 + NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); 1.1362 + 1.1363 + fragSink->WillBuildContent(); 1.1364 + // Now, parse the actual content. Note that this is the last call 1.1365 + // for HTML content, but for XML, we will want to build and parse 1.1366 + // the end tags. However, if tagStack is empty, it's the last call 1.1367 + // for XML as well. 1.1368 + if (theCount == 0) { 1.1369 + result = Parse(aSourceBuffer, 1.1370 + &theContext, 1.1371 + true); 1.1372 + fragSink->DidBuildContent(); 1.1373 + } else { 1.1374 + // Add an end tag chunk, so expat will read the whole source buffer, 1.1375 + // and not worry about ']]' etc. 1.1376 + result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"), 1.1377 + &theContext, 1.1378 + false); 1.1379 + fragSink->DidBuildContent(); 1.1380 + 1.1381 + if (NS_SUCCEEDED(result)) { 1.1382 + nsAutoString endContext; 1.1383 + for (theIndex = 0; theIndex < theCount; theIndex++) { 1.1384 + // we already added an end tag chunk above 1.1385 + if (theIndex > 0) { 1.1386 + endContext.AppendLiteral("</"); 1.1387 + } 1.1388 + 1.1389 + nsString& thisTag = aTagStack[theIndex]; 1.1390 + // was there an xmlns=? 1.1391 + int32_t endOfTag = thisTag.FindChar(char16_t(' ')); 1.1392 + if (endOfTag == -1) { 1.1393 + endContext.Append(thisTag); 1.1394 + } else { 1.1395 + endContext.Append(Substring(thisTag,0,endOfTag)); 1.1396 + } 1.1397 + 1.1398 + endContext.AppendLiteral(">"); 1.1399 + } 1.1400 + 1.1401 + result = Parse(endContext, 1.1402 + &theContext, 1.1403 + true); 1.1404 + } 1.1405 + } 1.1406 + 1.1407 + mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; 1.1408 + 1.1409 + return result; 1.1410 +} 1.1411 + 1.1412 +/** 1.1413 + * This routine is called to cause the parser to continue parsing its 1.1414 + * underlying stream. This call allows the parse process to happen in 1.1415 + * chunks, such as when the content is push based, and we need to parse in 1.1416 + * pieces. 1.1417 + * 1.1418 + * An interesting change in how the parser gets used has led us to add extra 1.1419 + * processing to this method. The case occurs when the parser is blocked in 1.1420 + * one context, and gets a parse(string) call in another context. In this 1.1421 + * case, the parserContexts are linked. No problem. 1.1422 + * 1.1423 + * The problem is that Parse(string) assumes that it can proceed unabated, 1.1424 + * but if the parser is already blocked that assumption is false. So we 1.1425 + * needed to add a mechanism here to allow the parser to continue to process 1.1426 + * (the pop and free) contexts until 1) it get's blocked again; 2) it runs 1.1427 + * out of contexts. 1.1428 + * 1.1429 + * 1.1430 + * @param allowItertion : set to true if non-script resumption is requested 1.1431 + * @param aIsFinalChunk : tells us when the last chunk of data is provided. 1.1432 + * @return error code -- 0 if ok, non-zero if error. 1.1433 + */ 1.1434 +nsresult 1.1435 +nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, 1.1436 + bool aCanInterrupt) 1.1437 +{ 1.1438 + nsresult result = NS_OK; 1.1439 + 1.1440 + if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) && 1.1441 + mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { 1.1442 + 1.1443 + result = WillBuildModel(mParserContext->mScanner->GetFilename()); 1.1444 + if (NS_FAILED(result)) { 1.1445 + mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; 1.1446 + return result; 1.1447 + } 1.1448 + 1.1449 + if (mDTD) { 1.1450 + mSink->WillResume(); 1.1451 + bool theIterationIsOk = true; 1.1452 + 1.1453 + while (result == NS_OK && theIterationIsOk) { 1.1454 + if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) { 1.1455 + // -- Ref: Bug# 22485 -- 1.1456 + // Insert the unused input into the source buffer 1.1457 + // as if it was read from the input stream. 1.1458 + // Adding UngetReadable() per vidur!! 1.1459 + mParserContext->mScanner->UngetReadable(mUnusedInput); 1.1460 + mUnusedInput.Truncate(0); 1.1461 + } 1.1462 + 1.1463 + // Only allow parsing to be interrupted in the subsequent call to 1.1464 + // build model. 1.1465 + nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) 1.1466 + ? Tokenize(aIsFinalChunk) 1.1467 + : NS_OK; 1.1468 + result = BuildModel(); 1.1469 + 1.1470 + if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { 1.1471 + PostContinueEvent(); 1.1472 + } 1.1473 + 1.1474 + theIterationIsOk = theTokenizerResult != kEOF && 1.1475 + result != NS_ERROR_HTMLPARSER_INTERRUPTED; 1.1476 + 1.1477 + // Make sure not to stop parsing too early. Therefore, before shutting 1.1478 + // down the parser, it's important to check whether the input buffer 1.1479 + // has been scanned to completion (theTokenizerResult should be kEOF). 1.1480 + // kEOF -> End of buffer. 1.1481 + 1.1482 + // If we're told to block the parser, we disable all further parsing 1.1483 + // (and cache any data coming in) until the parser is re-enabled. 1.1484 + if (NS_ERROR_HTMLPARSER_BLOCK == result) { 1.1485 + mSink->WillInterrupt(); 1.1486 + if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) { 1.1487 + // If we were blocked by a recursive invocation, don't re-block. 1.1488 + BlockParser(); 1.1489 + } 1.1490 + return NS_OK; 1.1491 + } 1.1492 + if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { 1.1493 + // Note: Parser Terminate() calls DidBuildModel. 1.1494 + if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { 1.1495 + DidBuildModel(mStreamStatus); 1.1496 + mInternalState = result; 1.1497 + } 1.1498 + 1.1499 + return NS_OK; 1.1500 + } 1.1501 + if ((NS_OK == result && theTokenizerResult == kEOF) || 1.1502 + result == NS_ERROR_HTMLPARSER_INTERRUPTED) { 1.1503 + bool theContextIsStringBased = 1.1504 + CParserContext::eCTString == mParserContext->mContextType; 1.1505 + 1.1506 + if (mParserContext->mStreamListenerState == eOnStop || 1.1507 + !mParserContext->mMultipart || theContextIsStringBased) { 1.1508 + if (!mParserContext->mPrevContext) { 1.1509 + if (mParserContext->mStreamListenerState == eOnStop) { 1.1510 + DidBuildModel(mStreamStatus); 1.1511 + return NS_OK; 1.1512 + } 1.1513 + } else { 1.1514 + CParserContext* theContext = PopContext(); 1.1515 + if (theContext) { 1.1516 + theIterationIsOk = allowIteration && theContextIsStringBased; 1.1517 + if (theContext->mCopyUnused) { 1.1518 + theContext->mScanner->CopyUnusedData(mUnusedInput); 1.1519 + } 1.1520 + 1.1521 + delete theContext; 1.1522 + } 1.1523 + 1.1524 + result = mInternalState; 1.1525 + aIsFinalChunk = mParserContext && 1.1526 + mParserContext->mStreamListenerState == eOnStop; 1.1527 + // ...then intentionally fall through to mSink->WillInterrupt()... 1.1528 + } 1.1529 + } 1.1530 + } 1.1531 + 1.1532 + if (theTokenizerResult == kEOF || 1.1533 + result == NS_ERROR_HTMLPARSER_INTERRUPTED) { 1.1534 + result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; 1.1535 + mSink->WillInterrupt(); 1.1536 + } 1.1537 + } 1.1538 + } else { 1.1539 + mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; 1.1540 + } 1.1541 + } 1.1542 + 1.1543 + return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; 1.1544 +} 1.1545 + 1.1546 +/** 1.1547 + * This is where we loop over the tokens created in the 1.1548 + * tokenization phase, and try to make sense out of them. 1.1549 + */ 1.1550 +nsresult 1.1551 +nsParser::BuildModel() 1.1552 +{ 1.1553 + nsITokenizer* theTokenizer = nullptr; 1.1554 + 1.1555 + nsresult result = NS_OK; 1.1556 + if (mParserContext) { 1.1557 + result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); 1.1558 + } 1.1559 + 1.1560 + if (NS_SUCCEEDED(result)) { 1.1561 + if (mDTD) { 1.1562 + result = mDTD->BuildModel(theTokenizer, mSink); 1.1563 + } 1.1564 + } else { 1.1565 + mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER; 1.1566 + } 1.1567 + return result; 1.1568 +} 1.1569 + 1.1570 +/******************************************************************* 1.1571 + These methods are used to talk to the netlib system... 1.1572 + *******************************************************************/ 1.1573 + 1.1574 +nsresult 1.1575 +nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) 1.1576 +{ 1.1577 + NS_PRECONDITION(eNone == mParserContext->mStreamListenerState, 1.1578 + "Parser's nsIStreamListener API was not setup " 1.1579 + "correctly in constructor."); 1.1580 + if (mObserver) { 1.1581 + mObserver->OnStartRequest(request, aContext); 1.1582 + } 1.1583 + mParserContext->mStreamListenerState = eOnStart; 1.1584 + mParserContext->mAutoDetectStatus = eUnknownDetect; 1.1585 + mParserContext->mRequest = request; 1.1586 + 1.1587 + NS_ASSERTION(!mParserContext->mPrevContext, 1.1588 + "Clobbering DTD for non-root parser context!"); 1.1589 + mDTD = nullptr; 1.1590 + 1.1591 + nsresult rv; 1.1592 + nsAutoCString contentType; 1.1593 + nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); 1.1594 + if (channel) { 1.1595 + rv = channel->GetContentType(contentType); 1.1596 + if (NS_SUCCEEDED(rv)) { 1.1597 + mParserContext->SetMimeType(contentType); 1.1598 + } 1.1599 + } 1.1600 + 1.1601 + rv = NS_OK; 1.1602 + 1.1603 + return rv; 1.1604 +} 1.1605 + 1.1606 +static bool 1.1607 +ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen, 1.1608 + nsCString& oCharset) 1.1609 +{ 1.1610 + // This code is rather pointless to have. Might as well reuse expat as 1.1611 + // seen in nsHtml5StreamParser. -- hsivonen 1.1612 + oCharset.Truncate(); 1.1613 + if ((aLen >= 5) && 1.1614 + ('<' == aBytes[0]) && 1.1615 + ('?' == aBytes[1]) && 1.1616 + ('x' == aBytes[2]) && 1.1617 + ('m' == aBytes[3]) && 1.1618 + ('l' == aBytes[4])) { 1.1619 + int32_t i; 1.1620 + bool versionFound = false, encodingFound = false; 1.1621 + for (i = 6; i < aLen && !encodingFound; ++i) { 1.1622 + // end of XML declaration? 1.1623 + if ((((char*) aBytes)[i] == '?') && 1.1624 + ((i + 1) < aLen) && 1.1625 + (((char*) aBytes)[i + 1] == '>')) { 1.1626 + break; 1.1627 + } 1.1628 + // Version is required. 1.1629 + if (!versionFound) { 1.1630 + // Want to avoid string comparisons, hence looking for 'n' 1.1631 + // and only if found check the string leading to it. Not 1.1632 + // foolproof, but fast. 1.1633 + // The shortest string allowed before this is (strlen==13): 1.1634 + // <?xml version 1.1635 + if ((((char*) aBytes)[i] == 'n') && 1.1636 + (i >= 12) && 1.1637 + (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) { 1.1638 + // Fast forward through version 1.1639 + char q = 0; 1.1640 + for (++i; i < aLen; ++i) { 1.1641 + char qi = ((char*) aBytes)[i]; 1.1642 + if (qi == '\'' || qi == '"') { 1.1643 + if (q && q == qi) { 1.1644 + // ending quote 1.1645 + versionFound = true; 1.1646 + break; 1.1647 + } else { 1.1648 + // Starting quote 1.1649 + q = qi; 1.1650 + } 1.1651 + } 1.1652 + } 1.1653 + } 1.1654 + } else { 1.1655 + // encoding must follow version 1.1656 + // Want to avoid string comparisons, hence looking for 'g' 1.1657 + // and only if found check the string leading to it. Not 1.1658 + // foolproof, but fast. 1.1659 + // The shortest allowed string before this (strlen==26): 1.1660 + // <?xml version="1" encoding 1.1661 + if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp( 1.1662 + "encodin", (char*) (aBytes + i - 7), 7))) { 1.1663 + int32_t encStart = 0; 1.1664 + char q = 0; 1.1665 + for (++i; i < aLen; ++i) { 1.1666 + char qi = ((char*) aBytes)[i]; 1.1667 + if (qi == '\'' || qi == '"') { 1.1668 + if (q && q == qi) { 1.1669 + int32_t count = i - encStart; 1.1670 + // encoding value is invalid if it is UTF-16 1.1671 + if (count > 0 && PL_strncasecmp("UTF-16", 1.1672 + (char*) (aBytes + encStart), count)) { 1.1673 + oCharset.Assign((char*) (aBytes + encStart), count); 1.1674 + } 1.1675 + encodingFound = true; 1.1676 + break; 1.1677 + } else { 1.1678 + encStart = i + 1; 1.1679 + q = qi; 1.1680 + } 1.1681 + } 1.1682 + } 1.1683 + } 1.1684 + } // if (!versionFound) 1.1685 + } // for 1.1686 + } 1.1687 + return !oCharset.IsEmpty(); 1.1688 +} 1.1689 + 1.1690 +inline const char 1.1691 +GetNextChar(nsACString::const_iterator& aStart, 1.1692 + nsACString::const_iterator& aEnd) 1.1693 +{ 1.1694 + NS_ASSERTION(aStart != aEnd, "end of buffer"); 1.1695 + return (++aStart != aEnd) ? *aStart : '\0'; 1.1696 +} 1.1697 + 1.1698 +static NS_METHOD 1.1699 +NoOpParserWriteFunc(nsIInputStream* in, 1.1700 + void* closure, 1.1701 + const char* fromRawSegment, 1.1702 + uint32_t toOffset, 1.1703 + uint32_t count, 1.1704 + uint32_t *writeCount) 1.1705 +{ 1.1706 + *writeCount = count; 1.1707 + return NS_OK; 1.1708 +} 1.1709 + 1.1710 +typedef struct { 1.1711 + bool mNeedCharsetCheck; 1.1712 + nsParser* mParser; 1.1713 + nsScanner* mScanner; 1.1714 + nsIRequest* mRequest; 1.1715 +} ParserWriteStruct; 1.1716 + 1.1717 +/* 1.1718 + * This function is invoked as a result of a call to a stream's 1.1719 + * ReadSegments() method. It is called for each contiguous buffer 1.1720 + * of data in the underlying stream or pipe. Using ReadSegments 1.1721 + * allows us to avoid copying data to read out of the stream. 1.1722 + */ 1.1723 +static NS_METHOD 1.1724 +ParserWriteFunc(nsIInputStream* in, 1.1725 + void* closure, 1.1726 + const char* fromRawSegment, 1.1727 + uint32_t toOffset, 1.1728 + uint32_t count, 1.1729 + uint32_t *writeCount) 1.1730 +{ 1.1731 + nsresult result; 1.1732 + ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure); 1.1733 + const unsigned char* buf = 1.1734 + reinterpret_cast<const unsigned char*> (fromRawSegment); 1.1735 + uint32_t theNumRead = count; 1.1736 + 1.1737 + if (!pws) { 1.1738 + return NS_ERROR_FAILURE; 1.1739 + } 1.1740 + 1.1741 + if (pws->mNeedCharsetCheck) { 1.1742 + pws->mNeedCharsetCheck = false; 1.1743 + int32_t source; 1.1744 + nsAutoCString preferred; 1.1745 + nsAutoCString maybePrefer; 1.1746 + pws->mParser->GetDocumentCharset(preferred, source); 1.1747 + 1.1748 + // This code was bogus when I found it. It expects the BOM or the XML 1.1749 + // declaration to be entirely in the first network buffer. -- hsivonen 1.1750 + if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) { 1.1751 + // The decoder will swallow the BOM. The UTF-16 will re-sniff for 1.1752 + // endianness. The value of preferred is now either "UTF-8" or "UTF-16". 1.1753 + preferred.Assign(maybePrefer); 1.1754 + source = kCharsetFromByteOrderMark; 1.1755 + } else if (source < kCharsetFromChannel) { 1.1756 + nsAutoCString declCharset; 1.1757 + 1.1758 + if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { 1.1759 + if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) { 1.1760 + preferred.Assign(maybePrefer); 1.1761 + source = kCharsetFromMetaTag; 1.1762 + } 1.1763 + } 1.1764 + } 1.1765 + 1.1766 + pws->mParser->SetDocumentCharset(preferred, source); 1.1767 + pws->mParser->SetSinkCharset(preferred); 1.1768 + 1.1769 + } 1.1770 + 1.1771 + result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest); 1.1772 + if (NS_SUCCEEDED(result)) { 1.1773 + *writeCount = count; 1.1774 + } 1.1775 + 1.1776 + return result; 1.1777 +} 1.1778 + 1.1779 +nsresult 1.1780 +nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext, 1.1781 + nsIInputStream *pIStream, uint64_t sourceOffset, 1.1782 + uint32_t aLength) 1.1783 +{ 1.1784 + NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState || 1.1785 + eOnDataAvail == mParserContext->mStreamListenerState), 1.1786 + "Error: OnStartRequest() must be called before OnDataAvailable()"); 1.1787 + NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream), 1.1788 + "Must have a buffered input stream"); 1.1789 + 1.1790 + nsresult rv = NS_OK; 1.1791 + 1.1792 + if (mIsAboutBlank) { 1.1793 + MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank"); 1.1794 + // ... but if an extension tries to feed us data for about:blank in a 1.1795 + // release build, silently ignore the data. 1.1796 + uint32_t totalRead; 1.1797 + rv = pIStream->ReadSegments(NoOpParserWriteFunc, 1.1798 + nullptr, 1.1799 + aLength, 1.1800 + &totalRead); 1.1801 + return rv; 1.1802 + } 1.1803 + 1.1804 + CParserContext *theContext = mParserContext; 1.1805 + 1.1806 + while (theContext && theContext->mRequest != request) { 1.1807 + theContext = theContext->mPrevContext; 1.1808 + } 1.1809 + 1.1810 + if (theContext) { 1.1811 + theContext->mStreamListenerState = eOnDataAvail; 1.1812 + 1.1813 + if (eInvalidDetect == theContext->mAutoDetectStatus) { 1.1814 + if (theContext->mScanner) { 1.1815 + nsScannerIterator iter; 1.1816 + theContext->mScanner->EndReading(iter); 1.1817 + theContext->mScanner->SetPosition(iter, true); 1.1818 + } 1.1819 + } 1.1820 + 1.1821 + uint32_t totalRead; 1.1822 + ParserWriteStruct pws; 1.1823 + pws.mNeedCharsetCheck = true; 1.1824 + pws.mParser = this; 1.1825 + pws.mScanner = theContext->mScanner; 1.1826 + pws.mRequest = request; 1.1827 + 1.1828 + rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); 1.1829 + if (NS_FAILED(rv)) { 1.1830 + return rv; 1.1831 + } 1.1832 + 1.1833 + // Don't bother to start parsing until we've seen some 1.1834 + // non-whitespace data 1.1835 + if (IsOkToProcessNetworkData() && 1.1836 + theContext->mScanner->FirstNonWhitespacePosition() >= 0) { 1.1837 + nsCOMPtr<nsIParser> kungFuDeathGrip(this); 1.1838 + nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); 1.1839 + mProcessingNetworkData = true; 1.1840 + if (mSink) { 1.1841 + mSink->WillParse(); 1.1842 + } 1.1843 + rv = ResumeParse(); 1.1844 + mProcessingNetworkData = false; 1.1845 + } 1.1846 + } else { 1.1847 + rv = NS_ERROR_UNEXPECTED; 1.1848 + } 1.1849 + 1.1850 + return rv; 1.1851 +} 1.1852 + 1.1853 +/** 1.1854 + * This is called by the networking library once the last block of data 1.1855 + * has been collected from the net. 1.1856 + */ 1.1857 +nsresult 1.1858 +nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext, 1.1859 + nsresult status) 1.1860 +{ 1.1861 + nsresult rv = NS_OK; 1.1862 + 1.1863 + CParserContext *pc = mParserContext; 1.1864 + while (pc) { 1.1865 + if (pc->mRequest == request) { 1.1866 + pc->mStreamListenerState = eOnStop; 1.1867 + pc->mScanner->SetIncremental(false); 1.1868 + break; 1.1869 + } 1.1870 + 1.1871 + pc = pc->mPrevContext; 1.1872 + } 1.1873 + 1.1874 + mStreamStatus = status; 1.1875 + 1.1876 + if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) { 1.1877 + mProcessingNetworkData = true; 1.1878 + if (mSink) { 1.1879 + mSink->WillParse(); 1.1880 + } 1.1881 + rv = ResumeParse(true, true); 1.1882 + mProcessingNetworkData = false; 1.1883 + } 1.1884 + 1.1885 + // If the parser isn't enabled, we don't finish parsing till 1.1886 + // it is reenabled. 1.1887 + 1.1888 + 1.1889 + // XXX Should we wait to notify our observers as well if the 1.1890 + // parser isn't yet enabled? 1.1891 + if (mObserver) { 1.1892 + mObserver->OnStopRequest(request, aContext, status); 1.1893 + } 1.1894 + 1.1895 + return rv; 1.1896 +} 1.1897 + 1.1898 + 1.1899 +/******************************************************************* 1.1900 + Here come the tokenization methods... 1.1901 + *******************************************************************/ 1.1902 + 1.1903 + 1.1904 +/** 1.1905 + * Part of the code sandwich, this gets called right before 1.1906 + * the tokenization process begins. The main reason for 1.1907 + * this call is to allow the delegate to do initialization. 1.1908 + */ 1.1909 +bool 1.1910 +nsParser::WillTokenize(bool aIsFinalChunk) 1.1911 +{ 1.1912 + if (!mParserContext) { 1.1913 + return true; 1.1914 + } 1.1915 + 1.1916 + nsITokenizer* theTokenizer; 1.1917 + nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); 1.1918 + NS_ENSURE_SUCCESS(result, false); 1.1919 + return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk)); 1.1920 +} 1.1921 + 1.1922 + 1.1923 +/** 1.1924 + * This is the primary control routine to consume tokens. 1.1925 + * It iteratively consumes tokens until an error occurs or 1.1926 + * you run out of data. 1.1927 + */ 1.1928 +nsresult nsParser::Tokenize(bool aIsFinalChunk) 1.1929 +{ 1.1930 + nsITokenizer* theTokenizer; 1.1931 + 1.1932 + nsresult result = NS_ERROR_NOT_AVAILABLE; 1.1933 + if (mParserContext) { 1.1934 + result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); 1.1935 + } 1.1936 + 1.1937 + if (NS_SUCCEEDED(result)) { 1.1938 + bool flushTokens = false; 1.1939 + 1.1940 + bool killSink = false; 1.1941 + 1.1942 + WillTokenize(aIsFinalChunk); 1.1943 + while (NS_SUCCEEDED(result)) { 1.1944 + mParserContext->mScanner->Mark(); 1.1945 + result = theTokenizer->ConsumeToken(*mParserContext->mScanner, 1.1946 + flushTokens); 1.1947 + if (NS_FAILED(result)) { 1.1948 + mParserContext->mScanner->RewindToMark(); 1.1949 + if (kEOF == result){ 1.1950 + break; 1.1951 + } 1.1952 + if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { 1.1953 + killSink = true; 1.1954 + result = Terminate(); 1.1955 + break; 1.1956 + } 1.1957 + } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) { 1.1958 + // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931. 1.1959 + // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 -- 1.1960 + // Also remember to update the marked position. 1.1961 + mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; 1.1962 + mParserContext->mScanner->Mark(); 1.1963 + break; 1.1964 + } 1.1965 + } 1.1966 + 1.1967 + if (killSink) { 1.1968 + mSink = nullptr; 1.1969 + } 1.1970 + } else { 1.1971 + result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; 1.1972 + } 1.1973 + 1.1974 + return result; 1.1975 +} 1.1976 + 1.1977 +/** 1.1978 + * Get the channel associated with this parser 1.1979 + * 1.1980 + * @param aChannel out param that will contain the result 1.1981 + * @return NS_OK if successful 1.1982 + */ 1.1983 +NS_IMETHODIMP 1.1984 +nsParser::GetChannel(nsIChannel** aChannel) 1.1985 +{ 1.1986 + nsresult result = NS_ERROR_NOT_AVAILABLE; 1.1987 + if (mParserContext && mParserContext->mRequest) { 1.1988 + result = CallQueryInterface(mParserContext->mRequest, aChannel); 1.1989 + } 1.1990 + return result; 1.1991 +} 1.1992 + 1.1993 +/** 1.1994 + * Get the DTD associated with this parser 1.1995 + */ 1.1996 +NS_IMETHODIMP 1.1997 +nsParser::GetDTD(nsIDTD** aDTD) 1.1998 +{ 1.1999 + if (mParserContext) { 1.2000 + NS_IF_ADDREF(*aDTD = mDTD); 1.2001 + } 1.2002 + 1.2003 + return NS_OK; 1.2004 +} 1.2005 + 1.2006 +/** 1.2007 + * Get this as nsIStreamListener 1.2008 + */ 1.2009 +nsIStreamListener* 1.2010 +nsParser::GetStreamListener() 1.2011 +{ 1.2012 + return this; 1.2013 +}