michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set sw=2 ts=2 et tw=79: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsIAtom.h" michael@0: #include "nsParser.h" michael@0: #include "nsString.h" michael@0: #include "nsCRT.h" michael@0: #include "nsScanner.h" michael@0: #include "plstr.h" michael@0: #include "nsIStringStream.h" michael@0: #include "nsIChannel.h" michael@0: #include "nsICachingChannel.h" michael@0: #include "nsICacheEntryDescriptor.h" michael@0: #include "nsIInputStream.h" michael@0: #include "CNavDTD.h" michael@0: #include "prenv.h" michael@0: #include "prlock.h" michael@0: #include "prcvar.h" michael@0: #include "nsParserCIID.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsExpatDriver.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsICategoryManager.h" michael@0: #include "nsISupportsPrimitives.h" michael@0: #include "nsIFragmentContentSink.h" michael@0: #include "nsStreamUtils.h" michael@0: #include "nsHTMLTokenizer.h" michael@0: #include "nsNetUtil.h" michael@0: #include "nsScriptLoader.h" michael@0: #include "nsDataHashtable.h" michael@0: #include "nsXPCOMCIDInternal.h" michael@0: #include "nsMimeTypes.h" michael@0: #include "mozilla/CondVar.h" michael@0: #include "mozilla/Mutex.h" michael@0: #include "nsParserConstants.h" michael@0: #include "nsCharsetSource.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsThreadUtils.h" michael@0: #include "nsIHTMLContentSink.h" michael@0: michael@0: #include "mozilla/dom/EncodingUtils.h" michael@0: michael@0: using namespace mozilla; michael@0: using mozilla::dom::EncodingUtils; michael@0: michael@0: #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 michael@0: #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 michael@0: #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 michael@0: #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 michael@0: #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 michael@0: michael@0: //-------------- Begin ParseContinue Event Definition ------------------------ michael@0: /* michael@0: The parser can be explicitly interrupted by passing a return value of michael@0: NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause michael@0: the parser to stop processing and allow the application to return to the event michael@0: loop. The data which was left at the time of interruption will be processed michael@0: the next time OnDataAvailable is called. If the parser has received its final michael@0: chunk of data then OnDataAvailable will no longer be called by the networking michael@0: module, so the parser will schedule a nsParserContinueEvent which will call michael@0: the parser to process the remaining data after returning to the event loop. michael@0: If the parser is interrupted while processing the remaining data it will michael@0: schedule another ParseContinueEvent. The processing of data followed by michael@0: scheduling of the continue events will proceed until either: michael@0: michael@0: 1) All of the remaining data can be processed without interrupting michael@0: 2) The parser has been cancelled. michael@0: michael@0: michael@0: This capability is currently used in CNavDTD and nsHTMLContentSink. The michael@0: nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be michael@0: processed and when each token is processed. The nsHTML content sink records michael@0: the time when the chunk has started processing and will return michael@0: NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a michael@0: threshold called max tokenizing processing time. This allows the content sink michael@0: to limit how much data is processed in a single chunk which in turn gates how michael@0: much time is spent away from the event loop. Processing smaller chunks of data michael@0: also reduces the time spent in subsequent reflows. michael@0: michael@0: This capability is most apparent when loading large documents. If the maximum michael@0: token processing time is set small enough the application will remain michael@0: responsive during document load. michael@0: michael@0: A side-effect of this capability is that document load is not complete when michael@0: the last chunk of data is passed to OnDataAvailable since the parser may have michael@0: been interrupted when the last chunk of data arrived. The document is complete michael@0: when all of the document has been tokenized and there aren't any pending michael@0: nsParserContinueEvents. This can cause problems if the application assumes michael@0: that it can monitor the load requests to determine when the document load has michael@0: been completed. This is what happens in Mozilla. The document is considered michael@0: completely loaded when all of the load requests have been satisfied. To delay michael@0: the document load until all of the parsing has been completed the michael@0: nsHTMLContentSink adds a dummy parser load request which is not removed until michael@0: the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call michael@0: DidBuildModel until the final chunk of data has been passed to the parser michael@0: through the OnDataAvailable and there aren't any pending michael@0: nsParserContineEvents. michael@0: michael@0: Currently the parser is ignores requests to be interrupted during the michael@0: processing of script. This is because a document.write followed by JavaScript michael@0: calls to manipulate the DOM may fail if the parser was interrupted during the michael@0: document.write. michael@0: michael@0: For more details @see bugzilla bug 76722 michael@0: */ michael@0: michael@0: michael@0: class nsParserContinueEvent : public nsRunnable michael@0: { michael@0: public: michael@0: nsRefPtr mParser; michael@0: michael@0: nsParserContinueEvent(nsParser* aParser) michael@0: : mParser(aParser) michael@0: {} michael@0: michael@0: NS_IMETHOD Run() michael@0: { michael@0: mParser->HandleParserContinueEvent(this); michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: //-------------- End ParseContinue Event Definition ------------------------ michael@0: michael@0: /** michael@0: * default constructor michael@0: */ michael@0: nsParser::nsParser() michael@0: { michael@0: Initialize(true); michael@0: } michael@0: michael@0: nsParser::~nsParser() michael@0: { michael@0: Cleanup(); michael@0: } michael@0: michael@0: void michael@0: nsParser::Initialize(bool aConstructor) michael@0: { michael@0: if (aConstructor) { michael@0: // Raw pointer michael@0: mParserContext = 0; michael@0: } michael@0: else { michael@0: // nsCOMPtrs michael@0: mObserver = nullptr; michael@0: mUnusedInput.Truncate(); michael@0: } michael@0: michael@0: mContinueEvent = nullptr; michael@0: mCharsetSource = kCharsetUninitialized; michael@0: mCharset.AssignLiteral("ISO-8859-1"); michael@0: mInternalState = NS_OK; michael@0: mStreamStatus = NS_OK; michael@0: mCommand = eViewNormal; michael@0: mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | michael@0: NS_PARSER_FLAG_PARSER_ENABLED | michael@0: NS_PARSER_FLAG_CAN_TOKENIZE; michael@0: michael@0: mProcessingNetworkData = false; michael@0: mIsAboutBlank = false; michael@0: } michael@0: michael@0: void michael@0: nsParser::Cleanup() michael@0: { michael@0: #ifdef DEBUG michael@0: if (mParserContext && mParserContext->mPrevContext) { michael@0: NS_WARNING("Extra parser contexts still on the parser stack"); michael@0: } michael@0: #endif michael@0: michael@0: while (mParserContext) { michael@0: CParserContext *pc = mParserContext->mPrevContext; michael@0: delete mParserContext; michael@0: mParserContext = pc; michael@0: } michael@0: michael@0: // It should not be possible for this flag to be set when we are getting michael@0: // destroyed since this flag implies a pending nsParserContinueEvent, which michael@0: // has an owning reference to |this|. michael@0: NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); michael@0: } michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) michael@0: CParserContext *pc = tmp->mParserContext; michael@0: while (pc) { michael@0: cb.NoteXPCOMChild(pc->mTokenizer); michael@0: pc = pc->mPrevContext; michael@0: } michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) michael@0: NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) michael@0: NS_INTERFACE_MAP_ENTRY(nsIStreamListener) michael@0: NS_INTERFACE_MAP_ENTRY(nsIParser) michael@0: NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) michael@0: NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) michael@0: NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) michael@0: NS_INTERFACE_MAP_END michael@0: michael@0: // The parser continue event is posted only if michael@0: // all of the data to parse has been passed to ::OnDataAvailable michael@0: // and the parser has been interrupted by the content sink michael@0: // because the processing of tokens took too long. michael@0: michael@0: nsresult michael@0: nsParser::PostContinueEvent() michael@0: { michael@0: if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { michael@0: // If this flag isn't set, then there shouldn't be a live continue event! michael@0: NS_ASSERTION(!mContinueEvent, "bad"); michael@0: michael@0: // This creates a reference cycle between this and the event that is michael@0: // broken when the event fires. michael@0: nsCOMPtr event = new nsParserContinueEvent(this); michael@0: if (NS_FAILED(NS_DispatchToCurrentThread(event))) { michael@0: NS_WARNING("failed to dispatch parser continuation event"); michael@0: } else { michael@0: mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; michael@0: mContinueEvent = event; michael@0: } michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::GetCommand(nsCString& aCommand) michael@0: { michael@0: aCommand = mCommandStr; michael@0: } michael@0: michael@0: /** michael@0: * Call this method once you've created a parser, and want to instruct it michael@0: * about the command which caused the parser to be constructed. For example, michael@0: * this allows us to select a DTD which can do, say, view-source. michael@0: * michael@0: * @param aCommand the command string to set michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::SetCommand(const char* aCommand) michael@0: { michael@0: mCommandStr.Assign(aCommand); michael@0: if (mCommandStr.Equals("view-source")) { michael@0: mCommand = eViewSource; michael@0: } else if (mCommandStr.Equals("view-fragment")) { michael@0: mCommand = eViewFragment; michael@0: } else { michael@0: mCommand = eViewNormal; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Call this method once you've created a parser, and want to instruct it michael@0: * about the command which caused the parser to be constructed. For example, michael@0: * this allows us to select a DTD which can do, say, view-source. michael@0: * michael@0: * @param aParserCommand the command to set michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::SetCommand(eParserCommands aParserCommand) michael@0: { michael@0: mCommand = aParserCommand; michael@0: } michael@0: michael@0: /** michael@0: * Call this method once you've created a parser, and want to instruct it michael@0: * about what charset to load michael@0: * michael@0: * @param aCharset- the charset of a document michael@0: * @param aCharsetSource- the source of the charset michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource) michael@0: { michael@0: mCharset = aCharset; michael@0: mCharsetSource = aCharsetSource; michael@0: if (mParserContext && mParserContext->mScanner) { michael@0: mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsParser::SetSinkCharset(nsACString& aCharset) michael@0: { michael@0: if (mSink) { michael@0: mSink->SetDocumentCharset(aCharset); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * This method gets called in order to set the content michael@0: * sink for this parser to dump nodes to. michael@0: * michael@0: * @param nsIContentSink interface for node receiver michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::SetContentSink(nsIContentSink* aSink) michael@0: { michael@0: NS_PRECONDITION(aSink, "sink cannot be null!"); michael@0: mSink = aSink; michael@0: michael@0: if (mSink) { michael@0: mSink->SetParser(this); michael@0: nsCOMPtr htmlSink = do_QueryInterface(mSink); michael@0: if (htmlSink) { michael@0: mIsAboutBlank = true; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * retrieve the sink set into the parser michael@0: * @return current sink michael@0: */ michael@0: NS_IMETHODIMP_(nsIContentSink*) michael@0: nsParser::GetContentSink() michael@0: { michael@0: return mSink; michael@0: } michael@0: michael@0: /** michael@0: * Determine what DTD mode (and thus what layout nsCompatibility mode) michael@0: * to use for this document based on the first chunk of data received michael@0: * from the network (each parsercontext can have its own mode). (No, michael@0: * this is not an optimal solution -- we really don't need to know until michael@0: * after we've received the DOCTYPE, and this could easily be part of michael@0: * the regular parsing process if the parser were designed in a way that michael@0: * made such modifications easy.) michael@0: */ michael@0: michael@0: // Parse the PS production in the SGML spec (excluding the part dealing michael@0: // with entity references) starting at theIndex into theBuffer, and michael@0: // return the first index after the end of the production. michael@0: static int32_t michael@0: ParsePS(const nsString& aBuffer, int32_t aIndex) michael@0: { michael@0: for (;;) { michael@0: char16_t ch = aBuffer.CharAt(aIndex); michael@0: if ((ch == char16_t(' ')) || (ch == char16_t('\t')) || michael@0: (ch == char16_t('\n')) || (ch == char16_t('\r'))) { michael@0: ++aIndex; michael@0: } else if (ch == char16_t('-')) { michael@0: int32_t tmpIndex; michael@0: if (aBuffer.CharAt(aIndex+1) == char16_t('-') && michael@0: kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) { michael@0: aIndex = tmpIndex + 2; michael@0: } else { michael@0: return aIndex; michael@0: } michael@0: } else { michael@0: return aIndex; michael@0: } michael@0: } michael@0: } michael@0: michael@0: #define PARSE_DTD_HAVE_DOCTYPE (1<<0) michael@0: #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) michael@0: #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) michael@0: #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) michael@0: michael@0: // return true on success (includes not present), false on failure michael@0: static bool michael@0: ParseDocTypeDecl(const nsString &aBuffer, michael@0: int32_t *aResultFlags, michael@0: nsString &aPublicID, michael@0: nsString &aSystemID) michael@0: { michael@0: bool haveDoctype = false; michael@0: *aResultFlags = 0; michael@0: michael@0: // Skip through any comments and processing instructions michael@0: // The PI-skipping is a bit of a hack. michael@0: int32_t theIndex = 0; michael@0: do { michael@0: theIndex = aBuffer.FindChar('<', theIndex); michael@0: if (theIndex == kNotFound) break; michael@0: char16_t nextChar = aBuffer.CharAt(theIndex+1); michael@0: if (nextChar == char16_t('!')) { michael@0: int32_t tmpIndex = theIndex + 2; michael@0: if (kNotFound != michael@0: (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) { michael@0: haveDoctype = true; michael@0: theIndex += 7; // skip "DOCTYPE" michael@0: break; michael@0: } michael@0: theIndex = ParsePS(aBuffer, tmpIndex); michael@0: theIndex = aBuffer.FindChar('>', theIndex); michael@0: } else if (nextChar == char16_t('?')) { michael@0: theIndex = aBuffer.FindChar('>', theIndex); michael@0: } else { michael@0: break; michael@0: } michael@0: } while (theIndex != kNotFound); michael@0: michael@0: if (!haveDoctype) michael@0: return true; michael@0: *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; michael@0: michael@0: theIndex = ParsePS(aBuffer, theIndex); michael@0: theIndex = aBuffer.Find("HTML", true, theIndex, 0); michael@0: if (kNotFound == theIndex) michael@0: return false; michael@0: theIndex = ParsePS(aBuffer, theIndex+4); michael@0: int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0); michael@0: michael@0: if (kNotFound != tmpIndex) { michael@0: theIndex = ParsePS(aBuffer, tmpIndex+6); michael@0: michael@0: // We get here only if we've read ')) { michael@0: // There was a public identifier, but no system michael@0: // identifier, michael@0: // so do nothing. michael@0: // This is needed to avoid the else at the end, and it's michael@0: // also the most common case. michael@0: } else if ((next == char16_t('\"')) || michael@0: (next == char16_t('\''))) { michael@0: // We found a system identifier. michael@0: *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; michael@0: int32_t SystemIDStart = theIndex + 1; michael@0: int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); michael@0: if (kNotFound == SystemIDEnd) michael@0: return false; michael@0: aSystemID = michael@0: Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); michael@0: } else if (next == char16_t('[')) { michael@0: // We found an internal subset. michael@0: *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; michael@0: } else { michael@0: // Something's wrong. michael@0: return false; michael@0: } michael@0: michael@0: // Since a public ID is a minimum literal, we must trim michael@0: // and collapse whitespace michael@0: aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); michael@0: aPublicID.CompressWhitespace(true, true); michael@0: *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; michael@0: } else { michael@0: tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0); michael@0: if (kNotFound != tmpIndex) { michael@0: // DOCTYPES with system ID but no Public ID michael@0: *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; michael@0: michael@0: theIndex = ParsePS(aBuffer, tmpIndex+6); michael@0: char16_t next = aBuffer.CharAt(theIndex); michael@0: if (next != char16_t('\"') && next != char16_t('\'')) michael@0: return false; michael@0: michael@0: int32_t SystemIDStart = theIndex + 1; michael@0: int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); michael@0: michael@0: if (kNotFound == SystemIDEnd) michael@0: return false; michael@0: aSystemID = michael@0: Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); michael@0: theIndex = ParsePS(aBuffer, SystemIDEnd + 1); michael@0: } michael@0: michael@0: char16_t nextChar = aBuffer.CharAt(theIndex); michael@0: if (nextChar == char16_t('[')) michael@0: *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; michael@0: else if (nextChar != char16_t('>')) michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: struct PubIDInfo michael@0: { michael@0: enum eMode { michael@0: eQuirks, /* always quirks mode, unless there's an internal subset */ michael@0: eAlmostStandards,/* eCompatibility_AlmostStandards */ michael@0: eFullStandards /* eCompatibility_FullStandards */ michael@0: /* michael@0: * public IDs that should trigger strict mode are not listed michael@0: * since we want all future public IDs to trigger strict mode as michael@0: * well michael@0: */ michael@0: }; michael@0: michael@0: const char* name; michael@0: eMode mode_if_no_sysid; michael@0: eMode mode_if_sysid; michael@0: }; michael@0: michael@0: #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) michael@0: michael@0: // These must be in nsCRT::strcmp order so binary-search can be used. michael@0: // This is verified, |#ifdef DEBUG|, below. michael@0: michael@0: // Even though public identifiers should be case sensitive, we will do michael@0: // all comparisons after converting to lower case in order to do michael@0: // case-insensitive comparison since there are a number of existing web michael@0: // sites that use the incorrect case. Therefore all of the public michael@0: // identifiers below are in lower case (with the correct case following, michael@0: // in comments). The case is verified, |#ifdef DEBUG|, below. michael@0: static const PubIDInfo kPublicIDs[] = { michael@0: {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, michael@0: {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, michael@0: {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, michael@0: {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, michael@0: {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, michael@0: }; michael@0: michael@0: #ifdef DEBUG michael@0: static void michael@0: VerifyPublicIDs() michael@0: { michael@0: static bool gVerified = false; michael@0: if (!gVerified) { michael@0: gVerified = true; michael@0: uint32_t i; michael@0: for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { michael@0: if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { michael@0: NS_NOTREACHED("doctypes out of order"); michael@0: printf("Doctypes %s and %s out of order.\n", michael@0: kPublicIDs[i].name, kPublicIDs[i+1].name); michael@0: } michael@0: } michael@0: for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { michael@0: nsAutoCString lcPubID(kPublicIDs[i].name); michael@0: ToLowerCase(lcPubID); michael@0: if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { michael@0: NS_NOTREACHED("doctype not lower case"); michael@0: printf("Doctype %s not lower case.\n", kPublicIDs[i].name); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: static void michael@0: DetermineHTMLParseMode(const nsString& aBuffer, michael@0: nsDTDMode& aParseMode, michael@0: eParserDocType& aDocType) michael@0: { michael@0: #ifdef DEBUG michael@0: VerifyPublicIDs(); michael@0: #endif michael@0: int32_t resultFlags; michael@0: nsAutoString publicIDUCS2, sysIDUCS2; michael@0: if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { michael@0: if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { michael@0: // no DOCTYPE michael@0: aParseMode = eDTDMode_quirks; michael@0: aDocType = eHTML_Quirks; michael@0: } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || michael@0: !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { michael@0: // A doctype with an internal subset is always full_standards. michael@0: // A doctype without a public ID is always full_standards. michael@0: aDocType = eHTML_Strict; michael@0: aParseMode = eDTDMode_full_standards; michael@0: michael@0: // Special hack for IBM's custom DOCTYPE. michael@0: if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && michael@0: sysIDUCS2 == NS_LITERAL_STRING( michael@0: "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { michael@0: aParseMode = eDTDMode_quirks; michael@0: aDocType = eHTML_Quirks; michael@0: } michael@0: michael@0: } else { michael@0: // We have to check our list of public IDs to see what to do. michael@0: // Yes, we want UCS2 to ASCII lossy conversion. michael@0: nsAutoCString publicID; michael@0: publicID.AssignWithConversion(publicIDUCS2); michael@0: michael@0: // See comment above definition of kPublicIDs about case michael@0: // sensitivity. michael@0: ToLowerCase(publicID); michael@0: michael@0: // Binary search to see if we can find the correct public ID michael@0: // These must be signed since maximum can go below zero and we'll michael@0: // crash if it's unsigned. michael@0: int32_t minimum = 0; michael@0: int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1; michael@0: int32_t index; michael@0: for (;;) { michael@0: index = (minimum + maximum) / 2; michael@0: int32_t comparison = michael@0: nsCRT::strcmp(publicID.get(), kPublicIDs[index].name); michael@0: if (comparison == 0) michael@0: break; michael@0: if (comparison < 0) michael@0: maximum = index - 1; michael@0: else michael@0: minimum = index + 1; michael@0: michael@0: if (maximum < minimum) { michael@0: // The DOCTYPE is not in our list, so it must be full_standards. michael@0: aParseMode = eDTDMode_full_standards; michael@0: aDocType = eHTML_Strict; michael@0: return; michael@0: } michael@0: } michael@0: michael@0: switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) michael@0: ? kPublicIDs[index].mode_if_sysid michael@0: : kPublicIDs[index].mode_if_no_sysid) michael@0: { michael@0: case PubIDInfo::eQuirks: michael@0: aParseMode = eDTDMode_quirks; michael@0: aDocType = eHTML_Quirks; michael@0: break; michael@0: case PubIDInfo::eAlmostStandards: michael@0: aParseMode = eDTDMode_almost_standards; michael@0: aDocType = eHTML_Strict; michael@0: break; michael@0: case PubIDInfo::eFullStandards: michael@0: aParseMode = eDTDMode_full_standards; michael@0: aDocType = eHTML_Strict; michael@0: break; michael@0: default: michael@0: NS_NOTREACHED("no other cases!"); michael@0: } michael@0: } michael@0: } else { michael@0: // badly formed DOCTYPE -> quirks michael@0: aParseMode = eDTDMode_quirks; michael@0: aDocType = eHTML_Quirks; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode, michael@0: eParserDocType& aDocType, const nsACString& aMimeType) michael@0: { michael@0: if (aMimeType.EqualsLiteral(TEXT_HTML)) { michael@0: DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); michael@0: } else if (nsContentUtils::IsPlainTextType(aMimeType)) { michael@0: aDocType = ePlainText; michael@0: aParseMode = eDTDMode_quirks; michael@0: } else { // Some form of XML michael@0: aDocType = eXML; michael@0: aParseMode = eDTDMode_full_standards; michael@0: } michael@0: } michael@0: michael@0: static nsIDTD* michael@0: FindSuitableDTD(CParserContext& aParserContext) michael@0: { michael@0: // We always find a DTD. michael@0: aParserContext.mAutoDetectStatus = ePrimaryDetect; michael@0: michael@0: // Quick check for view source. michael@0: NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource, michael@0: "The old parser is not supposed to be used for View Source anymore."); michael@0: michael@0: // Now see if we're parsing HTML (which, as far as we're concerned, simply michael@0: // means "not XML"). michael@0: if (aParserContext.mDocType != eXML) { michael@0: return new CNavDTD(); michael@0: } michael@0: michael@0: // If we're here, then we'd better be parsing XML. michael@0: NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?"); michael@0: return new nsExpatDriver(); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParser::CancelParsingEvents() michael@0: { michael@0: if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { michael@0: NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); michael@0: // Revoke the pending continue parsing event michael@0: mContinueEvent = nullptr; michael@0: mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: //////////////////////////////////////////////////////////////////////// michael@0: michael@0: /** michael@0: * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value michael@0: * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1 michael@0: * (which could be success or failure). michael@0: * michael@0: * To understand the motivation for this construct, consider these example michael@0: * methods: michael@0: * michael@0: * nsresult nsSomething::DoThatThing(nsIWhatever* obj) { michael@0: * nsresult rv = NS_OK; michael@0: * ... michael@0: * return obj->DoThatThing(); michael@0: * NS_ENSURE_SUCCESS(rv, rv); michael@0: * ... michael@0: * return rv; michael@0: * } michael@0: * michael@0: * void nsCaller::MakeThingsHappen() { michael@0: * return mSomething->DoThatThing(mWhatever); michael@0: * } michael@0: * michael@0: * Suppose, for whatever reason*, we want to shift responsibility for calling michael@0: * mWhatever->DoThatThing() from nsSomething::DoThatThing up to michael@0: * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows: michael@0: * michael@0: * nsresult nsSomething::DoThatThing() { michael@0: * nsresult rv = NS_OK; michael@0: * ... michael@0: * ... michael@0: * return rv; michael@0: * } michael@0: * michael@0: * void nsCaller::MakeThingsHappen() { michael@0: * nsresult rv; michael@0: * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(), michael@0: * mWhatever->DoThatThing(), michael@0: * rv); michael@0: * return rv; michael@0: * } michael@0: * michael@0: * *Possible reasons include: nsCaller doesn't want to give mSomething access michael@0: * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will michael@0: * be called regardless of how nsSomething::DoThatThing behaves, &c. michael@0: */ michael@0: #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \ michael@0: nsresult RV##__temp = EXPR1; \ michael@0: RV = EXPR2; \ michael@0: if (NS_FAILED(RV)) { \ michael@0: RV = RV##__temp; \ michael@0: } \ michael@0: } michael@0: michael@0: /** michael@0: * This gets called just prior to the model actually michael@0: * being constructed. It's important to make this the michael@0: * last thing that happens right before parsing, so we michael@0: * can delay until the last moment the resolution of michael@0: * which DTD to use (unless of course we're assigned one). michael@0: */ michael@0: nsresult michael@0: nsParser::WillBuildModel(nsString& aFilename) michael@0: { michael@0: if (!mParserContext) michael@0: return kInvalidParserContext; michael@0: michael@0: if (eUnknownDetect != mParserContext->mAutoDetectStatus) michael@0: return NS_OK; michael@0: michael@0: if (eDTDMode_unknown == mParserContext->mDTDMode || michael@0: eDTDMode_autodetect == mParserContext->mDTDMode) { michael@0: char16_t buf[1025]; michael@0: nsFixedString theBuffer(buf, 1024, 0); michael@0: michael@0: // Grab 1024 characters, starting at the first non-whitespace michael@0: // character, to look for the doctype in. michael@0: mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition()); michael@0: DetermineParseMode(theBuffer, mParserContext->mDTDMode, michael@0: mParserContext->mDocType, mParserContext->mMimeType); michael@0: } michael@0: michael@0: NS_ASSERTION(!mDTD || !mParserContext->mPrevContext, michael@0: "Clobbering DTD for non-root parser context!"); michael@0: mDTD = FindSuitableDTD(*mParserContext); michael@0: NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: nsITokenizer* tokenizer; michael@0: nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); michael@0: nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode()); michael@0: // nsIDTD::WillBuildModel used to be responsible for calling michael@0: // nsIContentSink::WillBuildModel, but that obligation isn't expressible michael@0: // in the nsIDTD interface itself, so it's sounder and simpler to give that michael@0: // responsibility back to the parser. The former behavior of the DTD was to michael@0: // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns michael@0: // failure we should use sinkResult instead of rv, to preserve the old error michael@0: // handling behavior of the DTD: michael@0: return NS_FAILED(sinkResult) ? sinkResult : rv; michael@0: } michael@0: michael@0: /** michael@0: * This gets called when the parser is done with its input. michael@0: * Note that the parser may have been called recursively, so we michael@0: * have to check for a prev. context before closing out the DTD/sink. michael@0: */ michael@0: nsresult michael@0: nsParser::DidBuildModel(nsresult anErrorCode) michael@0: { michael@0: nsresult result = anErrorCode; michael@0: michael@0: if (IsComplete()) { michael@0: if (mParserContext && !mParserContext->mPrevContext) { michael@0: // Let sink know if we're about to end load because we've been terminated. michael@0: // In that case we don't want it to run deferred scripts. michael@0: bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; michael@0: if (mDTD && mSink) { michael@0: nsresult dtdResult = mDTD->DidBuildModel(anErrorCode), michael@0: sinkResult = mSink->DidBuildModel(terminated); michael@0: // nsIDTD::DidBuildModel used to be responsible for calling michael@0: // nsIContentSink::DidBuildModel, but that obligation isn't expressible michael@0: // in the nsIDTD interface itself, so it's sounder and simpler to give michael@0: // that responsibility back to the parser. The former behavior of the michael@0: // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the michael@0: // sink returns failure we should use sinkResult instead of dtdResult, michael@0: // to preserve the old error handling behavior of the DTD: michael@0: result = NS_FAILED(sinkResult) ? sinkResult : dtdResult; michael@0: } michael@0: michael@0: //Ref. to bug 61462. michael@0: mParserContext->mRequest = 0; michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * This method adds a new parser context to the list, michael@0: * pushing the current one to the next position. michael@0: * michael@0: * @param ptr to new context michael@0: */ michael@0: void michael@0: nsParser::PushContext(CParserContext& aContext) michael@0: { michael@0: NS_ASSERTION(aContext.mPrevContext == mParserContext, michael@0: "Trying to push a context whose previous context differs from " michael@0: "the current parser context."); michael@0: mParserContext = &aContext; michael@0: } michael@0: michael@0: /** michael@0: * This method pops the topmost context off the stack, michael@0: * returning it to the user. The next context (if any) michael@0: * becomes the current context. michael@0: * @update gess7/22/98 michael@0: * @return prev. context michael@0: */ michael@0: CParserContext* michael@0: nsParser::PopContext() michael@0: { michael@0: CParserContext* oldContext = mParserContext; michael@0: if (oldContext) { michael@0: mParserContext = oldContext->mPrevContext; michael@0: if (mParserContext) { michael@0: // If the old context was blocked, propagate the blocked state michael@0: // back to the new one. Also, propagate the stream listener state michael@0: // but don't override onStop state to guarantee the call to DidBuildModel(). michael@0: if (mParserContext->mStreamListenerState != eOnStop) { michael@0: mParserContext->mStreamListenerState = oldContext->mStreamListenerState; michael@0: } michael@0: } michael@0: } michael@0: return oldContext; michael@0: } michael@0: michael@0: /** michael@0: * Call this when you want control whether or not the parser will parse michael@0: * and tokenize input (TRUE), or whether it just caches input to be michael@0: * parsed later (FALSE). michael@0: * michael@0: * @param aState determines whether we parse/tokenize or just cache. michael@0: * @return current state michael@0: */ michael@0: void michael@0: nsParser::SetUnusedInput(nsString& aBuffer) michael@0: { michael@0: mUnusedInput = aBuffer; michael@0: } michael@0: michael@0: /** michael@0: * Call this when you want to *force* the parser to terminate the michael@0: * parsing process altogether. This is binary -- so once you terminate michael@0: * you can't resume without restarting altogether. michael@0: */ michael@0: NS_IMETHODIMP michael@0: nsParser::Terminate(void) michael@0: { michael@0: // We should only call DidBuildModel once, so don't do anything if this is michael@0: // the second time that Terminate has been called. michael@0: if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult result = NS_OK; michael@0: // XXX - [ until we figure out a way to break parser-sink circularity ] michael@0: // Hack - Hold a reference until we are completely done... michael@0: nsCOMPtr kungFuDeathGrip(this); michael@0: mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; michael@0: michael@0: // CancelParsingEvents must be called to avoid leaking the nsParser object michael@0: // @see bug 108049 michael@0: // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents michael@0: // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note: michael@0: // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag. michael@0: CancelParsingEvents(); michael@0: michael@0: // If we got interrupted in the middle of a document.write, then we might michael@0: // have more than one parser context on our parsercontext stack. This has michael@0: // the effect of making DidBuildModel a no-op, meaning that we never call michael@0: // our sink's DidBuildModel and break the reference cycle, causing a leak. michael@0: // Since we're getting terminated, we manually clean up our context stack. michael@0: while (mParserContext && mParserContext->mPrevContext) { michael@0: CParserContext *prev = mParserContext->mPrevContext; michael@0: delete mParserContext; michael@0: mParserContext = prev; michael@0: } michael@0: michael@0: if (mDTD) { michael@0: mDTD->Terminate(); michael@0: DidBuildModel(result); michael@0: } else if (mSink) { michael@0: // We have no parser context or no DTD yet (so we got terminated before we michael@0: // got any data). Manually break the reference cycle with the sink. michael@0: result = mSink->DidBuildModel(true); michael@0: NS_ENSURE_SUCCESS(result, result); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParser::ContinueInterruptedParsing() michael@0: { michael@0: // If there are scripts executing, then the content sink is jumping the gun michael@0: // (probably due to a synchronous XMLHttpRequest) and will re-enable us michael@0: // later, see bug 460706. michael@0: if (!IsOkToProcessNetworkData()) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: // If the stream has already finished, there's a good chance michael@0: // that we might start closing things down when the parser michael@0: // is reenabled. To make sure that we're not deleted across michael@0: // the reenabling process, hold a reference to ourselves. michael@0: nsresult result=NS_OK; michael@0: nsCOMPtr kungFuDeathGrip(this); michael@0: nsCOMPtr sinkDeathGrip(mSink); michael@0: michael@0: #ifdef DEBUG michael@0: if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { michael@0: NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser."); michael@0: } michael@0: #endif michael@0: michael@0: bool isFinalChunk = mParserContext && michael@0: mParserContext->mStreamListenerState == eOnStop; michael@0: michael@0: mProcessingNetworkData = true; michael@0: if (mSink) { michael@0: mSink->WillParse(); michael@0: } michael@0: result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 michael@0: mProcessingNetworkData = false; michael@0: michael@0: if (result != NS_OK) { michael@0: result=mInternalState; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * Stops parsing temporarily. That's it will prevent the michael@0: * parser from building up content model. michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::BlockParser() michael@0: { michael@0: mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED; michael@0: } michael@0: michael@0: /** michael@0: * Open up the parser for tokenization, building up content michael@0: * model..etc. However, this method does not resume parsing michael@0: * automatically. It's the callers' responsibility to restart michael@0: * the parsing engine. michael@0: */ michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::UnblockParser() michael@0: { michael@0: if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { michael@0: mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; michael@0: } else { michael@0: NS_WARNING("Trying to unblock an unblocked parser."); michael@0: } michael@0: } michael@0: michael@0: NS_IMETHODIMP_(void) michael@0: nsParser::ContinueInterruptedParsingAsync() michael@0: { michael@0: mSink->ContinueInterruptedParsingAsync(); michael@0: } michael@0: michael@0: /** michael@0: * Call this to query whether the parser is enabled or not. michael@0: */ michael@0: NS_IMETHODIMP_(bool) michael@0: nsParser::IsParserEnabled() michael@0: { michael@0: return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0; michael@0: } michael@0: michael@0: /** michael@0: * Call this to query whether the parser thinks it's done with parsing. michael@0: */ michael@0: NS_IMETHODIMP_(bool) michael@0: nsParser::IsComplete() michael@0: { michael@0: return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); michael@0: } michael@0: michael@0: michael@0: void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev) michael@0: { michael@0: // Ignore any revoked continue events... michael@0: if (mContinueEvent != ev) michael@0: return; michael@0: michael@0: mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; michael@0: mContinueEvent = nullptr; michael@0: michael@0: NS_ASSERTION(IsOkToProcessNetworkData(), michael@0: "Interrupted in the middle of a script?"); michael@0: ContinueInterruptedParsing(); michael@0: } michael@0: michael@0: bool michael@0: nsParser::IsInsertionPointDefined() michael@0: { michael@0: return false; michael@0: } michael@0: michael@0: void michael@0: nsParser::BeginEvaluatingParserInsertedScript() michael@0: { michael@0: } michael@0: michael@0: void michael@0: nsParser::EndEvaluatingParserInsertedScript() michael@0: { michael@0: } michael@0: michael@0: void michael@0: nsParser::MarkAsNotScriptCreated(const char* aCommand) michael@0: { michael@0: } michael@0: michael@0: bool michael@0: nsParser::IsScriptCreated() michael@0: { michael@0: return false; michael@0: } michael@0: michael@0: /** michael@0: * This is the main controlling routine in the parsing process. michael@0: * Note that it may get called multiple times for the same scanner, michael@0: * since this is a pushed based system, and all the tokens may michael@0: * not have been consumed by the scanner during a given invocation michael@0: * of this method. michael@0: */ michael@0: NS_IMETHODIMP michael@0: nsParser::Parse(nsIURI* aURL, michael@0: nsIRequestObserver* aListener, michael@0: void* aKey, michael@0: nsDTDMode aMode) michael@0: { michael@0: michael@0: NS_PRECONDITION(aURL, "Error: Null URL given"); michael@0: michael@0: nsresult result=kBadURL; michael@0: mObserver = aListener; michael@0: michael@0: if (aURL) { michael@0: nsAutoCString spec; michael@0: nsresult rv = aURL->GetSpec(spec); michael@0: if (rv != NS_OK) { michael@0: return rv; michael@0: } michael@0: NS_ConvertUTF8toUTF16 theName(spec); michael@0: michael@0: nsScanner* theScanner = new nsScanner(theName, false); michael@0: CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey, michael@0: mCommand, aListener); michael@0: if (pc && theScanner) { michael@0: pc->mMultipart = true; michael@0: pc->mContextType = CParserContext::eCTURL; michael@0: pc->mDTDMode = aMode; michael@0: PushContext(*pc); michael@0: michael@0: result = NS_OK; michael@0: } else { michael@0: result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT; michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * Used by XML fragment parsing below. michael@0: * michael@0: * @param aSourceBuffer contains a string-full of real content michael@0: */ michael@0: nsresult michael@0: nsParser::Parse(const nsAString& aSourceBuffer, michael@0: void* aKey, michael@0: bool aLastCall) michael@0: { michael@0: nsresult result = NS_OK; michael@0: michael@0: // Don't bother if we're never going to parse this. michael@0: if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { michael@0: return result; michael@0: } michael@0: michael@0: if (!aLastCall && aSourceBuffer.IsEmpty()) { michael@0: // Nothing is being passed to the parser so return michael@0: // immediately. mUnusedInput will get processed when michael@0: // some data is actually passed in. michael@0: // But if this is the last call, make sure to finish up michael@0: // stuff correctly. michael@0: return result; michael@0: } michael@0: michael@0: // Maintain a reference to ourselves so we don't go away michael@0: // till we're completely done. michael@0: nsCOMPtr kungFuDeathGrip(this); michael@0: michael@0: if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) { michael@0: // Note: The following code will always find the parser context associated michael@0: // with the given key, even if that context has been suspended (e.g., for michael@0: // another document.write call). This doesn't appear to be exactly what IE michael@0: // does in the case where this happens, but this makes more sense. michael@0: CParserContext* pc = mParserContext; michael@0: while (pc && pc->mKey != aKey) { michael@0: pc = pc->mPrevContext; michael@0: } michael@0: michael@0: if (!pc) { michael@0: // Only make a new context if we don't have one, OR if we do, but has a michael@0: // different context key. michael@0: nsScanner* theScanner = new nsScanner(mUnusedInput); michael@0: NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: eAutoDetectResult theStatus = eUnknownDetect; michael@0: michael@0: if (mParserContext && michael@0: mParserContext->mMimeType.EqualsLiteral("application/xml")) { michael@0: // Ref. Bug 90379 michael@0: NS_ASSERTION(mDTD, "How come the DTD is null?"); michael@0: michael@0: if (mParserContext) { michael@0: theStatus = mParserContext->mAutoDetectStatus; michael@0: // Added this to fix bug 32022. michael@0: } michael@0: } michael@0: michael@0: pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, michael@0: 0, theStatus, aLastCall); michael@0: NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: PushContext(*pc); michael@0: michael@0: pc->mMultipart = !aLastCall; // By default michael@0: if (pc->mPrevContext) { michael@0: pc->mMultipart |= pc->mPrevContext->mMultipart; michael@0: } michael@0: michael@0: // Start fix bug 40143 michael@0: if (pc->mMultipart) { michael@0: pc->mStreamListenerState = eOnDataAvail; michael@0: if (pc->mScanner) { michael@0: pc->mScanner->SetIncremental(true); michael@0: } michael@0: } else { michael@0: pc->mStreamListenerState = eOnStop; michael@0: if (pc->mScanner) { michael@0: pc->mScanner->SetIncremental(false); michael@0: } michael@0: } michael@0: // end fix for 40143 michael@0: michael@0: pc->mContextType=CParserContext::eCTString; michael@0: pc->SetMimeType(NS_LITERAL_CSTRING("application/xml")); michael@0: pc->mDTDMode = eDTDMode_full_standards; michael@0: michael@0: mUnusedInput.Truncate(); michael@0: michael@0: pc->mScanner->Append(aSourceBuffer); michael@0: // Do not interrupt document.write() - bug 95487 michael@0: result = ResumeParse(false, false, false); michael@0: } else { michael@0: pc->mScanner->Append(aSourceBuffer); michael@0: if (!pc->mPrevContext) { michael@0: // Set stream listener state to eOnStop, on the final context - Fix 68160, michael@0: // to guarantee DidBuildModel() call - Fix 36148 michael@0: if (aLastCall) { michael@0: pc->mStreamListenerState = eOnStop; michael@0: pc->mScanner->SetIncremental(false); michael@0: } michael@0: michael@0: if (pc == mParserContext) { michael@0: // If pc is not mParserContext, then this call to ResumeParse would michael@0: // do the wrong thing and try to continue parsing using michael@0: // mParserContext. We need to wait to actually resume parsing on pc. michael@0: ResumeParse(false, false, false); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsParser::ParseFragment(const nsAString& aSourceBuffer, michael@0: nsTArray& aTagStack) michael@0: { michael@0: nsresult result = NS_OK; michael@0: nsAutoString theContext; michael@0: uint32_t theCount = aTagStack.Length(); michael@0: uint32_t theIndex = 0; michael@0: michael@0: // Disable observers for fragments michael@0: mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED; michael@0: michael@0: for (theIndex = 0; theIndex < theCount; theIndex++) { michael@0: theContext.AppendLiteral("<"); michael@0: theContext.Append(aTagStack[theCount - theIndex - 1]); michael@0: theContext.AppendLiteral(">"); michael@0: } michael@0: michael@0: if (theCount == 0) { michael@0: // Ensure that the buffer is not empty. Because none of the DTDs care michael@0: // about leading whitespace, this doesn't change the result. michael@0: theContext.AssignLiteral(" "); michael@0: } michael@0: michael@0: // First, parse the context to build up the DTD's tag stack. Note that we michael@0: // pass false for the aLastCall parameter. michael@0: result = Parse(theContext, michael@0: (void*)&theContext, michael@0: false); michael@0: if (NS_FAILED(result)) { michael@0: mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; michael@0: return result; michael@0: } michael@0: michael@0: if (!mSink) { michael@0: // Parse must have failed in the XML case and so the sink was killed. michael@0: return NS_ERROR_HTMLPARSER_STOPPARSING; michael@0: } michael@0: michael@0: nsCOMPtr fragSink = do_QueryInterface(mSink); michael@0: NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); michael@0: michael@0: fragSink->WillBuildContent(); michael@0: // Now, parse the actual content. Note that this is the last call michael@0: // for HTML content, but for XML, we will want to build and parse michael@0: // the end tags. However, if tagStack is empty, it's the last call michael@0: // for XML as well. michael@0: if (theCount == 0) { michael@0: result = Parse(aSourceBuffer, michael@0: &theContext, michael@0: true); michael@0: fragSink->DidBuildContent(); michael@0: } else { michael@0: // Add an end tag chunk, so expat will read the whole source buffer, michael@0: // and not worry about ']]' etc. michael@0: result = Parse(aSourceBuffer + NS_LITERAL_STRING("DidBuildContent(); michael@0: michael@0: if (NS_SUCCEEDED(result)) { michael@0: nsAutoString endContext; michael@0: for (theIndex = 0; theIndex < theCount; theIndex++) { michael@0: // we already added an end tag chunk above michael@0: if (theIndex > 0) { michael@0: endContext.AppendLiteral(""); michael@0: } michael@0: michael@0: result = Parse(endContext, michael@0: &theContext, michael@0: true); michael@0: } michael@0: } michael@0: michael@0: mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * This routine is called to cause the parser to continue parsing its michael@0: * underlying stream. This call allows the parse process to happen in michael@0: * chunks, such as when the content is push based, and we need to parse in michael@0: * pieces. michael@0: * michael@0: * An interesting change in how the parser gets used has led us to add extra michael@0: * processing to this method. The case occurs when the parser is blocked in michael@0: * one context, and gets a parse(string) call in another context. In this michael@0: * case, the parserContexts are linked. No problem. michael@0: * michael@0: * The problem is that Parse(string) assumes that it can proceed unabated, michael@0: * but if the parser is already blocked that assumption is false. So we michael@0: * needed to add a mechanism here to allow the parser to continue to process michael@0: * (the pop and free) contexts until 1) it get's blocked again; 2) it runs michael@0: * out of contexts. michael@0: * michael@0: * michael@0: * @param allowItertion : set to true if non-script resumption is requested michael@0: * @param aIsFinalChunk : tells us when the last chunk of data is provided. michael@0: * @return error code -- 0 if ok, non-zero if error. michael@0: */ michael@0: nsresult michael@0: nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, michael@0: bool aCanInterrupt) michael@0: { michael@0: nsresult result = NS_OK; michael@0: michael@0: if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) && michael@0: mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { michael@0: michael@0: result = WillBuildModel(mParserContext->mScanner->GetFilename()); michael@0: if (NS_FAILED(result)) { michael@0: mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; michael@0: return result; michael@0: } michael@0: michael@0: if (mDTD) { michael@0: mSink->WillResume(); michael@0: bool theIterationIsOk = true; michael@0: michael@0: while (result == NS_OK && theIterationIsOk) { michael@0: if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) { michael@0: // -- Ref: Bug# 22485 -- michael@0: // Insert the unused input into the source buffer michael@0: // as if it was read from the input stream. michael@0: // Adding UngetReadable() per vidur!! michael@0: mParserContext->mScanner->UngetReadable(mUnusedInput); michael@0: mUnusedInput.Truncate(0); michael@0: } michael@0: michael@0: // Only allow parsing to be interrupted in the subsequent call to michael@0: // build model. michael@0: nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) michael@0: ? Tokenize(aIsFinalChunk) michael@0: : NS_OK; michael@0: result = BuildModel(); michael@0: michael@0: if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { michael@0: PostContinueEvent(); michael@0: } michael@0: michael@0: theIterationIsOk = theTokenizerResult != kEOF && michael@0: result != NS_ERROR_HTMLPARSER_INTERRUPTED; michael@0: michael@0: // Make sure not to stop parsing too early. Therefore, before shutting michael@0: // down the parser, it's important to check whether the input buffer michael@0: // has been scanned to completion (theTokenizerResult should be kEOF). michael@0: // kEOF -> End of buffer. michael@0: michael@0: // If we're told to block the parser, we disable all further parsing michael@0: // (and cache any data coming in) until the parser is re-enabled. michael@0: if (NS_ERROR_HTMLPARSER_BLOCK == result) { michael@0: mSink->WillInterrupt(); michael@0: if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) { michael@0: // If we were blocked by a recursive invocation, don't re-block. michael@0: BlockParser(); michael@0: } michael@0: return NS_OK; michael@0: } michael@0: if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { michael@0: // Note: Parser Terminate() calls DidBuildModel. michael@0: if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { michael@0: DidBuildModel(mStreamStatus); michael@0: mInternalState = result; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: if ((NS_OK == result && theTokenizerResult == kEOF) || michael@0: result == NS_ERROR_HTMLPARSER_INTERRUPTED) { michael@0: bool theContextIsStringBased = michael@0: CParserContext::eCTString == mParserContext->mContextType; michael@0: michael@0: if (mParserContext->mStreamListenerState == eOnStop || michael@0: !mParserContext->mMultipart || theContextIsStringBased) { michael@0: if (!mParserContext->mPrevContext) { michael@0: if (mParserContext->mStreamListenerState == eOnStop) { michael@0: DidBuildModel(mStreamStatus); michael@0: return NS_OK; michael@0: } michael@0: } else { michael@0: CParserContext* theContext = PopContext(); michael@0: if (theContext) { michael@0: theIterationIsOk = allowIteration && theContextIsStringBased; michael@0: if (theContext->mCopyUnused) { michael@0: theContext->mScanner->CopyUnusedData(mUnusedInput); michael@0: } michael@0: michael@0: delete theContext; michael@0: } michael@0: michael@0: result = mInternalState; michael@0: aIsFinalChunk = mParserContext && michael@0: mParserContext->mStreamListenerState == eOnStop; michael@0: // ...then intentionally fall through to mSink->WillInterrupt()... michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (theTokenizerResult == kEOF || michael@0: result == NS_ERROR_HTMLPARSER_INTERRUPTED) { michael@0: result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; michael@0: mSink->WillInterrupt(); michael@0: } michael@0: } michael@0: } else { michael@0: mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; michael@0: } michael@0: } michael@0: michael@0: return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; michael@0: } michael@0: michael@0: /** michael@0: * This is where we loop over the tokens created in the michael@0: * tokenization phase, and try to make sense out of them. michael@0: */ michael@0: nsresult michael@0: nsParser::BuildModel() michael@0: { michael@0: nsITokenizer* theTokenizer = nullptr; michael@0: michael@0: nsresult result = NS_OK; michael@0: if (mParserContext) { michael@0: result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); michael@0: } michael@0: michael@0: if (NS_SUCCEEDED(result)) { michael@0: if (mDTD) { michael@0: result = mDTD->BuildModel(theTokenizer, mSink); michael@0: } michael@0: } else { michael@0: mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: /******************************************************************* michael@0: These methods are used to talk to the netlib system... michael@0: *******************************************************************/ michael@0: michael@0: nsresult michael@0: nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) michael@0: { michael@0: NS_PRECONDITION(eNone == mParserContext->mStreamListenerState, michael@0: "Parser's nsIStreamListener API was not setup " michael@0: "correctly in constructor."); michael@0: if (mObserver) { michael@0: mObserver->OnStartRequest(request, aContext); michael@0: } michael@0: mParserContext->mStreamListenerState = eOnStart; michael@0: mParserContext->mAutoDetectStatus = eUnknownDetect; michael@0: mParserContext->mRequest = request; michael@0: michael@0: NS_ASSERTION(!mParserContext->mPrevContext, michael@0: "Clobbering DTD for non-root parser context!"); michael@0: mDTD = nullptr; michael@0: michael@0: nsresult rv; michael@0: nsAutoCString contentType; michael@0: nsCOMPtr channel = do_QueryInterface(request); michael@0: if (channel) { michael@0: rv = channel->GetContentType(contentType); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: mParserContext->SetMimeType(contentType); michael@0: } michael@0: } michael@0: michael@0: rv = NS_OK; michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: static bool michael@0: ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen, michael@0: nsCString& oCharset) michael@0: { michael@0: // This code is rather pointless to have. Might as well reuse expat as michael@0: // seen in nsHtml5StreamParser. -- hsivonen michael@0: oCharset.Truncate(); michael@0: if ((aLen >= 5) && michael@0: ('<' == aBytes[0]) && michael@0: ('?' == aBytes[1]) && michael@0: ('x' == aBytes[2]) && michael@0: ('m' == aBytes[3]) && michael@0: ('l' == aBytes[4])) { michael@0: int32_t i; michael@0: bool versionFound = false, encodingFound = false; michael@0: for (i = 6; i < aLen && !encodingFound; ++i) { michael@0: // end of XML declaration? michael@0: if ((((char*) aBytes)[i] == '?') && michael@0: ((i + 1) < aLen) && michael@0: (((char*) aBytes)[i + 1] == '>')) { michael@0: break; michael@0: } michael@0: // Version is required. michael@0: if (!versionFound) { michael@0: // Want to avoid string comparisons, hence looking for 'n' michael@0: // and only if found check the string leading to it. Not michael@0: // foolproof, but fast. michael@0: // The shortest string allowed before this is (strlen==13): michael@0: // = 12) && michael@0: (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) { michael@0: // Fast forward through version michael@0: char q = 0; michael@0: for (++i; i < aLen; ++i) { michael@0: char qi = ((char*) aBytes)[i]; michael@0: if (qi == '\'' || qi == '"') { michael@0: if (q && q == qi) { michael@0: // ending quote michael@0: versionFound = true; michael@0: break; michael@0: } else { michael@0: // Starting quote michael@0: q = qi; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } else { michael@0: // encoding must follow version michael@0: // Want to avoid string comparisons, hence looking for 'g' michael@0: // and only if found check the string leading to it. Not michael@0: // foolproof, but fast. michael@0: // The shortest allowed string before this (strlen==26): michael@0: // = 25) && (0 == PL_strncmp( michael@0: "encodin", (char*) (aBytes + i - 7), 7))) { michael@0: int32_t encStart = 0; michael@0: char q = 0; michael@0: for (++i; i < aLen; ++i) { michael@0: char qi = ((char*) aBytes)[i]; michael@0: if (qi == '\'' || qi == '"') { michael@0: if (q && q == qi) { michael@0: int32_t count = i - encStart; michael@0: // encoding value is invalid if it is UTF-16 michael@0: if (count > 0 && PL_strncasecmp("UTF-16", michael@0: (char*) (aBytes + encStart), count)) { michael@0: oCharset.Assign((char*) (aBytes + encStart), count); michael@0: } michael@0: encodingFound = true; michael@0: break; michael@0: } else { michael@0: encStart = i + 1; michael@0: q = qi; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } // if (!versionFound) michael@0: } // for michael@0: } michael@0: return !oCharset.IsEmpty(); michael@0: } michael@0: michael@0: inline const char michael@0: GetNextChar(nsACString::const_iterator& aStart, michael@0: nsACString::const_iterator& aEnd) michael@0: { michael@0: NS_ASSERTION(aStart != aEnd, "end of buffer"); michael@0: return (++aStart != aEnd) ? *aStart : '\0'; michael@0: } michael@0: michael@0: static NS_METHOD michael@0: NoOpParserWriteFunc(nsIInputStream* in, michael@0: void* closure, michael@0: const char* fromRawSegment, michael@0: uint32_t toOffset, michael@0: uint32_t count, michael@0: uint32_t *writeCount) michael@0: { michael@0: *writeCount = count; michael@0: return NS_OK; michael@0: } michael@0: michael@0: typedef struct { michael@0: bool mNeedCharsetCheck; michael@0: nsParser* mParser; michael@0: nsScanner* mScanner; michael@0: nsIRequest* mRequest; michael@0: } ParserWriteStruct; michael@0: michael@0: /* michael@0: * This function is invoked as a result of a call to a stream's michael@0: * ReadSegments() method. It is called for each contiguous buffer michael@0: * of data in the underlying stream or pipe. Using ReadSegments michael@0: * allows us to avoid copying data to read out of the stream. michael@0: */ michael@0: static NS_METHOD michael@0: ParserWriteFunc(nsIInputStream* in, michael@0: void* closure, michael@0: const char* fromRawSegment, michael@0: uint32_t toOffset, michael@0: uint32_t count, michael@0: uint32_t *writeCount) michael@0: { michael@0: nsresult result; michael@0: ParserWriteStruct* pws = static_cast(closure); michael@0: const unsigned char* buf = michael@0: reinterpret_cast (fromRawSegment); michael@0: uint32_t theNumRead = count; michael@0: michael@0: if (!pws) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: if (pws->mNeedCharsetCheck) { michael@0: pws->mNeedCharsetCheck = false; michael@0: int32_t source; michael@0: nsAutoCString preferred; michael@0: nsAutoCString maybePrefer; michael@0: pws->mParser->GetDocumentCharset(preferred, source); michael@0: michael@0: // This code was bogus when I found it. It expects the BOM or the XML michael@0: // declaration to be entirely in the first network buffer. -- hsivonen michael@0: if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) { michael@0: // The decoder will swallow the BOM. The UTF-16 will re-sniff for michael@0: // endianness. The value of preferred is now either "UTF-8" or "UTF-16". michael@0: preferred.Assign(maybePrefer); michael@0: source = kCharsetFromByteOrderMark; michael@0: } else if (source < kCharsetFromChannel) { michael@0: nsAutoCString declCharset; michael@0: michael@0: if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { michael@0: if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) { michael@0: preferred.Assign(maybePrefer); michael@0: source = kCharsetFromMetaTag; michael@0: } michael@0: } michael@0: } michael@0: michael@0: pws->mParser->SetDocumentCharset(preferred, source); michael@0: pws->mParser->SetSinkCharset(preferred); michael@0: michael@0: } michael@0: michael@0: result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest); michael@0: if (NS_SUCCEEDED(result)) { michael@0: *writeCount = count; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: nsresult michael@0: nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext, michael@0: nsIInputStream *pIStream, uint64_t sourceOffset, michael@0: uint32_t aLength) michael@0: { michael@0: NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState || michael@0: eOnDataAvail == mParserContext->mStreamListenerState), michael@0: "Error: OnStartRequest() must be called before OnDataAvailable()"); michael@0: NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream), michael@0: "Must have a buffered input stream"); michael@0: michael@0: nsresult rv = NS_OK; michael@0: michael@0: if (mIsAboutBlank) { michael@0: MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank"); michael@0: // ... but if an extension tries to feed us data for about:blank in a michael@0: // release build, silently ignore the data. michael@0: uint32_t totalRead; michael@0: rv = pIStream->ReadSegments(NoOpParserWriteFunc, michael@0: nullptr, michael@0: aLength, michael@0: &totalRead); michael@0: return rv; michael@0: } michael@0: michael@0: CParserContext *theContext = mParserContext; michael@0: michael@0: while (theContext && theContext->mRequest != request) { michael@0: theContext = theContext->mPrevContext; michael@0: } michael@0: michael@0: if (theContext) { michael@0: theContext->mStreamListenerState = eOnDataAvail; michael@0: michael@0: if (eInvalidDetect == theContext->mAutoDetectStatus) { michael@0: if (theContext->mScanner) { michael@0: nsScannerIterator iter; michael@0: theContext->mScanner->EndReading(iter); michael@0: theContext->mScanner->SetPosition(iter, true); michael@0: } michael@0: } michael@0: michael@0: uint32_t totalRead; michael@0: ParserWriteStruct pws; michael@0: pws.mNeedCharsetCheck = true; michael@0: pws.mParser = this; michael@0: pws.mScanner = theContext->mScanner; michael@0: pws.mRequest = request; michael@0: michael@0: rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); michael@0: if (NS_FAILED(rv)) { michael@0: return rv; michael@0: } michael@0: michael@0: // Don't bother to start parsing until we've seen some michael@0: // non-whitespace data michael@0: if (IsOkToProcessNetworkData() && michael@0: theContext->mScanner->FirstNonWhitespacePosition() >= 0) { michael@0: nsCOMPtr kungFuDeathGrip(this); michael@0: nsCOMPtr sinkDeathGrip(mSink); michael@0: mProcessingNetworkData = true; michael@0: if (mSink) { michael@0: mSink->WillParse(); michael@0: } michael@0: rv = ResumeParse(); michael@0: mProcessingNetworkData = false; michael@0: } michael@0: } else { michael@0: rv = NS_ERROR_UNEXPECTED; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: /** michael@0: * This is called by the networking library once the last block of data michael@0: * has been collected from the net. michael@0: */ michael@0: nsresult michael@0: nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext, michael@0: nsresult status) michael@0: { michael@0: nsresult rv = NS_OK; michael@0: michael@0: CParserContext *pc = mParserContext; michael@0: while (pc) { michael@0: if (pc->mRequest == request) { michael@0: pc->mStreamListenerState = eOnStop; michael@0: pc->mScanner->SetIncremental(false); michael@0: break; michael@0: } michael@0: michael@0: pc = pc->mPrevContext; michael@0: } michael@0: michael@0: mStreamStatus = status; michael@0: michael@0: if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) { michael@0: mProcessingNetworkData = true; michael@0: if (mSink) { michael@0: mSink->WillParse(); michael@0: } michael@0: rv = ResumeParse(true, true); michael@0: mProcessingNetworkData = false; michael@0: } michael@0: michael@0: // If the parser isn't enabled, we don't finish parsing till michael@0: // it is reenabled. michael@0: michael@0: michael@0: // XXX Should we wait to notify our observers as well if the michael@0: // parser isn't yet enabled? michael@0: if (mObserver) { michael@0: mObserver->OnStopRequest(request, aContext, status); michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: michael@0: /******************************************************************* michael@0: Here come the tokenization methods... michael@0: *******************************************************************/ michael@0: michael@0: michael@0: /** michael@0: * Part of the code sandwich, this gets called right before michael@0: * the tokenization process begins. The main reason for michael@0: * this call is to allow the delegate to do initialization. michael@0: */ michael@0: bool michael@0: nsParser::WillTokenize(bool aIsFinalChunk) michael@0: { michael@0: if (!mParserContext) { michael@0: return true; michael@0: } michael@0: michael@0: nsITokenizer* theTokenizer; michael@0: nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); michael@0: NS_ENSURE_SUCCESS(result, false); michael@0: return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk)); michael@0: } michael@0: michael@0: michael@0: /** michael@0: * This is the primary control routine to consume tokens. michael@0: * It iteratively consumes tokens until an error occurs or michael@0: * you run out of data. michael@0: */ michael@0: nsresult nsParser::Tokenize(bool aIsFinalChunk) michael@0: { michael@0: nsITokenizer* theTokenizer; michael@0: michael@0: nsresult result = NS_ERROR_NOT_AVAILABLE; michael@0: if (mParserContext) { michael@0: result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); michael@0: } michael@0: michael@0: if (NS_SUCCEEDED(result)) { michael@0: bool flushTokens = false; michael@0: michael@0: bool killSink = false; michael@0: michael@0: WillTokenize(aIsFinalChunk); michael@0: while (NS_SUCCEEDED(result)) { michael@0: mParserContext->mScanner->Mark(); michael@0: result = theTokenizer->ConsumeToken(*mParserContext->mScanner, michael@0: flushTokens); michael@0: if (NS_FAILED(result)) { michael@0: mParserContext->mScanner->RewindToMark(); michael@0: if (kEOF == result){ michael@0: break; michael@0: } michael@0: if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { michael@0: killSink = true; michael@0: result = Terminate(); michael@0: break; michael@0: } michael@0: } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) { michael@0: // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931. michael@0: // Flush tokens on seeing -- Ref: Bug# 22485 -- michael@0: // Also remember to update the marked position. michael@0: mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; michael@0: mParserContext->mScanner->Mark(); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (killSink) { michael@0: mSink = nullptr; michael@0: } michael@0: } else { michael@0: result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * Get the channel associated with this parser michael@0: * michael@0: * @param aChannel out param that will contain the result michael@0: * @return NS_OK if successful michael@0: */ michael@0: NS_IMETHODIMP michael@0: nsParser::GetChannel(nsIChannel** aChannel) michael@0: { michael@0: nsresult result = NS_ERROR_NOT_AVAILABLE; michael@0: if (mParserContext && mParserContext->mRequest) { michael@0: result = CallQueryInterface(mParserContext->mRequest, aChannel); michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * Get the DTD associated with this parser michael@0: */ michael@0: NS_IMETHODIMP michael@0: nsParser::GetDTD(nsIDTD** aDTD) michael@0: { michael@0: if (mParserContext) { michael@0: NS_IF_ADDREF(*aDTD = mDTD); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: /** michael@0: * Get this as nsIStreamListener michael@0: */ michael@0: nsIStreamListener* michael@0: nsParser::GetStreamListener() michael@0: { michael@0: return this; michael@0: }