michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set sw=2 ts=2 et tw=79: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "nsIAtom.h"
michael@0: #include "nsParser.h"
michael@0: #include "nsString.h"
michael@0: #include "nsCRT.h"
michael@0: #include "nsScanner.h"
michael@0: #include "plstr.h"
michael@0: #include "nsIStringStream.h"
michael@0: #include "nsIChannel.h"
michael@0: #include "nsICachingChannel.h"
michael@0: #include "nsICacheEntryDescriptor.h"
michael@0: #include "nsIInputStream.h"
michael@0: #include "CNavDTD.h"
michael@0: #include "prenv.h"
michael@0: #include "prlock.h"
michael@0: #include "prcvar.h"
michael@0: #include "nsParserCIID.h"
michael@0: #include "nsReadableUtils.h"
michael@0: #include "nsCOMPtr.h"
michael@0: #include "nsExpatDriver.h"
michael@0: #include "nsIServiceManager.h"
michael@0: #include "nsICategoryManager.h"
michael@0: #include "nsISupportsPrimitives.h"
michael@0: #include "nsIFragmentContentSink.h"
michael@0: #include "nsStreamUtils.h"
michael@0: #include "nsHTMLTokenizer.h"
michael@0: #include "nsNetUtil.h"
michael@0: #include "nsScriptLoader.h"
michael@0: #include "nsDataHashtable.h"
michael@0: #include "nsXPCOMCIDInternal.h"
michael@0: #include "nsMimeTypes.h"
michael@0: #include "mozilla/CondVar.h"
michael@0: #include "mozilla/Mutex.h"
michael@0: #include "nsParserConstants.h"
michael@0: #include "nsCharsetSource.h"
michael@0: #include "nsContentUtils.h"
michael@0: #include "nsThreadUtils.h"
michael@0: #include "nsIHTMLContentSink.h"
michael@0: 
michael@0: #include "mozilla/dom/EncodingUtils.h"
michael@0: 
michael@0: using namespace mozilla;
michael@0: using mozilla::dom::EncodingUtils;
michael@0: 
michael@0: #define NS_PARSER_FLAG_PARSER_ENABLED         0x00000002
michael@0: #define NS_PARSER_FLAG_OBSERVERS_ENABLED      0x00000004
michael@0: #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
michael@0: #define NS_PARSER_FLAG_FLUSH_TOKENS           0x00000020
michael@0: #define NS_PARSER_FLAG_CAN_TOKENIZE           0x00000040
michael@0: 
michael@0: //-------------- Begin ParseContinue Event Definition ------------------------
michael@0: /*
michael@0: The parser can be explicitly interrupted by passing a return value of
michael@0: NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
michael@0: the parser to stop processing and allow the application to return to the event
michael@0: loop. The data which was left at the time of interruption will be processed
michael@0: the next time OnDataAvailable is called. If the parser has received its final
michael@0: chunk of data then OnDataAvailable will no longer be called by the networking
michael@0: module, so the parser will schedule a nsParserContinueEvent which will call
michael@0: the parser to process the remaining data after returning to the event loop.
michael@0: If the parser is interrupted while processing the remaining data it will
michael@0: schedule another ParseContinueEvent. The processing of data followed by
michael@0: scheduling of the continue events will proceed until either:
michael@0: 
michael@0:   1) All of the remaining data can be processed without interrupting
michael@0:   2) The parser has been cancelled.
michael@0: 
michael@0: 
michael@0: This capability is currently used in CNavDTD and nsHTMLContentSink. The
michael@0: nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
michael@0: processed and when each token is processed. The nsHTML content sink records
michael@0: the time when the chunk has started processing and will return
michael@0: NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
michael@0: threshold called max tokenizing processing time. This allows the content sink
michael@0: to limit how much data is processed in a single chunk which in turn gates how
michael@0: much time is spent away from the event loop. Processing smaller chunks of data
michael@0: also reduces the time spent in subsequent reflows.
michael@0: 
michael@0: This capability is most apparent when loading large documents. If the maximum
michael@0: token processing time is set small enough the application will remain
michael@0: responsive during document load.
michael@0: 
michael@0: A side-effect of this capability is that document load is not complete when
michael@0: the last chunk of data is passed to OnDataAvailable since  the parser may have
michael@0: been interrupted when the last chunk of data arrived. The document is complete
michael@0: when all of the document has been tokenized and there aren't any pending
michael@0: nsParserContinueEvents. This can cause problems if the application assumes
michael@0: that it can monitor the load requests to determine when the document load has
michael@0: been completed. This is what happens in Mozilla. The document is considered
michael@0: completely loaded when all of the load requests have been satisfied. To delay
michael@0: the document load until all of the parsing has been completed the
michael@0: nsHTMLContentSink adds a dummy parser load request which is not removed until
michael@0: the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
michael@0: DidBuildModel until the final chunk of data has been passed to the parser
michael@0: through the OnDataAvailable and there aren't any pending
michael@0: nsParserContineEvents.
michael@0: 
michael@0: Currently the parser is ignores requests to be interrupted during the
michael@0: processing of script.  This is because a document.write followed by JavaScript
michael@0: calls to manipulate the DOM may fail if the parser was interrupted during the
michael@0: document.write.
michael@0: 
michael@0: For more details @see bugzilla bug 76722
michael@0: */
michael@0: 
michael@0: 
michael@0: class nsParserContinueEvent : public nsRunnable
michael@0: {
michael@0: public:
michael@0:   nsRefPtr<nsParser> mParser;
michael@0: 
michael@0:   nsParserContinueEvent(nsParser* aParser)
michael@0:     : mParser(aParser)
michael@0:   {}
michael@0: 
michael@0:   NS_IMETHOD Run()
michael@0:   {
michael@0:     mParser->HandleParserContinueEvent(this);
michael@0:     return NS_OK;
michael@0:   }
michael@0: };
michael@0: 
michael@0: //-------------- End ParseContinue Event Definition ------------------------
michael@0: 
michael@0: /**
michael@0:  *  default constructor
michael@0:  */
michael@0: nsParser::nsParser()
michael@0: {
michael@0:   Initialize(true);
michael@0: }
michael@0: 
michael@0: nsParser::~nsParser()
michael@0: {
michael@0:   Cleanup();
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::Initialize(bool aConstructor)
michael@0: {
michael@0:   if (aConstructor) {
michael@0:     // Raw pointer
michael@0:     mParserContext = 0;
michael@0:   }
michael@0:   else {
michael@0:     // nsCOMPtrs
michael@0:     mObserver = nullptr;
michael@0:     mUnusedInput.Truncate();
michael@0:   }
michael@0: 
michael@0:   mContinueEvent = nullptr;
michael@0:   mCharsetSource = kCharsetUninitialized;
michael@0:   mCharset.AssignLiteral("ISO-8859-1");
michael@0:   mInternalState = NS_OK;
michael@0:   mStreamStatus = NS_OK;
michael@0:   mCommand = eViewNormal;
michael@0:   mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
michael@0:            NS_PARSER_FLAG_PARSER_ENABLED |
michael@0:            NS_PARSER_FLAG_CAN_TOKENIZE;
michael@0: 
michael@0:   mProcessingNetworkData = false;
michael@0:   mIsAboutBlank = false;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::Cleanup()
michael@0: {
michael@0: #ifdef DEBUG
michael@0:   if (mParserContext && mParserContext->mPrevContext) {
michael@0:     NS_WARNING("Extra parser contexts still on the parser stack");
michael@0:   }
michael@0: #endif
michael@0: 
michael@0:   while (mParserContext) {
michael@0:     CParserContext *pc = mParserContext->mPrevContext;
michael@0:     delete mParserContext;
michael@0:     mParserContext = pc;
michael@0:   }
michael@0: 
michael@0:   // It should not be possible for this flag to be set when we are getting
michael@0:   // destroyed since this flag implies a pending nsParserContinueEvent, which
michael@0:   // has an owning reference to |this|.
michael@0:   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
michael@0: }
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_END
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
michael@0:   CParserContext *pc = tmp->mParserContext;
michael@0:   while (pc) {
michael@0:     cb.NoteXPCOMChild(pc->mTokenizer);
michael@0:     pc = pc->mPrevContext;
michael@0:   }
michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
michael@0: NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
michael@0:   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
michael@0:   NS_INTERFACE_MAP_ENTRY(nsIParser)
michael@0:   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
michael@0:   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
michael@0:   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
michael@0: NS_INTERFACE_MAP_END
michael@0: 
michael@0: // The parser continue event is posted only if
michael@0: // all of the data to parse has been passed to ::OnDataAvailable
michael@0: // and the parser has been interrupted by the content sink
michael@0: // because the processing of tokens took too long.
michael@0: 
michael@0: nsresult
michael@0: nsParser::PostContinueEvent()
michael@0: {
michael@0:   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
michael@0:     // If this flag isn't set, then there shouldn't be a live continue event!
michael@0:     NS_ASSERTION(!mContinueEvent, "bad");
michael@0: 
michael@0:     // This creates a reference cycle between this and the event that is
michael@0:     // broken when the event fires.
michael@0:     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
michael@0:     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
michael@0:         NS_WARNING("failed to dispatch parser continuation event");
michael@0:     } else {
michael@0:         mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0:         mContinueEvent = event;
michael@0:     }
michael@0:   }
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::GetCommand(nsCString& aCommand)
michael@0: {
michael@0:   aCommand = mCommandStr;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Call this method once you've created a parser, and want to instruct it
michael@0:  *  about the command which caused the parser to be constructed. For example,
michael@0:  *  this allows us to select a DTD which can do, say, view-source.
michael@0:  *
michael@0:  *  @param   aCommand the command string to set
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::SetCommand(const char* aCommand)
michael@0: {
michael@0:   mCommandStr.Assign(aCommand);
michael@0:   if (mCommandStr.Equals("view-source")) {
michael@0:     mCommand = eViewSource;
michael@0:   } else if (mCommandStr.Equals("view-fragment")) {
michael@0:     mCommand = eViewFragment;
michael@0:   } else {
michael@0:     mCommand = eViewNormal;
michael@0:   }
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Call this method once you've created a parser, and want to instruct it
michael@0:  *  about the command which caused the parser to be constructed. For example,
michael@0:  *  this allows us to select a DTD which can do, say, view-source.
michael@0:  *
michael@0:  *  @param   aParserCommand the command to set
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::SetCommand(eParserCommands aParserCommand)
michael@0: {
michael@0:   mCommand = aParserCommand;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Call this method once you've created a parser, and want to instruct it
michael@0:  *  about what charset to load
michael@0:  *
michael@0:  *  @param   aCharset- the charset of a document
michael@0:  *  @param   aCharsetSource- the source of the charset
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
michael@0: {
michael@0:   mCharset = aCharset;
michael@0:   mCharsetSource = aCharsetSource;
michael@0:   if (mParserContext && mParserContext->mScanner) {
michael@0:      mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::SetSinkCharset(nsACString& aCharset)
michael@0: {
michael@0:   if (mSink) {
michael@0:     mSink->SetDocumentCharset(aCharset);
michael@0:   }
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  This method gets called in order to set the content
michael@0:  *  sink for this parser to dump nodes to.
michael@0:  *
michael@0:  *  @param   nsIContentSink interface for node receiver
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::SetContentSink(nsIContentSink* aSink)
michael@0: {
michael@0:   NS_PRECONDITION(aSink, "sink cannot be null!");
michael@0:   mSink = aSink;
michael@0: 
michael@0:   if (mSink) {
michael@0:     mSink->SetParser(this);
michael@0:     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
michael@0:     if (htmlSink) {
michael@0:       mIsAboutBlank = true;
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * retrieve the sink set into the parser
michael@0:  * @return  current sink
michael@0:  */
michael@0: NS_IMETHODIMP_(nsIContentSink*)
michael@0: nsParser::GetContentSink()
michael@0: {
michael@0:   return mSink;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Determine what DTD mode (and thus what layout nsCompatibility mode)
michael@0:  * to use for this document based on the first chunk of data received
michael@0:  * from the network (each parsercontext can have its own mode).  (No,
michael@0:  * this is not an optimal solution -- we really don't need to know until
michael@0:  * after we've received the DOCTYPE, and this could easily be part of
michael@0:  * the regular parsing process if the parser were designed in a way that
michael@0:  * made such modifications easy.)
michael@0:  */
michael@0: 
michael@0: // Parse the PS production in the SGML spec (excluding the part dealing
michael@0: // with entity references) starting at theIndex into theBuffer, and
michael@0: // return the first index after the end of the production.
michael@0: static int32_t
michael@0: ParsePS(const nsString& aBuffer, int32_t aIndex)
michael@0: {
michael@0:   for (;;) {
michael@0:     char16_t ch = aBuffer.CharAt(aIndex);
michael@0:     if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
michael@0:         (ch == char16_t('\n')) || (ch == char16_t('\r'))) {
michael@0:       ++aIndex;
michael@0:     } else if (ch == char16_t('-')) {
michael@0:       int32_t tmpIndex;
michael@0:       if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
michael@0:           kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
michael@0:         aIndex = tmpIndex + 2;
michael@0:       } else {
michael@0:         return aIndex;
michael@0:       }
michael@0:     } else {
michael@0:       return aIndex;
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: #define PARSE_DTD_HAVE_DOCTYPE          (1<<0)
michael@0: #define PARSE_DTD_HAVE_PUBLIC_ID        (1<<1)
michael@0: #define PARSE_DTD_HAVE_SYSTEM_ID        (1<<2)
michael@0: #define PARSE_DTD_HAVE_INTERNAL_SUBSET  (1<<3)
michael@0: 
michael@0: // return true on success (includes not present), false on failure
michael@0: static bool
michael@0: ParseDocTypeDecl(const nsString &aBuffer,
michael@0:                  int32_t *aResultFlags,
michael@0:                  nsString &aPublicID,
michael@0:                  nsString &aSystemID)
michael@0: {
michael@0:   bool haveDoctype = false;
michael@0:   *aResultFlags = 0;
michael@0: 
michael@0:   // Skip through any comments and processing instructions
michael@0:   // The PI-skipping is a bit of a hack.
michael@0:   int32_t theIndex = 0;
michael@0:   do {
michael@0:     theIndex = aBuffer.FindChar('<', theIndex);
michael@0:     if (theIndex == kNotFound) break;
michael@0:     char16_t nextChar = aBuffer.CharAt(theIndex+1);
michael@0:     if (nextChar == char16_t('!')) {
michael@0:       int32_t tmpIndex = theIndex + 2;
michael@0:       if (kNotFound !=
michael@0:           (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
michael@0:         haveDoctype = true;
michael@0:         theIndex += 7; // skip "DOCTYPE"
michael@0:         break;
michael@0:       }
michael@0:       theIndex = ParsePS(aBuffer, tmpIndex);
michael@0:       theIndex = aBuffer.FindChar('>', theIndex);
michael@0:     } else if (nextChar == char16_t('?')) {
michael@0:       theIndex = aBuffer.FindChar('>', theIndex);
michael@0:     } else {
michael@0:       break;
michael@0:     }
michael@0:   } while (theIndex != kNotFound);
michael@0: 
michael@0:   if (!haveDoctype)
michael@0:     return true;
michael@0:   *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
michael@0: 
michael@0:   theIndex = ParsePS(aBuffer, theIndex);
michael@0:   theIndex = aBuffer.Find("HTML", true, theIndex, 0);
michael@0:   if (kNotFound == theIndex)
michael@0:     return false;
michael@0:   theIndex = ParsePS(aBuffer, theIndex+4);
michael@0:   int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
michael@0: 
michael@0:   if (kNotFound != tmpIndex) {
michael@0:     theIndex = ParsePS(aBuffer, tmpIndex+6);
michael@0: 
michael@0:     // We get here only if we've read <!DOCTYPE HTML PUBLIC
michael@0:     // (not case sensitive) possibly with comments within.
michael@0: 
michael@0:     // Now find the beginning and end of the public identifier
michael@0:     // and the system identifier (if present).
michael@0: 
michael@0:     char16_t lit = aBuffer.CharAt(theIndex);
michael@0:     if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
michael@0:       return false;
michael@0: 
michael@0:     // Start is the first character, excluding the quote, and End is
michael@0:     // the final quote, so there are (end-start) characters.
michael@0: 
michael@0:     int32_t PublicIDStart = theIndex + 1;
michael@0:     int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
michael@0:     if (kNotFound == PublicIDEnd)
michael@0:       return false;
michael@0:     theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
michael@0:     char16_t next = aBuffer.CharAt(theIndex);
michael@0:     if (next == char16_t('>')) {
michael@0:       // There was a public identifier, but no system
michael@0:       // identifier,
michael@0:       // so do nothing.
michael@0:       // This is needed to avoid the else at the end, and it's
michael@0:       // also the most common case.
michael@0:     } else if ((next == char16_t('\"')) ||
michael@0:                (next == char16_t('\''))) {
michael@0:       // We found a system identifier.
michael@0:       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
michael@0:       int32_t SystemIDStart = theIndex + 1;
michael@0:       int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
michael@0:       if (kNotFound == SystemIDEnd)
michael@0:         return false;
michael@0:       aSystemID =
michael@0:         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
michael@0:     } else if (next == char16_t('[')) {
michael@0:       // We found an internal subset.
michael@0:       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
michael@0:     } else {
michael@0:       // Something's wrong.
michael@0:       return false;
michael@0:     }
michael@0: 
michael@0:     // Since a public ID is a minimum literal, we must trim
michael@0:     // and collapse whitespace
michael@0:     aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
michael@0:     aPublicID.CompressWhitespace(true, true);
michael@0:     *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
michael@0:   } else {
michael@0:     tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
michael@0:     if (kNotFound != tmpIndex) {
michael@0:       // DOCTYPES with system ID but no Public ID
michael@0:       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
michael@0: 
michael@0:       theIndex = ParsePS(aBuffer, tmpIndex+6);
michael@0:       char16_t next = aBuffer.CharAt(theIndex);
michael@0:       if (next != char16_t('\"') && next != char16_t('\''))
michael@0:         return false;
michael@0: 
michael@0:       int32_t SystemIDStart = theIndex + 1;
michael@0:       int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
michael@0: 
michael@0:       if (kNotFound == SystemIDEnd)
michael@0:         return false;
michael@0:       aSystemID =
michael@0:         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
michael@0:       theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
michael@0:     }
michael@0: 
michael@0:     char16_t nextChar = aBuffer.CharAt(theIndex);
michael@0:     if (nextChar == char16_t('['))
michael@0:       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
michael@0:     else if (nextChar != char16_t('>'))
michael@0:       return false;
michael@0:   }
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: struct PubIDInfo
michael@0: {
michael@0:   enum eMode {
michael@0:     eQuirks,         /* always quirks mode, unless there's an internal subset */
michael@0:     eAlmostStandards,/* eCompatibility_AlmostStandards */
michael@0:     eFullStandards   /* eCompatibility_FullStandards */
michael@0:       /*
michael@0:        * public IDs that should trigger strict mode are not listed
michael@0:        * since we want all future public IDs to trigger strict mode as
michael@0:        * well
michael@0:        */
michael@0:   };
michael@0: 
michael@0:   const char* name;
michael@0:   eMode mode_if_no_sysid;
michael@0:   eMode mode_if_sysid;
michael@0: };
michael@0: 
michael@0: #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
michael@0: 
michael@0: // These must be in nsCRT::strcmp order so binary-search can be used.
michael@0: // This is verified, |#ifdef DEBUG|, below.
michael@0: 
michael@0: // Even though public identifiers should be case sensitive, we will do
michael@0: // all comparisons after converting to lower case in order to do
michael@0: // case-insensitive comparison since there are a number of existing web
michael@0: // sites that use the incorrect case.  Therefore all of the public
michael@0: // identifiers below are in lower case (with the correct case following,
michael@0: // in comments).  The case is verified, |#ifdef DEBUG|, below.
michael@0: static const PubIDInfo kPublicIDs[] = {
michael@0:   {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
michael@0:   {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
michael@0:   {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
michael@0:   {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
michael@0:   {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0:   {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
michael@0: };
michael@0: 
michael@0: #ifdef DEBUG
michael@0: static void
michael@0: VerifyPublicIDs()
michael@0: {
michael@0:   static bool gVerified = false;
michael@0:   if (!gVerified) {
michael@0:     gVerified = true;
michael@0:     uint32_t i;
michael@0:     for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
michael@0:       if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
michael@0:         NS_NOTREACHED("doctypes out of order");
michael@0:         printf("Doctypes %s and %s out of order.\n",
michael@0:                kPublicIDs[i].name, kPublicIDs[i+1].name);
michael@0:       }
michael@0:     }
michael@0:     for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
michael@0:       nsAutoCString lcPubID(kPublicIDs[i].name);
michael@0:       ToLowerCase(lcPubID);
michael@0:       if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
michael@0:         NS_NOTREACHED("doctype not lower case");
michael@0:         printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: }
michael@0: #endif
michael@0: 
michael@0: static void
michael@0: DetermineHTMLParseMode(const nsString& aBuffer,
michael@0:                        nsDTDMode& aParseMode,
michael@0:                        eParserDocType& aDocType)
michael@0: {
michael@0: #ifdef DEBUG
michael@0:   VerifyPublicIDs();
michael@0: #endif
michael@0:   int32_t resultFlags;
michael@0:   nsAutoString publicIDUCS2, sysIDUCS2;
michael@0:   if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
michael@0:     if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
michael@0:       // no DOCTYPE
michael@0:       aParseMode = eDTDMode_quirks;
michael@0:       aDocType = eHTML_Quirks;
michael@0:     } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
michael@0:                !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
michael@0:       // A doctype with an internal subset is always full_standards.
michael@0:       // A doctype without a public ID is always full_standards.
michael@0:       aDocType = eHTML_Strict;
michael@0:       aParseMode = eDTDMode_full_standards;
michael@0: 
michael@0:       // Special hack for IBM's custom DOCTYPE.
michael@0:       if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
michael@0:           sysIDUCS2 == NS_LITERAL_STRING(
michael@0:                "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
michael@0:         aParseMode = eDTDMode_quirks;
michael@0:         aDocType = eHTML_Quirks;
michael@0:       }
michael@0: 
michael@0:     } else {
michael@0:       // We have to check our list of public IDs to see what to do.
michael@0:       // Yes, we want UCS2 to ASCII lossy conversion.
michael@0:       nsAutoCString publicID;
michael@0:       publicID.AssignWithConversion(publicIDUCS2);
michael@0: 
michael@0:       // See comment above definition of kPublicIDs about case
michael@0:       // sensitivity.
michael@0:       ToLowerCase(publicID);
michael@0: 
michael@0:       // Binary search to see if we can find the correct public ID
michael@0:       // These must be signed since maximum can go below zero and we'll
michael@0:       // crash if it's unsigned.
michael@0:       int32_t minimum = 0;
michael@0:       int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1;
michael@0:       int32_t index;
michael@0:       for (;;) {
michael@0:         index = (minimum + maximum) / 2;
michael@0:         int32_t comparison =
michael@0:             nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
michael@0:         if (comparison == 0)
michael@0:           break;
michael@0:         if (comparison < 0)
michael@0:           maximum = index - 1;
michael@0:         else
michael@0:           minimum = index + 1;
michael@0: 
michael@0:         if (maximum < minimum) {
michael@0:           // The DOCTYPE is not in our list, so it must be full_standards.
michael@0:           aParseMode = eDTDMode_full_standards;
michael@0:           aDocType = eHTML_Strict;
michael@0:           return;
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
michael@0:                 ? kPublicIDs[index].mode_if_sysid
michael@0:                 : kPublicIDs[index].mode_if_no_sysid)
michael@0:       {
michael@0:         case PubIDInfo::eQuirks:
michael@0:           aParseMode = eDTDMode_quirks;
michael@0:           aDocType = eHTML_Quirks;
michael@0:           break;
michael@0:         case PubIDInfo::eAlmostStandards:
michael@0:           aParseMode = eDTDMode_almost_standards;
michael@0:           aDocType = eHTML_Strict;
michael@0:           break;
michael@0:         case PubIDInfo::eFullStandards:
michael@0:           aParseMode = eDTDMode_full_standards;
michael@0:           aDocType = eHTML_Strict;
michael@0:           break;
michael@0:         default:
michael@0:           NS_NOTREACHED("no other cases!");
michael@0:       }
michael@0:     }
michael@0:   } else {
michael@0:     // badly formed DOCTYPE -> quirks
michael@0:     aParseMode = eDTDMode_quirks;
michael@0:     aDocType = eHTML_Quirks;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void
michael@0: DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
michael@0:                    eParserDocType& aDocType, const nsACString& aMimeType)
michael@0: {
michael@0:   if (aMimeType.EqualsLiteral(TEXT_HTML)) {
michael@0:     DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
michael@0:   } else if (nsContentUtils::IsPlainTextType(aMimeType)) {
michael@0:     aDocType = ePlainText;
michael@0:     aParseMode = eDTDMode_quirks;
michael@0:   } else { // Some form of XML
michael@0:     aDocType = eXML;
michael@0:     aParseMode = eDTDMode_full_standards;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static nsIDTD*
michael@0: FindSuitableDTD(CParserContext& aParserContext)
michael@0: {
michael@0:   // We always find a DTD.
michael@0:   aParserContext.mAutoDetectStatus = ePrimaryDetect;
michael@0: 
michael@0:   // Quick check for view source.
michael@0:   NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource,
michael@0:     "The old parser is not supposed to be used for View Source anymore.");
michael@0: 
michael@0:   // Now see if we're parsing HTML (which, as far as we're concerned, simply
michael@0:   // means "not XML").
michael@0:   if (aParserContext.mDocType != eXML) {
michael@0:     return new CNavDTD();
michael@0:   }
michael@0: 
michael@0:   // If we're here, then we'd better be parsing XML.
michael@0:   NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
michael@0:   return new nsExpatDriver();
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsParser::CancelParsingEvents()
michael@0: {
michael@0:   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
michael@0:     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
michael@0:     // Revoke the pending continue parsing event
michael@0:     mContinueEvent = nullptr;
michael@0:     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0:   }
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: ////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: /**
michael@0:  * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
michael@0:  * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
michael@0:  * (which could be success or failure).
michael@0:  *
michael@0:  * To understand the motivation for this construct, consider these example
michael@0:  * methods:
michael@0:  *
michael@0:  *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
michael@0:  *     nsresult rv = NS_OK;
michael@0:  *     ...
michael@0:  *     return obj->DoThatThing();
michael@0:  *     NS_ENSURE_SUCCESS(rv, rv);
michael@0:  *     ...
michael@0:  *     return rv;
michael@0:  *   }
michael@0:  *
michael@0:  *   void nsCaller::MakeThingsHappen() {
michael@0:  *     return mSomething->DoThatThing(mWhatever);
michael@0:  *   }
michael@0:  *
michael@0:  * Suppose, for whatever reason*, we want to shift responsibility for calling
michael@0:  * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
michael@0:  * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
michael@0:  *
michael@0:  *   nsresult nsSomething::DoThatThing() {
michael@0:  *     nsresult rv = NS_OK;
michael@0:  *     ...
michael@0:  *     ...
michael@0:  *     return rv;
michael@0:  *   }
michael@0:  *
michael@0:  *   void nsCaller::MakeThingsHappen() {
michael@0:  *     nsresult rv;
michael@0:  *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
michael@0:  *                              mWhatever->DoThatThing(),
michael@0:  *                              rv);
michael@0:  *     return rv;
michael@0:  *   }
michael@0:  *
michael@0:  * *Possible reasons include: nsCaller doesn't want to give mSomething access
michael@0:  * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
michael@0:  * be called regardless of how nsSomething::DoThatThing behaves, &c.
michael@0:  */
michael@0: #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) {                          \
michael@0:   nsresult RV##__temp = EXPR1;                                                \
michael@0:   RV = EXPR2;                                                                 \
michael@0:   if (NS_FAILED(RV)) {                                                        \
michael@0:     RV = RV##__temp;                                                          \
michael@0:   }                                                                           \
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * This gets called just prior to the model actually
michael@0:  * being constructed. It's important to make this the
michael@0:  * last thing that happens right before parsing, so we
michael@0:  * can delay until the last moment the resolution of
michael@0:  * which DTD to use (unless of course we're assigned one).
michael@0:  */
michael@0: nsresult
michael@0: nsParser::WillBuildModel(nsString& aFilename)
michael@0: {
michael@0:   if (!mParserContext)
michael@0:     return kInvalidParserContext;
michael@0: 
michael@0:   if (eUnknownDetect != mParserContext->mAutoDetectStatus)
michael@0:     return NS_OK;
michael@0: 
michael@0:   if (eDTDMode_unknown == mParserContext->mDTDMode ||
michael@0:       eDTDMode_autodetect == mParserContext->mDTDMode) {
michael@0:     char16_t buf[1025];
michael@0:     nsFixedString theBuffer(buf, 1024, 0);
michael@0: 
michael@0:     // Grab 1024 characters, starting at the first non-whitespace
michael@0:     // character, to look for the doctype in.
michael@0:     mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
michael@0:     DetermineParseMode(theBuffer, mParserContext->mDTDMode,
michael@0:                        mParserContext->mDocType, mParserContext->mMimeType);
michael@0:   }
michael@0: 
michael@0:   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
michael@0:                "Clobbering DTD for non-root parser context!");
michael@0:   mDTD = FindSuitableDTD(*mParserContext);
michael@0:   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
michael@0: 
michael@0:   nsITokenizer* tokenizer;
michael@0:   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
michael@0:   nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
michael@0:   // nsIDTD::WillBuildModel used to be responsible for calling
michael@0:   // nsIContentSink::WillBuildModel, but that obligation isn't expressible
michael@0:   // in the nsIDTD interface itself, so it's sounder and simpler to give that
michael@0:   // responsibility back to the parser. The former behavior of the DTD was to
michael@0:   // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
michael@0:   // failure we should use sinkResult instead of rv, to preserve the old error
michael@0:   // handling behavior of the DTD:
michael@0:   return NS_FAILED(sinkResult) ? sinkResult : rv;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * This gets called when the parser is done with its input.
michael@0:  * Note that the parser may have been called recursively, so we
michael@0:  * have to check for a prev. context before closing out the DTD/sink.
michael@0:  */
michael@0: nsresult
michael@0: nsParser::DidBuildModel(nsresult anErrorCode)
michael@0: {
michael@0:   nsresult result = anErrorCode;
michael@0: 
michael@0:   if (IsComplete()) {
michael@0:     if (mParserContext && !mParserContext->mPrevContext) {
michael@0:       // Let sink know if we're about to end load because we've been terminated.
michael@0:       // In that case we don't want it to run deferred scripts.
michael@0:       bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0:       if (mDTD && mSink) {
michael@0:         nsresult dtdResult =  mDTD->DidBuildModel(anErrorCode),
michael@0:                 sinkResult = mSink->DidBuildModel(terminated);
michael@0:         // nsIDTD::DidBuildModel used to be responsible for calling
michael@0:         // nsIContentSink::DidBuildModel, but that obligation isn't expressible
michael@0:         // in the nsIDTD interface itself, so it's sounder and simpler to give
michael@0:         // that responsibility back to the parser. The former behavior of the
michael@0:         // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
michael@0:         // sink returns failure we should use sinkResult instead of dtdResult,
michael@0:         // to preserve the old error handling behavior of the DTD:
michael@0:         result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
michael@0:       }
michael@0: 
michael@0:       //Ref. to bug 61462.
michael@0:       mParserContext->mRequest = 0;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * This method adds a new parser context to the list,
michael@0:  * pushing the current one to the next position.
michael@0:  *
michael@0:  * @param   ptr to new context
michael@0:  */
michael@0: void
michael@0: nsParser::PushContext(CParserContext& aContext)
michael@0: {
michael@0:   NS_ASSERTION(aContext.mPrevContext == mParserContext,
michael@0:                "Trying to push a context whose previous context differs from "
michael@0:                "the current parser context.");
michael@0:   mParserContext = &aContext;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * This method pops the topmost context off the stack,
michael@0:  * returning it to the user. The next context  (if any)
michael@0:  * becomes the current context.
michael@0:  * @update	gess7/22/98
michael@0:  * @return  prev. context
michael@0:  */
michael@0: CParserContext*
michael@0: nsParser::PopContext()
michael@0: {
michael@0:   CParserContext* oldContext = mParserContext;
michael@0:   if (oldContext) {
michael@0:     mParserContext = oldContext->mPrevContext;
michael@0:     if (mParserContext) {
michael@0:       // If the old context was blocked, propagate the blocked state
michael@0:       // back to the new one. Also, propagate the stream listener state
michael@0:       // but don't override onStop state to guarantee the call to DidBuildModel().
michael@0:       if (mParserContext->mStreamListenerState != eOnStop) {
michael@0:         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   return oldContext;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Call this when you want control whether or not the parser will parse
michael@0:  *  and tokenize input (TRUE), or whether it just caches input to be
michael@0:  *  parsed later (FALSE).
michael@0:  *
michael@0:  *  @param   aState determines whether we parse/tokenize or just cache.
michael@0:  *  @return  current state
michael@0:  */
michael@0: void
michael@0: nsParser::SetUnusedInput(nsString& aBuffer)
michael@0: {
michael@0:   mUnusedInput = aBuffer;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Call this when you want to *force* the parser to terminate the
michael@0:  *  parsing process altogether. This is binary -- so once you terminate
michael@0:  *  you can't resume without restarting altogether.
michael@0:  */
michael@0: NS_IMETHODIMP
michael@0: nsParser::Terminate(void)
michael@0: {
michael@0:   // We should only call DidBuildModel once, so don't do anything if this is
michael@0:   // the second time that Terminate has been called.
michael@0:   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   nsresult result = NS_OK;
michael@0:   // XXX - [ until we figure out a way to break parser-sink circularity ]
michael@0:   // Hack - Hold a reference until we are completely done...
michael@0:   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0:   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0: 
michael@0:   // CancelParsingEvents must be called to avoid leaking the nsParser object
michael@0:   // @see bug 108049
michael@0:   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
michael@0:   // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
michael@0:   // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
michael@0:   CancelParsingEvents();
michael@0: 
michael@0:   // If we got interrupted in the middle of a document.write, then we might
michael@0:   // have more than one parser context on our parsercontext stack. This has
michael@0:   // the effect of making DidBuildModel a no-op, meaning that we never call
michael@0:   // our sink's DidBuildModel and break the reference cycle, causing a leak.
michael@0:   // Since we're getting terminated, we manually clean up our context stack.
michael@0:   while (mParserContext && mParserContext->mPrevContext) {
michael@0:     CParserContext *prev = mParserContext->mPrevContext;
michael@0:     delete mParserContext;
michael@0:     mParserContext = prev;
michael@0:   }
michael@0: 
michael@0:   if (mDTD) {
michael@0:     mDTD->Terminate();
michael@0:     DidBuildModel(result);
michael@0:   } else if (mSink) {
michael@0:     // We have no parser context or no DTD yet (so we got terminated before we
michael@0:     // got any data).  Manually break the reference cycle with the sink.
michael@0:     result = mSink->DidBuildModel(true);
michael@0:     NS_ENSURE_SUCCESS(result, result);
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsParser::ContinueInterruptedParsing()
michael@0: {
michael@0:   // If there are scripts executing, then the content sink is jumping the gun
michael@0:   // (probably due to a synchronous XMLHttpRequest) and will re-enable us
michael@0:   // later, see bug 460706.
michael@0:   if (!IsOkToProcessNetworkData()) {
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // If the stream has already finished, there's a good chance
michael@0:   // that we might start closing things down when the parser
michael@0:   // is reenabled. To make sure that we're not deleted across
michael@0:   // the reenabling process, hold a reference to ourselves.
michael@0:   nsresult result=NS_OK;
michael@0:   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0:   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
michael@0: 
michael@0: #ifdef DEBUG
michael@0:   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
michael@0:     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
michael@0:   }
michael@0: #endif
michael@0: 
michael@0:   bool isFinalChunk = mParserContext &&
michael@0:                         mParserContext->mStreamListenerState == eOnStop;
michael@0: 
michael@0:   mProcessingNetworkData = true;
michael@0:   if (mSink) {
michael@0:     mSink->WillParse();
michael@0:   }
michael@0:   result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
michael@0:   mProcessingNetworkData = false;
michael@0: 
michael@0:   if (result != NS_OK) {
michael@0:     result=mInternalState;
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Stops parsing temporarily. That's it will prevent the
michael@0:  *  parser from building up content model.
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::BlockParser()
michael@0: {
michael@0:   mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Open up the parser for tokenization, building up content
michael@0:  *  model..etc. However, this method does not resume parsing
michael@0:  *  automatically. It's the callers' responsibility to restart
michael@0:  *  the parsing engine.
michael@0:  */
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::UnblockParser()
michael@0: {
michael@0:   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
michael@0:     mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
michael@0:   } else {
michael@0:     NS_WARNING("Trying to unblock an unblocked parser.");
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP_(void)
michael@0: nsParser::ContinueInterruptedParsingAsync()
michael@0: {
michael@0:   mSink->ContinueInterruptedParsingAsync();
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Call this to query whether the parser is enabled or not.
michael@0:  */
michael@0: NS_IMETHODIMP_(bool)
michael@0: nsParser::IsParserEnabled()
michael@0: {
michael@0:   return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Call this to query whether the parser thinks it's done with parsing.
michael@0:  */
michael@0: NS_IMETHODIMP_(bool)
michael@0: nsParser::IsComplete()
michael@0: {
michael@0:   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
michael@0: }
michael@0: 
michael@0: 
michael@0: void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
michael@0: {
michael@0:   // Ignore any revoked continue events...
michael@0:   if (mContinueEvent != ev)
michael@0:     return;
michael@0: 
michael@0:   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
michael@0:   mContinueEvent = nullptr;
michael@0: 
michael@0:   NS_ASSERTION(IsOkToProcessNetworkData(),
michael@0:                "Interrupted in the middle of a script?");
michael@0:   ContinueInterruptedParsing();
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsParser::IsInsertionPointDefined()
michael@0: {
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::BeginEvaluatingParserInsertedScript()
michael@0: {
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::EndEvaluatingParserInsertedScript()
michael@0: {
michael@0: }
michael@0: 
michael@0: void
michael@0: nsParser::MarkAsNotScriptCreated(const char* aCommand)
michael@0: {
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsParser::IsScriptCreated()
michael@0: {
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  This is the main controlling routine in the parsing process.
michael@0:  *  Note that it may get called multiple times for the same scanner,
michael@0:  *  since this is a pushed based system, and all the tokens may
michael@0:  *  not have been consumed by the scanner during a given invocation
michael@0:  *  of this method.
michael@0:  */
michael@0: NS_IMETHODIMP
michael@0: nsParser::Parse(nsIURI* aURL,
michael@0:                 nsIRequestObserver* aListener,
michael@0:                 void* aKey,
michael@0:                 nsDTDMode aMode)
michael@0: {
michael@0: 
michael@0:   NS_PRECONDITION(aURL, "Error: Null URL given");
michael@0: 
michael@0:   nsresult result=kBadURL;
michael@0:   mObserver = aListener;
michael@0: 
michael@0:   if (aURL) {
michael@0:     nsAutoCString spec;
michael@0:     nsresult rv = aURL->GetSpec(spec);
michael@0:     if (rv != NS_OK) {
michael@0:       return rv;
michael@0:     }
michael@0:     NS_ConvertUTF8toUTF16 theName(spec);
michael@0: 
michael@0:     nsScanner* theScanner = new nsScanner(theName, false);
michael@0:     CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
michael@0:                                             mCommand, aListener);
michael@0:     if (pc && theScanner) {
michael@0:       pc->mMultipart = true;
michael@0:       pc->mContextType = CParserContext::eCTURL;
michael@0:       pc->mDTDMode = aMode;
michael@0:       PushContext(*pc);
michael@0: 
michael@0:       result = NS_OK;
michael@0:     } else {
michael@0:       result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
michael@0:     }
michael@0:   }
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Used by XML fragment parsing below.
michael@0:  *
michael@0:  * @param   aSourceBuffer contains a string-full of real content
michael@0:  */
michael@0: nsresult
michael@0: nsParser::Parse(const nsAString& aSourceBuffer,
michael@0:                 void* aKey,
michael@0:                 bool aLastCall)
michael@0: {
michael@0:   nsresult result = NS_OK;
michael@0: 
michael@0:   // Don't bother if we're never going to parse this.
michael@0:   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0:     return result;
michael@0:   }
michael@0: 
michael@0:   if (!aLastCall && aSourceBuffer.IsEmpty()) {
michael@0:     // Nothing is being passed to the parser so return
michael@0:     // immediately. mUnusedInput will get processed when
michael@0:     // some data is actually passed in.
michael@0:     // But if this is the last call, make sure to finish up
michael@0:     // stuff correctly.
michael@0:     return result;
michael@0:   }
michael@0: 
michael@0:   // Maintain a reference to ourselves so we don't go away
michael@0:   // till we're completely done.
michael@0:   nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0: 
michael@0:   if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
michael@0:     // Note: The following code will always find the parser context associated
michael@0:     // with the given key, even if that context has been suspended (e.g., for
michael@0:     // another document.write call). This doesn't appear to be exactly what IE
michael@0:     // does in the case where this happens, but this makes more sense.
michael@0:     CParserContext* pc = mParserContext;
michael@0:     while (pc && pc->mKey != aKey) {
michael@0:       pc = pc->mPrevContext;
michael@0:     }
michael@0: 
michael@0:     if (!pc) {
michael@0:       // Only make a new context if we don't have one, OR if we do, but has a
michael@0:       // different context key.
michael@0:       nsScanner* theScanner = new nsScanner(mUnusedInput);
michael@0:       NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
michael@0: 
michael@0:       eAutoDetectResult theStatus = eUnknownDetect;
michael@0: 
michael@0:       if (mParserContext &&
michael@0:           mParserContext->mMimeType.EqualsLiteral("application/xml")) {
michael@0:         // Ref. Bug 90379
michael@0:         NS_ASSERTION(mDTD, "How come the DTD is null?");
michael@0: 
michael@0:         if (mParserContext) {
michael@0:           theStatus = mParserContext->mAutoDetectStatus;
michael@0:           // Added this to fix bug 32022.
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
michael@0:                               0, theStatus, aLastCall);
michael@0:       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
michael@0: 
michael@0:       PushContext(*pc);
michael@0: 
michael@0:       pc->mMultipart = !aLastCall; // By default
michael@0:       if (pc->mPrevContext) {
michael@0:         pc->mMultipart |= pc->mPrevContext->mMultipart;
michael@0:       }
michael@0: 
michael@0:       // Start fix bug 40143
michael@0:       if (pc->mMultipart) {
michael@0:         pc->mStreamListenerState = eOnDataAvail;
michael@0:         if (pc->mScanner) {
michael@0:           pc->mScanner->SetIncremental(true);
michael@0:         }
michael@0:       } else {
michael@0:         pc->mStreamListenerState = eOnStop;
michael@0:         if (pc->mScanner) {
michael@0:           pc->mScanner->SetIncremental(false);
michael@0:         }
michael@0:       }
michael@0:       // end fix for 40143
michael@0: 
michael@0:       pc->mContextType=CParserContext::eCTString;
michael@0:       pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
michael@0:       pc->mDTDMode = eDTDMode_full_standards;
michael@0: 
michael@0:       mUnusedInput.Truncate();
michael@0: 
michael@0:       pc->mScanner->Append(aSourceBuffer);
michael@0:       // Do not interrupt document.write() - bug 95487
michael@0:       result = ResumeParse(false, false, false);
michael@0:     } else {
michael@0:       pc->mScanner->Append(aSourceBuffer);
michael@0:       if (!pc->mPrevContext) {
michael@0:         // Set stream listener state to eOnStop, on the final context - Fix 68160,
michael@0:         // to guarantee DidBuildModel() call - Fix 36148
michael@0:         if (aLastCall) {
michael@0:           pc->mStreamListenerState = eOnStop;
michael@0:           pc->mScanner->SetIncremental(false);
michael@0:         }
michael@0: 
michael@0:         if (pc == mParserContext) {
michael@0:           // If pc is not mParserContext, then this call to ResumeParse would
michael@0:           // do the wrong thing and try to continue parsing using
michael@0:           // mParserContext. We need to wait to actually resume parsing on pc.
michael@0:           ResumeParse(false, false, false);
michael@0:         }
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsParser::ParseFragment(const nsAString& aSourceBuffer,
michael@0:                         nsTArray<nsString>& aTagStack)
michael@0: {
michael@0:   nsresult result = NS_OK;
michael@0:   nsAutoString  theContext;
michael@0:   uint32_t theCount = aTagStack.Length();
michael@0:   uint32_t theIndex = 0;
michael@0: 
michael@0:   // Disable observers for fragments
michael@0:   mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0: 
michael@0:   for (theIndex = 0; theIndex < theCount; theIndex++) {
michael@0:     theContext.AppendLiteral("<");
michael@0:     theContext.Append(aTagStack[theCount - theIndex - 1]);
michael@0:     theContext.AppendLiteral(">");
michael@0:   }
michael@0: 
michael@0:   if (theCount == 0) {
michael@0:     // Ensure that the buffer is not empty. Because none of the DTDs care
michael@0:     // about leading whitespace, this doesn't change the result.
michael@0:     theContext.AssignLiteral(" ");
michael@0:   }
michael@0: 
michael@0:   // First, parse the context to build up the DTD's tag stack. Note that we
michael@0:   // pass false for the aLastCall parameter.
michael@0:   result = Parse(theContext,
michael@0:                  (void*)&theContext,
michael@0:                  false);
michael@0:   if (NS_FAILED(result)) {
michael@0:     mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0:     return result;
michael@0:   }
michael@0: 
michael@0:   if (!mSink) {
michael@0:     // Parse must have failed in the XML case and so the sink was killed.
michael@0:     return NS_ERROR_HTMLPARSER_STOPPARSING;
michael@0:   }
michael@0: 
michael@0:   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
michael@0:   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
michael@0: 
michael@0:   fragSink->WillBuildContent();
michael@0:   // Now, parse the actual content. Note that this is the last call
michael@0:   // for HTML content, but for XML, we will want to build and parse
michael@0:   // the end tags.  However, if tagStack is empty, it's the last call
michael@0:   // for XML as well.
michael@0:   if (theCount == 0) {
michael@0:     result = Parse(aSourceBuffer,
michael@0:                    &theContext,
michael@0:                    true);
michael@0:     fragSink->DidBuildContent();
michael@0:   } else {
michael@0:     // Add an end tag chunk, so expat will read the whole source buffer,
michael@0:     // and not worry about ']]' etc.
michael@0:     result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
michael@0:                    &theContext,
michael@0:                    false);
michael@0:     fragSink->DidBuildContent();
michael@0: 
michael@0:     if (NS_SUCCEEDED(result)) {
michael@0:       nsAutoString endContext;
michael@0:       for (theIndex = 0; theIndex < theCount; theIndex++) {
michael@0:          // we already added an end tag chunk above
michael@0:         if (theIndex > 0) {
michael@0:           endContext.AppendLiteral("</");
michael@0:         }
michael@0: 
michael@0:         nsString& thisTag = aTagStack[theIndex];
michael@0:         // was there an xmlns=?
michael@0:         int32_t endOfTag = thisTag.FindChar(char16_t(' '));
michael@0:         if (endOfTag == -1) {
michael@0:           endContext.Append(thisTag);
michael@0:         } else {
michael@0:           endContext.Append(Substring(thisTag,0,endOfTag));
michael@0:         }
michael@0: 
michael@0:         endContext.AppendLiteral(">");
michael@0:       }
michael@0: 
michael@0:       result = Parse(endContext,
michael@0:                      &theContext,
michael@0:                      true);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  This routine is called to cause the parser to continue parsing its
michael@0:  *  underlying stream.  This call allows the parse process to happen in
michael@0:  *  chunks, such as when the content is push based, and we need to parse in
michael@0:  *  pieces.
michael@0:  *
michael@0:  *  An interesting change in how the parser gets used has led us to add extra
michael@0:  *  processing to this method.  The case occurs when the parser is blocked in
michael@0:  *  one context, and gets a parse(string) call in another context.  In this
michael@0:  *  case, the parserContexts are linked. No problem.
michael@0:  *
michael@0:  *  The problem is that Parse(string) assumes that it can proceed unabated,
michael@0:  *  but if the parser is already blocked that assumption is false. So we
michael@0:  *  needed to add a mechanism here to allow the parser to continue to process
michael@0:  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
michael@0:  *  out of contexts.
michael@0:  *
michael@0:  *
michael@0:  *  @param   allowItertion : set to true if non-script resumption is requested
michael@0:  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
michael@0:  *  @return  error code -- 0 if ok, non-zero if error.
michael@0:  */
michael@0: nsresult
michael@0: nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
michael@0:                       bool aCanInterrupt)
michael@0: {
michael@0:   nsresult result = NS_OK;
michael@0: 
michael@0:   if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
michael@0:       mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0: 
michael@0:     result = WillBuildModel(mParserContext->mScanner->GetFilename());
michael@0:     if (NS_FAILED(result)) {
michael@0:       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
michael@0:       return result;
michael@0:     }
michael@0: 
michael@0:     if (mDTD) {
michael@0:       mSink->WillResume();
michael@0:       bool theIterationIsOk = true;
michael@0: 
michael@0:       while (result == NS_OK && theIterationIsOk) {
michael@0:         if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
michael@0:           // -- Ref: Bug# 22485 --
michael@0:           // Insert the unused input into the source buffer
michael@0:           // as if it was read from the input stream.
michael@0:           // Adding UngetReadable() per vidur!!
michael@0:           mParserContext->mScanner->UngetReadable(mUnusedInput);
michael@0:           mUnusedInput.Truncate(0);
michael@0:         }
michael@0: 
michael@0:         // Only allow parsing to be interrupted in the subsequent call to
michael@0:         // build model.
michael@0:         nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
michael@0:                                       ? Tokenize(aIsFinalChunk)
michael@0:                                       : NS_OK;
michael@0:         result = BuildModel();
michael@0: 
michael@0:         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
michael@0:           PostContinueEvent();
michael@0:         }
michael@0: 
michael@0:         theIterationIsOk = theTokenizerResult != kEOF &&
michael@0:                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;
michael@0: 
michael@0:         // Make sure not to stop parsing too early. Therefore, before shutting
michael@0:         // down the parser, it's important to check whether the input buffer
michael@0:         // has been scanned to completion (theTokenizerResult should be kEOF).
michael@0:         // kEOF -> End of buffer.
michael@0: 
michael@0:         // If we're told to block the parser, we disable all further parsing
michael@0:         // (and cache any data coming in) until the parser is re-enabled.
michael@0:         if (NS_ERROR_HTMLPARSER_BLOCK == result) {
michael@0:           mSink->WillInterrupt();
michael@0:           if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
michael@0:             // If we were blocked by a recursive invocation, don't re-block.
michael@0:             BlockParser();
michael@0:           }
michael@0:           return NS_OK;
michael@0:         }
michael@0:         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
michael@0:           // Note: Parser Terminate() calls DidBuildModel.
michael@0:           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
michael@0:             DidBuildModel(mStreamStatus);
michael@0:             mInternalState = result;
michael@0:           }
michael@0: 
michael@0:           return NS_OK;
michael@0:         }
michael@0:         if ((NS_OK == result && theTokenizerResult == kEOF) ||
michael@0:              result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
michael@0:           bool theContextIsStringBased =
michael@0:             CParserContext::eCTString == mParserContext->mContextType;
michael@0: 
michael@0:           if (mParserContext->mStreamListenerState == eOnStop ||
michael@0:               !mParserContext->mMultipart || theContextIsStringBased) {
michael@0:             if (!mParserContext->mPrevContext) {
michael@0:               if (mParserContext->mStreamListenerState == eOnStop) {
michael@0:                 DidBuildModel(mStreamStatus);
michael@0:                 return NS_OK;
michael@0:               }
michael@0:             } else {
michael@0:               CParserContext* theContext = PopContext();
michael@0:               if (theContext) {
michael@0:                 theIterationIsOk = allowIteration && theContextIsStringBased;
michael@0:                 if (theContext->mCopyUnused) {
michael@0:                   theContext->mScanner->CopyUnusedData(mUnusedInput);
michael@0:                 }
michael@0: 
michael@0:                 delete theContext;
michael@0:               }
michael@0: 
michael@0:               result = mInternalState;
michael@0:               aIsFinalChunk = mParserContext &&
michael@0:                               mParserContext->mStreamListenerState == eOnStop;
michael@0:               // ...then intentionally fall through to mSink->WillInterrupt()...
michael@0:             }
michael@0:           }
michael@0:         }
michael@0: 
michael@0:         if (theTokenizerResult == kEOF ||
michael@0:             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
michael@0:           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
michael@0:           mSink->WillInterrupt();
michael@0:         }
michael@0:       }
michael@0:     } else {
michael@0:       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  This is where we loop over the tokens created in the
michael@0:  *  tokenization phase, and try to make sense out of them.
michael@0:  */
michael@0: nsresult
michael@0: nsParser::BuildModel()
michael@0: {
michael@0:   nsITokenizer* theTokenizer = nullptr;
michael@0: 
michael@0:   nsresult result = NS_OK;
michael@0:   if (mParserContext) {
michael@0:     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0:   }
michael@0: 
michael@0:   if (NS_SUCCEEDED(result)) {
michael@0:     if (mDTD) {
michael@0:       result = mDTD->BuildModel(theTokenizer, mSink);
michael@0:     }
michael@0:   } else {
michael@0:     mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
michael@0:   }
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /*******************************************************************
michael@0:   These methods are used to talk to the netlib system...
michael@0:  *******************************************************************/
michael@0: 
michael@0: nsresult
michael@0: nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
michael@0: {
michael@0:   NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
michael@0:                   "Parser's nsIStreamListener API was not setup "
michael@0:                   "correctly in constructor.");
michael@0:   if (mObserver) {
michael@0:     mObserver->OnStartRequest(request, aContext);
michael@0:   }
michael@0:   mParserContext->mStreamListenerState = eOnStart;
michael@0:   mParserContext->mAutoDetectStatus = eUnknownDetect;
michael@0:   mParserContext->mRequest = request;
michael@0: 
michael@0:   NS_ASSERTION(!mParserContext->mPrevContext,
michael@0:                "Clobbering DTD for non-root parser context!");
michael@0:   mDTD = nullptr;
michael@0: 
michael@0:   nsresult rv;
michael@0:   nsAutoCString contentType;
michael@0:   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
michael@0:   if (channel) {
michael@0:     rv = channel->GetContentType(contentType);
michael@0:     if (NS_SUCCEEDED(rv)) {
michael@0:       mParserContext->SetMimeType(contentType);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   rv = NS_OK;
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: static bool
michael@0: ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
michael@0:                                  nsCString& oCharset)
michael@0: {
michael@0:   // This code is rather pointless to have. Might as well reuse expat as
michael@0:   // seen in nsHtml5StreamParser. -- hsivonen
michael@0:   oCharset.Truncate();
michael@0:   if ((aLen >= 5) &&
michael@0:       ('<' == aBytes[0]) &&
michael@0:       ('?' == aBytes[1]) &&
michael@0:       ('x' == aBytes[2]) &&
michael@0:       ('m' == aBytes[3]) &&
michael@0:       ('l' == aBytes[4])) {
michael@0:     int32_t i;
michael@0:     bool versionFound = false, encodingFound = false;
michael@0:     for (i = 6; i < aLen && !encodingFound; ++i) {
michael@0:       // end of XML declaration?
michael@0:       if ((((char*) aBytes)[i] == '?') &&
michael@0:           ((i + 1) < aLen) &&
michael@0:           (((char*) aBytes)[i + 1] == '>')) {
michael@0:         break;
michael@0:       }
michael@0:       // Version is required.
michael@0:       if (!versionFound) {
michael@0:         // Want to avoid string comparisons, hence looking for 'n'
michael@0:         // and only if found check the string leading to it. Not
michael@0:         // foolproof, but fast.
michael@0:         // The shortest string allowed before this is  (strlen==13):
michael@0:         // <?xml version
michael@0:         if ((((char*) aBytes)[i] == 'n') &&
michael@0:             (i >= 12) &&
michael@0:             (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
michael@0:           // Fast forward through version
michael@0:           char q = 0;
michael@0:           for (++i; i < aLen; ++i) {
michael@0:             char qi = ((char*) aBytes)[i];
michael@0:             if (qi == '\'' || qi == '"') {
michael@0:               if (q && q == qi) {
michael@0:                 //  ending quote
michael@0:                 versionFound = true;
michael@0:                 break;
michael@0:               } else {
michael@0:                 // Starting quote
michael@0:                 q = qi;
michael@0:               }
michael@0:             }
michael@0:           }
michael@0:         }
michael@0:       } else {
michael@0:         // encoding must follow version
michael@0:         // Want to avoid string comparisons, hence looking for 'g'
michael@0:         // and only if found check the string leading to it. Not
michael@0:         // foolproof, but fast.
michael@0:         // The shortest allowed string before this (strlen==26):
michael@0:         // <?xml version="1" encoding
michael@0:         if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
michael@0:             "encodin", (char*) (aBytes + i - 7), 7))) {
michael@0:           int32_t encStart = 0;
michael@0:           char q = 0;
michael@0:           for (++i; i < aLen; ++i) {
michael@0:             char qi = ((char*) aBytes)[i];
michael@0:             if (qi == '\'' || qi == '"') {
michael@0:               if (q && q == qi) {
michael@0:                 int32_t count = i - encStart;
michael@0:                 // encoding value is invalid if it is UTF-16
michael@0:                 if (count > 0 && PL_strncasecmp("UTF-16",
michael@0:                     (char*) (aBytes + encStart), count)) {
michael@0:                   oCharset.Assign((char*) (aBytes + encStart), count);
michael@0:                 }
michael@0:                 encodingFound = true;
michael@0:                 break;
michael@0:               } else {
michael@0:                 encStart = i + 1;
michael@0:                 q = qi;
michael@0:               }
michael@0:             }
michael@0:           }
michael@0:         }
michael@0:       } // if (!versionFound)
michael@0:     } // for
michael@0:   }
michael@0:   return !oCharset.IsEmpty();
michael@0: }
michael@0: 
michael@0: inline const char
michael@0: GetNextChar(nsACString::const_iterator& aStart,
michael@0:             nsACString::const_iterator& aEnd)
michael@0: {
michael@0:   NS_ASSERTION(aStart != aEnd, "end of buffer");
michael@0:   return (++aStart != aEnd) ? *aStart : '\0';
michael@0: }
michael@0: 
michael@0: static NS_METHOD
michael@0: NoOpParserWriteFunc(nsIInputStream* in,
michael@0:                 void* closure,
michael@0:                 const char* fromRawSegment,
michael@0:                 uint32_t toOffset,
michael@0:                 uint32_t count,
michael@0:                 uint32_t *writeCount)
michael@0: {
michael@0:   *writeCount = count;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: typedef struct {
michael@0:   bool mNeedCharsetCheck;
michael@0:   nsParser* mParser;
michael@0:   nsScanner* mScanner;
michael@0:   nsIRequest* mRequest;
michael@0: } ParserWriteStruct;
michael@0: 
michael@0: /*
michael@0:  * This function is invoked as a result of a call to a stream's
michael@0:  * ReadSegments() method. It is called for each contiguous buffer
michael@0:  * of data in the underlying stream or pipe. Using ReadSegments
michael@0:  * allows us to avoid copying data to read out of the stream.
michael@0:  */
michael@0: static NS_METHOD
michael@0: ParserWriteFunc(nsIInputStream* in,
michael@0:                 void* closure,
michael@0:                 const char* fromRawSegment,
michael@0:                 uint32_t toOffset,
michael@0:                 uint32_t count,
michael@0:                 uint32_t *writeCount)
michael@0: {
michael@0:   nsresult result;
michael@0:   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
michael@0:   const unsigned char* buf =
michael@0:     reinterpret_cast<const unsigned char*> (fromRawSegment);
michael@0:   uint32_t theNumRead = count;
michael@0: 
michael@0:   if (!pws) {
michael@0:     return NS_ERROR_FAILURE;
michael@0:   }
michael@0: 
michael@0:   if (pws->mNeedCharsetCheck) {
michael@0:     pws->mNeedCharsetCheck = false;
michael@0:     int32_t source;
michael@0:     nsAutoCString preferred;
michael@0:     nsAutoCString maybePrefer;
michael@0:     pws->mParser->GetDocumentCharset(preferred, source);
michael@0: 
michael@0:     // This code was bogus when I found it. It expects the BOM or the XML
michael@0:     // declaration to be entirely in the first network buffer. -- hsivonen
michael@0:     if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
michael@0:       // The decoder will swallow the BOM. The UTF-16 will re-sniff for
michael@0:       // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
michael@0:       preferred.Assign(maybePrefer);
michael@0:       source = kCharsetFromByteOrderMark;
michael@0:     } else if (source < kCharsetFromChannel) {
michael@0:       nsAutoCString declCharset;
michael@0: 
michael@0:       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
michael@0:         if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
michael@0:           preferred.Assign(maybePrefer);
michael@0:           source = kCharsetFromMetaTag;
michael@0:         }
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     pws->mParser->SetDocumentCharset(preferred, source);
michael@0:     pws->mParser->SetSinkCharset(preferred);
michael@0: 
michael@0:   }
michael@0: 
michael@0:   result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest);
michael@0:   if (NS_SUCCEEDED(result)) {
michael@0:     *writeCount = count;
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
michael@0:                           nsIInputStream *pIStream, uint64_t sourceOffset,
michael@0:                           uint32_t aLength)
michael@0: {
michael@0:   NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
michael@0:                    eOnDataAvail == mParserContext->mStreamListenerState),
michael@0:             "Error: OnStartRequest() must be called before OnDataAvailable()");
michael@0:   NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
michael@0:                   "Must have a buffered input stream");
michael@0: 
michael@0:   nsresult rv = NS_OK;
michael@0: 
michael@0:   if (mIsAboutBlank) {
michael@0:     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
michael@0:     // ... but if an extension tries to feed us data for about:blank in a
michael@0:     // release build, silently ignore the data.
michael@0:     uint32_t totalRead;
michael@0:     rv = pIStream->ReadSegments(NoOpParserWriteFunc,
michael@0:                                 nullptr,
michael@0:                                 aLength,
michael@0:                                 &totalRead);
michael@0:     return rv;
michael@0:   }
michael@0: 
michael@0:   CParserContext *theContext = mParserContext;
michael@0: 
michael@0:   while (theContext && theContext->mRequest != request) {
michael@0:     theContext = theContext->mPrevContext;
michael@0:   }
michael@0: 
michael@0:   if (theContext) {
michael@0:     theContext->mStreamListenerState = eOnDataAvail;
michael@0: 
michael@0:     if (eInvalidDetect == theContext->mAutoDetectStatus) {
michael@0:       if (theContext->mScanner) {
michael@0:         nsScannerIterator iter;
michael@0:         theContext->mScanner->EndReading(iter);
michael@0:         theContext->mScanner->SetPosition(iter, true);
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     uint32_t totalRead;
michael@0:     ParserWriteStruct pws;
michael@0:     pws.mNeedCharsetCheck = true;
michael@0:     pws.mParser = this;
michael@0:     pws.mScanner = theContext->mScanner;
michael@0:     pws.mRequest = request;
michael@0: 
michael@0:     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
michael@0:     if (NS_FAILED(rv)) {
michael@0:       return rv;
michael@0:     }
michael@0: 
michael@0:     // Don't bother to start parsing until we've seen some
michael@0:     // non-whitespace data
michael@0:     if (IsOkToProcessNetworkData() &&
michael@0:         theContext->mScanner->FirstNonWhitespacePosition() >= 0) {
michael@0:       nsCOMPtr<nsIParser> kungFuDeathGrip(this);
michael@0:       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
michael@0:       mProcessingNetworkData = true;
michael@0:       if (mSink) {
michael@0:         mSink->WillParse();
michael@0:       }
michael@0:       rv = ResumeParse();
michael@0:       mProcessingNetworkData = false;
michael@0:     }
michael@0:   } else {
michael@0:     rv = NS_ERROR_UNEXPECTED;
michael@0:   }
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  This is called by the networking library once the last block of data
michael@0:  *  has been collected from the net.
michael@0:  */
michael@0: nsresult
michael@0: nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
michael@0:                         nsresult status)
michael@0: {
michael@0:   nsresult rv = NS_OK;
michael@0: 
michael@0:   CParserContext *pc = mParserContext;
michael@0:   while (pc) {
michael@0:     if (pc->mRequest == request) {
michael@0:       pc->mStreamListenerState = eOnStop;
michael@0:       pc->mScanner->SetIncremental(false);
michael@0:       break;
michael@0:     }
michael@0: 
michael@0:     pc = pc->mPrevContext;
michael@0:   }
michael@0: 
michael@0:   mStreamStatus = status;
michael@0: 
michael@0:   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
michael@0:     mProcessingNetworkData = true;
michael@0:     if (mSink) {
michael@0:       mSink->WillParse();
michael@0:     }
michael@0:     rv = ResumeParse(true, true);
michael@0:     mProcessingNetworkData = false;
michael@0:   }
michael@0: 
michael@0:   // If the parser isn't enabled, we don't finish parsing till
michael@0:   // it is reenabled.
michael@0: 
michael@0: 
michael@0:   // XXX Should we wait to notify our observers as well if the
michael@0:   // parser isn't yet enabled?
michael@0:   if (mObserver) {
michael@0:     mObserver->OnStopRequest(request, aContext, status);
michael@0:   }
michael@0: 
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: 
michael@0: /*******************************************************************
michael@0:   Here come the tokenization methods...
michael@0:  *******************************************************************/
michael@0: 
michael@0: 
michael@0: /**
michael@0:  *  Part of the code sandwich, this gets called right before
michael@0:  *  the tokenization process begins. The main reason for
michael@0:  *  this call is to allow the delegate to do initialization.
michael@0:  */
michael@0: bool
michael@0: nsParser::WillTokenize(bool aIsFinalChunk)
michael@0: {
michael@0:   if (!mParserContext) {
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   nsITokenizer* theTokenizer;
michael@0:   nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0:   NS_ENSURE_SUCCESS(result, false);
michael@0:   return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
michael@0: }
michael@0: 
michael@0: 
michael@0: /**
michael@0:  * This is the primary control routine to consume tokens.
michael@0:  * It iteratively consumes tokens until an error occurs or
michael@0:  * you run out of data.
michael@0:  */
michael@0: nsresult nsParser::Tokenize(bool aIsFinalChunk)
michael@0: {
michael@0:   nsITokenizer* theTokenizer;
michael@0: 
michael@0:   nsresult result = NS_ERROR_NOT_AVAILABLE;
michael@0:   if (mParserContext) {
michael@0:     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
michael@0:   }
michael@0: 
michael@0:   if (NS_SUCCEEDED(result)) {
michael@0:     bool flushTokens = false;
michael@0: 
michael@0:     bool killSink = false;
michael@0: 
michael@0:     WillTokenize(aIsFinalChunk);
michael@0:     while (NS_SUCCEEDED(result)) {
michael@0:       mParserContext->mScanner->Mark();
michael@0:       result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
michael@0:                                           flushTokens);
michael@0:       if (NS_FAILED(result)) {
michael@0:         mParserContext->mScanner->RewindToMark();
michael@0:         if (kEOF == result){
michael@0:           break;
michael@0:         }
michael@0:         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
michael@0:           killSink = true;
michael@0:           result = Terminate();
michael@0:           break;
michael@0:         }
michael@0:       } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
michael@0:         // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
michael@0:         // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
michael@0:         // Also remember to update the marked position.
michael@0:         mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
michael@0:         mParserContext->mScanner->Mark();
michael@0:         break;
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     if (killSink) {
michael@0:       mSink = nullptr;
michael@0:     }
michael@0:   } else {
michael@0:     result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Get the channel associated with this parser
michael@0:  *
michael@0:  * @param aChannel out param that will contain the result
michael@0:  * @return NS_OK if successful
michael@0:  */
michael@0: NS_IMETHODIMP
michael@0: nsParser::GetChannel(nsIChannel** aChannel)
michael@0: {
michael@0:   nsresult result = NS_ERROR_NOT_AVAILABLE;
michael@0:   if (mParserContext && mParserContext->mRequest) {
michael@0:     result = CallQueryInterface(mParserContext->mRequest, aChannel);
michael@0:   }
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Get the DTD associated with this parser
michael@0:  */
michael@0: NS_IMETHODIMP
michael@0: nsParser::GetDTD(nsIDTD** aDTD)
michael@0: {
michael@0:   if (mParserContext) {
michael@0:     NS_IF_ADDREF(*aDTD = mDTD);
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Get this as nsIStreamListener
michael@0:  */
michael@0: nsIStreamListener*
michael@0: nsParser::GetStreamListener()
michael@0: {
michael@0:   return this;
michael@0: }