michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set sw=2 ts=2 et tw=79: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "mozilla/DebugOnly.h"
michael@0: 
michael@0: #include "nsHtml5StreamParser.h"
michael@0: #include "nsContentUtils.h"
michael@0: #include "nsHtml5Tokenizer.h"
michael@0: #include "nsIHttpChannel.h"
michael@0: #include "nsHtml5Parser.h"
michael@0: #include "nsHtml5TreeBuilder.h"
michael@0: #include "nsHtml5AtomTable.h"
michael@0: #include "nsHtml5Module.h"
michael@0: #include "nsHtml5RefPtr.h"
michael@0: #include "nsIScriptError.h"
michael@0: #include "mozilla/Preferences.h"
michael@0: #include "nsHtml5Highlighter.h"
michael@0: #include "expat_config.h"
michael@0: #include "expat.h"
michael@0: #include "nsINestedURI.h"
michael@0: #include "nsCharsetSource.h"
michael@0: #include "nsIWyciwygChannel.h"
michael@0: #include "nsIThreadRetargetableRequest.h"
michael@0: #include "nsPrintfCString.h"
michael@0: #include "nsNetUtil.h"
michael@0: 
michael@0: #include "mozilla/dom/EncodingUtils.h"
michael@0: 
michael@0: using namespace mozilla;
michael@0: using mozilla::dom::EncodingUtils;
michael@0: 
michael@0: int32_t nsHtml5StreamParser::sTimerInitialDelay = 120;
michael@0: int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120;
michael@0: 
michael@0: // static
michael@0: void
michael@0: nsHtml5StreamParser::InitializeStatics()
michael@0: {
michael@0:   Preferences::AddIntVarCache(&sTimerInitialDelay,
michael@0:                               "html5.flushtimer.initialdelay");
michael@0:   Preferences::AddIntVarCache(&sTimerSubsequentDelay,
michael@0:                               "html5.flushtimer.subsequentdelay");
michael@0: }
michael@0: 
michael@0: /*
michael@0:  * Note that nsHtml5StreamParser implements cycle collecting AddRef and
michael@0:  * Release. Therefore, nsHtml5StreamParser must never be refcounted from
michael@0:  * the parser thread!
michael@0:  *
michael@0:  * To work around this limitation, runnables posted by the main thread to the
michael@0:  * parser thread hold their reference to the stream parser in an
michael@0:  * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds
michael@0:  * just like a regular nsRefPtr. This is OK, since the creation of the
michael@0:  * runnable and the nsHtml5RefPtr happens on the main thread.
michael@0:  *
michael@0:  * When the runnable is done on the parser thread, the destructor of
michael@0:  * nsHtml5RefPtr runs there. It doesn't call Release on the held object
michael@0:  * directly. Instead, it posts another runnable back to the main thread where
michael@0:  * that runnable calls Release on the wrapped object.
michael@0:  *
michael@0:  * When posting runnables in the other direction, the runnables have to be
michael@0:  * created on the main thread when nsHtml5StreamParser is instantiated and
michael@0:  * held for the lifetime of the nsHtml5StreamParser. This works, because the
michael@0:  * same runnabled can be dispatched multiple times and currently runnables
michael@0:  * posted from the parser thread to main thread don't need to wrap any
michael@0:  * runnable-specific data. (In the other direction, the runnables most notably
michael@0:  * wrap the byte data of the stream.)
michael@0:  */
michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
michael@0: 
michael@0: NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
michael@0:   NS_INTERFACE_TABLE(nsHtml5StreamParser,
michael@0:                      nsICharsetDetectionObserver)
michael@0:   NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
michael@0: NS_INTERFACE_MAP_END
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
michael@0:   tmp->DropTimer();
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest)
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner)
michael@0:   tmp->mExecutorFlusher = nullptr;
michael@0:   tmp->mLoadFlusher = nullptr;
michael@0:   tmp->mExecutor = nullptr;
michael@0:   NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet)
michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_END
michael@0: 
michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest)
michael@0:   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner)
michael@0:   // hack: count the strongly owned edge wrapped in the runnable
michael@0:   if (tmp->mExecutorFlusher) {
michael@0:     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
michael@0:     cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
michael@0:   }
michael@0:   // hack: count the strongly owned edge wrapped in the runnable
michael@0:   if (tmp->mLoadFlusher) {
michael@0:     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
michael@0:     cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
michael@0:   }
michael@0:   // hack: count self if held by mChardet
michael@0:   if (tmp->mChardet) {
michael@0:     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver");
michael@0:     cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp));
michael@0:   }
michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
michael@0: 
michael@0: class nsHtml5ExecutorFlusher : public nsRunnable
michael@0: {
michael@0:   private:
michael@0:     nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
michael@0:   public:
michael@0:     nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
michael@0:       : mExecutor(aExecutor)
michael@0:     {}
michael@0:     NS_IMETHODIMP Run()
michael@0:     {
michael@0:       if (!mExecutor->isInList()) {
michael@0:         mExecutor->RunFlushLoop();
michael@0:       }
michael@0:       return NS_OK;
michael@0:     }
michael@0: };
michael@0: 
michael@0: class nsHtml5LoadFlusher : public nsRunnable
michael@0: {
michael@0:   private:
michael@0:     nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
michael@0:   public:
michael@0:     nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor)
michael@0:       : mExecutor(aExecutor)
michael@0:     {}
michael@0:     NS_IMETHODIMP Run()
michael@0:     {
michael@0:       mExecutor->FlushSpeculativeLoads();
michael@0:       return NS_OK;
michael@0:     }
michael@0: };
michael@0: 
michael@0: nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
michael@0:                                          nsHtml5Parser* aOwner,
michael@0:                                          eParserMode aMode)
michael@0:   : mFirstBuffer(nullptr) // Will be filled when starting
michael@0:   , mLastBuffer(nullptr) // Will be filled when starting
michael@0:   , mExecutor(aExecutor)
michael@0:   , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML ||
michael@0:                                          aMode == VIEW_SOURCE_XML) ?
michael@0:                                              nullptr : mExecutor->GetStage(),
michael@0:                                          aMode == NORMAL ?
michael@0:                                              mExecutor->GetStage() : nullptr))
michael@0:   , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
michael@0:   , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
michael@0:   , mOwner(aOwner)
michael@0:   , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
michael@0:   , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
michael@0:   , mThread(nsHtml5Module::GetStreamParserThread())
michael@0:   , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor))
michael@0:   , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor))
michael@0:   , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
michael@0:   , mMode(aMode)
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   mFlushTimer->SetTarget(mThread);
michael@0: #ifdef DEBUG
michael@0:   mAtomTable.SetPermittedLookupThread(mThread);
michael@0: #endif
michael@0:   mTokenizer->setInterner(&mAtomTable);
michael@0:   mTokenizer->setEncodingDeclarationHandler(this);
michael@0: 
michael@0:   if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) {
michael@0:     nsHtml5Highlighter* highlighter =
michael@0:       new nsHtml5Highlighter(mExecutor->GetStage());
michael@0:     mTokenizer->EnableViewSource(highlighter); // takes ownership
michael@0:     mTreeBuilder->EnableViewSource(highlighter); // doesn't own
michael@0:   }
michael@0: 
michael@0:   // Chardet instantiation adapted from nsDOMFile.
michael@0:   // Chardet is initialized here even if it turns out to be useless
michael@0:   // to make the chardet refcount its observer (nsHtml5StreamParser)
michael@0:   // on the main thread.
michael@0:   const nsAdoptingCString& detectorName =
michael@0:     Preferences::GetLocalizedCString("intl.charset.detector");
michael@0:   if (!detectorName.IsEmpty()) {
michael@0:     nsAutoCString detectorContractID;
michael@0:     detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
michael@0:     detectorContractID += detectorName;
michael@0:     if ((mChardet = do_CreateInstance(detectorContractID.get()))) {
michael@0:       (void) mChardet->Init(this);
michael@0:       mFeedChardet = true;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // There's a zeroing operator new for everything else
michael@0: }
michael@0: 
michael@0: nsHtml5StreamParser::~nsHtml5StreamParser()
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   mTokenizer->end();
michael@0:   NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!");
michael@0: #ifdef DEBUG
michael@0:   mRequest = nullptr;
michael@0:   mObserver = nullptr;
michael@0:   mUnicodeDecoder = nullptr;
michael@0:   mSniffingBuffer = nullptr;
michael@0:   mMetaScanner = nullptr;
michael@0:   mFirstBuffer = nullptr;
michael@0:   mExecutor = nullptr;
michael@0:   mTreeBuilder = nullptr;
michael@0:   mTokenizer = nullptr;
michael@0:   mOwner = nullptr;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::GetChannel(nsIChannel** aChannel)
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   return mRequest ? CallQueryInterface(mRequest, aChannel) :
michael@0:                     NS_ERROR_NOT_AVAILABLE;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   if (aConf == eBestAnswer || aConf == eSureAnswer) {
michael@0:     mFeedChardet = false; // just in case
michael@0:     nsAutoCString encoding;
michael@0:     if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset),
michael@0:                                              encoding)) {
michael@0:       return NS_OK;
michael@0:     }
michael@0:     if (encoding.EqualsLiteral("replacement")) {
michael@0:       return NS_OK;
michael@0:     }
michael@0:     if (HasDecoder()) {
michael@0:       if (mCharset.Equals(encoding)) {
michael@0:         NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
michael@0:             "Why are we running chardet at all?");
michael@0:         mCharsetSource = kCharsetFromAutoDetection;
michael@0:         mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:       } else {
michael@0:         // We've already committed to a decoder. Request a reload from the
michael@0:         // docshell.
michael@0:         mTreeBuilder->NeedsCharsetSwitchTo(encoding,
michael@0:                                            kCharsetFromAutoDetection,
michael@0:                                            0);
michael@0:         FlushTreeOpsAndDisarmTimer();
michael@0:         Interrupt();
michael@0:       }
michael@0:     } else {
michael@0:       // Got a confident answer from the sniffing buffer. That code will
michael@0:       // take care of setting up the decoder.
michael@0:       mCharset.Assign(encoding);
michael@0:       mCharsetSource = kCharsetFromAutoDetection;
michael@0:       mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:     }
michael@0:   }
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL)
michael@0: {
michael@0:   if (aURL) {
michael@0:     nsCOMPtr<nsIURI> temp;
michael@0:     bool isViewSource;
michael@0:     aURL->SchemeIs("view-source", &isViewSource);
michael@0:     if (isViewSource) {
michael@0:       nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL);
michael@0:       nested->GetInnerURI(getter_AddRefs(temp));
michael@0:     } else {
michael@0:       temp = aURL;
michael@0:     }
michael@0:     bool isData;
michael@0:     temp->SchemeIs("data", &isData);
michael@0:     if (isData) {
michael@0:       // Avoid showing potentially huge data: URLs. The three last bytes are
michael@0:       // UTF-8 for an ellipsis.
michael@0:       mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6");
michael@0:     } else {
michael@0:       temp->GetSpec(mViewSourceTitle);
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null
michael@0:                                                                           uint32_t aCount,
michael@0:                                                                           uint32_t* aWriteCount)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   nsresult rv = NS_OK;
michael@0:   mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0:   if (mSniffingBuffer) {
michael@0:     uint32_t writeCount;
michael@0:     rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount);
michael@0:     NS_ENSURE_SUCCESS(rv, rv);
michael@0:     mSniffingBuffer = nullptr;
michael@0:   }
michael@0:   mMetaScanner = nullptr;
michael@0:   if (aFromSegment) {
michael@0:     rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount);
michael@0:   }
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   mCharset.Assign(aDecoderCharsetName);
michael@0:   mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0:   mCharsetSource = kCharsetFromByteOrderMark;
michael@0:   mFeedChardet = false;
michael@0:   mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:   mSniffingBuffer = nullptr;
michael@0:   mMetaScanner = nullptr;
michael@0:   mBomState = BOM_SNIFFING_OVER;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
michael@0:                                                  uint32_t aCountToSniffingLimit)
michael@0: {
michael@0:   // Avoid underspecified heuristic craziness for XHR
michael@0:   if (mMode == LOAD_AS_DATA) {
michael@0:     return;
michael@0:   }
michael@0:   // Make sure there's enough data. Require room for "<title></title>"
michael@0:   if (mSniffingLength + aCountToSniffingLimit < 30) {
michael@0:     return;
michael@0:   }
michael@0:   // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
michael@0:   bool byteZero[2] = { false, false };
michael@0:   bool byteNonZero[2] = { false, false };
michael@0:   uint32_t i = 0;
michael@0:   if (mSniffingBuffer) {
michael@0:     for (; i < mSniffingLength; ++i) {
michael@0:       if (mSniffingBuffer[i]) {
michael@0:         if (byteNonZero[1 - (i % 2)]) {
michael@0:           return;
michael@0:         }
michael@0:         byteNonZero[i % 2] = true;
michael@0:       } else {
michael@0:         if (byteZero[1 - (i % 2)]) {
michael@0:           return;
michael@0:         }
michael@0:         byteZero[i % 2] = true;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   if (aFromSegment) {
michael@0:     for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) {
michael@0:       if (aFromSegment[j]) {
michael@0:         if (byteNonZero[1 - ((i + j) % 2)]) {
michael@0:           return;
michael@0:         }
michael@0:         byteNonZero[(i + j) % 2] = true;
michael@0:       } else {
michael@0:         if (byteZero[1 - ((i + j) % 2)]) {
michael@0:           return;
michael@0:         }
michael@0:         byteZero[(i + j) % 2] = true;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (byteNonZero[0]) {
michael@0:     mCharset.Assign("UTF-16LE");
michael@0:   } else {
michael@0:     mCharset.Assign("UTF-16BE");
michael@0:   }
michael@0:   mCharsetSource = kCharsetFromIrreversibleAutoDetection;
michael@0:   mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:   mFeedChardet = false;
michael@0:   mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16",
michael@0:                                           true,
michael@0:                                           0);
michael@0: 
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
michael@0: {
michael@0:   if (aEncoding) {
michael@0:     nsDependentString utf16(aEncoding);
michael@0:     nsAutoCString utf8;
michael@0:     CopyUTF16toUTF8(utf16, utf8);
michael@0:     if (PreferredForInternalEncodingDecl(utf8)) {
michael@0:       mCharset.Assign(utf8);
michael@0:       mCharsetSource = kCharsetFromMetaTag; // closest for XML
michael@0:       return;
michael@0:     }
michael@0:     // else the page declared an encoding Gecko doesn't support and we'd
michael@0:     // end up defaulting to UTF-8 anyway. Might as well fall through here
michael@0:     // right away and let the encoding be set to UTF-8 which we'd default to
michael@0:     // anyway.
michael@0:   }
michael@0:   mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
michael@0:   mCharsetSource = kCharsetFromMetaTag; // means confident
michael@0: }
michael@0: 
michael@0: // A separate user data struct is used instead of passing the
michael@0: // nsHtml5StreamParser instance as user data in order to avoid including
michael@0: // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
michael@0: // Using a separate user data struct also avoids bloating nsHtml5StreamParser
michael@0: // by one pointer.
michael@0: struct UserData {
michael@0:   XML_Parser mExpat;
michael@0:   nsHtml5StreamParser* mStreamParser;
michael@0: };
michael@0: 
michael@0: // Using no-namespace handler callbacks to avoid including expat.h in
michael@0: // nsHtml5StreamParser.h, since doing so would cause naming conclicts.
michael@0: static void
michael@0: HandleXMLDeclaration(void* aUserData,
michael@0:                      const XML_Char* aVersion,
michael@0:                      const XML_Char* aEncoding,
michael@0:                      int aStandalone)
michael@0: {
michael@0:   UserData* ud = static_cast<UserData*>(aUserData);
michael@0:   ud->mStreamParser->SetEncodingFromExpat(
michael@0:       reinterpret_cast<const char16_t*>(aEncoding));
michael@0:   XML_StopParser(ud->mExpat, false);
michael@0: }
michael@0: 
michael@0: static void
michael@0: HandleStartElement(void* aUserData,
michael@0:                    const XML_Char* aName,
michael@0:                    const XML_Char **aAtts)
michael@0: {
michael@0:   UserData* ud = static_cast<UserData*>(aUserData);
michael@0:   XML_StopParser(ud->mExpat, false);
michael@0: }
michael@0: 
michael@0: static void
michael@0: HandleEndElement(void* aUserData,
michael@0:                  const XML_Char* aName)
michael@0: {
michael@0:   UserData* ud = static_cast<UserData*>(aUserData);
michael@0:   XML_StopParser(ud->mExpat, false);
michael@0: }
michael@0: 
michael@0: static void
michael@0: HandleComment(void* aUserData,
michael@0:               const XML_Char* aName)
michael@0: {
michael@0:   UserData* ud = static_cast<UserData*>(aUserData);
michael@0:   XML_StopParser(ud->mExpat, false);
michael@0: }
michael@0: 
michael@0: static void
michael@0: HandleProcessingInstruction(void* aUserData,
michael@0:                             const XML_Char* aTarget,
michael@0:                             const XML_Char* aData)
michael@0: {
michael@0:   UserData* ud = static_cast<UserData*>(aUserData);
michael@0:   XML_StopParser(ud->mExpat, false);
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be null
michael@0:                                       uint32_t aCount,
michael@0:                                       uint32_t* aWriteCount,
michael@0:                                       uint32_t aCountToSniffingLimit)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
michael@0:       "Should not finalize sniffing when using forced charset.");
michael@0:   if (mMode == VIEW_SOURCE_XML) {
michael@0:     static const XML_Memory_Handling_Suite memsuite =
michael@0:       {
michael@0:         (void *(*)(size_t))moz_xmalloc,
michael@0:         (void *(*)(void *, size_t))moz_xrealloc,
michael@0:         moz_free
michael@0:       };
michael@0: 
michael@0:     static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' };
michael@0: 
michael@0:     static const char16_t kISO88591[] =
michael@0:         { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
michael@0: 
michael@0:     UserData ud;
michael@0:     ud.mStreamParser = this;
michael@0: 
michael@0:     // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
michael@0:     // documents MUST begin with a BOM. We don't support EBCDIC and such.
michael@0:     // Thus, at this point, what we have is garbage or something encoded using
michael@0:     // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
michael@0:     // without throwing errors when bytes have the most significant bit set
michael@0:     // and without triggering expat's unknown encoding code paths. This is
michael@0:     // enough to be able to use expat to parse the XML declaration in order
michael@0:     // to extract the encoding name from it.
michael@0:     ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator);
michael@0:     XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration);
michael@0:     XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement);
michael@0:     XML_SetCommentHandler(ud.mExpat, HandleComment);
michael@0:     XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction);
michael@0:     XML_SetUserData(ud.mExpat, static_cast<void*>(&ud));
michael@0: 
michael@0:     XML_Status status = XML_STATUS_OK;
michael@0: 
michael@0:     // aFromSegment points to the data obtained from the current network
michael@0:     // event. mSniffingBuffer (if it exists) contains the data obtained before
michael@0:     // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
michael@0:     // followed by aCountToSniffingLimit bytes from aFromSegment are the
michael@0:     // first 1024 bytes of the file (or the file as a whole if the file is
michael@0:     // 1024 bytes long or shorter). Thus, we parse both buffers, but if the
michael@0:     // first call succeeds already, we skip parsing the second buffer.
michael@0:     if (mSniffingBuffer) {
michael@0:       status = XML_Parse(ud.mExpat,
michael@0:                          reinterpret_cast<const char*>(mSniffingBuffer.get()),
michael@0:                          mSniffingLength,
michael@0:                          false);
michael@0:     }
michael@0:     if (status == XML_STATUS_OK &&
michael@0:         mCharsetSource < kCharsetFromMetaTag &&
michael@0:         aFromSegment) {
michael@0:       status = XML_Parse(ud.mExpat,
michael@0:                          reinterpret_cast<const char*>(aFromSegment),
michael@0:                          aCountToSniffingLimit,
michael@0:                          false);
michael@0:     }
michael@0:     XML_ParserFree(ud.mExpat);
michael@0: 
michael@0:     if (mCharsetSource < kCharsetFromMetaTag) {
michael@0:       // Failed to get an encoding from the XML declaration. XML defaults
michael@0:       // confidently to UTF-8 in this case.
michael@0:       // It is also possible that the document has an XML declaration that is
michael@0:       // longer than 1024 bytes, but that case is not worth worrying about.
michael@0:       mCharset.AssignLiteral("UTF-8");
michael@0:       mCharsetSource = kCharsetFromMetaTag; // means confident
michael@0:     }
michael@0: 
michael@0:     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0:                                                                 aCount,
michael@0:                                                                 aWriteCount);
michael@0:   }
michael@0: 
michael@0:   // meta scan failed.
michael@0:   if (mCharsetSource >= kCharsetFromHintPrevDoc) {
michael@0:     mFeedChardet = false;
michael@0:     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
michael@0:   }
michael@0:   // Check for BOMless UTF-16 with Basic
michael@0:   // Latin content for compat with IE. See bug 631751.
michael@0:   SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit);
michael@0:   // the charset may have been set now
michael@0:   // maybe try chardet now; 
michael@0:   if (mFeedChardet) {
michael@0:     bool dontFeed;
michael@0:     nsresult rv;
michael@0:     if (mSniffingBuffer) {
michael@0:       rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
michael@0:       mFeedChardet = !dontFeed;
michael@0:       NS_ENSURE_SUCCESS(rv, rv);
michael@0:     }
michael@0:     if (mFeedChardet && aFromSegment) {
michael@0:       rv = mChardet->DoIt((const char*)aFromSegment,
michael@0:                           // Avoid buffer boundary-dependent behavior when
michael@0:                           // reparsing is forbidden. If reparse is forbidden,
michael@0:                           // act as if we only saw the first 1024 bytes.
michael@0:                           // When reparsing isn't forbidden, buffer boundaries
michael@0:                           // can have an effect on whether the page is loaded
michael@0:                           // once or twice. :-(
michael@0:                           mReparseForbidden ? aCountToSniffingLimit : aCount,
michael@0:                           &dontFeed);
michael@0:       mFeedChardet = !dontFeed;
michael@0:       NS_ENSURE_SUCCESS(rv, rv);
michael@0:     }
michael@0:     if (mFeedChardet && (!aFromSegment || mReparseForbidden)) {
michael@0:       // mReparseForbidden is checked so that we get to use the sniffing
michael@0:       // buffer with the best guess so far if we aren't allowed to guess
michael@0:       // better later.
michael@0:       mFeedChardet = false;
michael@0:       rv = mChardet->Done();
michael@0:       NS_ENSURE_SUCCESS(rv, rv);
michael@0:     }
michael@0:     // fall thru; callback may have changed charset  
michael@0:   }
michael@0:   if (mCharsetSource == kCharsetUninitialized) {
michael@0:     // Hopefully this case is never needed, but dealing with it anyway
michael@0:     mCharset.AssignLiteral("windows-1252");
michael@0:     mCharsetSource = kCharsetFromFallback;
michael@0:     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:   } else if (mMode == LOAD_AS_DATA &&
michael@0:              mCharsetSource == kCharsetFromFallback) {
michael@0:     NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
michael@0:     NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
michael@0:     NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
michael@0:                  "XHR should default to UTF-8");
michael@0:     // Now mark charset source as non-weak to signal that we have a decision
michael@0:     mCharsetSource = kCharsetFromDocTypeDefault;
michael@0:     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:   }
michael@0:   return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
michael@0:                                       uint32_t aCount,
michael@0:                                       uint32_t* aWriteCount)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   nsresult rv = NS_OK;
michael@0:   uint32_t writeCount;
michael@0: 
michael@0:   // mCharset and mCharsetSource potentially have come from channel or higher
michael@0:   // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
michael@0:   // If we don't find a BOM, the previously set values of mCharset and
michael@0:   // mCharsetSource are not modified by the BOM sniffing here.
michael@0:   for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
michael@0:     switch (mBomState) {
michael@0:       case BOM_SNIFFING_NOT_STARTED:
michael@0:         NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
michael@0:         switch (*aFromSegment) {
michael@0:           case 0xEF:
michael@0:             mBomState = SEEN_UTF_8_FIRST_BYTE;
michael@0:             break;
michael@0:           case 0xFF:
michael@0:             mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
michael@0:             break;
michael@0:           case 0xFE:
michael@0:             mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
michael@0:             break;
michael@0:           default:
michael@0:             mBomState = BOM_SNIFFING_OVER;
michael@0:             break;
michael@0:         }
michael@0:         break;
michael@0:       case SEEN_UTF_16_LE_FIRST_BYTE:
michael@0:         if (aFromSegment[i] == 0xFE) {
michael@0:           rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           uint32_t count = aCount - (i + 1);
michael@0:           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           *aWriteCount = writeCount + (i + 1);
michael@0:           return rv;
michael@0:         }
michael@0:         mBomState = BOM_SNIFFING_OVER;
michael@0:         break;
michael@0:       case SEEN_UTF_16_BE_FIRST_BYTE:
michael@0:         if (aFromSegment[i] == 0xFF) {
michael@0:           rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           uint32_t count = aCount - (i + 1);
michael@0:           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           *aWriteCount = writeCount + (i + 1);
michael@0:           return rv;
michael@0:         }
michael@0:         mBomState = BOM_SNIFFING_OVER;
michael@0:         break;
michael@0:       case SEEN_UTF_8_FIRST_BYTE:
michael@0:         if (aFromSegment[i] == 0xBB) {
michael@0:           mBomState = SEEN_UTF_8_SECOND_BYTE;
michael@0:         } else {
michael@0:           mBomState = BOM_SNIFFING_OVER;
michael@0:         }
michael@0:         break;
michael@0:       case SEEN_UTF_8_SECOND_BYTE:
michael@0:         if (aFromSegment[i] == 0xBF) {
michael@0:           rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           uint32_t count = aCount - (i + 1);
michael@0:           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0:           NS_ENSURE_SUCCESS(rv, rv);
michael@0:           *aWriteCount = writeCount + (i + 1);
michael@0:           return rv;
michael@0:         }
michael@0:         mBomState = BOM_SNIFFING_OVER;
michael@0:         break;
michael@0:       default:
michael@0:         mBomState = BOM_SNIFFING_OVER;
michael@0:         break;
michael@0:     }
michael@0:   }
michael@0:   // if we get here, there either was no BOM or the BOM sniffing isn't complete
michael@0:   // yet
michael@0:   
michael@0:   MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark,
michael@0:              "Should not come here if BOM was found.");
michael@0:   MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
michael@0:              "kCharsetFromOtherComponent is for XSLT.");
michael@0: 
michael@0:   if (mBomState == BOM_SNIFFING_OVER &&
michael@0:     mCharsetSource == kCharsetFromChannel) {
michael@0:     // There was no BOM and the charset came from channel. mCharset
michael@0:     // still contains the charset from the channel as set by an
michael@0:     // earlier call to SetDocumentCharset(), since we didn't find a BOM and
michael@0:     // overwrite mCharset. (Note that if the user has overridden the charset,
michael@0:     // we don't come here but check <meta> for XSS-dangerous charsets first.)
michael@0:     mFeedChardet = false;
michael@0:     mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0:       aCount, aWriteCount);
michael@0:   }
michael@0: 
michael@0:   if (!mMetaScanner && (mMode == NORMAL ||
michael@0:                         mMode == VIEW_SOURCE_HTML ||
michael@0:                         mMode == LOAD_AS_DATA)) {
michael@0:     mMetaScanner = new nsHtml5MetaScanner();
michael@0:   }
michael@0:   
michael@0:   if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
michael@0:     // this is the last buffer
michael@0:     uint32_t countToSniffingLimit =
michael@0:         NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
michael@0:     if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
michael@0:       nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
michael@0:           countToSniffingLimit);
michael@0:       nsAutoCString encoding;
michael@0:       mMetaScanner->sniff(&readable, encoding);
michael@0:       if (!encoding.IsEmpty()) {
michael@0:         // meta scan successful; honor overrides unless meta is XSS-dangerous
michael@0:         if ((mCharsetSource == kCharsetFromParentForced ||
michael@0:              mCharsetSource == kCharsetFromUserForced) &&
michael@0:             EncodingUtils::IsAsciiCompatible(encoding)) {
michael@0:           // Honor override
michael@0:           return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0:             aFromSegment, aCount, aWriteCount);
michael@0:         }
michael@0:         mCharset.Assign(encoding);
michael@0:         mCharsetSource = kCharsetFromMetaPrescan;
michael@0:         mFeedChardet = false;
michael@0:         mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:         return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0:           aFromSegment, aCount, aWriteCount);
michael@0:       }
michael@0:     }
michael@0:     if (mCharsetSource == kCharsetFromParentForced ||
michael@0:         mCharsetSource == kCharsetFromUserForced) {
michael@0:       // meta not found, honor override
michael@0:       return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0:         aFromSegment, aCount, aWriteCount);
michael@0:     }
michael@0:     return FinalizeSniffing(aFromSegment, aCount, aWriteCount,
michael@0:         countToSniffingLimit);
michael@0:   }
michael@0: 
michael@0:   // not the last buffer
michael@0:   if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
michael@0:     nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
michael@0:     nsAutoCString encoding;
michael@0:     mMetaScanner->sniff(&readable, encoding);
michael@0:     if (!encoding.IsEmpty()) {
michael@0:       // meta scan successful; honor overrides unless meta is XSS-dangerous
michael@0:       if ((mCharsetSource == kCharsetFromParentForced ||
michael@0:            mCharsetSource == kCharsetFromUserForced) &&
michael@0:           EncodingUtils::IsAsciiCompatible(encoding)) {
michael@0:         // Honor override
michael@0:         return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0:             aCount, aWriteCount);
michael@0:       }
michael@0:       mCharset.Assign(encoding);
michael@0:       mCharsetSource = kCharsetFromMetaPrescan;
michael@0:       mFeedChardet = false;
michael@0:       mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0:       return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0:         aCount, aWriteCount);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (!mSniffingBuffer) {
michael@0:     const mozilla::fallible_t fallible = mozilla::fallible_t();
michael@0:     mSniffingBuffer = new (fallible)
michael@0:       uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
michael@0:     if (!mSniffingBuffer) {
michael@0:       return NS_ERROR_OUT_OF_MEMORY;
michael@0:     }
michael@0:   }
michael@0:   memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
michael@0:   mSniffingLength += aCount;
michael@0:   *aWriteCount = aCount;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment,
michael@0:                                       uint32_t aCount,
michael@0:                                       uint32_t* aWriteCount)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   // mLastBuffer should always point to a buffer of the size
michael@0:   // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
michael@0:   if (!mLastBuffer) {
michael@0:     NS_WARNING("mLastBuffer should not be null!");
michael@0:     MarkAsBroken(NS_ERROR_NULL_POINTER);
michael@0:     return NS_ERROR_NULL_POINTER;
michael@0:   }
michael@0:   if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
michael@0:     nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0:       nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0:         NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0:     if (!newBuf) {
michael@0:       return NS_ERROR_OUT_OF_MEMORY;
michael@0:     }
michael@0:     mLastBuffer = (mLastBuffer->next = newBuf.forget());
michael@0:   }
michael@0:   int32_t totalByteCount = 0;
michael@0:   for (;;) {
michael@0:     int32_t end = mLastBuffer->getEnd();
michael@0:     int32_t byteCount = aCount - totalByteCount;
michael@0:     int32_t utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end;
michael@0: 
michael@0:     NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!");
michael@0:     // byteCount may be zero to force the decoder to output a pending surrogate
michael@0:     // pair.
michael@0: 
michael@0:     nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count);
michael@0:     MOZ_ASSERT(NS_SUCCEEDED(convResult));
michael@0: 
michael@0:     end += utf16Count;
michael@0:     mLastBuffer->setEnd(end);
michael@0:     totalByteCount += byteCount;
michael@0:     aFromSegment += byteCount;
michael@0: 
michael@0:     NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE,
michael@0:         "The Unicode decoder wrote too much data.");
michael@0:     NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");
michael@0: 
michael@0:     if (convResult == NS_PARTIAL_MORE_OUTPUT) {
michael@0:       nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0:         nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0:           NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0:       if (!newBuf) {
michael@0:         return NS_ERROR_OUT_OF_MEMORY;
michael@0:       }
michael@0:       mLastBuffer = (mLastBuffer->next = newBuf.forget());
michael@0:       // All input may have been consumed if there is a pending surrogate pair
michael@0:       // that doesn't fit in the output buffer. Loop back to push a zero-length
michael@0:       // input to the decoder in that case.
michael@0:     } else {
michael@0:       NS_ASSERTION(totalByteCount == (int32_t)aCount,
michael@0:           "The Unicode decoder consumed the wrong number of bytes.");
michael@0:       *aWriteCount = (uint32_t)totalByteCount;
michael@0:       return NS_OK;
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
michael@0: {
michael@0:   NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState,
michael@0:                   "Got OnStartRequest when the stream had already started.");
michael@0:   NS_PRECONDITION(!mExecutor->HasStarted(), 
michael@0:                   "Got OnStartRequest at the wrong stage in the executor life cycle.");
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   if (mObserver) {
michael@0:     mObserver->OnStartRequest(aRequest, aContext);
michael@0:   }
michael@0:   mRequest = aRequest;
michael@0: 
michael@0:   mStreamState = STREAM_BEING_READ;
michael@0: 
michael@0:   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0:     mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
michael@0:   }
michael@0: 
michael@0:   // For View Source, the parser should run with scripts "enabled" if a normal
michael@0:   // load would have scripts enabled.
michael@0:   bool scriptingEnabled = mMode == LOAD_AS_DATA ?
michael@0:                                    false : mExecutor->IsScriptEnabled();
michael@0:   mOwner->StartTokenizer(scriptingEnabled);
michael@0: 
michael@0:   bool isSrcdoc = false;
michael@0:   nsCOMPtr<nsIChannel> channel;
michael@0:   nsresult rv = GetChannel(getter_AddRefs(channel));
michael@0:   if (NS_SUCCEEDED(rv)) {
michael@0:     isSrcdoc = NS_IsSrcdocChannel(channel);
michael@0:   }
michael@0:   mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
michael@0:   mTreeBuilder->setScriptingEnabled(scriptingEnabled);
michael@0:   mTreeBuilder->SetPreventScriptExecution(!((mMode == NORMAL) &&
michael@0:                                             scriptingEnabled));
michael@0:   mTokenizer->start();
michael@0:   mExecutor->Start();
michael@0:   mExecutor->StartReadingFromStage();
michael@0: 
michael@0:   if (mMode == PLAIN_TEXT) {
michael@0:     mTreeBuilder->StartPlainText();
michael@0:     mTokenizer->StartPlainText();
michael@0:   } else if (mMode == VIEW_SOURCE_PLAIN) {
michael@0:     mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
michael@0:     mTokenizer->StartPlainText();
michael@0:   }
michael@0: 
michael@0:   /*
michael@0:    * If you move the following line, be very careful not to cause 
michael@0:    * WillBuildModel to be called before the document has had its 
michael@0:    * script global object set.
michael@0:    */
michael@0:   rv = mExecutor->WillBuildModel(eDTDMode_unknown);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0:   
michael@0:   nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0:     nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0:       NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0:   if (!newBuf) {
michael@0:     // marks this stream parser as terminated,
michael@0:     // which prevents entry to code paths that
michael@0:     // would use mFirstBuffer or mLastBuffer.
michael@0:     return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
michael@0:   }
michael@0:   NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?");
michael@0:   NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?");
michael@0:   mFirstBuffer = mLastBuffer = newBuf;
michael@0: 
michael@0:   rv = NS_OK;
michael@0: 
michael@0:   // The line below means that the encoding can end up being wrong if
michael@0:   // a view-source URL is loaded without having the encoding hint from a
michael@0:   // previous normal load in the history.
michael@0:   mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
michael@0: 
michael@0:   nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
michael@0:   if (NS_SUCCEEDED(rv)) {
michael@0:     nsAutoCString method;
michael@0:     httpChannel->GetRequestMethod(method);
michael@0:     // XXX does Necko have a way to renavigate POST, etc. without hitting
michael@0:     // the network?
michael@0:     if (!method.EqualsLiteral("GET")) {
michael@0:       // This is the old Gecko behavior but the HTML5 spec disagrees.
michael@0:       // Don't reparse on POST.
michael@0:       mReparseForbidden = true;
michael@0:       mFeedChardet = false; // can't restart anyway
michael@0:     }
michael@0: 
michael@0:     // Attempt to retarget delivery of data (via OnDataAvailable) to the parser
michael@0:     // thread, rather than through the main thread.
michael@0:     nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest =
michael@0:       do_QueryInterface(mRequest);
michael@0:     if (threadRetargetableRequest) {
michael@0:       threadRetargetableRequest->RetargetDeliveryTo(mThread);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (mCharsetSource == kCharsetFromParentFrame) {
michael@0:     // Remember this in case chardet overwrites mCharsetSource
michael@0:     mInitialEncodingWasFromParentFrame = true;
michael@0:   }
michael@0: 
michael@0:   if (mCharsetSource >= kCharsetFromAutoDetection) {
michael@0:     mFeedChardet = false;
michael@0:   }
michael@0:   
michael@0:   nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
michael@0:   if (!wyciwygChannel) {
michael@0:     // we aren't ready to commit to an encoding yet
michael@0:     // leave converter uninstantiated for now
michael@0:     return NS_OK;
michael@0:   }
michael@0: 
michael@0:   // We are reloading a document.open()ed doc.
michael@0:   mReparseForbidden = true;
michael@0:   mFeedChardet = false;
michael@0: 
michael@0:   // Instantiate the converter here to avoid BOM sniffing.
michael@0:   mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::CheckListenerChain()
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
michael@0:   if (!mObserver) {
michael@0:     return NS_OK;
michael@0:   }
michael@0:   nsresult rv;
michael@0:   nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable =
michael@0:     do_QueryInterface(mObserver, &rv);
michael@0:   if (NS_SUCCEEDED(rv) && retargetable) {
michael@0:     rv = retargetable->CheckListenerChain();
michael@0:   }
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::DoStopRequest()
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
michael@0:                   "Stream ended without being open.");
michael@0:   mTokenizerMutex.AssertCurrentThreadOwns();
michael@0: 
michael@0:   if (IsTerminated()) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   mStreamState = STREAM_ENDED;
michael@0: 
michael@0:   if (!mUnicodeDecoder) {
michael@0:     uint32_t writeCount;
michael@0:     nsresult rv;
michael@0:     if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) {
michael@0:       MarkAsBroken(rv);
michael@0:       return;
michael@0:     }
michael@0:   } else if (mFeedChardet) {
michael@0:     mChardet->Done();
michael@0:   }
michael@0: 
michael@0:   if (IsTerminatedOrInterrupted()) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   ParseAvailableData(); 
michael@0: }
michael@0: 
michael@0: class nsHtml5RequestStopper : public nsRunnable
michael@0: {
michael@0:   private:
michael@0:     nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0:   public:
michael@0:     nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser)
michael@0:       : mStreamParser(aStreamParser)
michael@0:     {}
michael@0:     NS_IMETHODIMP Run()
michael@0:     {
michael@0:       mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0:       mStreamParser->DoStopRequest();
michael@0:       return NS_OK;
michael@0:     }
michael@0: };
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
michael@0:                              nsISupports* aContext,
michael@0:                              nsresult status)
michael@0: {
michael@0:   NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   if (mObserver) {
michael@0:     mObserver->OnStopRequest(aRequest, aContext, status);
michael@0:   }
michael@0:   nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
michael@0:   if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
michael@0:     NS_WARNING("Dispatching StopRequest event failed.");
michael@0:   }
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
michael@0:                   "DoDataAvailable called when stream not open.");
michael@0:   mTokenizerMutex.AssertCurrentThreadOwns();
michael@0: 
michael@0:   if (IsTerminated()) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   uint32_t writeCount;
michael@0:   nsresult rv;
michael@0:   if (HasDecoder()) {
michael@0:     if (mFeedChardet) {
michael@0:       bool dontFeed;
michael@0:       mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed);
michael@0:       mFeedChardet = !dontFeed;
michael@0:     }
michael@0:     rv = WriteStreamBytes(aBuffer, aLength, &writeCount);
michael@0:   } else {
michael@0:     rv = SniffStreamBytes(aBuffer, aLength, &writeCount);
michael@0:   }
michael@0:   if (NS_FAILED(rv)) {
michael@0:     MarkAsBroken(rv);
michael@0:     return;
michael@0:   }
michael@0:   NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed.");
michael@0: 
michael@0:   if (IsTerminatedOrInterrupted()) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   ParseAvailableData();
michael@0: 
michael@0:   if (mFlushTimerArmed || mSpeculating) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback,
michael@0:                                     static_cast<void*> (this),
michael@0:                                     mFlushTimerEverFired ?
michael@0:                                         sTimerInitialDelay :
michael@0:                                         sTimerSubsequentDelay,
michael@0:                                     nsITimer::TYPE_ONE_SHOT);
michael@0:   mFlushTimerArmed = true;
michael@0: }
michael@0: 
michael@0: class nsHtml5DataAvailable : public nsRunnable
michael@0: {
michael@0:   private:
michael@0:     nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0:     nsAutoArrayPtr<uint8_t>            mData;
michael@0:     uint32_t                           mLength;
michael@0:   public:
michael@0:     nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
michael@0:                          uint8_t*             aData,
michael@0:                          uint32_t             aLength)
michael@0:       : mStreamParser(aStreamParser)
michael@0:       , mData(aData)
michael@0:       , mLength(aLength)
michael@0:     {}
michael@0:     NS_IMETHODIMP Run()
michael@0:     {
michael@0:       mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0:       mStreamParser->DoDataAvailable(mData, mLength);
michael@0:       return NS_OK;
michael@0:     }
michael@0: };
michael@0: 
michael@0: nsresult
michael@0: nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
michael@0:                                      nsISupports* aContext,
michael@0:                                      nsIInputStream* aInStream,
michael@0:                                      uint64_t aSourceOffset,
michael@0:                                      uint32_t aLength)
michael@0: {
michael@0:   nsresult rv;
michael@0:   if (NS_FAILED(rv = mExecutor->IsBroken())) {
michael@0:     return rv;
michael@0:   }
michael@0: 
michael@0:   NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
michael@0:   uint32_t totalRead;
michael@0:   // Main thread to parser thread dispatch requires copying to buffer first.
michael@0:   if (NS_IsMainThread()) {
michael@0:     const mozilla::fallible_t fallible = mozilla::fallible_t();
michael@0:     nsAutoArrayPtr<uint8_t> data(new (fallible) uint8_t[aLength]);
michael@0:     if (!data) {
michael@0:       return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
michael@0:     }
michael@0:     rv = aInStream->Read(reinterpret_cast<char*>(data.get()),
michael@0:                          aLength, &totalRead);
michael@0:     NS_ENSURE_SUCCESS(rv, rv);
michael@0:     NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?");
michael@0: 
michael@0:     nsCOMPtr<nsIRunnable> dataAvailable = new nsHtml5DataAvailable(this,
michael@0:                                                                    data.forget(),
michael@0:                                                                    totalRead);
michael@0:     if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) {
michael@0:       NS_WARNING("Dispatching DataAvailable event failed.");
michael@0:     }
michael@0:     return rv;
michael@0:   } else {
michael@0:     NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:     mozilla::MutexAutoLock autoLock(mTokenizerMutex);
michael@0: 
michael@0:     // Read directly from response buffer.
michael@0:     rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength,
michael@0:                                  &totalRead);
michael@0:     if (NS_FAILED(rv)) {
michael@0:       NS_WARNING("Failed reading response data to parser");
michael@0:       return rv;
michael@0:     }
michael@0:     return NS_OK;
michael@0:   }
michael@0: }
michael@0: 
michael@0: /* static */
michael@0: NS_METHOD
michael@0: nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream,
michael@0:                                           void *aClosure,
michael@0:                                           const char *aFromSegment,
michael@0:                                           uint32_t aToOffset,
michael@0:                                           uint32_t aCount,
michael@0:                                           uint32_t *aWriteCount)
michael@0: {
michael@0:   nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure);
michael@0: 
michael@0:   parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount);
michael@0:   // Assume DoDataAvailable consumed all available bytes.
michael@0:   *aWriteCount = aCount;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
michael@0: {
michael@0:   nsAutoCString newEncoding;
michael@0:   if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) {
michael@0:     // the encoding name is bogus
michael@0:     mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
michael@0:                                             true,
michael@0:                                             mTokenizer->getLineNumber());
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (newEncoding.EqualsLiteral("UTF-16BE") ||
michael@0:       newEncoding.EqualsLiteral("UTF-16LE")) {
michael@0:     mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16",
michael@0:                                             true,
michael@0:                                             mTokenizer->getLineNumber());
michael@0:     newEncoding.Assign("UTF-8");
michael@0:   }
michael@0: 
michael@0:   if (newEncoding.EqualsLiteral("x-user-defined")) {
michael@0:     // WebKit/Blink hack for Indian and Armenian legacy sites
michael@0:     mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined",
michael@0:                                             true,
michael@0:                                             mTokenizer->getLineNumber());
michael@0:     newEncoding.Assign("windows-1252");
michael@0:   }
michael@0: 
michael@0:   if (newEncoding.Equals(mCharset)) {
michael@0:     if (mCharsetSource < kCharsetFromMetaPrescan) {
michael@0:       if (mInitialEncodingWasFromParentFrame) {
michael@0:         mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
michael@0:                                                 false,
michael@0:                                                 mTokenizer->getLineNumber());
michael@0:       } else {
michael@0:         mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta",
michael@0:                                                 false,
michael@0:                                                 mTokenizer->getLineNumber());
michael@0:       }
michael@0:     }
michael@0:     mCharsetSource = kCharsetFromMetaTag; // become confident
michael@0:     mFeedChardet = false; // don't feed chardet when confident
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   aEncoding.Assign(newEncoding);
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: bool
michael@0: nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
michael@0: {
michael@0:   // This code needs to stay in sync with
michael@0:   // nsHtml5MetaScanner::tryCharset. Unfortunately, the
michael@0:   // trickery with member fields there leads to some copy-paste reuse. :-(
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   nsAutoCString newEncoding;
michael@0:   CopyUTF16toUTF8(*aEncoding, newEncoding);
michael@0: 
michael@0:   if (!PreferredForInternalEncodingDecl(newEncoding)) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (mReparseForbidden) {
michael@0:     // This mReparseForbidden check happens after the call to
michael@0:     // PreferredForInternalEncodingDecl so that if that method calls
michael@0:     // MaybeComplainAboutCharset, its charset complaint wins over the one
michael@0:     // below.
michael@0:     mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate",
michael@0:                                             true,
michael@0:                                             mTokenizer->getLineNumber());
michael@0:     return false; // not reparsing even if we wanted to
michael@0:   }
michael@0: 
michael@0:   // Avoid having the chardet ask for another restart after this restart
michael@0:   // request.
michael@0:   mFeedChardet = false;
michael@0:   mTreeBuilder->NeedsCharsetSwitchTo(newEncoding,
michael@0:                                      kCharsetFromMetaTag,
michael@0:                                      mTokenizer->getLineNumber());
michael@0:   FlushTreeOpsAndDisarmTimer();
michael@0:   Interrupt();
michael@0:   // the tree op executor will cause the stream parser to terminate
michael@0:   // if the charset switch request is accepted or it'll uninterrupt 
michael@0:   // if the request failed. Note that if the restart request fails,
michael@0:   // we don't bother trying to make chardet resume. Might as well
michael@0:   // assume that chardet-requested restarts would fail, too.
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer()
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   if (mFlushTimerArmed) {
michael@0:     // avoid calling Cancel if the flush timer isn't armed to avoid acquiring
michael@0:     // a mutex
michael@0:     mFlushTimer->Cancel();
michael@0:     mFlushTimerArmed = false;
michael@0:   }
michael@0:   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0:     mTokenizer->FlushViewSource();
michael@0:   }
michael@0:   mTreeBuilder->Flush();
michael@0:   if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0:     NS_WARNING("failed to dispatch executor flush event");
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::ParseAvailableData()
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   mTokenizerMutex.AssertCurrentThreadOwns();
michael@0: 
michael@0:   if (IsTerminatedOrInterrupted()) {
michael@0:     return;
michael@0:   }
michael@0:   
michael@0:   for (;;) {
michael@0:     if (!mFirstBuffer->hasMore()) {
michael@0:       if (mFirstBuffer == mLastBuffer) {
michael@0:         switch (mStreamState) {
michael@0:           case STREAM_BEING_READ:
michael@0:             // never release the last buffer.
michael@0:             if (!mSpeculating) {
michael@0:               // reuse buffer space if not speculating
michael@0:               mFirstBuffer->setStart(0);
michael@0:               mFirstBuffer->setEnd(0);
michael@0:             }
michael@0:             mTreeBuilder->FlushLoads();
michael@0:             // Dispatch this runnable unconditionally, because the loads
michael@0:             // that need flushing may have been flushed earlier even if the
michael@0:             // flush right above here did nothing.
michael@0:             if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) {
michael@0:               NS_WARNING("failed to dispatch load flush event");
michael@0:             }
michael@0:             return; // no more data for now but expecting more
michael@0:           case STREAM_ENDED:
michael@0:             if (mAtEOF) {
michael@0:               return;
michael@0:             }
michael@0:             mAtEOF = true;
michael@0:             if (mCharsetSource < kCharsetFromMetaTag) {
michael@0:               if (mInitialEncodingWasFromParentFrame) {
michael@0:                 // Unfortunately, this check doesn't take effect for
michael@0:                 // cross-origin frames, so cross-origin ad frames that have
michael@0:                 // no text and only an image or a Flash embed get the more
michael@0:                 // severe message from the next if block. The message is
michael@0:                 // technically accurate, though.
michael@0:                 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame",
michael@0:                                                         false,
michael@0:                                                         0);
michael@0:               } else if (mMode == NORMAL) {
michael@0:                 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration",
michael@0:                                                         true,
michael@0:                                                         0);
michael@0:               } else if (mMode == PLAIN_TEXT) {
michael@0:                 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain",
michael@0:                                                         true,
michael@0:                                                         0);
michael@0:               }
michael@0:             }
michael@0:             mTokenizer->eof();
michael@0:             mTreeBuilder->StreamEnded();
michael@0:             if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0:               mTokenizer->EndViewSource();
michael@0:             }
michael@0:             FlushTreeOpsAndDisarmTimer();
michael@0:             return; // no more data and not expecting more
michael@0:           default:
michael@0:             NS_NOTREACHED("It should be impossible to reach this.");
michael@0:             return;
michael@0:         }
michael@0:       }
michael@0:       mFirstBuffer = mFirstBuffer->next;
michael@0:       continue;
michael@0:     }
michael@0: 
michael@0:     // now we have a non-empty buffer
michael@0:     mFirstBuffer->adjust(mLastWasCR);
michael@0:     mLastWasCR = false;
michael@0:     if (mFirstBuffer->hasMore()) {
michael@0:       mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
michael@0:       // At this point, internalEncodingDeclaration() may have called 
michael@0:       // Terminate, but that never happens together with script.
michael@0:       // Can't assert that here, though, because it's possible that the main
michael@0:       // thread has called Terminate() while this thread was parsing.
michael@0:       if (mTreeBuilder->HasScript()) {
michael@0:         // HasScript() cannot return true if the tree builder is preventing
michael@0:         // script execution.
michael@0:         MOZ_ASSERT(mMode == NORMAL);
michael@0:         mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
michael@0:         nsHtml5Speculation* speculation = 
michael@0:           new nsHtml5Speculation(mFirstBuffer,
michael@0:                                  mFirstBuffer->getStart(),
michael@0:                                  mTokenizer->getLineNumber(),
michael@0:                                  mTreeBuilder->newSnapshot());
michael@0:         mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), 
michael@0:                                           speculation->GetStartLineNumber());
michael@0:         FlushTreeOpsAndDisarmTimer();
michael@0:         mTreeBuilder->SetOpSink(speculation);
michael@0:         mSpeculations.AppendElement(speculation); // adopts the pointer
michael@0:         mSpeculating = true;
michael@0:       }
michael@0:       if (IsTerminatedOrInterrupted()) {
michael@0:         return;
michael@0:       }
michael@0:     }
michael@0:     continue;
michael@0:   }
michael@0: }
michael@0: 
michael@0: class nsHtml5StreamParserContinuation : public nsRunnable
michael@0: {
michael@0: private:
michael@0:   nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0: public:
michael@0:   nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser)
michael@0:     : mStreamParser(aStreamParser)
michael@0:   {}
michael@0:   NS_IMETHODIMP Run()
michael@0:   {
michael@0:     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0:     mStreamParser->Uninterrupt();
michael@0:     mStreamParser->ParseAvailableData();
michael@0:     return NS_OK;
michael@0:   }
michael@0: };
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, 
michael@0:                                           nsHtml5TreeBuilder* aTreeBuilder,
michael@0:                                           bool aLastWasCR)
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML),
michael@0:       "ContinueAfterScripts called in view source mode!");
michael@0:   if (NS_FAILED(mExecutor->IsBroken())) {
michael@0:     return;
michael@0:   }
michael@0:   #ifdef DEBUG
michael@0:     mExecutor->AssertStageEmpty();
michael@0:   #endif
michael@0:   bool speculationFailed = false;
michael@0:   {
michael@0:     mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
michael@0:     if (mSpeculations.IsEmpty()) {
michael@0:       NS_NOTREACHED("ContinueAfterScripts called without speculations.");
michael@0:       return;
michael@0:     }
michael@0:     nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
michael@0:     if (aLastWasCR || 
michael@0:         !aTokenizer->isInDataState() || 
michael@0:         !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) {
michael@0:       speculationFailed = true;
michael@0:       // We've got a failed speculation :-(
michael@0:       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
michael@0:       // now fall out of the speculationAutoLock into the tokenizerAutoLock block
michael@0:     } else {
michael@0:       // We've got a successful speculation!
michael@0:       if (mSpeculations.Length() > 1) {
michael@0:         // the first speculation isn't the current speculation, so there's 
michael@0:         // no need to bother the parser thread.
michael@0:         speculation->FlushToSink(mExecutor);
michael@0:         NS_ASSERTION(!mExecutor->IsScriptExecuting(),
michael@0:           "ParseUntilBlocked() was supposed to ensure we don't come "
michael@0:           "here when scripts are executing.");
michael@0:         NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
michael@0:           "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
michael@0:           "only caller of this method?");
michael@0:         mSpeculations.RemoveElementAt(0);
michael@0:         return;
michael@0:       }
michael@0:       // else
michael@0:       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
michael@0:       
michael@0:       // now fall through
michael@0:       // the first speculation is the current speculation. Need to 
michael@0:       // release the the speculation mutex and acquire the tokenizer 
michael@0:       // mutex. (Just acquiring the other mutex here would deadlock)
michael@0:     }
michael@0:   }
michael@0:   {
michael@0:     mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex);
michael@0:     #ifdef DEBUG
michael@0:     {
michael@0:       nsCOMPtr<nsIThread> mainThread;
michael@0:       NS_GetMainThread(getter_AddRefs(mainThread));
michael@0:       mAtomTable.SetPermittedLookupThread(mainThread);
michael@0:     }
michael@0:     #endif
michael@0:     // In principle, the speculation mutex should be acquired here,
michael@0:     // but there's no point, because the parser thread only acquires it
michael@0:     // when it has also acquired the tokenizer mutex and we are already
michael@0:     // holding the tokenizer mutex.
michael@0:     if (speculationFailed) {
michael@0:       // Rewind the stream
michael@0:       mAtEOF = false;
michael@0:       nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
michael@0:       mFirstBuffer = speculation->GetBuffer();
michael@0:       mFirstBuffer->setStart(speculation->GetStart());
michael@0:       mTokenizer->setLineNumber(speculation->GetStartLineNumber());
michael@0: 
michael@0:       nsContentUtils::ReportToConsole(nsIScriptError::warningFlag,
michael@0:                                       NS_LITERAL_CSTRING("DOM Events"),
michael@0:                                       mExecutor->GetDocument(),
michael@0:                                       nsContentUtils::eDOM_PROPERTIES,
michael@0:                                       "SpeculationFailed",
michael@0:                                       nullptr, 0,
michael@0:                                       nullptr,
michael@0:                                       EmptyString(),
michael@0:                                       speculation->GetStartLineNumber());
michael@0: 
michael@0:       nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
michael@0:       while (buffer) {
michael@0:         buffer->setStart(0);
michael@0:         buffer = buffer->next;
michael@0:       }
michael@0:       
michael@0:       mSpeculations.Clear(); // potentially a huge number of destructors 
michael@0:                              // run here synchronously on the main thread...
michael@0: 
michael@0:       mTreeBuilder->flushCharacters(); // empty the pending buffer
michael@0:       mTreeBuilder->ClearOps(); // now get rid of the failed ops
michael@0: 
michael@0:       mTreeBuilder->SetOpSink(mExecutor->GetStage());
michael@0:       mExecutor->StartReadingFromStage();
michael@0:       mSpeculating = false;
michael@0: 
michael@0:       // Copy state over
michael@0:       mLastWasCR = aLastWasCR;
michael@0:       mTokenizer->loadState(aTokenizer);
michael@0:       mTreeBuilder->loadState(aTreeBuilder, &mAtomTable);
michael@0:     } else {    
michael@0:       // We've got a successful speculation and at least a moment ago it was
michael@0:       // the current speculation
michael@0:       mSpeculations.ElementAt(0)->FlushToSink(mExecutor);
michael@0:       NS_ASSERTION(!mExecutor->IsScriptExecuting(),
michael@0:         "ParseUntilBlocked() was supposed to ensure we don't come "
michael@0:         "here when scripts are executing.");
michael@0:       NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
michael@0:         "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
michael@0:         "only caller of this method?");
michael@0:       mSpeculations.RemoveElementAt(0);
michael@0:       if (mSpeculations.IsEmpty()) {
michael@0:         // yes, it was still the only speculation. Now stop speculating
michael@0:         // However, before telling the executor to read from stage, flush
michael@0:         // any pending ops straight to the executor, because otherwise
michael@0:         // they remain unflushed until we get more data from the network.
michael@0:         mTreeBuilder->SetOpSink(mExecutor);
michael@0:         mTreeBuilder->Flush(true);
michael@0:         mTreeBuilder->SetOpSink(mExecutor->GetStage());
michael@0:         mExecutor->StartReadingFromStage();
michael@0:         mSpeculating = false;
michael@0:       }
michael@0:     }
michael@0:     nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
michael@0:     if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0:       NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
michael@0:     }
michael@0:     // A stream event might run before this event runs, but that's harmless.
michael@0:     #ifdef DEBUG
michael@0:       mAtomTable.SetPermittedLookupThread(mThread);
michael@0:     #endif
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch()
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
michael@0:   if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0:     NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
michael@0:   }
michael@0: }
michael@0: 
michael@0: class nsHtml5TimerKungFu : public nsRunnable
michael@0: {
michael@0: private:
michael@0:   nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0: public:
michael@0:   nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser)
michael@0:     : mStreamParser(aStreamParser)
michael@0:   {}
michael@0:   NS_IMETHODIMP Run()
michael@0:   {
michael@0:     if (mStreamParser->mFlushTimer) {
michael@0:       mStreamParser->mFlushTimer->Cancel();
michael@0:       mStreamParser->mFlushTimer = nullptr;
michael@0:     }
michael@0:     return NS_OK;
michael@0:   }
michael@0: };
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::DropTimer()
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0:   /*
michael@0:    * Simply nulling out the timer wouldn't work, because if the timer is
michael@0:    * armed, it needs to be canceled first. Simply canceling it first wouldn't
michael@0:    * work, because nsTimerImpl::Cancel is not safe for calling from outside
michael@0:    * the thread where nsTimerImpl::Fire would run. It's not safe to
michael@0:    * dispatch a runnable to cancel the timer from the destructor of this
michael@0:    * class, because the timer has a weak (void*) pointer back to this instance
michael@0:    * of the stream parser and having the timer fire before the runnable
michael@0:    * cancels it would make the timer access a deleted object.
michael@0:    *
michael@0:    * This DropTimer method addresses these issues. This method must be called
michael@0:    * on the main thread before the destructor of this class is reached.
michael@0:    * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this
michael@0:    * stream parser object to keep it alive until the runnable is done.
michael@0:    * The runnable cancels the timer on the parser thread, drops the timer
michael@0:    * and lets nsHtml5RefPtr send a runnable back to the main thread to
michael@0:    * release the stream parser.
michael@0:    */
michael@0:   if (mFlushTimer) {
michael@0:     nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this);
michael@0:     if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0:       NS_WARNING("Failed to dispatch TimerKungFu event");
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: // Using a static, because the method name Notify is taken by the chardet 
michael@0: // callback.
michael@0: void
michael@0: nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure)
michael@0: {
michael@0:   (static_cast<nsHtml5StreamParser*> (aClosure))->TimerFlush();
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::TimerFlush()
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   mozilla::MutexAutoLock autoLock(mTokenizerMutex);
michael@0: 
michael@0:   NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating.");
michael@0: 
michael@0:   // The timer fired if we got here. No need to cancel it. Mark it as
michael@0:   // not armed, though.
michael@0:   mFlushTimerArmed = false;
michael@0: 
michael@0:   mFlushTimerEverFired = true;
michael@0: 
michael@0:   if (IsTerminatedOrInterrupted()) {
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0:     mTreeBuilder->Flush(); // delete useless ops
michael@0:     if (mTokenizer->FlushViewSource()) {
michael@0:        if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0:          NS_WARNING("failed to dispatch executor flush event");
michael@0:        }
michael@0:      }
michael@0:   } else {
michael@0:     // we aren't speculating and we don't know when new data is
michael@0:     // going to arrive. Send data to the main thread.
michael@0:     if (mTreeBuilder->Flush(true)) {
michael@0:       if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0:         NS_WARNING("failed to dispatch executor flush event");
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: void
michael@0: nsHtml5StreamParser::MarkAsBroken(nsresult aRv)
michael@0: {
michael@0:   NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0:   mTokenizerMutex.AssertCurrentThreadOwns();
michael@0: 
michael@0:   Terminate();
michael@0:   mTreeBuilder->MarkAsBroken(aRv);
michael@0:   mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false);
michael@0:   NS_ASSERTION(hadOps, "Should have had the markAsBroken op!");
michael@0:   if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0:     NS_WARNING("failed to dispatch executor flush event");
michael@0:   }
michael@0: }