michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set sw=2 ts=2 et tw=79: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/DebugOnly.h" michael@0: michael@0: #include "nsHtml5StreamParser.h" michael@0: #include "nsContentUtils.h" michael@0: #include "nsHtml5Tokenizer.h" michael@0: #include "nsIHttpChannel.h" michael@0: #include "nsHtml5Parser.h" michael@0: #include "nsHtml5TreeBuilder.h" michael@0: #include "nsHtml5AtomTable.h" michael@0: #include "nsHtml5Module.h" michael@0: #include "nsHtml5RefPtr.h" michael@0: #include "nsIScriptError.h" michael@0: #include "mozilla/Preferences.h" michael@0: #include "nsHtml5Highlighter.h" michael@0: #include "expat_config.h" michael@0: #include "expat.h" michael@0: #include "nsINestedURI.h" michael@0: #include "nsCharsetSource.h" michael@0: #include "nsIWyciwygChannel.h" michael@0: #include "nsIThreadRetargetableRequest.h" michael@0: #include "nsPrintfCString.h" michael@0: #include "nsNetUtil.h" michael@0: michael@0: #include "mozilla/dom/EncodingUtils.h" michael@0: michael@0: using namespace mozilla; michael@0: using mozilla::dom::EncodingUtils; michael@0: michael@0: int32_t nsHtml5StreamParser::sTimerInitialDelay = 120; michael@0: int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120; michael@0: michael@0: // static michael@0: void michael@0: nsHtml5StreamParser::InitializeStatics() michael@0: { michael@0: Preferences::AddIntVarCache(&sTimerInitialDelay, michael@0: "html5.flushtimer.initialdelay"); michael@0: Preferences::AddIntVarCache(&sTimerSubsequentDelay, michael@0: "html5.flushtimer.subsequentdelay"); michael@0: } michael@0: michael@0: /* michael@0: * Note that nsHtml5StreamParser implements cycle collecting AddRef and michael@0: * Release. Therefore, nsHtml5StreamParser must never be refcounted from michael@0: * the parser thread! michael@0: * michael@0: * To work around this limitation, runnables posted by the main thread to the michael@0: * parser thread hold their reference to the stream parser in an michael@0: * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds michael@0: * just like a regular nsRefPtr. This is OK, since the creation of the michael@0: * runnable and the nsHtml5RefPtr happens on the main thread. michael@0: * michael@0: * When the runnable is done on the parser thread, the destructor of michael@0: * nsHtml5RefPtr runs there. It doesn't call Release on the held object michael@0: * directly. Instead, it posts another runnable back to the main thread where michael@0: * that runnable calls Release on the wrapped object. michael@0: * michael@0: * When posting runnables in the other direction, the runnables have to be michael@0: * created on the main thread when nsHtml5StreamParser is instantiated and michael@0: * held for the lifetime of the nsHtml5StreamParser. This works, because the michael@0: * same runnabled can be dispatched multiple times and currently runnables michael@0: * posted from the parser thread to main thread don't need to wrap any michael@0: * runnable-specific data. (In the other direction, the runnables most notably michael@0: * wrap the byte data of the stream.) michael@0: */ michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser) michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser) michael@0: michael@0: NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser) michael@0: NS_INTERFACE_TABLE(nsHtml5StreamParser, michael@0: nsICharsetDetectionObserver) michael@0: NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser) michael@0: NS_INTERFACE_MAP_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser) michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser) michael@0: tmp->DropTimer(); michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner) michael@0: tmp->mExecutorFlusher = nullptr; michael@0: tmp->mLoadFlusher = nullptr; michael@0: tmp->mExecutor = nullptr; michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet) michael@0: NS_IMPL_CYCLE_COLLECTION_UNLINK_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest) michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner) michael@0: // hack: count the strongly owned edge wrapped in the runnable michael@0: if (tmp->mExecutorFlusher) { michael@0: NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor"); michael@0: cb.NoteXPCOMChild(static_cast (tmp->mExecutor)); michael@0: } michael@0: // hack: count the strongly owned edge wrapped in the runnable michael@0: if (tmp->mLoadFlusher) { michael@0: NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor"); michael@0: cb.NoteXPCOMChild(static_cast (tmp->mExecutor)); michael@0: } michael@0: // hack: count self if held by mChardet michael@0: if (tmp->mChardet) { michael@0: NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver"); michael@0: cb.NoteXPCOMChild(static_cast(tmp)); michael@0: } michael@0: NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END michael@0: michael@0: class nsHtml5ExecutorFlusher : public nsRunnable michael@0: { michael@0: private: michael@0: nsRefPtr mExecutor; michael@0: public: michael@0: nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor) michael@0: : mExecutor(aExecutor) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: if (!mExecutor->isInList()) { michael@0: mExecutor->RunFlushLoop(); michael@0: } michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: class nsHtml5LoadFlusher : public nsRunnable michael@0: { michael@0: private: michael@0: nsRefPtr mExecutor; michael@0: public: michael@0: nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor) michael@0: : mExecutor(aExecutor) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: mExecutor->FlushSpeculativeLoads(); michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, michael@0: nsHtml5Parser* aOwner, michael@0: eParserMode aMode) michael@0: : mFirstBuffer(nullptr) // Will be filled when starting michael@0: , mLastBuffer(nullptr) // Will be filled when starting michael@0: , mExecutor(aExecutor) michael@0: , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML || michael@0: aMode == VIEW_SOURCE_XML) ? michael@0: nullptr : mExecutor->GetStage(), michael@0: aMode == NORMAL ? michael@0: mExecutor->GetStage() : nullptr)) michael@0: , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)) michael@0: , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex") michael@0: , mOwner(aOwner) michael@0: , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex") michael@0: , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex") michael@0: , mThread(nsHtml5Module::GetStreamParserThread()) michael@0: , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)) michael@0: , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)) michael@0: , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1")) michael@0: , mMode(aMode) michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: mFlushTimer->SetTarget(mThread); michael@0: #ifdef DEBUG michael@0: mAtomTable.SetPermittedLookupThread(mThread); michael@0: #endif michael@0: mTokenizer->setInterner(&mAtomTable); michael@0: mTokenizer->setEncodingDeclarationHandler(this); michael@0: michael@0: if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) { michael@0: nsHtml5Highlighter* highlighter = michael@0: new nsHtml5Highlighter(mExecutor->GetStage()); michael@0: mTokenizer->EnableViewSource(highlighter); // takes ownership michael@0: mTreeBuilder->EnableViewSource(highlighter); // doesn't own michael@0: } michael@0: michael@0: // Chardet instantiation adapted from nsDOMFile. michael@0: // Chardet is initialized here even if it turns out to be useless michael@0: // to make the chardet refcount its observer (nsHtml5StreamParser) michael@0: // on the main thread. michael@0: const nsAdoptingCString& detectorName = michael@0: Preferences::GetLocalizedCString("intl.charset.detector"); michael@0: if (!detectorName.IsEmpty()) { michael@0: nsAutoCString detectorContractID; michael@0: detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE); michael@0: detectorContractID += detectorName; michael@0: if ((mChardet = do_CreateInstance(detectorContractID.get()))) { michael@0: (void) mChardet->Init(this); michael@0: mFeedChardet = true; michael@0: } michael@0: } michael@0: michael@0: // There's a zeroing operator new for everything else michael@0: } michael@0: michael@0: nsHtml5StreamParser::~nsHtml5StreamParser() michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: mTokenizer->end(); michael@0: NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!"); michael@0: #ifdef DEBUG michael@0: mRequest = nullptr; michael@0: mObserver = nullptr; michael@0: mUnicodeDecoder = nullptr; michael@0: mSniffingBuffer = nullptr; michael@0: mMetaScanner = nullptr; michael@0: mFirstBuffer = nullptr; michael@0: mExecutor = nullptr; michael@0: mTreeBuilder = nullptr; michael@0: mTokenizer = nullptr; michael@0: mOwner = nullptr; michael@0: #endif michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: return mRequest ? CallQueryInterface(mRequest, aChannel) : michael@0: NS_ERROR_NOT_AVAILABLE; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: if (aConf == eBestAnswer || aConf == eSureAnswer) { michael@0: mFeedChardet = false; // just in case michael@0: nsAutoCString encoding; michael@0: if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset), michael@0: encoding)) { michael@0: return NS_OK; michael@0: } michael@0: if (encoding.EqualsLiteral("replacement")) { michael@0: return NS_OK; michael@0: } michael@0: if (HasDecoder()) { michael@0: if (mCharset.Equals(encoding)) { michael@0: NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, michael@0: "Why are we running chardet at all?"); michael@0: mCharsetSource = kCharsetFromAutoDetection; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: } else { michael@0: // We've already committed to a decoder. Request a reload from the michael@0: // docshell. michael@0: mTreeBuilder->NeedsCharsetSwitchTo(encoding, michael@0: kCharsetFromAutoDetection, michael@0: 0); michael@0: FlushTreeOpsAndDisarmTimer(); michael@0: Interrupt(); michael@0: } michael@0: } else { michael@0: // Got a confident answer from the sniffing buffer. That code will michael@0: // take care of setting up the decoder. michael@0: mCharset.Assign(encoding); michael@0: mCharsetSource = kCharsetFromAutoDetection; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: } michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) michael@0: { michael@0: if (aURL) { michael@0: nsCOMPtr temp; michael@0: bool isViewSource; michael@0: aURL->SchemeIs("view-source", &isViewSource); michael@0: if (isViewSource) { michael@0: nsCOMPtr nested = do_QueryInterface(aURL); michael@0: nested->GetInnerURI(getter_AddRefs(temp)); michael@0: } else { michael@0: temp = aURL; michael@0: } michael@0: bool isData; michael@0: temp->SchemeIs("data", &isData); michael@0: if (isData) { michael@0: // Avoid showing potentially huge data: URLs. The three last bytes are michael@0: // UTF-8 for an ellipsis. michael@0: mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6"); michael@0: } else { michael@0: temp->GetSpec(mViewSourceTitle); michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null michael@0: uint32_t aCount, michael@0: uint32_t* aWriteCount) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: nsresult rv = NS_OK; michael@0: mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); michael@0: if (mSniffingBuffer) { michael@0: uint32_t writeCount; michael@0: rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: mSniffingBuffer = nullptr; michael@0: } michael@0: mMetaScanner = nullptr; michael@0: if (aFromSegment) { michael@0: rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount); michael@0: } michael@0: return rv; michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: mCharset.Assign(aDecoderCharsetName); michael@0: mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); michael@0: mCharsetSource = kCharsetFromByteOrderMark; michael@0: mFeedChardet = false; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: mSniffingBuffer = nullptr; michael@0: mMetaScanner = nullptr; michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment, michael@0: uint32_t aCountToSniffingLimit) michael@0: { michael@0: // Avoid underspecified heuristic craziness for XHR michael@0: if (mMode == LOAD_AS_DATA) { michael@0: return; michael@0: } michael@0: // Make sure there's enough data. Require room for "" michael@0: if (mSniffingLength + aCountToSniffingLimit < 30) { michael@0: return; michael@0: } michael@0: // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1 michael@0: bool byteZero[2] = { false, false }; michael@0: bool byteNonZero[2] = { false, false }; michael@0: uint32_t i = 0; michael@0: if (mSniffingBuffer) { michael@0: for (; i < mSniffingLength; ++i) { michael@0: if (mSniffingBuffer[i]) { michael@0: if (byteNonZero[1 - (i % 2)]) { michael@0: return; michael@0: } michael@0: byteNonZero[i % 2] = true; michael@0: } else { michael@0: if (byteZero[1 - (i % 2)]) { michael@0: return; michael@0: } michael@0: byteZero[i % 2] = true; michael@0: } michael@0: } michael@0: } michael@0: if (aFromSegment) { michael@0: for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) { michael@0: if (aFromSegment[j]) { michael@0: if (byteNonZero[1 - ((i + j) % 2)]) { michael@0: return; michael@0: } michael@0: byteNonZero[(i + j) % 2] = true; michael@0: } else { michael@0: if (byteZero[1 - ((i + j) % 2)]) { michael@0: return; michael@0: } michael@0: byteZero[(i + j) % 2] = true; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (byteNonZero[0]) { michael@0: mCharset.Assign("UTF-16LE"); michael@0: } else { michael@0: mCharset.Assign("UTF-16BE"); michael@0: } michael@0: mCharsetSource = kCharsetFromIrreversibleAutoDetection; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: mFeedChardet = false; michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", michael@0: true, michael@0: 0); michael@0: michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) michael@0: { michael@0: if (aEncoding) { michael@0: nsDependentString utf16(aEncoding); michael@0: nsAutoCString utf8; michael@0: CopyUTF16toUTF8(utf16, utf8); michael@0: if (PreferredForInternalEncodingDecl(utf8)) { michael@0: mCharset.Assign(utf8); michael@0: mCharsetSource = kCharsetFromMetaTag; // closest for XML michael@0: return; michael@0: } michael@0: // else the page declared an encoding Gecko doesn't support and we'd michael@0: // end up defaulting to UTF-8 anyway. Might as well fall through here michael@0: // right away and let the encoding be set to UTF-8 which we'd default to michael@0: // anyway. michael@0: } michael@0: mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM michael@0: mCharsetSource = kCharsetFromMetaTag; // means confident michael@0: } michael@0: michael@0: // A separate user data struct is used instead of passing the michael@0: // nsHtml5StreamParser instance as user data in order to avoid including michael@0: // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts. michael@0: // Using a separate user data struct also avoids bloating nsHtml5StreamParser michael@0: // by one pointer. michael@0: struct UserData { michael@0: XML_Parser mExpat; michael@0: nsHtml5StreamParser* mStreamParser; michael@0: }; michael@0: michael@0: // Using no-namespace handler callbacks to avoid including expat.h in michael@0: // nsHtml5StreamParser.h, since doing so would cause naming conclicts. michael@0: static void michael@0: HandleXMLDeclaration(void* aUserData, michael@0: const XML_Char* aVersion, michael@0: const XML_Char* aEncoding, michael@0: int aStandalone) michael@0: { michael@0: UserData* ud = static_cast(aUserData); michael@0: ud->mStreamParser->SetEncodingFromExpat( michael@0: reinterpret_cast(aEncoding)); michael@0: XML_StopParser(ud->mExpat, false); michael@0: } michael@0: michael@0: static void michael@0: HandleStartElement(void* aUserData, michael@0: const XML_Char* aName, michael@0: const XML_Char **aAtts) michael@0: { michael@0: UserData* ud = static_cast(aUserData); michael@0: XML_StopParser(ud->mExpat, false); michael@0: } michael@0: michael@0: static void michael@0: HandleEndElement(void* aUserData, michael@0: const XML_Char* aName) michael@0: { michael@0: UserData* ud = static_cast(aUserData); michael@0: XML_StopParser(ud->mExpat, false); michael@0: } michael@0: michael@0: static void michael@0: HandleComment(void* aUserData, michael@0: const XML_Char* aName) michael@0: { michael@0: UserData* ud = static_cast(aUserData); michael@0: XML_StopParser(ud->mExpat, false); michael@0: } michael@0: michael@0: static void michael@0: HandleProcessingInstruction(void* aUserData, michael@0: const XML_Char* aTarget, michael@0: const XML_Char* aData) michael@0: { michael@0: UserData* ud = static_cast(aUserData); michael@0: XML_StopParser(ud->mExpat, false); michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be null michael@0: uint32_t aCount, michael@0: uint32_t* aWriteCount, michael@0: uint32_t aCountToSniffingLimit) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: NS_ASSERTION(mCharsetSource < kCharsetFromParentForced, michael@0: "Should not finalize sniffing when using forced charset."); michael@0: if (mMode == VIEW_SOURCE_XML) { michael@0: static const XML_Memory_Handling_Suite memsuite = michael@0: { michael@0: (void *(*)(size_t))moz_xmalloc, michael@0: (void *(*)(void *, size_t))moz_xrealloc, michael@0: moz_free michael@0: }; michael@0: michael@0: static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' }; michael@0: michael@0: static const char16_t kISO88591[] = michael@0: { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }; michael@0: michael@0: UserData ud; michael@0: ud.mStreamParser = this; michael@0: michael@0: // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML michael@0: // documents MUST begin with a BOM. We don't support EBCDIC and such. michael@0: // Thus, at this point, what we have is garbage or something encoded using michael@0: // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes michael@0: // without throwing errors when bytes have the most significant bit set michael@0: // and without triggering expat's unknown encoding code paths. This is michael@0: // enough to be able to use expat to parse the XML declaration in order michael@0: // to extract the encoding name from it. michael@0: ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator); michael@0: XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration); michael@0: XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement); michael@0: XML_SetCommentHandler(ud.mExpat, HandleComment); michael@0: XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction); michael@0: XML_SetUserData(ud.mExpat, static_cast(&ud)); michael@0: michael@0: XML_Status status = XML_STATUS_OK; michael@0: michael@0: // aFromSegment points to the data obtained from the current network michael@0: // event. mSniffingBuffer (if it exists) contains the data obtained before michael@0: // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer michael@0: // followed by aCountToSniffingLimit bytes from aFromSegment are the michael@0: // first 1024 bytes of the file (or the file as a whole if the file is michael@0: // 1024 bytes long or shorter). Thus, we parse both buffers, but if the michael@0: // first call succeeds already, we skip parsing the second buffer. michael@0: if (mSniffingBuffer) { michael@0: status = XML_Parse(ud.mExpat, michael@0: reinterpret_cast(mSniffingBuffer.get()), michael@0: mSniffingLength, michael@0: false); michael@0: } michael@0: if (status == XML_STATUS_OK && michael@0: mCharsetSource < kCharsetFromMetaTag && michael@0: aFromSegment) { michael@0: status = XML_Parse(ud.mExpat, michael@0: reinterpret_cast(aFromSegment), michael@0: aCountToSniffingLimit, michael@0: false); michael@0: } michael@0: XML_ParserFree(ud.mExpat); michael@0: michael@0: if (mCharsetSource < kCharsetFromMetaTag) { michael@0: // Failed to get an encoding from the XML declaration. XML defaults michael@0: // confidently to UTF-8 in this case. michael@0: // It is also possible that the document has an XML declaration that is michael@0: // longer than 1024 bytes, but that case is not worth worrying about. michael@0: mCharset.AssignLiteral("UTF-8"); michael@0: mCharsetSource = kCharsetFromMetaTag; // means confident michael@0: } michael@0: michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, michael@0: aCount, michael@0: aWriteCount); michael@0: } michael@0: michael@0: // meta scan failed. michael@0: if (mCharsetSource >= kCharsetFromHintPrevDoc) { michael@0: mFeedChardet = false; michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); michael@0: } michael@0: // Check for BOMless UTF-16 with Basic michael@0: // Latin content for compat with IE. See bug 631751. michael@0: SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit); michael@0: // the charset may have been set now michael@0: // maybe try chardet now; michael@0: if (mFeedChardet) { michael@0: bool dontFeed; michael@0: nsresult rv; michael@0: if (mSniffingBuffer) { michael@0: rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed); michael@0: mFeedChardet = !dontFeed; michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: } michael@0: if (mFeedChardet && aFromSegment) { michael@0: rv = mChardet->DoIt((const char*)aFromSegment, michael@0: // Avoid buffer boundary-dependent behavior when michael@0: // reparsing is forbidden. If reparse is forbidden, michael@0: // act as if we only saw the first 1024 bytes. michael@0: // When reparsing isn't forbidden, buffer boundaries michael@0: // can have an effect on whether the page is loaded michael@0: // once or twice. :-( michael@0: mReparseForbidden ? aCountToSniffingLimit : aCount, michael@0: &dontFeed); michael@0: mFeedChardet = !dontFeed; michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: } michael@0: if (mFeedChardet && (!aFromSegment || mReparseForbidden)) { michael@0: // mReparseForbidden is checked so that we get to use the sniffing michael@0: // buffer with the best guess so far if we aren't allowed to guess michael@0: // better later. michael@0: mFeedChardet = false; michael@0: rv = mChardet->Done(); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: } michael@0: // fall thru; callback may have changed charset michael@0: } michael@0: if (mCharsetSource == kCharsetUninitialized) { michael@0: // Hopefully this case is never needed, but dealing with it anyway michael@0: mCharset.AssignLiteral("windows-1252"); michael@0: mCharsetSource = kCharsetFromFallback; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: } else if (mMode == LOAD_AS_DATA && michael@0: mCharsetSource == kCharsetFromFallback) { michael@0: NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); michael@0: NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); michael@0: NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"), michael@0: "XHR should default to UTF-8"); michael@0: // Now mark charset source as non-weak to signal that we have a decision michael@0: mCharsetSource = kCharsetFromDocTypeDefault; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: } michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment, michael@0: uint32_t aCount, michael@0: uint32_t* aWriteCount) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: nsresult rv = NS_OK; michael@0: uint32_t writeCount; michael@0: michael@0: // mCharset and mCharsetSource potentially have come from channel or higher michael@0: // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. michael@0: // If we don't find a BOM, the previously set values of mCharset and michael@0: // mCharsetSource are not modified by the BOM sniffing here. michael@0: for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) { michael@0: switch (mBomState) { michael@0: case BOM_SNIFFING_NOT_STARTED: michael@0: NS_ASSERTION(i == 0, "Bad BOM sniffing state."); michael@0: switch (*aFromSegment) { michael@0: case 0xEF: michael@0: mBomState = SEEN_UTF_8_FIRST_BYTE; michael@0: break; michael@0: case 0xFF: michael@0: mBomState = SEEN_UTF_16_LE_FIRST_BYTE; michael@0: break; michael@0: case 0xFE: michael@0: mBomState = SEEN_UTF_16_BE_FIRST_BYTE; michael@0: break; michael@0: default: michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: break; michael@0: } michael@0: break; michael@0: case SEEN_UTF_16_LE_FIRST_BYTE: michael@0: if (aFromSegment[i] == 0xFE) { michael@0: rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: uint32_t count = aCount - (i + 1); michael@0: rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: *aWriteCount = writeCount + (i + 1); michael@0: return rv; michael@0: } michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: break; michael@0: case SEEN_UTF_16_BE_FIRST_BYTE: michael@0: if (aFromSegment[i] == 0xFF) { michael@0: rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: uint32_t count = aCount - (i + 1); michael@0: rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: *aWriteCount = writeCount + (i + 1); michael@0: return rv; michael@0: } michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: break; michael@0: case SEEN_UTF_8_FIRST_BYTE: michael@0: if (aFromSegment[i] == 0xBB) { michael@0: mBomState = SEEN_UTF_8_SECOND_BYTE; michael@0: } else { michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: } michael@0: break; michael@0: case SEEN_UTF_8_SECOND_BYTE: michael@0: if (aFromSegment[i] == 0xBF) { michael@0: rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: uint32_t count = aCount - (i + 1); michael@0: rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: *aWriteCount = writeCount + (i + 1); michael@0: return rv; michael@0: } michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: break; michael@0: default: michael@0: mBomState = BOM_SNIFFING_OVER; michael@0: break; michael@0: } michael@0: } michael@0: // if we get here, there either was no BOM or the BOM sniffing isn't complete michael@0: // yet michael@0: michael@0: MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, michael@0: "Should not come here if BOM was found."); michael@0: MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, michael@0: "kCharsetFromOtherComponent is for XSLT."); michael@0: michael@0: if (mBomState == BOM_SNIFFING_OVER && michael@0: mCharsetSource == kCharsetFromChannel) { michael@0: // There was no BOM and the charset came from channel. mCharset michael@0: // still contains the charset from the channel as set by an michael@0: // earlier call to SetDocumentCharset(), since we didn't find a BOM and michael@0: // overwrite mCharset. (Note that if the user has overridden the charset, michael@0: // we don't come here but check for XSS-dangerous charsets first.) michael@0: mFeedChardet = false; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, michael@0: aCount, aWriteCount); michael@0: } michael@0: michael@0: if (!mMetaScanner && (mMode == NORMAL || michael@0: mMode == VIEW_SOURCE_HTML || michael@0: mMode == LOAD_AS_DATA)) { michael@0: mMetaScanner = new nsHtml5MetaScanner(); michael@0: } michael@0: michael@0: if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) { michael@0: // this is the last buffer michael@0: uint32_t countToSniffingLimit = michael@0: NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength; michael@0: if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { michael@0: nsHtml5ByteReadable readable(aFromSegment, aFromSegment + michael@0: countToSniffingLimit); michael@0: nsAutoCString encoding; michael@0: mMetaScanner->sniff(&readable, encoding); michael@0: if (!encoding.IsEmpty()) { michael@0: // meta scan successful; honor overrides unless meta is XSS-dangerous michael@0: if ((mCharsetSource == kCharsetFromParentForced || michael@0: mCharsetSource == kCharsetFromUserForced) && michael@0: EncodingUtils::IsAsciiCompatible(encoding)) { michael@0: // Honor override michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( michael@0: aFromSegment, aCount, aWriteCount); michael@0: } michael@0: mCharset.Assign(encoding); michael@0: mCharsetSource = kCharsetFromMetaPrescan; michael@0: mFeedChardet = false; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( michael@0: aFromSegment, aCount, aWriteCount); michael@0: } michael@0: } michael@0: if (mCharsetSource == kCharsetFromParentForced || michael@0: mCharsetSource == kCharsetFromUserForced) { michael@0: // meta not found, honor override michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( michael@0: aFromSegment, aCount, aWriteCount); michael@0: } michael@0: return FinalizeSniffing(aFromSegment, aCount, aWriteCount, michael@0: countToSniffingLimit); michael@0: } michael@0: michael@0: // not the last buffer michael@0: if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { michael@0: nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount); michael@0: nsAutoCString encoding; michael@0: mMetaScanner->sniff(&readable, encoding); michael@0: if (!encoding.IsEmpty()) { michael@0: // meta scan successful; honor overrides unless meta is XSS-dangerous michael@0: if ((mCharsetSource == kCharsetFromParentForced || michael@0: mCharsetSource == kCharsetFromUserForced) && michael@0: EncodingUtils::IsAsciiCompatible(encoding)) { michael@0: // Honor override michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, michael@0: aCount, aWriteCount); michael@0: } michael@0: mCharset.Assign(encoding); michael@0: mCharsetSource = kCharsetFromMetaPrescan; michael@0: mFeedChardet = false; michael@0: mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); michael@0: return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, michael@0: aCount, aWriteCount); michael@0: } michael@0: } michael@0: michael@0: if (!mSniffingBuffer) { michael@0: const mozilla::fallible_t fallible = mozilla::fallible_t(); michael@0: mSniffingBuffer = new (fallible) michael@0: uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE]; michael@0: if (!mSniffingBuffer) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: } michael@0: memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount); michael@0: mSniffingLength += aCount; michael@0: *aWriteCount = aCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment, michael@0: uint32_t aCount, michael@0: uint32_t* aWriteCount) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: // mLastBuffer should always point to a buffer of the size michael@0: // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE. michael@0: if (!mLastBuffer) { michael@0: NS_WARNING("mLastBuffer should not be null!"); michael@0: MarkAsBroken(NS_ERROR_NULL_POINTER); michael@0: return NS_ERROR_NULL_POINTER; michael@0: } michael@0: if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) { michael@0: nsRefPtr newBuf = michael@0: nsHtml5OwningUTF16Buffer::FalliblyCreate( michael@0: NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); michael@0: if (!newBuf) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: mLastBuffer = (mLastBuffer->next = newBuf.forget()); michael@0: } michael@0: int32_t totalByteCount = 0; michael@0: for (;;) { michael@0: int32_t end = mLastBuffer->getEnd(); michael@0: int32_t byteCount = aCount - totalByteCount; michael@0: int32_t utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end; michael@0: michael@0: NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!"); michael@0: // byteCount may be zero to force the decoder to output a pending surrogate michael@0: // pair. michael@0: michael@0: nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count); michael@0: MOZ_ASSERT(NS_SUCCEEDED(convResult)); michael@0: michael@0: end += utf16Count; michael@0: mLastBuffer->setEnd(end); michael@0: totalByteCount += byteCount; michael@0: aFromSegment += byteCount; michael@0: michael@0: NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, michael@0: "The Unicode decoder wrote too much data."); michael@0: NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes."); michael@0: michael@0: if (convResult == NS_PARTIAL_MORE_OUTPUT) { michael@0: nsRefPtr newBuf = michael@0: nsHtml5OwningUTF16Buffer::FalliblyCreate( michael@0: NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); michael@0: if (!newBuf) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: mLastBuffer = (mLastBuffer->next = newBuf.forget()); michael@0: // All input may have been consumed if there is a pending surrogate pair michael@0: // that doesn't fit in the output buffer. Loop back to push a zero-length michael@0: // input to the decoder in that case. michael@0: } else { michael@0: NS_ASSERTION(totalByteCount == (int32_t)aCount, michael@0: "The Unicode decoder consumed the wrong number of bytes."); michael@0: *aWriteCount = (uint32_t)totalByteCount; michael@0: return NS_OK; michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext) michael@0: { michael@0: NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState, michael@0: "Got OnStartRequest when the stream had already started."); michael@0: NS_PRECONDITION(!mExecutor->HasStarted(), michael@0: "Got OnStartRequest at the wrong stage in the executor life cycle."); michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: if (mObserver) { michael@0: mObserver->OnStartRequest(aRequest, aContext); michael@0: } michael@0: mRequest = aRequest; michael@0: michael@0: mStreamState = STREAM_BEING_READ; michael@0: michael@0: if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { michael@0: mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); michael@0: } michael@0: michael@0: // For View Source, the parser should run with scripts "enabled" if a normal michael@0: // load would have scripts enabled. michael@0: bool scriptingEnabled = mMode == LOAD_AS_DATA ? michael@0: false : mExecutor->IsScriptEnabled(); michael@0: mOwner->StartTokenizer(scriptingEnabled); michael@0: michael@0: bool isSrcdoc = false; michael@0: nsCOMPtr channel; michael@0: nsresult rv = GetChannel(getter_AddRefs(channel)); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: isSrcdoc = NS_IsSrcdocChannel(channel); michael@0: } michael@0: mTreeBuilder->setIsSrcdocDocument(isSrcdoc); michael@0: mTreeBuilder->setScriptingEnabled(scriptingEnabled); michael@0: mTreeBuilder->SetPreventScriptExecution(!((mMode == NORMAL) && michael@0: scriptingEnabled)); michael@0: mTokenizer->start(); michael@0: mExecutor->Start(); michael@0: mExecutor->StartReadingFromStage(); michael@0: michael@0: if (mMode == PLAIN_TEXT) { michael@0: mTreeBuilder->StartPlainText(); michael@0: mTokenizer->StartPlainText(); michael@0: } else if (mMode == VIEW_SOURCE_PLAIN) { michael@0: mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); michael@0: mTokenizer->StartPlainText(); michael@0: } michael@0: michael@0: /* michael@0: * If you move the following line, be very careful not to cause michael@0: * WillBuildModel to be called before the document has had its michael@0: * script global object set. michael@0: */ michael@0: rv = mExecutor->WillBuildModel(eDTDMode_unknown); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsRefPtr newBuf = michael@0: nsHtml5OwningUTF16Buffer::FalliblyCreate( michael@0: NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); michael@0: if (!newBuf) { michael@0: // marks this stream parser as terminated, michael@0: // which prevents entry to code paths that michael@0: // would use mFirstBuffer or mLastBuffer. michael@0: return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); michael@0: } michael@0: NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?"); michael@0: NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?"); michael@0: mFirstBuffer = mLastBuffer = newBuf; michael@0: michael@0: rv = NS_OK; michael@0: michael@0: // The line below means that the encoding can end up being wrong if michael@0: // a view-source URL is loaded without having the encoding hint from a michael@0: // previous normal load in the history. michael@0: mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT); michael@0: michael@0: nsCOMPtr httpChannel(do_QueryInterface(mRequest, &rv)); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: nsAutoCString method; michael@0: httpChannel->GetRequestMethod(method); michael@0: // XXX does Necko have a way to renavigate POST, etc. without hitting michael@0: // the network? michael@0: if (!method.EqualsLiteral("GET")) { michael@0: // This is the old Gecko behavior but the HTML5 spec disagrees. michael@0: // Don't reparse on POST. michael@0: mReparseForbidden = true; michael@0: mFeedChardet = false; // can't restart anyway michael@0: } michael@0: michael@0: // Attempt to retarget delivery of data (via OnDataAvailable) to the parser michael@0: // thread, rather than through the main thread. michael@0: nsCOMPtr threadRetargetableRequest = michael@0: do_QueryInterface(mRequest); michael@0: if (threadRetargetableRequest) { michael@0: threadRetargetableRequest->RetargetDeliveryTo(mThread); michael@0: } michael@0: } michael@0: michael@0: if (mCharsetSource == kCharsetFromParentFrame) { michael@0: // Remember this in case chardet overwrites mCharsetSource michael@0: mInitialEncodingWasFromParentFrame = true; michael@0: } michael@0: michael@0: if (mCharsetSource >= kCharsetFromAutoDetection) { michael@0: mFeedChardet = false; michael@0: } michael@0: michael@0: nsCOMPtr wyciwygChannel(do_QueryInterface(mRequest)); michael@0: if (!wyciwygChannel) { michael@0: // we aren't ready to commit to an encoding yet michael@0: // leave converter uninstantiated for now michael@0: return NS_OK; michael@0: } michael@0: michael@0: // We are reloading a document.open()ed doc. michael@0: mReparseForbidden = true; michael@0: mFeedChardet = false; michael@0: michael@0: // Instantiate the converter here to avoid BOM sniffing. michael@0: mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::CheckListenerChain() michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!"); michael@0: if (!mObserver) { michael@0: return NS_OK; michael@0: } michael@0: nsresult rv; michael@0: nsCOMPtr retargetable = michael@0: do_QueryInterface(mObserver, &rv); michael@0: if (NS_SUCCEEDED(rv) && retargetable) { michael@0: rv = retargetable->CheckListenerChain(); michael@0: } michael@0: return rv; michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::DoStopRequest() michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: NS_PRECONDITION(STREAM_BEING_READ == mStreamState, michael@0: "Stream ended without being open."); michael@0: mTokenizerMutex.AssertCurrentThreadOwns(); michael@0: michael@0: if (IsTerminated()) { michael@0: return; michael@0: } michael@0: michael@0: mStreamState = STREAM_ENDED; michael@0: michael@0: if (!mUnicodeDecoder) { michael@0: uint32_t writeCount; michael@0: nsresult rv; michael@0: if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) { michael@0: MarkAsBroken(rv); michael@0: return; michael@0: } michael@0: } else if (mFeedChardet) { michael@0: mChardet->Done(); michael@0: } michael@0: michael@0: if (IsTerminatedOrInterrupted()) { michael@0: return; michael@0: } michael@0: michael@0: ParseAvailableData(); michael@0: } michael@0: michael@0: class nsHtml5RequestStopper : public nsRunnable michael@0: { michael@0: private: michael@0: nsHtml5RefPtr mStreamParser; michael@0: public: michael@0: nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser) michael@0: : mStreamParser(aStreamParser) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); michael@0: mStreamParser->DoStopRequest(); michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest, michael@0: nsISupports* aContext, michael@0: nsresult status) michael@0: { michael@0: NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream."); michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: if (mObserver) { michael@0: mObserver->OnStopRequest(aRequest, aContext, status); michael@0: } michael@0: nsCOMPtr stopper = new nsHtml5RequestStopper(this); michael@0: if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) { michael@0: NS_WARNING("Dispatching StopRequest event failed."); michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: NS_PRECONDITION(STREAM_BEING_READ == mStreamState, michael@0: "DoDataAvailable called when stream not open."); michael@0: mTokenizerMutex.AssertCurrentThreadOwns(); michael@0: michael@0: if (IsTerminated()) { michael@0: return; michael@0: } michael@0: michael@0: uint32_t writeCount; michael@0: nsresult rv; michael@0: if (HasDecoder()) { michael@0: if (mFeedChardet) { michael@0: bool dontFeed; michael@0: mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed); michael@0: mFeedChardet = !dontFeed; michael@0: } michael@0: rv = WriteStreamBytes(aBuffer, aLength, &writeCount); michael@0: } else { michael@0: rv = SniffStreamBytes(aBuffer, aLength, &writeCount); michael@0: } michael@0: if (NS_FAILED(rv)) { michael@0: MarkAsBroken(rv); michael@0: return; michael@0: } michael@0: NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed."); michael@0: michael@0: if (IsTerminatedOrInterrupted()) { michael@0: return; michael@0: } michael@0: michael@0: ParseAvailableData(); michael@0: michael@0: if (mFlushTimerArmed || mSpeculating) { michael@0: return; michael@0: } michael@0: michael@0: mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback, michael@0: static_cast (this), michael@0: mFlushTimerEverFired ? michael@0: sTimerInitialDelay : michael@0: sTimerSubsequentDelay, michael@0: nsITimer::TYPE_ONE_SHOT); michael@0: mFlushTimerArmed = true; michael@0: } michael@0: michael@0: class nsHtml5DataAvailable : public nsRunnable michael@0: { michael@0: private: michael@0: nsHtml5RefPtr mStreamParser; michael@0: nsAutoArrayPtr mData; michael@0: uint32_t mLength; michael@0: public: michael@0: nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser, michael@0: uint8_t* aData, michael@0: uint32_t aLength) michael@0: : mStreamParser(aStreamParser) michael@0: , mData(aData) michael@0: , mLength(aLength) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); michael@0: mStreamParser->DoDataAvailable(mData, mLength); michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: nsresult michael@0: nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest, michael@0: nsISupports* aContext, michael@0: nsIInputStream* aInStream, michael@0: uint64_t aSourceOffset, michael@0: uint32_t aLength) michael@0: { michael@0: nsresult rv; michael@0: if (NS_FAILED(rv = mExecutor->IsBroken())) { michael@0: return rv; michael@0: } michael@0: michael@0: NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream."); michael@0: uint32_t totalRead; michael@0: // Main thread to parser thread dispatch requires copying to buffer first. michael@0: if (NS_IsMainThread()) { michael@0: const mozilla::fallible_t fallible = mozilla::fallible_t(); michael@0: nsAutoArrayPtr data(new (fallible) uint8_t[aLength]); michael@0: if (!data) { michael@0: return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); michael@0: } michael@0: rv = aInStream->Read(reinterpret_cast(data.get()), michael@0: aLength, &totalRead); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?"); michael@0: michael@0: nsCOMPtr dataAvailable = new nsHtml5DataAvailable(this, michael@0: data.forget(), michael@0: totalRead); michael@0: if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) { michael@0: NS_WARNING("Dispatching DataAvailable event failed."); michael@0: } michael@0: return rv; michael@0: } else { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: mozilla::MutexAutoLock autoLock(mTokenizerMutex); michael@0: michael@0: // Read directly from response buffer. michael@0: rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength, michael@0: &totalRead); michael@0: if (NS_FAILED(rv)) { michael@0: NS_WARNING("Failed reading response data to parser"); michael@0: return rv; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: } michael@0: michael@0: /* static */ michael@0: NS_METHOD michael@0: nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream, michael@0: void *aClosure, michael@0: const char *aFromSegment, michael@0: uint32_t aToOffset, michael@0: uint32_t aCount, michael@0: uint32_t *aWriteCount) michael@0: { michael@0: nsHtml5StreamParser* parser = static_cast(aClosure); michael@0: michael@0: parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount); michael@0: // Assume DoDataAvailable consumed all available bytes. michael@0: *aWriteCount = aCount; michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool michael@0: nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) michael@0: { michael@0: nsAutoCString newEncoding; michael@0: if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) { michael@0: // the encoding name is bogus michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", michael@0: true, michael@0: mTokenizer->getLineNumber()); michael@0: return false; michael@0: } michael@0: michael@0: if (newEncoding.EqualsLiteral("UTF-16BE") || michael@0: newEncoding.EqualsLiteral("UTF-16LE")) { michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", michael@0: true, michael@0: mTokenizer->getLineNumber()); michael@0: newEncoding.Assign("UTF-8"); michael@0: } michael@0: michael@0: if (newEncoding.EqualsLiteral("x-user-defined")) { michael@0: // WebKit/Blink hack for Indian and Armenian legacy sites michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", michael@0: true, michael@0: mTokenizer->getLineNumber()); michael@0: newEncoding.Assign("windows-1252"); michael@0: } michael@0: michael@0: if (newEncoding.Equals(mCharset)) { michael@0: if (mCharsetSource < kCharsetFromMetaPrescan) { michael@0: if (mInitialEncodingWasFromParentFrame) { michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", michael@0: false, michael@0: mTokenizer->getLineNumber()); michael@0: } else { michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", michael@0: false, michael@0: mTokenizer->getLineNumber()); michael@0: } michael@0: } michael@0: mCharsetSource = kCharsetFromMetaTag; // become confident michael@0: mFeedChardet = false; // don't feed chardet when confident michael@0: return false; michael@0: } michael@0: michael@0: aEncoding.Assign(newEncoding); michael@0: return true; michael@0: } michael@0: michael@0: bool michael@0: nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding) michael@0: { michael@0: // This code needs to stay in sync with michael@0: // nsHtml5MetaScanner::tryCharset. Unfortunately, the michael@0: // trickery with member fields there leads to some copy-paste reuse. :-( michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec michael@0: return false; michael@0: } michael@0: michael@0: nsAutoCString newEncoding; michael@0: CopyUTF16toUTF8(*aEncoding, newEncoding); michael@0: michael@0: if (!PreferredForInternalEncodingDecl(newEncoding)) { michael@0: return false; michael@0: } michael@0: michael@0: if (mReparseForbidden) { michael@0: // This mReparseForbidden check happens after the call to michael@0: // PreferredForInternalEncodingDecl so that if that method calls michael@0: // MaybeComplainAboutCharset, its charset complaint wins over the one michael@0: // below. michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate", michael@0: true, michael@0: mTokenizer->getLineNumber()); michael@0: return false; // not reparsing even if we wanted to michael@0: } michael@0: michael@0: // Avoid having the chardet ask for another restart after this restart michael@0: // request. michael@0: mFeedChardet = false; michael@0: mTreeBuilder->NeedsCharsetSwitchTo(newEncoding, michael@0: kCharsetFromMetaTag, michael@0: mTokenizer->getLineNumber()); michael@0: FlushTreeOpsAndDisarmTimer(); michael@0: Interrupt(); michael@0: // the tree op executor will cause the stream parser to terminate michael@0: // if the charset switch request is accepted or it'll uninterrupt michael@0: // if the request failed. Note that if the restart request fails, michael@0: // we don't bother trying to make chardet resume. Might as well michael@0: // assume that chardet-requested restarts would fail, too. michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: if (mFlushTimerArmed) { michael@0: // avoid calling Cancel if the flush timer isn't armed to avoid acquiring michael@0: // a mutex michael@0: mFlushTimer->Cancel(); michael@0: mFlushTimerArmed = false; michael@0: } michael@0: if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { michael@0: mTokenizer->FlushViewSource(); michael@0: } michael@0: mTreeBuilder->Flush(); michael@0: if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { michael@0: NS_WARNING("failed to dispatch executor flush event"); michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::ParseAvailableData() michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: mTokenizerMutex.AssertCurrentThreadOwns(); michael@0: michael@0: if (IsTerminatedOrInterrupted()) { michael@0: return; michael@0: } michael@0: michael@0: for (;;) { michael@0: if (!mFirstBuffer->hasMore()) { michael@0: if (mFirstBuffer == mLastBuffer) { michael@0: switch (mStreamState) { michael@0: case STREAM_BEING_READ: michael@0: // never release the last buffer. michael@0: if (!mSpeculating) { michael@0: // reuse buffer space if not speculating michael@0: mFirstBuffer->setStart(0); michael@0: mFirstBuffer->setEnd(0); michael@0: } michael@0: mTreeBuilder->FlushLoads(); michael@0: // Dispatch this runnable unconditionally, because the loads michael@0: // that need flushing may have been flushed earlier even if the michael@0: // flush right above here did nothing. michael@0: if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) { michael@0: NS_WARNING("failed to dispatch load flush event"); michael@0: } michael@0: return; // no more data for now but expecting more michael@0: case STREAM_ENDED: michael@0: if (mAtEOF) { michael@0: return; michael@0: } michael@0: mAtEOF = true; michael@0: if (mCharsetSource < kCharsetFromMetaTag) { michael@0: if (mInitialEncodingWasFromParentFrame) { michael@0: // Unfortunately, this check doesn't take effect for michael@0: // cross-origin frames, so cross-origin ad frames that have michael@0: // no text and only an image or a Flash embed get the more michael@0: // severe message from the next if block. The message is michael@0: // technically accurate, though. michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame", michael@0: false, michael@0: 0); michael@0: } else if (mMode == NORMAL) { michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration", michael@0: true, michael@0: 0); michael@0: } else if (mMode == PLAIN_TEXT) { michael@0: mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain", michael@0: true, michael@0: 0); michael@0: } michael@0: } michael@0: mTokenizer->eof(); michael@0: mTreeBuilder->StreamEnded(); michael@0: if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { michael@0: mTokenizer->EndViewSource(); michael@0: } michael@0: FlushTreeOpsAndDisarmTimer(); michael@0: return; // no more data and not expecting more michael@0: default: michael@0: NS_NOTREACHED("It should be impossible to reach this."); michael@0: return; michael@0: } michael@0: } michael@0: mFirstBuffer = mFirstBuffer->next; michael@0: continue; michael@0: } michael@0: michael@0: // now we have a non-empty buffer michael@0: mFirstBuffer->adjust(mLastWasCR); michael@0: mLastWasCR = false; michael@0: if (mFirstBuffer->hasMore()) { michael@0: mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); michael@0: // At this point, internalEncodingDeclaration() may have called michael@0: // Terminate, but that never happens together with script. michael@0: // Can't assert that here, though, because it's possible that the main michael@0: // thread has called Terminate() while this thread was parsing. michael@0: if (mTreeBuilder->HasScript()) { michael@0: // HasScript() cannot return true if the tree builder is preventing michael@0: // script execution. michael@0: MOZ_ASSERT(mMode == NORMAL); michael@0: mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); michael@0: nsHtml5Speculation* speculation = michael@0: new nsHtml5Speculation(mFirstBuffer, michael@0: mFirstBuffer->getStart(), michael@0: mTokenizer->getLineNumber(), michael@0: mTreeBuilder->newSnapshot()); michael@0: mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), michael@0: speculation->GetStartLineNumber()); michael@0: FlushTreeOpsAndDisarmTimer(); michael@0: mTreeBuilder->SetOpSink(speculation); michael@0: mSpeculations.AppendElement(speculation); // adopts the pointer michael@0: mSpeculating = true; michael@0: } michael@0: if (IsTerminatedOrInterrupted()) { michael@0: return; michael@0: } michael@0: } michael@0: continue; michael@0: } michael@0: } michael@0: michael@0: class nsHtml5StreamParserContinuation : public nsRunnable michael@0: { michael@0: private: michael@0: nsHtml5RefPtr mStreamParser; michael@0: public: michael@0: nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser) michael@0: : mStreamParser(aStreamParser) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); michael@0: mStreamParser->Uninterrupt(); michael@0: mStreamParser->ParseAvailableData(); michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: void michael@0: nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, michael@0: nsHtml5TreeBuilder* aTreeBuilder, michael@0: bool aLastWasCR) michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML), michael@0: "ContinueAfterScripts called in view source mode!"); michael@0: if (NS_FAILED(mExecutor->IsBroken())) { michael@0: return; michael@0: } michael@0: #ifdef DEBUG michael@0: mExecutor->AssertStageEmpty(); michael@0: #endif michael@0: bool speculationFailed = false; michael@0: { michael@0: mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); michael@0: if (mSpeculations.IsEmpty()) { michael@0: NS_NOTREACHED("ContinueAfterScripts called without speculations."); michael@0: return; michael@0: } michael@0: nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); michael@0: if (aLastWasCR || michael@0: !aTokenizer->isInDataState() || michael@0: !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) { michael@0: speculationFailed = true; michael@0: // We've got a failed speculation :-( michael@0: Interrupt(); // Make the parser thread release the tokenizer mutex sooner michael@0: // now fall out of the speculationAutoLock into the tokenizerAutoLock block michael@0: } else { michael@0: // We've got a successful speculation! michael@0: if (mSpeculations.Length() > 1) { michael@0: // the first speculation isn't the current speculation, so there's michael@0: // no need to bother the parser thread. michael@0: speculation->FlushToSink(mExecutor); michael@0: NS_ASSERTION(!mExecutor->IsScriptExecuting(), michael@0: "ParseUntilBlocked() was supposed to ensure we don't come " michael@0: "here when scripts are executing."); michael@0: NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " michael@0: "RunFlushLoop() didn't call ParseUntilBlocked() which is the " michael@0: "only caller of this method?"); michael@0: mSpeculations.RemoveElementAt(0); michael@0: return; michael@0: } michael@0: // else michael@0: Interrupt(); // Make the parser thread release the tokenizer mutex sooner michael@0: michael@0: // now fall through michael@0: // the first speculation is the current speculation. Need to michael@0: // release the the speculation mutex and acquire the tokenizer michael@0: // mutex. (Just acquiring the other mutex here would deadlock) michael@0: } michael@0: } michael@0: { michael@0: mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex); michael@0: #ifdef DEBUG michael@0: { michael@0: nsCOMPtr mainThread; michael@0: NS_GetMainThread(getter_AddRefs(mainThread)); michael@0: mAtomTable.SetPermittedLookupThread(mainThread); michael@0: } michael@0: #endif michael@0: // In principle, the speculation mutex should be acquired here, michael@0: // but there's no point, because the parser thread only acquires it michael@0: // when it has also acquired the tokenizer mutex and we are already michael@0: // holding the tokenizer mutex. michael@0: if (speculationFailed) { michael@0: // Rewind the stream michael@0: mAtEOF = false; michael@0: nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); michael@0: mFirstBuffer = speculation->GetBuffer(); michael@0: mFirstBuffer->setStart(speculation->GetStart()); michael@0: mTokenizer->setLineNumber(speculation->GetStartLineNumber()); michael@0: michael@0: nsContentUtils::ReportToConsole(nsIScriptError::warningFlag, michael@0: NS_LITERAL_CSTRING("DOM Events"), michael@0: mExecutor->GetDocument(), michael@0: nsContentUtils::eDOM_PROPERTIES, michael@0: "SpeculationFailed", michael@0: nullptr, 0, michael@0: nullptr, michael@0: EmptyString(), michael@0: speculation->GetStartLineNumber()); michael@0: michael@0: nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next; michael@0: while (buffer) { michael@0: buffer->setStart(0); michael@0: buffer = buffer->next; michael@0: } michael@0: michael@0: mSpeculations.Clear(); // potentially a huge number of destructors michael@0: // run here synchronously on the main thread... michael@0: michael@0: mTreeBuilder->flushCharacters(); // empty the pending buffer michael@0: mTreeBuilder->ClearOps(); // now get rid of the failed ops michael@0: michael@0: mTreeBuilder->SetOpSink(mExecutor->GetStage()); michael@0: mExecutor->StartReadingFromStage(); michael@0: mSpeculating = false; michael@0: michael@0: // Copy state over michael@0: mLastWasCR = aLastWasCR; michael@0: mTokenizer->loadState(aTokenizer); michael@0: mTreeBuilder->loadState(aTreeBuilder, &mAtomTable); michael@0: } else { michael@0: // We've got a successful speculation and at least a moment ago it was michael@0: // the current speculation michael@0: mSpeculations.ElementAt(0)->FlushToSink(mExecutor); michael@0: NS_ASSERTION(!mExecutor->IsScriptExecuting(), michael@0: "ParseUntilBlocked() was supposed to ensure we don't come " michael@0: "here when scripts are executing."); michael@0: NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " michael@0: "RunFlushLoop() didn't call ParseUntilBlocked() which is the " michael@0: "only caller of this method?"); michael@0: mSpeculations.RemoveElementAt(0); michael@0: if (mSpeculations.IsEmpty()) { michael@0: // yes, it was still the only speculation. Now stop speculating michael@0: // However, before telling the executor to read from stage, flush michael@0: // any pending ops straight to the executor, because otherwise michael@0: // they remain unflushed until we get more data from the network. michael@0: mTreeBuilder->SetOpSink(mExecutor); michael@0: mTreeBuilder->Flush(true); michael@0: mTreeBuilder->SetOpSink(mExecutor->GetStage()); michael@0: mExecutor->StartReadingFromStage(); michael@0: mSpeculating = false; michael@0: } michael@0: } michael@0: nsCOMPtr event = new nsHtml5StreamParserContinuation(this); michael@0: if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { michael@0: NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); michael@0: } michael@0: // A stream event might run before this event runs, but that's harmless. michael@0: #ifdef DEBUG michael@0: mAtomTable.SetPermittedLookupThread(mThread); michael@0: #endif michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch() michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: nsCOMPtr event = new nsHtml5StreamParserContinuation(this); michael@0: if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { michael@0: NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); michael@0: } michael@0: } michael@0: michael@0: class nsHtml5TimerKungFu : public nsRunnable michael@0: { michael@0: private: michael@0: nsHtml5RefPtr mStreamParser; michael@0: public: michael@0: nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser) michael@0: : mStreamParser(aStreamParser) michael@0: {} michael@0: NS_IMETHODIMP Run() michael@0: { michael@0: if (mStreamParser->mFlushTimer) { michael@0: mStreamParser->mFlushTimer->Cancel(); michael@0: mStreamParser->mFlushTimer = nullptr; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: }; michael@0: michael@0: void michael@0: nsHtml5StreamParser::DropTimer() michael@0: { michael@0: NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); michael@0: /* michael@0: * Simply nulling out the timer wouldn't work, because if the timer is michael@0: * armed, it needs to be canceled first. Simply canceling it first wouldn't michael@0: * work, because nsTimerImpl::Cancel is not safe for calling from outside michael@0: * the thread where nsTimerImpl::Fire would run. It's not safe to michael@0: * dispatch a runnable to cancel the timer from the destructor of this michael@0: * class, because the timer has a weak (void*) pointer back to this instance michael@0: * of the stream parser and having the timer fire before the runnable michael@0: * cancels it would make the timer access a deleted object. michael@0: * michael@0: * This DropTimer method addresses these issues. This method must be called michael@0: * on the main thread before the destructor of this class is reached. michael@0: * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this michael@0: * stream parser object to keep it alive until the runnable is done. michael@0: * The runnable cancels the timer on the parser thread, drops the timer michael@0: * and lets nsHtml5RefPtr send a runnable back to the main thread to michael@0: * release the stream parser. michael@0: */ michael@0: if (mFlushTimer) { michael@0: nsCOMPtr event = new nsHtml5TimerKungFu(this); michael@0: if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { michael@0: NS_WARNING("Failed to dispatch TimerKungFu event"); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Using a static, because the method name Notify is taken by the chardet michael@0: // callback. michael@0: void michael@0: nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure) michael@0: { michael@0: (static_cast (aClosure))->TimerFlush(); michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::TimerFlush() michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: mozilla::MutexAutoLock autoLock(mTokenizerMutex); michael@0: michael@0: NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating."); michael@0: michael@0: // The timer fired if we got here. No need to cancel it. Mark it as michael@0: // not armed, though. michael@0: mFlushTimerArmed = false; michael@0: michael@0: mFlushTimerEverFired = true; michael@0: michael@0: if (IsTerminatedOrInterrupted()) { michael@0: return; michael@0: } michael@0: michael@0: if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { michael@0: mTreeBuilder->Flush(); // delete useless ops michael@0: if (mTokenizer->FlushViewSource()) { michael@0: if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { michael@0: NS_WARNING("failed to dispatch executor flush event"); michael@0: } michael@0: } michael@0: } else { michael@0: // we aren't speculating and we don't know when new data is michael@0: // going to arrive. Send data to the main thread. michael@0: if (mTreeBuilder->Flush(true)) { michael@0: if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { michael@0: NS_WARNING("failed to dispatch executor flush event"); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: nsHtml5StreamParser::MarkAsBroken(nsresult aRv) michael@0: { michael@0: NS_ASSERTION(IsParserThread(), "Wrong thread!"); michael@0: mTokenizerMutex.AssertCurrentThreadOwns(); michael@0: michael@0: Terminate(); michael@0: mTreeBuilder->MarkAsBroken(aRv); michael@0: mozilla::DebugOnly hadOps = mTreeBuilder->Flush(false); michael@0: NS_ASSERTION(hadOps, "Should have had the markAsBroken op!"); michael@0: if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { michael@0: NS_WARNING("failed to dispatch executor flush event"); michael@0: } michael@0: }