parser/html/nsHtml5StreamParser.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set sw=2 ts=2 et tw=79: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "mozilla/DebugOnly.h"
michael@0 8
michael@0 9 #include "nsHtml5StreamParser.h"
michael@0 10 #include "nsContentUtils.h"
michael@0 11 #include "nsHtml5Tokenizer.h"
michael@0 12 #include "nsIHttpChannel.h"
michael@0 13 #include "nsHtml5Parser.h"
michael@0 14 #include "nsHtml5TreeBuilder.h"
michael@0 15 #include "nsHtml5AtomTable.h"
michael@0 16 #include "nsHtml5Module.h"
michael@0 17 #include "nsHtml5RefPtr.h"
michael@0 18 #include "nsIScriptError.h"
michael@0 19 #include "mozilla/Preferences.h"
michael@0 20 #include "nsHtml5Highlighter.h"
michael@0 21 #include "expat_config.h"
michael@0 22 #include "expat.h"
michael@0 23 #include "nsINestedURI.h"
michael@0 24 #include "nsCharsetSource.h"
michael@0 25 #include "nsIWyciwygChannel.h"
michael@0 26 #include "nsIThreadRetargetableRequest.h"
michael@0 27 #include "nsPrintfCString.h"
michael@0 28 #include "nsNetUtil.h"
michael@0 29
michael@0 30 #include "mozilla/dom/EncodingUtils.h"
michael@0 31
michael@0 32 using namespace mozilla;
michael@0 33 using mozilla::dom::EncodingUtils;
michael@0 34
michael@0 35 int32_t nsHtml5StreamParser::sTimerInitialDelay = 120;
michael@0 36 int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120;
michael@0 37
michael@0 38 // static
michael@0 39 void
michael@0 40 nsHtml5StreamParser::InitializeStatics()
michael@0 41 {
michael@0 42 Preferences::AddIntVarCache(&sTimerInitialDelay,
michael@0 43 "html5.flushtimer.initialdelay");
michael@0 44 Preferences::AddIntVarCache(&sTimerSubsequentDelay,
michael@0 45 "html5.flushtimer.subsequentdelay");
michael@0 46 }
michael@0 47
michael@0 48 /*
michael@0 49 * Note that nsHtml5StreamParser implements cycle collecting AddRef and
michael@0 50 * Release. Therefore, nsHtml5StreamParser must never be refcounted from
michael@0 51 * the parser thread!
michael@0 52 *
michael@0 53 * To work around this limitation, runnables posted by the main thread to the
michael@0 54 * parser thread hold their reference to the stream parser in an
michael@0 55 * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds
michael@0 56 * just like a regular nsRefPtr. This is OK, since the creation of the
michael@0 57 * runnable and the nsHtml5RefPtr happens on the main thread.
michael@0 58 *
michael@0 59 * When the runnable is done on the parser thread, the destructor of
michael@0 60 * nsHtml5RefPtr runs there. It doesn't call Release on the held object
michael@0 61 * directly. Instead, it posts another runnable back to the main thread where
michael@0 62 * that runnable calls Release on the wrapped object.
michael@0 63 *
michael@0 64 * When posting runnables in the other direction, the runnables have to be
michael@0 65 * created on the main thread when nsHtml5StreamParser is instantiated and
michael@0 66 * held for the lifetime of the nsHtml5StreamParser. This works, because the
michael@0 67 * same runnabled can be dispatched multiple times and currently runnables
michael@0 68 * posted from the parser thread to main thread don't need to wrap any
michael@0 69 * runnable-specific data. (In the other direction, the runnables most notably
michael@0 70 * wrap the byte data of the stream.)
michael@0 71 */
michael@0 72 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
michael@0 73 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
michael@0 74
michael@0 75 NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
michael@0 76 NS_INTERFACE_TABLE(nsHtml5StreamParser,
michael@0 77 nsICharsetDetectionObserver)
michael@0 78 NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
michael@0 79 NS_INTERFACE_MAP_END
michael@0 80
michael@0 81 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
michael@0 82
michael@0 83 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
michael@0 84 tmp->DropTimer();
michael@0 85 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
michael@0 86 NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest)
michael@0 87 NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner)
michael@0 88 tmp->mExecutorFlusher = nullptr;
michael@0 89 tmp->mLoadFlusher = nullptr;
michael@0 90 tmp->mExecutor = nullptr;
michael@0 91 NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet)
michael@0 92 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
michael@0 93
michael@0 94 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
michael@0 95 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
michael@0 96 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest)
michael@0 97 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner)
michael@0 98 // hack: count the strongly owned edge wrapped in the runnable
michael@0 99 if (tmp->mExecutorFlusher) {
michael@0 100 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
michael@0 101 cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
michael@0 102 }
michael@0 103 // hack: count the strongly owned edge wrapped in the runnable
michael@0 104 if (tmp->mLoadFlusher) {
michael@0 105 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
michael@0 106 cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor));
michael@0 107 }
michael@0 108 // hack: count self if held by mChardet
michael@0 109 if (tmp->mChardet) {
michael@0 110 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver");
michael@0 111 cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp));
michael@0 112 }
michael@0 113 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
michael@0 114
michael@0 115 class nsHtml5ExecutorFlusher : public nsRunnable
michael@0 116 {
michael@0 117 private:
michael@0 118 nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
michael@0 119 public:
michael@0 120 nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
michael@0 121 : mExecutor(aExecutor)
michael@0 122 {}
michael@0 123 NS_IMETHODIMP Run()
michael@0 124 {
michael@0 125 if (!mExecutor->isInList()) {
michael@0 126 mExecutor->RunFlushLoop();
michael@0 127 }
michael@0 128 return NS_OK;
michael@0 129 }
michael@0 130 };
michael@0 131
michael@0 132 class nsHtml5LoadFlusher : public nsRunnable
michael@0 133 {
michael@0 134 private:
michael@0 135 nsRefPtr<nsHtml5TreeOpExecutor> mExecutor;
michael@0 136 public:
michael@0 137 nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor)
michael@0 138 : mExecutor(aExecutor)
michael@0 139 {}
michael@0 140 NS_IMETHODIMP Run()
michael@0 141 {
michael@0 142 mExecutor->FlushSpeculativeLoads();
michael@0 143 return NS_OK;
michael@0 144 }
michael@0 145 };
michael@0 146
michael@0 147 nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
michael@0 148 nsHtml5Parser* aOwner,
michael@0 149 eParserMode aMode)
michael@0 150 : mFirstBuffer(nullptr) // Will be filled when starting
michael@0 151 , mLastBuffer(nullptr) // Will be filled when starting
michael@0 152 , mExecutor(aExecutor)
michael@0 153 , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML ||
michael@0 154 aMode == VIEW_SOURCE_XML) ?
michael@0 155 nullptr : mExecutor->GetStage(),
michael@0 156 aMode == NORMAL ?
michael@0 157 mExecutor->GetStage() : nullptr))
michael@0 158 , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
michael@0 159 , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
michael@0 160 , mOwner(aOwner)
michael@0 161 , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
michael@0 162 , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
michael@0 163 , mThread(nsHtml5Module::GetStreamParserThread())
michael@0 164 , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor))
michael@0 165 , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor))
michael@0 166 , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1"))
michael@0 167 , mMode(aMode)
michael@0 168 {
michael@0 169 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 170 mFlushTimer->SetTarget(mThread);
michael@0 171 #ifdef DEBUG
michael@0 172 mAtomTable.SetPermittedLookupThread(mThread);
michael@0 173 #endif
michael@0 174 mTokenizer->setInterner(&mAtomTable);
michael@0 175 mTokenizer->setEncodingDeclarationHandler(this);
michael@0 176
michael@0 177 if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) {
michael@0 178 nsHtml5Highlighter* highlighter =
michael@0 179 new nsHtml5Highlighter(mExecutor->GetStage());
michael@0 180 mTokenizer->EnableViewSource(highlighter); // takes ownership
michael@0 181 mTreeBuilder->EnableViewSource(highlighter); // doesn't own
michael@0 182 }
michael@0 183
michael@0 184 // Chardet instantiation adapted from nsDOMFile.
michael@0 185 // Chardet is initialized here even if it turns out to be useless
michael@0 186 // to make the chardet refcount its observer (nsHtml5StreamParser)
michael@0 187 // on the main thread.
michael@0 188 const nsAdoptingCString& detectorName =
michael@0 189 Preferences::GetLocalizedCString("intl.charset.detector");
michael@0 190 if (!detectorName.IsEmpty()) {
michael@0 191 nsAutoCString detectorContractID;
michael@0 192 detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
michael@0 193 detectorContractID += detectorName;
michael@0 194 if ((mChardet = do_CreateInstance(detectorContractID.get()))) {
michael@0 195 (void) mChardet->Init(this);
michael@0 196 mFeedChardet = true;
michael@0 197 }
michael@0 198 }
michael@0 199
michael@0 200 // There's a zeroing operator new for everything else
michael@0 201 }
michael@0 202
michael@0 203 nsHtml5StreamParser::~nsHtml5StreamParser()
michael@0 204 {
michael@0 205 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 206 mTokenizer->end();
michael@0 207 NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!");
michael@0 208 #ifdef DEBUG
michael@0 209 mRequest = nullptr;
michael@0 210 mObserver = nullptr;
michael@0 211 mUnicodeDecoder = nullptr;
michael@0 212 mSniffingBuffer = nullptr;
michael@0 213 mMetaScanner = nullptr;
michael@0 214 mFirstBuffer = nullptr;
michael@0 215 mExecutor = nullptr;
michael@0 216 mTreeBuilder = nullptr;
michael@0 217 mTokenizer = nullptr;
michael@0 218 mOwner = nullptr;
michael@0 219 #endif
michael@0 220 }
michael@0 221
michael@0 222 nsresult
michael@0 223 nsHtml5StreamParser::GetChannel(nsIChannel** aChannel)
michael@0 224 {
michael@0 225 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 226 return mRequest ? CallQueryInterface(mRequest, aChannel) :
michael@0 227 NS_ERROR_NOT_AVAILABLE;
michael@0 228 }
michael@0 229
michael@0 230 NS_IMETHODIMP
michael@0 231 nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
michael@0 232 {
michael@0 233 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 234 if (aConf == eBestAnswer || aConf == eSureAnswer) {
michael@0 235 mFeedChardet = false; // just in case
michael@0 236 nsAutoCString encoding;
michael@0 237 if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset),
michael@0 238 encoding)) {
michael@0 239 return NS_OK;
michael@0 240 }
michael@0 241 if (encoding.EqualsLiteral("replacement")) {
michael@0 242 return NS_OK;
michael@0 243 }
michael@0 244 if (HasDecoder()) {
michael@0 245 if (mCharset.Equals(encoding)) {
michael@0 246 NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
michael@0 247 "Why are we running chardet at all?");
michael@0 248 mCharsetSource = kCharsetFromAutoDetection;
michael@0 249 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 250 } else {
michael@0 251 // We've already committed to a decoder. Request a reload from the
michael@0 252 // docshell.
michael@0 253 mTreeBuilder->NeedsCharsetSwitchTo(encoding,
michael@0 254 kCharsetFromAutoDetection,
michael@0 255 0);
michael@0 256 FlushTreeOpsAndDisarmTimer();
michael@0 257 Interrupt();
michael@0 258 }
michael@0 259 } else {
michael@0 260 // Got a confident answer from the sniffing buffer. That code will
michael@0 261 // take care of setting up the decoder.
michael@0 262 mCharset.Assign(encoding);
michael@0 263 mCharsetSource = kCharsetFromAutoDetection;
michael@0 264 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 265 }
michael@0 266 }
michael@0 267 return NS_OK;
michael@0 268 }
michael@0 269
michael@0 270 void
michael@0 271 nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL)
michael@0 272 {
michael@0 273 if (aURL) {
michael@0 274 nsCOMPtr<nsIURI> temp;
michael@0 275 bool isViewSource;
michael@0 276 aURL->SchemeIs("view-source", &isViewSource);
michael@0 277 if (isViewSource) {
michael@0 278 nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL);
michael@0 279 nested->GetInnerURI(getter_AddRefs(temp));
michael@0 280 } else {
michael@0 281 temp = aURL;
michael@0 282 }
michael@0 283 bool isData;
michael@0 284 temp->SchemeIs("data", &isData);
michael@0 285 if (isData) {
michael@0 286 // Avoid showing potentially huge data: URLs. The three last bytes are
michael@0 287 // UTF-8 for an ellipsis.
michael@0 288 mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6");
michael@0 289 } else {
michael@0 290 temp->GetSpec(mViewSourceTitle);
michael@0 291 }
michael@0 292 }
michael@0 293 }
michael@0 294
michael@0 295 nsresult
michael@0 296 nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null
michael@0 297 uint32_t aCount,
michael@0 298 uint32_t* aWriteCount)
michael@0 299 {
michael@0 300 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 301 nsresult rv = NS_OK;
michael@0 302 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0 303 if (mSniffingBuffer) {
michael@0 304 uint32_t writeCount;
michael@0 305 rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount);
michael@0 306 NS_ENSURE_SUCCESS(rv, rv);
michael@0 307 mSniffingBuffer = nullptr;
michael@0 308 }
michael@0 309 mMetaScanner = nullptr;
michael@0 310 if (aFromSegment) {
michael@0 311 rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount);
michael@0 312 }
michael@0 313 return rv;
michael@0 314 }
michael@0 315
michael@0 316 nsresult
michael@0 317 nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName)
michael@0 318 {
michael@0 319 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 320 mCharset.Assign(aDecoderCharsetName);
michael@0 321 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0 322 mCharsetSource = kCharsetFromByteOrderMark;
michael@0 323 mFeedChardet = false;
michael@0 324 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 325 mSniffingBuffer = nullptr;
michael@0 326 mMetaScanner = nullptr;
michael@0 327 mBomState = BOM_SNIFFING_OVER;
michael@0 328 return NS_OK;
michael@0 329 }
michael@0 330
michael@0 331 void
michael@0 332 nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
michael@0 333 uint32_t aCountToSniffingLimit)
michael@0 334 {
michael@0 335 // Avoid underspecified heuristic craziness for XHR
michael@0 336 if (mMode == LOAD_AS_DATA) {
michael@0 337 return;
michael@0 338 }
michael@0 339 // Make sure there's enough data. Require room for "<title></title>"
michael@0 340 if (mSniffingLength + aCountToSniffingLimit < 30) {
michael@0 341 return;
michael@0 342 }
michael@0 343 // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
michael@0 344 bool byteZero[2] = { false, false };
michael@0 345 bool byteNonZero[2] = { false, false };
michael@0 346 uint32_t i = 0;
michael@0 347 if (mSniffingBuffer) {
michael@0 348 for (; i < mSniffingLength; ++i) {
michael@0 349 if (mSniffingBuffer[i]) {
michael@0 350 if (byteNonZero[1 - (i % 2)]) {
michael@0 351 return;
michael@0 352 }
michael@0 353 byteNonZero[i % 2] = true;
michael@0 354 } else {
michael@0 355 if (byteZero[1 - (i % 2)]) {
michael@0 356 return;
michael@0 357 }
michael@0 358 byteZero[i % 2] = true;
michael@0 359 }
michael@0 360 }
michael@0 361 }
michael@0 362 if (aFromSegment) {
michael@0 363 for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) {
michael@0 364 if (aFromSegment[j]) {
michael@0 365 if (byteNonZero[1 - ((i + j) % 2)]) {
michael@0 366 return;
michael@0 367 }
michael@0 368 byteNonZero[(i + j) % 2] = true;
michael@0 369 } else {
michael@0 370 if (byteZero[1 - ((i + j) % 2)]) {
michael@0 371 return;
michael@0 372 }
michael@0 373 byteZero[(i + j) % 2] = true;
michael@0 374 }
michael@0 375 }
michael@0 376 }
michael@0 377
michael@0 378 if (byteNonZero[0]) {
michael@0 379 mCharset.Assign("UTF-16LE");
michael@0 380 } else {
michael@0 381 mCharset.Assign("UTF-16BE");
michael@0 382 }
michael@0 383 mCharsetSource = kCharsetFromIrreversibleAutoDetection;
michael@0 384 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 385 mFeedChardet = false;
michael@0 386 mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16",
michael@0 387 true,
michael@0 388 0);
michael@0 389
michael@0 390 }
michael@0 391
michael@0 392 void
michael@0 393 nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
michael@0 394 {
michael@0 395 if (aEncoding) {
michael@0 396 nsDependentString utf16(aEncoding);
michael@0 397 nsAutoCString utf8;
michael@0 398 CopyUTF16toUTF8(utf16, utf8);
michael@0 399 if (PreferredForInternalEncodingDecl(utf8)) {
michael@0 400 mCharset.Assign(utf8);
michael@0 401 mCharsetSource = kCharsetFromMetaTag; // closest for XML
michael@0 402 return;
michael@0 403 }
michael@0 404 // else the page declared an encoding Gecko doesn't support and we'd
michael@0 405 // end up defaulting to UTF-8 anyway. Might as well fall through here
michael@0 406 // right away and let the encoding be set to UTF-8 which we'd default to
michael@0 407 // anyway.
michael@0 408 }
michael@0 409 mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
michael@0 410 mCharsetSource = kCharsetFromMetaTag; // means confident
michael@0 411 }
michael@0 412
michael@0 413 // A separate user data struct is used instead of passing the
michael@0 414 // nsHtml5StreamParser instance as user data in order to avoid including
michael@0 415 // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
michael@0 416 // Using a separate user data struct also avoids bloating nsHtml5StreamParser
michael@0 417 // by one pointer.
michael@0 418 struct UserData {
michael@0 419 XML_Parser mExpat;
michael@0 420 nsHtml5StreamParser* mStreamParser;
michael@0 421 };
michael@0 422
michael@0 423 // Using no-namespace handler callbacks to avoid including expat.h in
michael@0 424 // nsHtml5StreamParser.h, since doing so would cause naming conclicts.
michael@0 425 static void
michael@0 426 HandleXMLDeclaration(void* aUserData,
michael@0 427 const XML_Char* aVersion,
michael@0 428 const XML_Char* aEncoding,
michael@0 429 int aStandalone)
michael@0 430 {
michael@0 431 UserData* ud = static_cast<UserData*>(aUserData);
michael@0 432 ud->mStreamParser->SetEncodingFromExpat(
michael@0 433 reinterpret_cast<const char16_t*>(aEncoding));
michael@0 434 XML_StopParser(ud->mExpat, false);
michael@0 435 }
michael@0 436
michael@0 437 static void
michael@0 438 HandleStartElement(void* aUserData,
michael@0 439 const XML_Char* aName,
michael@0 440 const XML_Char **aAtts)
michael@0 441 {
michael@0 442 UserData* ud = static_cast<UserData*>(aUserData);
michael@0 443 XML_StopParser(ud->mExpat, false);
michael@0 444 }
michael@0 445
michael@0 446 static void
michael@0 447 HandleEndElement(void* aUserData,
michael@0 448 const XML_Char* aName)
michael@0 449 {
michael@0 450 UserData* ud = static_cast<UserData*>(aUserData);
michael@0 451 XML_StopParser(ud->mExpat, false);
michael@0 452 }
michael@0 453
michael@0 454 static void
michael@0 455 HandleComment(void* aUserData,
michael@0 456 const XML_Char* aName)
michael@0 457 {
michael@0 458 UserData* ud = static_cast<UserData*>(aUserData);
michael@0 459 XML_StopParser(ud->mExpat, false);
michael@0 460 }
michael@0 461
michael@0 462 static void
michael@0 463 HandleProcessingInstruction(void* aUserData,
michael@0 464 const XML_Char* aTarget,
michael@0 465 const XML_Char* aData)
michael@0 466 {
michael@0 467 UserData* ud = static_cast<UserData*>(aUserData);
michael@0 468 XML_StopParser(ud->mExpat, false);
michael@0 469 }
michael@0 470
michael@0 471 nsresult
michael@0 472 nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be null
michael@0 473 uint32_t aCount,
michael@0 474 uint32_t* aWriteCount,
michael@0 475 uint32_t aCountToSniffingLimit)
michael@0 476 {
michael@0 477 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 478 NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
michael@0 479 "Should not finalize sniffing when using forced charset.");
michael@0 480 if (mMode == VIEW_SOURCE_XML) {
michael@0 481 static const XML_Memory_Handling_Suite memsuite =
michael@0 482 {
michael@0 483 (void *(*)(size_t))moz_xmalloc,
michael@0 484 (void *(*)(void *, size_t))moz_xrealloc,
michael@0 485 moz_free
michael@0 486 };
michael@0 487
michael@0 488 static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' };
michael@0 489
michael@0 490 static const char16_t kISO88591[] =
michael@0 491 { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
michael@0 492
michael@0 493 UserData ud;
michael@0 494 ud.mStreamParser = this;
michael@0 495
michael@0 496 // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
michael@0 497 // documents MUST begin with a BOM. We don't support EBCDIC and such.
michael@0 498 // Thus, at this point, what we have is garbage or something encoded using
michael@0 499 // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
michael@0 500 // without throwing errors when bytes have the most significant bit set
michael@0 501 // and without triggering expat's unknown encoding code paths. This is
michael@0 502 // enough to be able to use expat to parse the XML declaration in order
michael@0 503 // to extract the encoding name from it.
michael@0 504 ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator);
michael@0 505 XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration);
michael@0 506 XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement);
michael@0 507 XML_SetCommentHandler(ud.mExpat, HandleComment);
michael@0 508 XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction);
michael@0 509 XML_SetUserData(ud.mExpat, static_cast<void*>(&ud));
michael@0 510
michael@0 511 XML_Status status = XML_STATUS_OK;
michael@0 512
michael@0 513 // aFromSegment points to the data obtained from the current network
michael@0 514 // event. mSniffingBuffer (if it exists) contains the data obtained before
michael@0 515 // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
michael@0 516 // followed by aCountToSniffingLimit bytes from aFromSegment are the
michael@0 517 // first 1024 bytes of the file (or the file as a whole if the file is
michael@0 518 // 1024 bytes long or shorter). Thus, we parse both buffers, but if the
michael@0 519 // first call succeeds already, we skip parsing the second buffer.
michael@0 520 if (mSniffingBuffer) {
michael@0 521 status = XML_Parse(ud.mExpat,
michael@0 522 reinterpret_cast<const char*>(mSniffingBuffer.get()),
michael@0 523 mSniffingLength,
michael@0 524 false);
michael@0 525 }
michael@0 526 if (status == XML_STATUS_OK &&
michael@0 527 mCharsetSource < kCharsetFromMetaTag &&
michael@0 528 aFromSegment) {
michael@0 529 status = XML_Parse(ud.mExpat,
michael@0 530 reinterpret_cast<const char*>(aFromSegment),
michael@0 531 aCountToSniffingLimit,
michael@0 532 false);
michael@0 533 }
michael@0 534 XML_ParserFree(ud.mExpat);
michael@0 535
michael@0 536 if (mCharsetSource < kCharsetFromMetaTag) {
michael@0 537 // Failed to get an encoding from the XML declaration. XML defaults
michael@0 538 // confidently to UTF-8 in this case.
michael@0 539 // It is also possible that the document has an XML declaration that is
michael@0 540 // longer than 1024 bytes, but that case is not worth worrying about.
michael@0 541 mCharset.AssignLiteral("UTF-8");
michael@0 542 mCharsetSource = kCharsetFromMetaTag; // means confident
michael@0 543 }
michael@0 544
michael@0 545 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0 546 aCount,
michael@0 547 aWriteCount);
michael@0 548 }
michael@0 549
michael@0 550 // meta scan failed.
michael@0 551 if (mCharsetSource >= kCharsetFromHintPrevDoc) {
michael@0 552 mFeedChardet = false;
michael@0 553 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
michael@0 554 }
michael@0 555 // Check for BOMless UTF-16 with Basic
michael@0 556 // Latin content for compat with IE. See bug 631751.
michael@0 557 SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit);
michael@0 558 // the charset may have been set now
michael@0 559 // maybe try chardet now;
michael@0 560 if (mFeedChardet) {
michael@0 561 bool dontFeed;
michael@0 562 nsresult rv;
michael@0 563 if (mSniffingBuffer) {
michael@0 564 rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
michael@0 565 mFeedChardet = !dontFeed;
michael@0 566 NS_ENSURE_SUCCESS(rv, rv);
michael@0 567 }
michael@0 568 if (mFeedChardet && aFromSegment) {
michael@0 569 rv = mChardet->DoIt((const char*)aFromSegment,
michael@0 570 // Avoid buffer boundary-dependent behavior when
michael@0 571 // reparsing is forbidden. If reparse is forbidden,
michael@0 572 // act as if we only saw the first 1024 bytes.
michael@0 573 // When reparsing isn't forbidden, buffer boundaries
michael@0 574 // can have an effect on whether the page is loaded
michael@0 575 // once or twice. :-(
michael@0 576 mReparseForbidden ? aCountToSniffingLimit : aCount,
michael@0 577 &dontFeed);
michael@0 578 mFeedChardet = !dontFeed;
michael@0 579 NS_ENSURE_SUCCESS(rv, rv);
michael@0 580 }
michael@0 581 if (mFeedChardet && (!aFromSegment || mReparseForbidden)) {
michael@0 582 // mReparseForbidden is checked so that we get to use the sniffing
michael@0 583 // buffer with the best guess so far if we aren't allowed to guess
michael@0 584 // better later.
michael@0 585 mFeedChardet = false;
michael@0 586 rv = mChardet->Done();
michael@0 587 NS_ENSURE_SUCCESS(rv, rv);
michael@0 588 }
michael@0 589 // fall thru; callback may have changed charset
michael@0 590 }
michael@0 591 if (mCharsetSource == kCharsetUninitialized) {
michael@0 592 // Hopefully this case is never needed, but dealing with it anyway
michael@0 593 mCharset.AssignLiteral("windows-1252");
michael@0 594 mCharsetSource = kCharsetFromFallback;
michael@0 595 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 596 } else if (mMode == LOAD_AS_DATA &&
michael@0 597 mCharsetSource == kCharsetFromFallback) {
michael@0 598 NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
michael@0 599 NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
michael@0 600 NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
michael@0 601 "XHR should default to UTF-8");
michael@0 602 // Now mark charset source as non-weak to signal that we have a decision
michael@0 603 mCharsetSource = kCharsetFromDocTypeDefault;
michael@0 604 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 605 }
michael@0 606 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
michael@0 607 }
michael@0 608
michael@0 609 nsresult
michael@0 610 nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
michael@0 611 uint32_t aCount,
michael@0 612 uint32_t* aWriteCount)
michael@0 613 {
michael@0 614 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 615 nsresult rv = NS_OK;
michael@0 616 uint32_t writeCount;
michael@0 617
michael@0 618 // mCharset and mCharsetSource potentially have come from channel or higher
michael@0 619 // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
michael@0 620 // If we don't find a BOM, the previously set values of mCharset and
michael@0 621 // mCharsetSource are not modified by the BOM sniffing here.
michael@0 622 for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
michael@0 623 switch (mBomState) {
michael@0 624 case BOM_SNIFFING_NOT_STARTED:
michael@0 625 NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
michael@0 626 switch (*aFromSegment) {
michael@0 627 case 0xEF:
michael@0 628 mBomState = SEEN_UTF_8_FIRST_BYTE;
michael@0 629 break;
michael@0 630 case 0xFF:
michael@0 631 mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
michael@0 632 break;
michael@0 633 case 0xFE:
michael@0 634 mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
michael@0 635 break;
michael@0 636 default:
michael@0 637 mBomState = BOM_SNIFFING_OVER;
michael@0 638 break;
michael@0 639 }
michael@0 640 break;
michael@0 641 case SEEN_UTF_16_LE_FIRST_BYTE:
michael@0 642 if (aFromSegment[i] == 0xFE) {
michael@0 643 rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
michael@0 644 NS_ENSURE_SUCCESS(rv, rv);
michael@0 645 uint32_t count = aCount - (i + 1);
michael@0 646 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0 647 NS_ENSURE_SUCCESS(rv, rv);
michael@0 648 *aWriteCount = writeCount + (i + 1);
michael@0 649 return rv;
michael@0 650 }
michael@0 651 mBomState = BOM_SNIFFING_OVER;
michael@0 652 break;
michael@0 653 case SEEN_UTF_16_BE_FIRST_BYTE:
michael@0 654 if (aFromSegment[i] == 0xFF) {
michael@0 655 rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
michael@0 656 NS_ENSURE_SUCCESS(rv, rv);
michael@0 657 uint32_t count = aCount - (i + 1);
michael@0 658 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0 659 NS_ENSURE_SUCCESS(rv, rv);
michael@0 660 *aWriteCount = writeCount + (i + 1);
michael@0 661 return rv;
michael@0 662 }
michael@0 663 mBomState = BOM_SNIFFING_OVER;
michael@0 664 break;
michael@0 665 case SEEN_UTF_8_FIRST_BYTE:
michael@0 666 if (aFromSegment[i] == 0xBB) {
michael@0 667 mBomState = SEEN_UTF_8_SECOND_BYTE;
michael@0 668 } else {
michael@0 669 mBomState = BOM_SNIFFING_OVER;
michael@0 670 }
michael@0 671 break;
michael@0 672 case SEEN_UTF_8_SECOND_BYTE:
michael@0 673 if (aFromSegment[i] == 0xBF) {
michael@0 674 rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form
michael@0 675 NS_ENSURE_SUCCESS(rv, rv);
michael@0 676 uint32_t count = aCount - (i + 1);
michael@0 677 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
michael@0 678 NS_ENSURE_SUCCESS(rv, rv);
michael@0 679 *aWriteCount = writeCount + (i + 1);
michael@0 680 return rv;
michael@0 681 }
michael@0 682 mBomState = BOM_SNIFFING_OVER;
michael@0 683 break;
michael@0 684 default:
michael@0 685 mBomState = BOM_SNIFFING_OVER;
michael@0 686 break;
michael@0 687 }
michael@0 688 }
michael@0 689 // if we get here, there either was no BOM or the BOM sniffing isn't complete
michael@0 690 // yet
michael@0 691
michael@0 692 MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark,
michael@0 693 "Should not come here if BOM was found.");
michael@0 694 MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
michael@0 695 "kCharsetFromOtherComponent is for XSLT.");
michael@0 696
michael@0 697 if (mBomState == BOM_SNIFFING_OVER &&
michael@0 698 mCharsetSource == kCharsetFromChannel) {
michael@0 699 // There was no BOM and the charset came from channel. mCharset
michael@0 700 // still contains the charset from the channel as set by an
michael@0 701 // earlier call to SetDocumentCharset(), since we didn't find a BOM and
michael@0 702 // overwrite mCharset. (Note that if the user has overridden the charset,
michael@0 703 // we don't come here but check <meta> for XSS-dangerous charsets first.)
michael@0 704 mFeedChardet = false;
michael@0 705 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 706 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0 707 aCount, aWriteCount);
michael@0 708 }
michael@0 709
michael@0 710 if (!mMetaScanner && (mMode == NORMAL ||
michael@0 711 mMode == VIEW_SOURCE_HTML ||
michael@0 712 mMode == LOAD_AS_DATA)) {
michael@0 713 mMetaScanner = new nsHtml5MetaScanner();
michael@0 714 }
michael@0 715
michael@0 716 if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
michael@0 717 // this is the last buffer
michael@0 718 uint32_t countToSniffingLimit =
michael@0 719 NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
michael@0 720 if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
michael@0 721 nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
michael@0 722 countToSniffingLimit);
michael@0 723 nsAutoCString encoding;
michael@0 724 mMetaScanner->sniff(&readable, encoding);
michael@0 725 if (!encoding.IsEmpty()) {
michael@0 726 // meta scan successful; honor overrides unless meta is XSS-dangerous
michael@0 727 if ((mCharsetSource == kCharsetFromParentForced ||
michael@0 728 mCharsetSource == kCharsetFromUserForced) &&
michael@0 729 EncodingUtils::IsAsciiCompatible(encoding)) {
michael@0 730 // Honor override
michael@0 731 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0 732 aFromSegment, aCount, aWriteCount);
michael@0 733 }
michael@0 734 mCharset.Assign(encoding);
michael@0 735 mCharsetSource = kCharsetFromMetaPrescan;
michael@0 736 mFeedChardet = false;
michael@0 737 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 738 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0 739 aFromSegment, aCount, aWriteCount);
michael@0 740 }
michael@0 741 }
michael@0 742 if (mCharsetSource == kCharsetFromParentForced ||
michael@0 743 mCharsetSource == kCharsetFromUserForced) {
michael@0 744 // meta not found, honor override
michael@0 745 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
michael@0 746 aFromSegment, aCount, aWriteCount);
michael@0 747 }
michael@0 748 return FinalizeSniffing(aFromSegment, aCount, aWriteCount,
michael@0 749 countToSniffingLimit);
michael@0 750 }
michael@0 751
michael@0 752 // not the last buffer
michael@0 753 if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
michael@0 754 nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
michael@0 755 nsAutoCString encoding;
michael@0 756 mMetaScanner->sniff(&readable, encoding);
michael@0 757 if (!encoding.IsEmpty()) {
michael@0 758 // meta scan successful; honor overrides unless meta is XSS-dangerous
michael@0 759 if ((mCharsetSource == kCharsetFromParentForced ||
michael@0 760 mCharsetSource == kCharsetFromUserForced) &&
michael@0 761 EncodingUtils::IsAsciiCompatible(encoding)) {
michael@0 762 // Honor override
michael@0 763 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0 764 aCount, aWriteCount);
michael@0 765 }
michael@0 766 mCharset.Assign(encoding);
michael@0 767 mCharsetSource = kCharsetFromMetaPrescan;
michael@0 768 mFeedChardet = false;
michael@0 769 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
michael@0 770 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
michael@0 771 aCount, aWriteCount);
michael@0 772 }
michael@0 773 }
michael@0 774
michael@0 775 if (!mSniffingBuffer) {
michael@0 776 const mozilla::fallible_t fallible = mozilla::fallible_t();
michael@0 777 mSniffingBuffer = new (fallible)
michael@0 778 uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE];
michael@0 779 if (!mSniffingBuffer) {
michael@0 780 return NS_ERROR_OUT_OF_MEMORY;
michael@0 781 }
michael@0 782 }
michael@0 783 memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount);
michael@0 784 mSniffingLength += aCount;
michael@0 785 *aWriteCount = aCount;
michael@0 786 return NS_OK;
michael@0 787 }
michael@0 788
michael@0 789 nsresult
michael@0 790 nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment,
michael@0 791 uint32_t aCount,
michael@0 792 uint32_t* aWriteCount)
michael@0 793 {
michael@0 794 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 795 // mLastBuffer should always point to a buffer of the size
michael@0 796 // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
michael@0 797 if (!mLastBuffer) {
michael@0 798 NS_WARNING("mLastBuffer should not be null!");
michael@0 799 MarkAsBroken(NS_ERROR_NULL_POINTER);
michael@0 800 return NS_ERROR_NULL_POINTER;
michael@0 801 }
michael@0 802 if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
michael@0 803 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0 804 nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0 805 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0 806 if (!newBuf) {
michael@0 807 return NS_ERROR_OUT_OF_MEMORY;
michael@0 808 }
michael@0 809 mLastBuffer = (mLastBuffer->next = newBuf.forget());
michael@0 810 }
michael@0 811 int32_t totalByteCount = 0;
michael@0 812 for (;;) {
michael@0 813 int32_t end = mLastBuffer->getEnd();
michael@0 814 int32_t byteCount = aCount - totalByteCount;
michael@0 815 int32_t utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end;
michael@0 816
michael@0 817 NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!");
michael@0 818 // byteCount may be zero to force the decoder to output a pending surrogate
michael@0 819 // pair.
michael@0 820
michael@0 821 nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count);
michael@0 822 MOZ_ASSERT(NS_SUCCEEDED(convResult));
michael@0 823
michael@0 824 end += utf16Count;
michael@0 825 mLastBuffer->setEnd(end);
michael@0 826 totalByteCount += byteCount;
michael@0 827 aFromSegment += byteCount;
michael@0 828
michael@0 829 NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE,
michael@0 830 "The Unicode decoder wrote too much data.");
michael@0 831 NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");
michael@0 832
michael@0 833 if (convResult == NS_PARTIAL_MORE_OUTPUT) {
michael@0 834 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0 835 nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0 836 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0 837 if (!newBuf) {
michael@0 838 return NS_ERROR_OUT_OF_MEMORY;
michael@0 839 }
michael@0 840 mLastBuffer = (mLastBuffer->next = newBuf.forget());
michael@0 841 // All input may have been consumed if there is a pending surrogate pair
michael@0 842 // that doesn't fit in the output buffer. Loop back to push a zero-length
michael@0 843 // input to the decoder in that case.
michael@0 844 } else {
michael@0 845 NS_ASSERTION(totalByteCount == (int32_t)aCount,
michael@0 846 "The Unicode decoder consumed the wrong number of bytes.");
michael@0 847 *aWriteCount = (uint32_t)totalByteCount;
michael@0 848 return NS_OK;
michael@0 849 }
michael@0 850 }
michael@0 851 }
michael@0 852
michael@0 853 nsresult
michael@0 854 nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
michael@0 855 {
michael@0 856 NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState,
michael@0 857 "Got OnStartRequest when the stream had already started.");
michael@0 858 NS_PRECONDITION(!mExecutor->HasStarted(),
michael@0 859 "Got OnStartRequest at the wrong stage in the executor life cycle.");
michael@0 860 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 861 if (mObserver) {
michael@0 862 mObserver->OnStartRequest(aRequest, aContext);
michael@0 863 }
michael@0 864 mRequest = aRequest;
michael@0 865
michael@0 866 mStreamState = STREAM_BEING_READ;
michael@0 867
michael@0 868 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0 869 mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
michael@0 870 }
michael@0 871
michael@0 872 // For View Source, the parser should run with scripts "enabled" if a normal
michael@0 873 // load would have scripts enabled.
michael@0 874 bool scriptingEnabled = mMode == LOAD_AS_DATA ?
michael@0 875 false : mExecutor->IsScriptEnabled();
michael@0 876 mOwner->StartTokenizer(scriptingEnabled);
michael@0 877
michael@0 878 bool isSrcdoc = false;
michael@0 879 nsCOMPtr<nsIChannel> channel;
michael@0 880 nsresult rv = GetChannel(getter_AddRefs(channel));
michael@0 881 if (NS_SUCCEEDED(rv)) {
michael@0 882 isSrcdoc = NS_IsSrcdocChannel(channel);
michael@0 883 }
michael@0 884 mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
michael@0 885 mTreeBuilder->setScriptingEnabled(scriptingEnabled);
michael@0 886 mTreeBuilder->SetPreventScriptExecution(!((mMode == NORMAL) &&
michael@0 887 scriptingEnabled));
michael@0 888 mTokenizer->start();
michael@0 889 mExecutor->Start();
michael@0 890 mExecutor->StartReadingFromStage();
michael@0 891
michael@0 892 if (mMode == PLAIN_TEXT) {
michael@0 893 mTreeBuilder->StartPlainText();
michael@0 894 mTokenizer->StartPlainText();
michael@0 895 } else if (mMode == VIEW_SOURCE_PLAIN) {
michael@0 896 mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
michael@0 897 mTokenizer->StartPlainText();
michael@0 898 }
michael@0 899
michael@0 900 /*
michael@0 901 * If you move the following line, be very careful not to cause
michael@0 902 * WillBuildModel to be called before the document has had its
michael@0 903 * script global object set.
michael@0 904 */
michael@0 905 rv = mExecutor->WillBuildModel(eDTDMode_unknown);
michael@0 906 NS_ENSURE_SUCCESS(rv, rv);
michael@0 907
michael@0 908 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
michael@0 909 nsHtml5OwningUTF16Buffer::FalliblyCreate(
michael@0 910 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
michael@0 911 if (!newBuf) {
michael@0 912 // marks this stream parser as terminated,
michael@0 913 // which prevents entry to code paths that
michael@0 914 // would use mFirstBuffer or mLastBuffer.
michael@0 915 return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
michael@0 916 }
michael@0 917 NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?");
michael@0 918 NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?");
michael@0 919 mFirstBuffer = mLastBuffer = newBuf;
michael@0 920
michael@0 921 rv = NS_OK;
michael@0 922
michael@0 923 // The line below means that the encoding can end up being wrong if
michael@0 924 // a view-source URL is loaded without having the encoding hint from a
michael@0 925 // previous normal load in the history.
michael@0 926 mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
michael@0 927
michael@0 928 nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
michael@0 929 if (NS_SUCCEEDED(rv)) {
michael@0 930 nsAutoCString method;
michael@0 931 httpChannel->GetRequestMethod(method);
michael@0 932 // XXX does Necko have a way to renavigate POST, etc. without hitting
michael@0 933 // the network?
michael@0 934 if (!method.EqualsLiteral("GET")) {
michael@0 935 // This is the old Gecko behavior but the HTML5 spec disagrees.
michael@0 936 // Don't reparse on POST.
michael@0 937 mReparseForbidden = true;
michael@0 938 mFeedChardet = false; // can't restart anyway
michael@0 939 }
michael@0 940
michael@0 941 // Attempt to retarget delivery of data (via OnDataAvailable) to the parser
michael@0 942 // thread, rather than through the main thread.
michael@0 943 nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest =
michael@0 944 do_QueryInterface(mRequest);
michael@0 945 if (threadRetargetableRequest) {
michael@0 946 threadRetargetableRequest->RetargetDeliveryTo(mThread);
michael@0 947 }
michael@0 948 }
michael@0 949
michael@0 950 if (mCharsetSource == kCharsetFromParentFrame) {
michael@0 951 // Remember this in case chardet overwrites mCharsetSource
michael@0 952 mInitialEncodingWasFromParentFrame = true;
michael@0 953 }
michael@0 954
michael@0 955 if (mCharsetSource >= kCharsetFromAutoDetection) {
michael@0 956 mFeedChardet = false;
michael@0 957 }
michael@0 958
michael@0 959 nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
michael@0 960 if (!wyciwygChannel) {
michael@0 961 // we aren't ready to commit to an encoding yet
michael@0 962 // leave converter uninstantiated for now
michael@0 963 return NS_OK;
michael@0 964 }
michael@0 965
michael@0 966 // We are reloading a document.open()ed doc.
michael@0 967 mReparseForbidden = true;
michael@0 968 mFeedChardet = false;
michael@0 969
michael@0 970 // Instantiate the converter here to avoid BOM sniffing.
michael@0 971 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
michael@0 972 return NS_OK;
michael@0 973 }
michael@0 974
michael@0 975 nsresult
michael@0 976 nsHtml5StreamParser::CheckListenerChain()
michael@0 977 {
michael@0 978 NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
michael@0 979 if (!mObserver) {
michael@0 980 return NS_OK;
michael@0 981 }
michael@0 982 nsresult rv;
michael@0 983 nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable =
michael@0 984 do_QueryInterface(mObserver, &rv);
michael@0 985 if (NS_SUCCEEDED(rv) && retargetable) {
michael@0 986 rv = retargetable->CheckListenerChain();
michael@0 987 }
michael@0 988 return rv;
michael@0 989 }
michael@0 990
michael@0 991 void
michael@0 992 nsHtml5StreamParser::DoStopRequest()
michael@0 993 {
michael@0 994 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 995 NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
michael@0 996 "Stream ended without being open.");
michael@0 997 mTokenizerMutex.AssertCurrentThreadOwns();
michael@0 998
michael@0 999 if (IsTerminated()) {
michael@0 1000 return;
michael@0 1001 }
michael@0 1002
michael@0 1003 mStreamState = STREAM_ENDED;
michael@0 1004
michael@0 1005 if (!mUnicodeDecoder) {
michael@0 1006 uint32_t writeCount;
michael@0 1007 nsresult rv;
michael@0 1008 if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) {
michael@0 1009 MarkAsBroken(rv);
michael@0 1010 return;
michael@0 1011 }
michael@0 1012 } else if (mFeedChardet) {
michael@0 1013 mChardet->Done();
michael@0 1014 }
michael@0 1015
michael@0 1016 if (IsTerminatedOrInterrupted()) {
michael@0 1017 return;
michael@0 1018 }
michael@0 1019
michael@0 1020 ParseAvailableData();
michael@0 1021 }
michael@0 1022
michael@0 1023 class nsHtml5RequestStopper : public nsRunnable
michael@0 1024 {
michael@0 1025 private:
michael@0 1026 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0 1027 public:
michael@0 1028 nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser)
michael@0 1029 : mStreamParser(aStreamParser)
michael@0 1030 {}
michael@0 1031 NS_IMETHODIMP Run()
michael@0 1032 {
michael@0 1033 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0 1034 mStreamParser->DoStopRequest();
michael@0 1035 return NS_OK;
michael@0 1036 }
michael@0 1037 };
michael@0 1038
michael@0 1039 nsresult
michael@0 1040 nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
michael@0 1041 nsISupports* aContext,
michael@0 1042 nsresult status)
michael@0 1043 {
michael@0 1044 NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
michael@0 1045 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 1046 if (mObserver) {
michael@0 1047 mObserver->OnStopRequest(aRequest, aContext, status);
michael@0 1048 }
michael@0 1049 nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
michael@0 1050 if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
michael@0 1051 NS_WARNING("Dispatching StopRequest event failed.");
michael@0 1052 }
michael@0 1053 return NS_OK;
michael@0 1054 }
michael@0 1055
michael@0 1056 void
michael@0 1057 nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength)
michael@0 1058 {
michael@0 1059 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1060 NS_PRECONDITION(STREAM_BEING_READ == mStreamState,
michael@0 1061 "DoDataAvailable called when stream not open.");
michael@0 1062 mTokenizerMutex.AssertCurrentThreadOwns();
michael@0 1063
michael@0 1064 if (IsTerminated()) {
michael@0 1065 return;
michael@0 1066 }
michael@0 1067
michael@0 1068 uint32_t writeCount;
michael@0 1069 nsresult rv;
michael@0 1070 if (HasDecoder()) {
michael@0 1071 if (mFeedChardet) {
michael@0 1072 bool dontFeed;
michael@0 1073 mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed);
michael@0 1074 mFeedChardet = !dontFeed;
michael@0 1075 }
michael@0 1076 rv = WriteStreamBytes(aBuffer, aLength, &writeCount);
michael@0 1077 } else {
michael@0 1078 rv = SniffStreamBytes(aBuffer, aLength, &writeCount);
michael@0 1079 }
michael@0 1080 if (NS_FAILED(rv)) {
michael@0 1081 MarkAsBroken(rv);
michael@0 1082 return;
michael@0 1083 }
michael@0 1084 NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed.");
michael@0 1085
michael@0 1086 if (IsTerminatedOrInterrupted()) {
michael@0 1087 return;
michael@0 1088 }
michael@0 1089
michael@0 1090 ParseAvailableData();
michael@0 1091
michael@0 1092 if (mFlushTimerArmed || mSpeculating) {
michael@0 1093 return;
michael@0 1094 }
michael@0 1095
michael@0 1096 mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback,
michael@0 1097 static_cast<void*> (this),
michael@0 1098 mFlushTimerEverFired ?
michael@0 1099 sTimerInitialDelay :
michael@0 1100 sTimerSubsequentDelay,
michael@0 1101 nsITimer::TYPE_ONE_SHOT);
michael@0 1102 mFlushTimerArmed = true;
michael@0 1103 }
michael@0 1104
michael@0 1105 class nsHtml5DataAvailable : public nsRunnable
michael@0 1106 {
michael@0 1107 private:
michael@0 1108 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0 1109 nsAutoArrayPtr<uint8_t> mData;
michael@0 1110 uint32_t mLength;
michael@0 1111 public:
michael@0 1112 nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
michael@0 1113 uint8_t* aData,
michael@0 1114 uint32_t aLength)
michael@0 1115 : mStreamParser(aStreamParser)
michael@0 1116 , mData(aData)
michael@0 1117 , mLength(aLength)
michael@0 1118 {}
michael@0 1119 NS_IMETHODIMP Run()
michael@0 1120 {
michael@0 1121 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0 1122 mStreamParser->DoDataAvailable(mData, mLength);
michael@0 1123 return NS_OK;
michael@0 1124 }
michael@0 1125 };
michael@0 1126
michael@0 1127 nsresult
michael@0 1128 nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
michael@0 1129 nsISupports* aContext,
michael@0 1130 nsIInputStream* aInStream,
michael@0 1131 uint64_t aSourceOffset,
michael@0 1132 uint32_t aLength)
michael@0 1133 {
michael@0 1134 nsresult rv;
michael@0 1135 if (NS_FAILED(rv = mExecutor->IsBroken())) {
michael@0 1136 return rv;
michael@0 1137 }
michael@0 1138
michael@0 1139 NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
michael@0 1140 uint32_t totalRead;
michael@0 1141 // Main thread to parser thread dispatch requires copying to buffer first.
michael@0 1142 if (NS_IsMainThread()) {
michael@0 1143 const mozilla::fallible_t fallible = mozilla::fallible_t();
michael@0 1144 nsAutoArrayPtr<uint8_t> data(new (fallible) uint8_t[aLength]);
michael@0 1145 if (!data) {
michael@0 1146 return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
michael@0 1147 }
michael@0 1148 rv = aInStream->Read(reinterpret_cast<char*>(data.get()),
michael@0 1149 aLength, &totalRead);
michael@0 1150 NS_ENSURE_SUCCESS(rv, rv);
michael@0 1151 NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?");
michael@0 1152
michael@0 1153 nsCOMPtr<nsIRunnable> dataAvailable = new nsHtml5DataAvailable(this,
michael@0 1154 data.forget(),
michael@0 1155 totalRead);
michael@0 1156 if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) {
michael@0 1157 NS_WARNING("Dispatching DataAvailable event failed.");
michael@0 1158 }
michael@0 1159 return rv;
michael@0 1160 } else {
michael@0 1161 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1162 mozilla::MutexAutoLock autoLock(mTokenizerMutex);
michael@0 1163
michael@0 1164 // Read directly from response buffer.
michael@0 1165 rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength,
michael@0 1166 &totalRead);
michael@0 1167 if (NS_FAILED(rv)) {
michael@0 1168 NS_WARNING("Failed reading response data to parser");
michael@0 1169 return rv;
michael@0 1170 }
michael@0 1171 return NS_OK;
michael@0 1172 }
michael@0 1173 }
michael@0 1174
michael@0 1175 /* static */
michael@0 1176 NS_METHOD
michael@0 1177 nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream,
michael@0 1178 void *aClosure,
michael@0 1179 const char *aFromSegment,
michael@0 1180 uint32_t aToOffset,
michael@0 1181 uint32_t aCount,
michael@0 1182 uint32_t *aWriteCount)
michael@0 1183 {
michael@0 1184 nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure);
michael@0 1185
michael@0 1186 parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount);
michael@0 1187 // Assume DoDataAvailable consumed all available bytes.
michael@0 1188 *aWriteCount = aCount;
michael@0 1189 return NS_OK;
michael@0 1190 }
michael@0 1191
michael@0 1192 bool
michael@0 1193 nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
michael@0 1194 {
michael@0 1195 nsAutoCString newEncoding;
michael@0 1196 if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) {
michael@0 1197 // the encoding name is bogus
michael@0 1198 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
michael@0 1199 true,
michael@0 1200 mTokenizer->getLineNumber());
michael@0 1201 return false;
michael@0 1202 }
michael@0 1203
michael@0 1204 if (newEncoding.EqualsLiteral("UTF-16BE") ||
michael@0 1205 newEncoding.EqualsLiteral("UTF-16LE")) {
michael@0 1206 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16",
michael@0 1207 true,
michael@0 1208 mTokenizer->getLineNumber());
michael@0 1209 newEncoding.Assign("UTF-8");
michael@0 1210 }
michael@0 1211
michael@0 1212 if (newEncoding.EqualsLiteral("x-user-defined")) {
michael@0 1213 // WebKit/Blink hack for Indian and Armenian legacy sites
michael@0 1214 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined",
michael@0 1215 true,
michael@0 1216 mTokenizer->getLineNumber());
michael@0 1217 newEncoding.Assign("windows-1252");
michael@0 1218 }
michael@0 1219
michael@0 1220 if (newEncoding.Equals(mCharset)) {
michael@0 1221 if (mCharsetSource < kCharsetFromMetaPrescan) {
michael@0 1222 if (mInitialEncodingWasFromParentFrame) {
michael@0 1223 mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
michael@0 1224 false,
michael@0 1225 mTokenizer->getLineNumber());
michael@0 1226 } else {
michael@0 1227 mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta",
michael@0 1228 false,
michael@0 1229 mTokenizer->getLineNumber());
michael@0 1230 }
michael@0 1231 }
michael@0 1232 mCharsetSource = kCharsetFromMetaTag; // become confident
michael@0 1233 mFeedChardet = false; // don't feed chardet when confident
michael@0 1234 return false;
michael@0 1235 }
michael@0 1236
michael@0 1237 aEncoding.Assign(newEncoding);
michael@0 1238 return true;
michael@0 1239 }
michael@0 1240
michael@0 1241 bool
michael@0 1242 nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
michael@0 1243 {
michael@0 1244 // This code needs to stay in sync with
michael@0 1245 // nsHtml5MetaScanner::tryCharset. Unfortunately, the
michael@0 1246 // trickery with member fields there leads to some copy-paste reuse. :-(
michael@0 1247 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1248 if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec
michael@0 1249 return false;
michael@0 1250 }
michael@0 1251
michael@0 1252 nsAutoCString newEncoding;
michael@0 1253 CopyUTF16toUTF8(*aEncoding, newEncoding);
michael@0 1254
michael@0 1255 if (!PreferredForInternalEncodingDecl(newEncoding)) {
michael@0 1256 return false;
michael@0 1257 }
michael@0 1258
michael@0 1259 if (mReparseForbidden) {
michael@0 1260 // This mReparseForbidden check happens after the call to
michael@0 1261 // PreferredForInternalEncodingDecl so that if that method calls
michael@0 1262 // MaybeComplainAboutCharset, its charset complaint wins over the one
michael@0 1263 // below.
michael@0 1264 mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate",
michael@0 1265 true,
michael@0 1266 mTokenizer->getLineNumber());
michael@0 1267 return false; // not reparsing even if we wanted to
michael@0 1268 }
michael@0 1269
michael@0 1270 // Avoid having the chardet ask for another restart after this restart
michael@0 1271 // request.
michael@0 1272 mFeedChardet = false;
michael@0 1273 mTreeBuilder->NeedsCharsetSwitchTo(newEncoding,
michael@0 1274 kCharsetFromMetaTag,
michael@0 1275 mTokenizer->getLineNumber());
michael@0 1276 FlushTreeOpsAndDisarmTimer();
michael@0 1277 Interrupt();
michael@0 1278 // the tree op executor will cause the stream parser to terminate
michael@0 1279 // if the charset switch request is accepted or it'll uninterrupt
michael@0 1280 // if the request failed. Note that if the restart request fails,
michael@0 1281 // we don't bother trying to make chardet resume. Might as well
michael@0 1282 // assume that chardet-requested restarts would fail, too.
michael@0 1283 return true;
michael@0 1284 }
michael@0 1285
michael@0 1286 void
michael@0 1287 nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer()
michael@0 1288 {
michael@0 1289 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1290 if (mFlushTimerArmed) {
michael@0 1291 // avoid calling Cancel if the flush timer isn't armed to avoid acquiring
michael@0 1292 // a mutex
michael@0 1293 mFlushTimer->Cancel();
michael@0 1294 mFlushTimerArmed = false;
michael@0 1295 }
michael@0 1296 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0 1297 mTokenizer->FlushViewSource();
michael@0 1298 }
michael@0 1299 mTreeBuilder->Flush();
michael@0 1300 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0 1301 NS_WARNING("failed to dispatch executor flush event");
michael@0 1302 }
michael@0 1303 }
michael@0 1304
michael@0 1305 void
michael@0 1306 nsHtml5StreamParser::ParseAvailableData()
michael@0 1307 {
michael@0 1308 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1309 mTokenizerMutex.AssertCurrentThreadOwns();
michael@0 1310
michael@0 1311 if (IsTerminatedOrInterrupted()) {
michael@0 1312 return;
michael@0 1313 }
michael@0 1314
michael@0 1315 for (;;) {
michael@0 1316 if (!mFirstBuffer->hasMore()) {
michael@0 1317 if (mFirstBuffer == mLastBuffer) {
michael@0 1318 switch (mStreamState) {
michael@0 1319 case STREAM_BEING_READ:
michael@0 1320 // never release the last buffer.
michael@0 1321 if (!mSpeculating) {
michael@0 1322 // reuse buffer space if not speculating
michael@0 1323 mFirstBuffer->setStart(0);
michael@0 1324 mFirstBuffer->setEnd(0);
michael@0 1325 }
michael@0 1326 mTreeBuilder->FlushLoads();
michael@0 1327 // Dispatch this runnable unconditionally, because the loads
michael@0 1328 // that need flushing may have been flushed earlier even if the
michael@0 1329 // flush right above here did nothing.
michael@0 1330 if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) {
michael@0 1331 NS_WARNING("failed to dispatch load flush event");
michael@0 1332 }
michael@0 1333 return; // no more data for now but expecting more
michael@0 1334 case STREAM_ENDED:
michael@0 1335 if (mAtEOF) {
michael@0 1336 return;
michael@0 1337 }
michael@0 1338 mAtEOF = true;
michael@0 1339 if (mCharsetSource < kCharsetFromMetaTag) {
michael@0 1340 if (mInitialEncodingWasFromParentFrame) {
michael@0 1341 // Unfortunately, this check doesn't take effect for
michael@0 1342 // cross-origin frames, so cross-origin ad frames that have
michael@0 1343 // no text and only an image or a Flash embed get the more
michael@0 1344 // severe message from the next if block. The message is
michael@0 1345 // technically accurate, though.
michael@0 1346 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame",
michael@0 1347 false,
michael@0 1348 0);
michael@0 1349 } else if (mMode == NORMAL) {
michael@0 1350 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration",
michael@0 1351 true,
michael@0 1352 0);
michael@0 1353 } else if (mMode == PLAIN_TEXT) {
michael@0 1354 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain",
michael@0 1355 true,
michael@0 1356 0);
michael@0 1357 }
michael@0 1358 }
michael@0 1359 mTokenizer->eof();
michael@0 1360 mTreeBuilder->StreamEnded();
michael@0 1361 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0 1362 mTokenizer->EndViewSource();
michael@0 1363 }
michael@0 1364 FlushTreeOpsAndDisarmTimer();
michael@0 1365 return; // no more data and not expecting more
michael@0 1366 default:
michael@0 1367 NS_NOTREACHED("It should be impossible to reach this.");
michael@0 1368 return;
michael@0 1369 }
michael@0 1370 }
michael@0 1371 mFirstBuffer = mFirstBuffer->next;
michael@0 1372 continue;
michael@0 1373 }
michael@0 1374
michael@0 1375 // now we have a non-empty buffer
michael@0 1376 mFirstBuffer->adjust(mLastWasCR);
michael@0 1377 mLastWasCR = false;
michael@0 1378 if (mFirstBuffer->hasMore()) {
michael@0 1379 mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
michael@0 1380 // At this point, internalEncodingDeclaration() may have called
michael@0 1381 // Terminate, but that never happens together with script.
michael@0 1382 // Can't assert that here, though, because it's possible that the main
michael@0 1383 // thread has called Terminate() while this thread was parsing.
michael@0 1384 if (mTreeBuilder->HasScript()) {
michael@0 1385 // HasScript() cannot return true if the tree builder is preventing
michael@0 1386 // script execution.
michael@0 1387 MOZ_ASSERT(mMode == NORMAL);
michael@0 1388 mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
michael@0 1389 nsHtml5Speculation* speculation =
michael@0 1390 new nsHtml5Speculation(mFirstBuffer,
michael@0 1391 mFirstBuffer->getStart(),
michael@0 1392 mTokenizer->getLineNumber(),
michael@0 1393 mTreeBuilder->newSnapshot());
michael@0 1394 mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(),
michael@0 1395 speculation->GetStartLineNumber());
michael@0 1396 FlushTreeOpsAndDisarmTimer();
michael@0 1397 mTreeBuilder->SetOpSink(speculation);
michael@0 1398 mSpeculations.AppendElement(speculation); // adopts the pointer
michael@0 1399 mSpeculating = true;
michael@0 1400 }
michael@0 1401 if (IsTerminatedOrInterrupted()) {
michael@0 1402 return;
michael@0 1403 }
michael@0 1404 }
michael@0 1405 continue;
michael@0 1406 }
michael@0 1407 }
michael@0 1408
michael@0 1409 class nsHtml5StreamParserContinuation : public nsRunnable
michael@0 1410 {
michael@0 1411 private:
michael@0 1412 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0 1413 public:
michael@0 1414 nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser)
michael@0 1415 : mStreamParser(aStreamParser)
michael@0 1416 {}
michael@0 1417 NS_IMETHODIMP Run()
michael@0 1418 {
michael@0 1419 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
michael@0 1420 mStreamParser->Uninterrupt();
michael@0 1421 mStreamParser->ParseAvailableData();
michael@0 1422 return NS_OK;
michael@0 1423 }
michael@0 1424 };
michael@0 1425
michael@0 1426 void
michael@0 1427 nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer,
michael@0 1428 nsHtml5TreeBuilder* aTreeBuilder,
michael@0 1429 bool aLastWasCR)
michael@0 1430 {
michael@0 1431 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 1432 NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML),
michael@0 1433 "ContinueAfterScripts called in view source mode!");
michael@0 1434 if (NS_FAILED(mExecutor->IsBroken())) {
michael@0 1435 return;
michael@0 1436 }
michael@0 1437 #ifdef DEBUG
michael@0 1438 mExecutor->AssertStageEmpty();
michael@0 1439 #endif
michael@0 1440 bool speculationFailed = false;
michael@0 1441 {
michael@0 1442 mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
michael@0 1443 if (mSpeculations.IsEmpty()) {
michael@0 1444 NS_NOTREACHED("ContinueAfterScripts called without speculations.");
michael@0 1445 return;
michael@0 1446 }
michael@0 1447 nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
michael@0 1448 if (aLastWasCR ||
michael@0 1449 !aTokenizer->isInDataState() ||
michael@0 1450 !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) {
michael@0 1451 speculationFailed = true;
michael@0 1452 // We've got a failed speculation :-(
michael@0 1453 Interrupt(); // Make the parser thread release the tokenizer mutex sooner
michael@0 1454 // now fall out of the speculationAutoLock into the tokenizerAutoLock block
michael@0 1455 } else {
michael@0 1456 // We've got a successful speculation!
michael@0 1457 if (mSpeculations.Length() > 1) {
michael@0 1458 // the first speculation isn't the current speculation, so there's
michael@0 1459 // no need to bother the parser thread.
michael@0 1460 speculation->FlushToSink(mExecutor);
michael@0 1461 NS_ASSERTION(!mExecutor->IsScriptExecuting(),
michael@0 1462 "ParseUntilBlocked() was supposed to ensure we don't come "
michael@0 1463 "here when scripts are executing.");
michael@0 1464 NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
michael@0 1465 "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
michael@0 1466 "only caller of this method?");
michael@0 1467 mSpeculations.RemoveElementAt(0);
michael@0 1468 return;
michael@0 1469 }
michael@0 1470 // else
michael@0 1471 Interrupt(); // Make the parser thread release the tokenizer mutex sooner
michael@0 1472
michael@0 1473 // now fall through
michael@0 1474 // the first speculation is the current speculation. Need to
michael@0 1475 // release the the speculation mutex and acquire the tokenizer
michael@0 1476 // mutex. (Just acquiring the other mutex here would deadlock)
michael@0 1477 }
michael@0 1478 }
michael@0 1479 {
michael@0 1480 mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex);
michael@0 1481 #ifdef DEBUG
michael@0 1482 {
michael@0 1483 nsCOMPtr<nsIThread> mainThread;
michael@0 1484 NS_GetMainThread(getter_AddRefs(mainThread));
michael@0 1485 mAtomTable.SetPermittedLookupThread(mainThread);
michael@0 1486 }
michael@0 1487 #endif
michael@0 1488 // In principle, the speculation mutex should be acquired here,
michael@0 1489 // but there's no point, because the parser thread only acquires it
michael@0 1490 // when it has also acquired the tokenizer mutex and we are already
michael@0 1491 // holding the tokenizer mutex.
michael@0 1492 if (speculationFailed) {
michael@0 1493 // Rewind the stream
michael@0 1494 mAtEOF = false;
michael@0 1495 nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
michael@0 1496 mFirstBuffer = speculation->GetBuffer();
michael@0 1497 mFirstBuffer->setStart(speculation->GetStart());
michael@0 1498 mTokenizer->setLineNumber(speculation->GetStartLineNumber());
michael@0 1499
michael@0 1500 nsContentUtils::ReportToConsole(nsIScriptError::warningFlag,
michael@0 1501 NS_LITERAL_CSTRING("DOM Events"),
michael@0 1502 mExecutor->GetDocument(),
michael@0 1503 nsContentUtils::eDOM_PROPERTIES,
michael@0 1504 "SpeculationFailed",
michael@0 1505 nullptr, 0,
michael@0 1506 nullptr,
michael@0 1507 EmptyString(),
michael@0 1508 speculation->GetStartLineNumber());
michael@0 1509
michael@0 1510 nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
michael@0 1511 while (buffer) {
michael@0 1512 buffer->setStart(0);
michael@0 1513 buffer = buffer->next;
michael@0 1514 }
michael@0 1515
michael@0 1516 mSpeculations.Clear(); // potentially a huge number of destructors
michael@0 1517 // run here synchronously on the main thread...
michael@0 1518
michael@0 1519 mTreeBuilder->flushCharacters(); // empty the pending buffer
michael@0 1520 mTreeBuilder->ClearOps(); // now get rid of the failed ops
michael@0 1521
michael@0 1522 mTreeBuilder->SetOpSink(mExecutor->GetStage());
michael@0 1523 mExecutor->StartReadingFromStage();
michael@0 1524 mSpeculating = false;
michael@0 1525
michael@0 1526 // Copy state over
michael@0 1527 mLastWasCR = aLastWasCR;
michael@0 1528 mTokenizer->loadState(aTokenizer);
michael@0 1529 mTreeBuilder->loadState(aTreeBuilder, &mAtomTable);
michael@0 1530 } else {
michael@0 1531 // We've got a successful speculation and at least a moment ago it was
michael@0 1532 // the current speculation
michael@0 1533 mSpeculations.ElementAt(0)->FlushToSink(mExecutor);
michael@0 1534 NS_ASSERTION(!mExecutor->IsScriptExecuting(),
michael@0 1535 "ParseUntilBlocked() was supposed to ensure we don't come "
michael@0 1536 "here when scripts are executing.");
michael@0 1537 NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if "
michael@0 1538 "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
michael@0 1539 "only caller of this method?");
michael@0 1540 mSpeculations.RemoveElementAt(0);
michael@0 1541 if (mSpeculations.IsEmpty()) {
michael@0 1542 // yes, it was still the only speculation. Now stop speculating
michael@0 1543 // However, before telling the executor to read from stage, flush
michael@0 1544 // any pending ops straight to the executor, because otherwise
michael@0 1545 // they remain unflushed until we get more data from the network.
michael@0 1546 mTreeBuilder->SetOpSink(mExecutor);
michael@0 1547 mTreeBuilder->Flush(true);
michael@0 1548 mTreeBuilder->SetOpSink(mExecutor->GetStage());
michael@0 1549 mExecutor->StartReadingFromStage();
michael@0 1550 mSpeculating = false;
michael@0 1551 }
michael@0 1552 }
michael@0 1553 nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
michael@0 1554 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0 1555 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
michael@0 1556 }
michael@0 1557 // A stream event might run before this event runs, but that's harmless.
michael@0 1558 #ifdef DEBUG
michael@0 1559 mAtomTable.SetPermittedLookupThread(mThread);
michael@0 1560 #endif
michael@0 1561 }
michael@0 1562 }
michael@0 1563
michael@0 1564 void
michael@0 1565 nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch()
michael@0 1566 {
michael@0 1567 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 1568 nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
michael@0 1569 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0 1570 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
michael@0 1571 }
michael@0 1572 }
michael@0 1573
michael@0 1574 class nsHtml5TimerKungFu : public nsRunnable
michael@0 1575 {
michael@0 1576 private:
michael@0 1577 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser;
michael@0 1578 public:
michael@0 1579 nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser)
michael@0 1580 : mStreamParser(aStreamParser)
michael@0 1581 {}
michael@0 1582 NS_IMETHODIMP Run()
michael@0 1583 {
michael@0 1584 if (mStreamParser->mFlushTimer) {
michael@0 1585 mStreamParser->mFlushTimer->Cancel();
michael@0 1586 mStreamParser->mFlushTimer = nullptr;
michael@0 1587 }
michael@0 1588 return NS_OK;
michael@0 1589 }
michael@0 1590 };
michael@0 1591
michael@0 1592 void
michael@0 1593 nsHtml5StreamParser::DropTimer()
michael@0 1594 {
michael@0 1595 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
michael@0 1596 /*
michael@0 1597 * Simply nulling out the timer wouldn't work, because if the timer is
michael@0 1598 * armed, it needs to be canceled first. Simply canceling it first wouldn't
michael@0 1599 * work, because nsTimerImpl::Cancel is not safe for calling from outside
michael@0 1600 * the thread where nsTimerImpl::Fire would run. It's not safe to
michael@0 1601 * dispatch a runnable to cancel the timer from the destructor of this
michael@0 1602 * class, because the timer has a weak (void*) pointer back to this instance
michael@0 1603 * of the stream parser and having the timer fire before the runnable
michael@0 1604 * cancels it would make the timer access a deleted object.
michael@0 1605 *
michael@0 1606 * This DropTimer method addresses these issues. This method must be called
michael@0 1607 * on the main thread before the destructor of this class is reached.
michael@0 1608 * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this
michael@0 1609 * stream parser object to keep it alive until the runnable is done.
michael@0 1610 * The runnable cancels the timer on the parser thread, drops the timer
michael@0 1611 * and lets nsHtml5RefPtr send a runnable back to the main thread to
michael@0 1612 * release the stream parser.
michael@0 1613 */
michael@0 1614 if (mFlushTimer) {
michael@0 1615 nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this);
michael@0 1616 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
michael@0 1617 NS_WARNING("Failed to dispatch TimerKungFu event");
michael@0 1618 }
michael@0 1619 }
michael@0 1620 }
michael@0 1621
michael@0 1622 // Using a static, because the method name Notify is taken by the chardet
michael@0 1623 // callback.
michael@0 1624 void
michael@0 1625 nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure)
michael@0 1626 {
michael@0 1627 (static_cast<nsHtml5StreamParser*> (aClosure))->TimerFlush();
michael@0 1628 }
michael@0 1629
michael@0 1630 void
michael@0 1631 nsHtml5StreamParser::TimerFlush()
michael@0 1632 {
michael@0 1633 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1634 mozilla::MutexAutoLock autoLock(mTokenizerMutex);
michael@0 1635
michael@0 1636 NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating.");
michael@0 1637
michael@0 1638 // The timer fired if we got here. No need to cancel it. Mark it as
michael@0 1639 // not armed, though.
michael@0 1640 mFlushTimerArmed = false;
michael@0 1641
michael@0 1642 mFlushTimerEverFired = true;
michael@0 1643
michael@0 1644 if (IsTerminatedOrInterrupted()) {
michael@0 1645 return;
michael@0 1646 }
michael@0 1647
michael@0 1648 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
michael@0 1649 mTreeBuilder->Flush(); // delete useless ops
michael@0 1650 if (mTokenizer->FlushViewSource()) {
michael@0 1651 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0 1652 NS_WARNING("failed to dispatch executor flush event");
michael@0 1653 }
michael@0 1654 }
michael@0 1655 } else {
michael@0 1656 // we aren't speculating and we don't know when new data is
michael@0 1657 // going to arrive. Send data to the main thread.
michael@0 1658 if (mTreeBuilder->Flush(true)) {
michael@0 1659 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0 1660 NS_WARNING("failed to dispatch executor flush event");
michael@0 1661 }
michael@0 1662 }
michael@0 1663 }
michael@0 1664 }
michael@0 1665
michael@0 1666 void
michael@0 1667 nsHtml5StreamParser::MarkAsBroken(nsresult aRv)
michael@0 1668 {
michael@0 1669 NS_ASSERTION(IsParserThread(), "Wrong thread!");
michael@0 1670 mTokenizerMutex.AssertCurrentThreadOwns();
michael@0 1671
michael@0 1672 Terminate();
michael@0 1673 mTreeBuilder->MarkAsBroken(aRv);
michael@0 1674 mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false);
michael@0 1675 NS_ASSERTION(hadOps, "Should have had the markAsBroken op!");
michael@0 1676 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) {
michael@0 1677 NS_WARNING("failed to dispatch executor flush event");
michael@0 1678 }
michael@0 1679 }

mercurial