Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set sw=2 ts=2 et tw=79: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #include "mozilla/DebugOnly.h" |
michael@0 | 8 | |
michael@0 | 9 | #include "nsHtml5StreamParser.h" |
michael@0 | 10 | #include "nsContentUtils.h" |
michael@0 | 11 | #include "nsHtml5Tokenizer.h" |
michael@0 | 12 | #include "nsIHttpChannel.h" |
michael@0 | 13 | #include "nsHtml5Parser.h" |
michael@0 | 14 | #include "nsHtml5TreeBuilder.h" |
michael@0 | 15 | #include "nsHtml5AtomTable.h" |
michael@0 | 16 | #include "nsHtml5Module.h" |
michael@0 | 17 | #include "nsHtml5RefPtr.h" |
michael@0 | 18 | #include "nsIScriptError.h" |
michael@0 | 19 | #include "mozilla/Preferences.h" |
michael@0 | 20 | #include "nsHtml5Highlighter.h" |
michael@0 | 21 | #include "expat_config.h" |
michael@0 | 22 | #include "expat.h" |
michael@0 | 23 | #include "nsINestedURI.h" |
michael@0 | 24 | #include "nsCharsetSource.h" |
michael@0 | 25 | #include "nsIWyciwygChannel.h" |
michael@0 | 26 | #include "nsIThreadRetargetableRequest.h" |
michael@0 | 27 | #include "nsPrintfCString.h" |
michael@0 | 28 | #include "nsNetUtil.h" |
michael@0 | 29 | |
michael@0 | 30 | #include "mozilla/dom/EncodingUtils.h" |
michael@0 | 31 | |
michael@0 | 32 | using namespace mozilla; |
michael@0 | 33 | using mozilla::dom::EncodingUtils; |
michael@0 | 34 | |
michael@0 | 35 | int32_t nsHtml5StreamParser::sTimerInitialDelay = 120; |
michael@0 | 36 | int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120; |
michael@0 | 37 | |
michael@0 | 38 | // static |
michael@0 | 39 | void |
michael@0 | 40 | nsHtml5StreamParser::InitializeStatics() |
michael@0 | 41 | { |
michael@0 | 42 | Preferences::AddIntVarCache(&sTimerInitialDelay, |
michael@0 | 43 | "html5.flushtimer.initialdelay"); |
michael@0 | 44 | Preferences::AddIntVarCache(&sTimerSubsequentDelay, |
michael@0 | 45 | "html5.flushtimer.subsequentdelay"); |
michael@0 | 46 | } |
michael@0 | 47 | |
michael@0 | 48 | /* |
michael@0 | 49 | * Note that nsHtml5StreamParser implements cycle collecting AddRef and |
michael@0 | 50 | * Release. Therefore, nsHtml5StreamParser must never be refcounted from |
michael@0 | 51 | * the parser thread! |
michael@0 | 52 | * |
michael@0 | 53 | * To work around this limitation, runnables posted by the main thread to the |
michael@0 | 54 | * parser thread hold their reference to the stream parser in an |
michael@0 | 55 | * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds |
michael@0 | 56 | * just like a regular nsRefPtr. This is OK, since the creation of the |
michael@0 | 57 | * runnable and the nsHtml5RefPtr happens on the main thread. |
michael@0 | 58 | * |
michael@0 | 59 | * When the runnable is done on the parser thread, the destructor of |
michael@0 | 60 | * nsHtml5RefPtr runs there. It doesn't call Release on the held object |
michael@0 | 61 | * directly. Instead, it posts another runnable back to the main thread where |
michael@0 | 62 | * that runnable calls Release on the wrapped object. |
michael@0 | 63 | * |
michael@0 | 64 | * When posting runnables in the other direction, the runnables have to be |
michael@0 | 65 | * created on the main thread when nsHtml5StreamParser is instantiated and |
michael@0 | 66 | * held for the lifetime of the nsHtml5StreamParser. This works, because the |
michael@0 | 67 | * same runnabled can be dispatched multiple times and currently runnables |
michael@0 | 68 | * posted from the parser thread to main thread don't need to wrap any |
michael@0 | 69 | * runnable-specific data. (In the other direction, the runnables most notably |
michael@0 | 70 | * wrap the byte data of the stream.) |
michael@0 | 71 | */ |
michael@0 | 72 | NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser) |
michael@0 | 73 | NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser) |
michael@0 | 74 | |
michael@0 | 75 | NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser) |
michael@0 | 76 | NS_INTERFACE_TABLE(nsHtml5StreamParser, |
michael@0 | 77 | nsICharsetDetectionObserver) |
michael@0 | 78 | NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser) |
michael@0 | 79 | NS_INTERFACE_MAP_END |
michael@0 | 80 | |
michael@0 | 81 | NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser) |
michael@0 | 82 | |
michael@0 | 83 | NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser) |
michael@0 | 84 | tmp->DropTimer(); |
michael@0 | 85 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) |
michael@0 | 86 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest) |
michael@0 | 87 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner) |
michael@0 | 88 | tmp->mExecutorFlusher = nullptr; |
michael@0 | 89 | tmp->mLoadFlusher = nullptr; |
michael@0 | 90 | tmp->mExecutor = nullptr; |
michael@0 | 91 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet) |
michael@0 | 92 | NS_IMPL_CYCLE_COLLECTION_UNLINK_END |
michael@0 | 93 | |
michael@0 | 94 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser) |
michael@0 | 95 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) |
michael@0 | 96 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest) |
michael@0 | 97 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner) |
michael@0 | 98 | // hack: count the strongly owned edge wrapped in the runnable |
michael@0 | 99 | if (tmp->mExecutorFlusher) { |
michael@0 | 100 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor"); |
michael@0 | 101 | cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor)); |
michael@0 | 102 | } |
michael@0 | 103 | // hack: count the strongly owned edge wrapped in the runnable |
michael@0 | 104 | if (tmp->mLoadFlusher) { |
michael@0 | 105 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor"); |
michael@0 | 106 | cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor)); |
michael@0 | 107 | } |
michael@0 | 108 | // hack: count self if held by mChardet |
michael@0 | 109 | if (tmp->mChardet) { |
michael@0 | 110 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver"); |
michael@0 | 111 | cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp)); |
michael@0 | 112 | } |
michael@0 | 113 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END |
michael@0 | 114 | |
michael@0 | 115 | class nsHtml5ExecutorFlusher : public nsRunnable |
michael@0 | 116 | { |
michael@0 | 117 | private: |
michael@0 | 118 | nsRefPtr<nsHtml5TreeOpExecutor> mExecutor; |
michael@0 | 119 | public: |
michael@0 | 120 | nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor) |
michael@0 | 121 | : mExecutor(aExecutor) |
michael@0 | 122 | {} |
michael@0 | 123 | NS_IMETHODIMP Run() |
michael@0 | 124 | { |
michael@0 | 125 | if (!mExecutor->isInList()) { |
michael@0 | 126 | mExecutor->RunFlushLoop(); |
michael@0 | 127 | } |
michael@0 | 128 | return NS_OK; |
michael@0 | 129 | } |
michael@0 | 130 | }; |
michael@0 | 131 | |
michael@0 | 132 | class nsHtml5LoadFlusher : public nsRunnable |
michael@0 | 133 | { |
michael@0 | 134 | private: |
michael@0 | 135 | nsRefPtr<nsHtml5TreeOpExecutor> mExecutor; |
michael@0 | 136 | public: |
michael@0 | 137 | nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor) |
michael@0 | 138 | : mExecutor(aExecutor) |
michael@0 | 139 | {} |
michael@0 | 140 | NS_IMETHODIMP Run() |
michael@0 | 141 | { |
michael@0 | 142 | mExecutor->FlushSpeculativeLoads(); |
michael@0 | 143 | return NS_OK; |
michael@0 | 144 | } |
michael@0 | 145 | }; |
michael@0 | 146 | |
michael@0 | 147 | nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, |
michael@0 | 148 | nsHtml5Parser* aOwner, |
michael@0 | 149 | eParserMode aMode) |
michael@0 | 150 | : mFirstBuffer(nullptr) // Will be filled when starting |
michael@0 | 151 | , mLastBuffer(nullptr) // Will be filled when starting |
michael@0 | 152 | , mExecutor(aExecutor) |
michael@0 | 153 | , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML || |
michael@0 | 154 | aMode == VIEW_SOURCE_XML) ? |
michael@0 | 155 | nullptr : mExecutor->GetStage(), |
michael@0 | 156 | aMode == NORMAL ? |
michael@0 | 157 | mExecutor->GetStage() : nullptr)) |
michael@0 | 158 | , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)) |
michael@0 | 159 | , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex") |
michael@0 | 160 | , mOwner(aOwner) |
michael@0 | 161 | , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex") |
michael@0 | 162 | , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex") |
michael@0 | 163 | , mThread(nsHtml5Module::GetStreamParserThread()) |
michael@0 | 164 | , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)) |
michael@0 | 165 | , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)) |
michael@0 | 166 | , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1")) |
michael@0 | 167 | , mMode(aMode) |
michael@0 | 168 | { |
michael@0 | 169 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 170 | mFlushTimer->SetTarget(mThread); |
michael@0 | 171 | #ifdef DEBUG |
michael@0 | 172 | mAtomTable.SetPermittedLookupThread(mThread); |
michael@0 | 173 | #endif |
michael@0 | 174 | mTokenizer->setInterner(&mAtomTable); |
michael@0 | 175 | mTokenizer->setEncodingDeclarationHandler(this); |
michael@0 | 176 | |
michael@0 | 177 | if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) { |
michael@0 | 178 | nsHtml5Highlighter* highlighter = |
michael@0 | 179 | new nsHtml5Highlighter(mExecutor->GetStage()); |
michael@0 | 180 | mTokenizer->EnableViewSource(highlighter); // takes ownership |
michael@0 | 181 | mTreeBuilder->EnableViewSource(highlighter); // doesn't own |
michael@0 | 182 | } |
michael@0 | 183 | |
michael@0 | 184 | // Chardet instantiation adapted from nsDOMFile. |
michael@0 | 185 | // Chardet is initialized here even if it turns out to be useless |
michael@0 | 186 | // to make the chardet refcount its observer (nsHtml5StreamParser) |
michael@0 | 187 | // on the main thread. |
michael@0 | 188 | const nsAdoptingCString& detectorName = |
michael@0 | 189 | Preferences::GetLocalizedCString("intl.charset.detector"); |
michael@0 | 190 | if (!detectorName.IsEmpty()) { |
michael@0 | 191 | nsAutoCString detectorContractID; |
michael@0 | 192 | detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE); |
michael@0 | 193 | detectorContractID += detectorName; |
michael@0 | 194 | if ((mChardet = do_CreateInstance(detectorContractID.get()))) { |
michael@0 | 195 | (void) mChardet->Init(this); |
michael@0 | 196 | mFeedChardet = true; |
michael@0 | 197 | } |
michael@0 | 198 | } |
michael@0 | 199 | |
michael@0 | 200 | // There's a zeroing operator new for everything else |
michael@0 | 201 | } |
michael@0 | 202 | |
michael@0 | 203 | nsHtml5StreamParser::~nsHtml5StreamParser() |
michael@0 | 204 | { |
michael@0 | 205 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 206 | mTokenizer->end(); |
michael@0 | 207 | NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!"); |
michael@0 | 208 | #ifdef DEBUG |
michael@0 | 209 | mRequest = nullptr; |
michael@0 | 210 | mObserver = nullptr; |
michael@0 | 211 | mUnicodeDecoder = nullptr; |
michael@0 | 212 | mSniffingBuffer = nullptr; |
michael@0 | 213 | mMetaScanner = nullptr; |
michael@0 | 214 | mFirstBuffer = nullptr; |
michael@0 | 215 | mExecutor = nullptr; |
michael@0 | 216 | mTreeBuilder = nullptr; |
michael@0 | 217 | mTokenizer = nullptr; |
michael@0 | 218 | mOwner = nullptr; |
michael@0 | 219 | #endif |
michael@0 | 220 | } |
michael@0 | 221 | |
michael@0 | 222 | nsresult |
michael@0 | 223 | nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) |
michael@0 | 224 | { |
michael@0 | 225 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 226 | return mRequest ? CallQueryInterface(mRequest, aChannel) : |
michael@0 | 227 | NS_ERROR_NOT_AVAILABLE; |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | NS_IMETHODIMP |
michael@0 | 231 | nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) |
michael@0 | 232 | { |
michael@0 | 233 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 234 | if (aConf == eBestAnswer || aConf == eSureAnswer) { |
michael@0 | 235 | mFeedChardet = false; // just in case |
michael@0 | 236 | nsAutoCString encoding; |
michael@0 | 237 | if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset), |
michael@0 | 238 | encoding)) { |
michael@0 | 239 | return NS_OK; |
michael@0 | 240 | } |
michael@0 | 241 | if (encoding.EqualsLiteral("replacement")) { |
michael@0 | 242 | return NS_OK; |
michael@0 | 243 | } |
michael@0 | 244 | if (HasDecoder()) { |
michael@0 | 245 | if (mCharset.Equals(encoding)) { |
michael@0 | 246 | NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, |
michael@0 | 247 | "Why are we running chardet at all?"); |
michael@0 | 248 | mCharsetSource = kCharsetFromAutoDetection; |
michael@0 | 249 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 250 | } else { |
michael@0 | 251 | // We've already committed to a decoder. Request a reload from the |
michael@0 | 252 | // docshell. |
michael@0 | 253 | mTreeBuilder->NeedsCharsetSwitchTo(encoding, |
michael@0 | 254 | kCharsetFromAutoDetection, |
michael@0 | 255 | 0); |
michael@0 | 256 | FlushTreeOpsAndDisarmTimer(); |
michael@0 | 257 | Interrupt(); |
michael@0 | 258 | } |
michael@0 | 259 | } else { |
michael@0 | 260 | // Got a confident answer from the sniffing buffer. That code will |
michael@0 | 261 | // take care of setting up the decoder. |
michael@0 | 262 | mCharset.Assign(encoding); |
michael@0 | 263 | mCharsetSource = kCharsetFromAutoDetection; |
michael@0 | 264 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 265 | } |
michael@0 | 266 | } |
michael@0 | 267 | return NS_OK; |
michael@0 | 268 | } |
michael@0 | 269 | |
michael@0 | 270 | void |
michael@0 | 271 | nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) |
michael@0 | 272 | { |
michael@0 | 273 | if (aURL) { |
michael@0 | 274 | nsCOMPtr<nsIURI> temp; |
michael@0 | 275 | bool isViewSource; |
michael@0 | 276 | aURL->SchemeIs("view-source", &isViewSource); |
michael@0 | 277 | if (isViewSource) { |
michael@0 | 278 | nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL); |
michael@0 | 279 | nested->GetInnerURI(getter_AddRefs(temp)); |
michael@0 | 280 | } else { |
michael@0 | 281 | temp = aURL; |
michael@0 | 282 | } |
michael@0 | 283 | bool isData; |
michael@0 | 284 | temp->SchemeIs("data", &isData); |
michael@0 | 285 | if (isData) { |
michael@0 | 286 | // Avoid showing potentially huge data: URLs. The three last bytes are |
michael@0 | 287 | // UTF-8 for an ellipsis. |
michael@0 | 288 | mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6"); |
michael@0 | 289 | } else { |
michael@0 | 290 | temp->GetSpec(mViewSourceTitle); |
michael@0 | 291 | } |
michael@0 | 292 | } |
michael@0 | 293 | } |
michael@0 | 294 | |
michael@0 | 295 | nsresult |
michael@0 | 296 | nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null |
michael@0 | 297 | uint32_t aCount, |
michael@0 | 298 | uint32_t* aWriteCount) |
michael@0 | 299 | { |
michael@0 | 300 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 301 | nsresult rv = NS_OK; |
michael@0 | 302 | mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
michael@0 | 303 | if (mSniffingBuffer) { |
michael@0 | 304 | uint32_t writeCount; |
michael@0 | 305 | rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount); |
michael@0 | 306 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 307 | mSniffingBuffer = nullptr; |
michael@0 | 308 | } |
michael@0 | 309 | mMetaScanner = nullptr; |
michael@0 | 310 | if (aFromSegment) { |
michael@0 | 311 | rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount); |
michael@0 | 312 | } |
michael@0 | 313 | return rv; |
michael@0 | 314 | } |
michael@0 | 315 | |
michael@0 | 316 | nsresult |
michael@0 | 317 | nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName) |
michael@0 | 318 | { |
michael@0 | 319 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 320 | mCharset.Assign(aDecoderCharsetName); |
michael@0 | 321 | mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
michael@0 | 322 | mCharsetSource = kCharsetFromByteOrderMark; |
michael@0 | 323 | mFeedChardet = false; |
michael@0 | 324 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 325 | mSniffingBuffer = nullptr; |
michael@0 | 326 | mMetaScanner = nullptr; |
michael@0 | 327 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 328 | return NS_OK; |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | void |
michael@0 | 332 | nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment, |
michael@0 | 333 | uint32_t aCountToSniffingLimit) |
michael@0 | 334 | { |
michael@0 | 335 | // Avoid underspecified heuristic craziness for XHR |
michael@0 | 336 | if (mMode == LOAD_AS_DATA) { |
michael@0 | 337 | return; |
michael@0 | 338 | } |
michael@0 | 339 | // Make sure there's enough data. Require room for "<title></title>" |
michael@0 | 340 | if (mSniffingLength + aCountToSniffingLimit < 30) { |
michael@0 | 341 | return; |
michael@0 | 342 | } |
michael@0 | 343 | // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1 |
michael@0 | 344 | bool byteZero[2] = { false, false }; |
michael@0 | 345 | bool byteNonZero[2] = { false, false }; |
michael@0 | 346 | uint32_t i = 0; |
michael@0 | 347 | if (mSniffingBuffer) { |
michael@0 | 348 | for (; i < mSniffingLength; ++i) { |
michael@0 | 349 | if (mSniffingBuffer[i]) { |
michael@0 | 350 | if (byteNonZero[1 - (i % 2)]) { |
michael@0 | 351 | return; |
michael@0 | 352 | } |
michael@0 | 353 | byteNonZero[i % 2] = true; |
michael@0 | 354 | } else { |
michael@0 | 355 | if (byteZero[1 - (i % 2)]) { |
michael@0 | 356 | return; |
michael@0 | 357 | } |
michael@0 | 358 | byteZero[i % 2] = true; |
michael@0 | 359 | } |
michael@0 | 360 | } |
michael@0 | 361 | } |
michael@0 | 362 | if (aFromSegment) { |
michael@0 | 363 | for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) { |
michael@0 | 364 | if (aFromSegment[j]) { |
michael@0 | 365 | if (byteNonZero[1 - ((i + j) % 2)]) { |
michael@0 | 366 | return; |
michael@0 | 367 | } |
michael@0 | 368 | byteNonZero[(i + j) % 2] = true; |
michael@0 | 369 | } else { |
michael@0 | 370 | if (byteZero[1 - ((i + j) % 2)]) { |
michael@0 | 371 | return; |
michael@0 | 372 | } |
michael@0 | 373 | byteZero[(i + j) % 2] = true; |
michael@0 | 374 | } |
michael@0 | 375 | } |
michael@0 | 376 | } |
michael@0 | 377 | |
michael@0 | 378 | if (byteNonZero[0]) { |
michael@0 | 379 | mCharset.Assign("UTF-16LE"); |
michael@0 | 380 | } else { |
michael@0 | 381 | mCharset.Assign("UTF-16BE"); |
michael@0 | 382 | } |
michael@0 | 383 | mCharsetSource = kCharsetFromIrreversibleAutoDetection; |
michael@0 | 384 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 385 | mFeedChardet = false; |
michael@0 | 386 | mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", |
michael@0 | 387 | true, |
michael@0 | 388 | 0); |
michael@0 | 389 | |
michael@0 | 390 | } |
michael@0 | 391 | |
michael@0 | 392 | void |
michael@0 | 393 | nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) |
michael@0 | 394 | { |
michael@0 | 395 | if (aEncoding) { |
michael@0 | 396 | nsDependentString utf16(aEncoding); |
michael@0 | 397 | nsAutoCString utf8; |
michael@0 | 398 | CopyUTF16toUTF8(utf16, utf8); |
michael@0 | 399 | if (PreferredForInternalEncodingDecl(utf8)) { |
michael@0 | 400 | mCharset.Assign(utf8); |
michael@0 | 401 | mCharsetSource = kCharsetFromMetaTag; // closest for XML |
michael@0 | 402 | return; |
michael@0 | 403 | } |
michael@0 | 404 | // else the page declared an encoding Gecko doesn't support and we'd |
michael@0 | 405 | // end up defaulting to UTF-8 anyway. Might as well fall through here |
michael@0 | 406 | // right away and let the encoding be set to UTF-8 which we'd default to |
michael@0 | 407 | // anyway. |
michael@0 | 408 | } |
michael@0 | 409 | mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM |
michael@0 | 410 | mCharsetSource = kCharsetFromMetaTag; // means confident |
michael@0 | 411 | } |
michael@0 | 412 | |
michael@0 | 413 | // A separate user data struct is used instead of passing the |
michael@0 | 414 | // nsHtml5StreamParser instance as user data in order to avoid including |
michael@0 | 415 | // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts. |
michael@0 | 416 | // Using a separate user data struct also avoids bloating nsHtml5StreamParser |
michael@0 | 417 | // by one pointer. |
michael@0 | 418 | struct UserData { |
michael@0 | 419 | XML_Parser mExpat; |
michael@0 | 420 | nsHtml5StreamParser* mStreamParser; |
michael@0 | 421 | }; |
michael@0 | 422 | |
michael@0 | 423 | // Using no-namespace handler callbacks to avoid including expat.h in |
michael@0 | 424 | // nsHtml5StreamParser.h, since doing so would cause naming conclicts. |
michael@0 | 425 | static void |
michael@0 | 426 | HandleXMLDeclaration(void* aUserData, |
michael@0 | 427 | const XML_Char* aVersion, |
michael@0 | 428 | const XML_Char* aEncoding, |
michael@0 | 429 | int aStandalone) |
michael@0 | 430 | { |
michael@0 | 431 | UserData* ud = static_cast<UserData*>(aUserData); |
michael@0 | 432 | ud->mStreamParser->SetEncodingFromExpat( |
michael@0 | 433 | reinterpret_cast<const char16_t*>(aEncoding)); |
michael@0 | 434 | XML_StopParser(ud->mExpat, false); |
michael@0 | 435 | } |
michael@0 | 436 | |
michael@0 | 437 | static void |
michael@0 | 438 | HandleStartElement(void* aUserData, |
michael@0 | 439 | const XML_Char* aName, |
michael@0 | 440 | const XML_Char **aAtts) |
michael@0 | 441 | { |
michael@0 | 442 | UserData* ud = static_cast<UserData*>(aUserData); |
michael@0 | 443 | XML_StopParser(ud->mExpat, false); |
michael@0 | 444 | } |
michael@0 | 445 | |
michael@0 | 446 | static void |
michael@0 | 447 | HandleEndElement(void* aUserData, |
michael@0 | 448 | const XML_Char* aName) |
michael@0 | 449 | { |
michael@0 | 450 | UserData* ud = static_cast<UserData*>(aUserData); |
michael@0 | 451 | XML_StopParser(ud->mExpat, false); |
michael@0 | 452 | } |
michael@0 | 453 | |
michael@0 | 454 | static void |
michael@0 | 455 | HandleComment(void* aUserData, |
michael@0 | 456 | const XML_Char* aName) |
michael@0 | 457 | { |
michael@0 | 458 | UserData* ud = static_cast<UserData*>(aUserData); |
michael@0 | 459 | XML_StopParser(ud->mExpat, false); |
michael@0 | 460 | } |
michael@0 | 461 | |
michael@0 | 462 | static void |
michael@0 | 463 | HandleProcessingInstruction(void* aUserData, |
michael@0 | 464 | const XML_Char* aTarget, |
michael@0 | 465 | const XML_Char* aData) |
michael@0 | 466 | { |
michael@0 | 467 | UserData* ud = static_cast<UserData*>(aUserData); |
michael@0 | 468 | XML_StopParser(ud->mExpat, false); |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | nsresult |
michael@0 | 472 | nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be null |
michael@0 | 473 | uint32_t aCount, |
michael@0 | 474 | uint32_t* aWriteCount, |
michael@0 | 475 | uint32_t aCountToSniffingLimit) |
michael@0 | 476 | { |
michael@0 | 477 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 478 | NS_ASSERTION(mCharsetSource < kCharsetFromParentForced, |
michael@0 | 479 | "Should not finalize sniffing when using forced charset."); |
michael@0 | 480 | if (mMode == VIEW_SOURCE_XML) { |
michael@0 | 481 | static const XML_Memory_Handling_Suite memsuite = |
michael@0 | 482 | { |
michael@0 | 483 | (void *(*)(size_t))moz_xmalloc, |
michael@0 | 484 | (void *(*)(void *, size_t))moz_xrealloc, |
michael@0 | 485 | moz_free |
michael@0 | 486 | }; |
michael@0 | 487 | |
michael@0 | 488 | static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' }; |
michael@0 | 489 | |
michael@0 | 490 | static const char16_t kISO88591[] = |
michael@0 | 491 | { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }; |
michael@0 | 492 | |
michael@0 | 493 | UserData ud; |
michael@0 | 494 | ud.mStreamParser = this; |
michael@0 | 495 | |
michael@0 | 496 | // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML |
michael@0 | 497 | // documents MUST begin with a BOM. We don't support EBCDIC and such. |
michael@0 | 498 | // Thus, at this point, what we have is garbage or something encoded using |
michael@0 | 499 | // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes |
michael@0 | 500 | // without throwing errors when bytes have the most significant bit set |
michael@0 | 501 | // and without triggering expat's unknown encoding code paths. This is |
michael@0 | 502 | // enough to be able to use expat to parse the XML declaration in order |
michael@0 | 503 | // to extract the encoding name from it. |
michael@0 | 504 | ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator); |
michael@0 | 505 | XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration); |
michael@0 | 506 | XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement); |
michael@0 | 507 | XML_SetCommentHandler(ud.mExpat, HandleComment); |
michael@0 | 508 | XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction); |
michael@0 | 509 | XML_SetUserData(ud.mExpat, static_cast<void*>(&ud)); |
michael@0 | 510 | |
michael@0 | 511 | XML_Status status = XML_STATUS_OK; |
michael@0 | 512 | |
michael@0 | 513 | // aFromSegment points to the data obtained from the current network |
michael@0 | 514 | // event. mSniffingBuffer (if it exists) contains the data obtained before |
michael@0 | 515 | // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer |
michael@0 | 516 | // followed by aCountToSniffingLimit bytes from aFromSegment are the |
michael@0 | 517 | // first 1024 bytes of the file (or the file as a whole if the file is |
michael@0 | 518 | // 1024 bytes long or shorter). Thus, we parse both buffers, but if the |
michael@0 | 519 | // first call succeeds already, we skip parsing the second buffer. |
michael@0 | 520 | if (mSniffingBuffer) { |
michael@0 | 521 | status = XML_Parse(ud.mExpat, |
michael@0 | 522 | reinterpret_cast<const char*>(mSniffingBuffer.get()), |
michael@0 | 523 | mSniffingLength, |
michael@0 | 524 | false); |
michael@0 | 525 | } |
michael@0 | 526 | if (status == XML_STATUS_OK && |
michael@0 | 527 | mCharsetSource < kCharsetFromMetaTag && |
michael@0 | 528 | aFromSegment) { |
michael@0 | 529 | status = XML_Parse(ud.mExpat, |
michael@0 | 530 | reinterpret_cast<const char*>(aFromSegment), |
michael@0 | 531 | aCountToSniffingLimit, |
michael@0 | 532 | false); |
michael@0 | 533 | } |
michael@0 | 534 | XML_ParserFree(ud.mExpat); |
michael@0 | 535 | |
michael@0 | 536 | if (mCharsetSource < kCharsetFromMetaTag) { |
michael@0 | 537 | // Failed to get an encoding from the XML declaration. XML defaults |
michael@0 | 538 | // confidently to UTF-8 in this case. |
michael@0 | 539 | // It is also possible that the document has an XML declaration that is |
michael@0 | 540 | // longer than 1024 bytes, but that case is not worth worrying about. |
michael@0 | 541 | mCharset.AssignLiteral("UTF-8"); |
michael@0 | 542 | mCharsetSource = kCharsetFromMetaTag; // means confident |
michael@0 | 543 | } |
michael@0 | 544 | |
michael@0 | 545 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
michael@0 | 546 | aCount, |
michael@0 | 547 | aWriteCount); |
michael@0 | 548 | } |
michael@0 | 549 | |
michael@0 | 550 | // meta scan failed. |
michael@0 | 551 | if (mCharsetSource >= kCharsetFromHintPrevDoc) { |
michael@0 | 552 | mFeedChardet = false; |
michael@0 | 553 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); |
michael@0 | 554 | } |
michael@0 | 555 | // Check for BOMless UTF-16 with Basic |
michael@0 | 556 | // Latin content for compat with IE. See bug 631751. |
michael@0 | 557 | SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit); |
michael@0 | 558 | // the charset may have been set now |
michael@0 | 559 | // maybe try chardet now; |
michael@0 | 560 | if (mFeedChardet) { |
michael@0 | 561 | bool dontFeed; |
michael@0 | 562 | nsresult rv; |
michael@0 | 563 | if (mSniffingBuffer) { |
michael@0 | 564 | rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed); |
michael@0 | 565 | mFeedChardet = !dontFeed; |
michael@0 | 566 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 567 | } |
michael@0 | 568 | if (mFeedChardet && aFromSegment) { |
michael@0 | 569 | rv = mChardet->DoIt((const char*)aFromSegment, |
michael@0 | 570 | // Avoid buffer boundary-dependent behavior when |
michael@0 | 571 | // reparsing is forbidden. If reparse is forbidden, |
michael@0 | 572 | // act as if we only saw the first 1024 bytes. |
michael@0 | 573 | // When reparsing isn't forbidden, buffer boundaries |
michael@0 | 574 | // can have an effect on whether the page is loaded |
michael@0 | 575 | // once or twice. :-( |
michael@0 | 576 | mReparseForbidden ? aCountToSniffingLimit : aCount, |
michael@0 | 577 | &dontFeed); |
michael@0 | 578 | mFeedChardet = !dontFeed; |
michael@0 | 579 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 580 | } |
michael@0 | 581 | if (mFeedChardet && (!aFromSegment || mReparseForbidden)) { |
michael@0 | 582 | // mReparseForbidden is checked so that we get to use the sniffing |
michael@0 | 583 | // buffer with the best guess so far if we aren't allowed to guess |
michael@0 | 584 | // better later. |
michael@0 | 585 | mFeedChardet = false; |
michael@0 | 586 | rv = mChardet->Done(); |
michael@0 | 587 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 588 | } |
michael@0 | 589 | // fall thru; callback may have changed charset |
michael@0 | 590 | } |
michael@0 | 591 | if (mCharsetSource == kCharsetUninitialized) { |
michael@0 | 592 | // Hopefully this case is never needed, but dealing with it anyway |
michael@0 | 593 | mCharset.AssignLiteral("windows-1252"); |
michael@0 | 594 | mCharsetSource = kCharsetFromFallback; |
michael@0 | 595 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 596 | } else if (mMode == LOAD_AS_DATA && |
michael@0 | 597 | mCharsetSource == kCharsetFromFallback) { |
michael@0 | 598 | NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); |
michael@0 | 599 | NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); |
michael@0 | 600 | NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"), |
michael@0 | 601 | "XHR should default to UTF-8"); |
michael@0 | 602 | // Now mark charset source as non-weak to signal that we have a decision |
michael@0 | 603 | mCharsetSource = kCharsetFromDocTypeDefault; |
michael@0 | 604 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 605 | } |
michael@0 | 606 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); |
michael@0 | 607 | } |
michael@0 | 608 | |
michael@0 | 609 | nsresult |
michael@0 | 610 | nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment, |
michael@0 | 611 | uint32_t aCount, |
michael@0 | 612 | uint32_t* aWriteCount) |
michael@0 | 613 | { |
michael@0 | 614 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 615 | nsresult rv = NS_OK; |
michael@0 | 616 | uint32_t writeCount; |
michael@0 | 617 | |
michael@0 | 618 | // mCharset and mCharsetSource potentially have come from channel or higher |
michael@0 | 619 | // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. |
michael@0 | 620 | // If we don't find a BOM, the previously set values of mCharset and |
michael@0 | 621 | // mCharsetSource are not modified by the BOM sniffing here. |
michael@0 | 622 | for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) { |
michael@0 | 623 | switch (mBomState) { |
michael@0 | 624 | case BOM_SNIFFING_NOT_STARTED: |
michael@0 | 625 | NS_ASSERTION(i == 0, "Bad BOM sniffing state."); |
michael@0 | 626 | switch (*aFromSegment) { |
michael@0 | 627 | case 0xEF: |
michael@0 | 628 | mBomState = SEEN_UTF_8_FIRST_BYTE; |
michael@0 | 629 | break; |
michael@0 | 630 | case 0xFF: |
michael@0 | 631 | mBomState = SEEN_UTF_16_LE_FIRST_BYTE; |
michael@0 | 632 | break; |
michael@0 | 633 | case 0xFE: |
michael@0 | 634 | mBomState = SEEN_UTF_16_BE_FIRST_BYTE; |
michael@0 | 635 | break; |
michael@0 | 636 | default: |
michael@0 | 637 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 638 | break; |
michael@0 | 639 | } |
michael@0 | 640 | break; |
michael@0 | 641 | case SEEN_UTF_16_LE_FIRST_BYTE: |
michael@0 | 642 | if (aFromSegment[i] == 0xFE) { |
michael@0 | 643 | rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form |
michael@0 | 644 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 645 | uint32_t count = aCount - (i + 1); |
michael@0 | 646 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
michael@0 | 647 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 648 | *aWriteCount = writeCount + (i + 1); |
michael@0 | 649 | return rv; |
michael@0 | 650 | } |
michael@0 | 651 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 652 | break; |
michael@0 | 653 | case SEEN_UTF_16_BE_FIRST_BYTE: |
michael@0 | 654 | if (aFromSegment[i] == 0xFF) { |
michael@0 | 655 | rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form |
michael@0 | 656 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 657 | uint32_t count = aCount - (i + 1); |
michael@0 | 658 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
michael@0 | 659 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 660 | *aWriteCount = writeCount + (i + 1); |
michael@0 | 661 | return rv; |
michael@0 | 662 | } |
michael@0 | 663 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 664 | break; |
michael@0 | 665 | case SEEN_UTF_8_FIRST_BYTE: |
michael@0 | 666 | if (aFromSegment[i] == 0xBB) { |
michael@0 | 667 | mBomState = SEEN_UTF_8_SECOND_BYTE; |
michael@0 | 668 | } else { |
michael@0 | 669 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 670 | } |
michael@0 | 671 | break; |
michael@0 | 672 | case SEEN_UTF_8_SECOND_BYTE: |
michael@0 | 673 | if (aFromSegment[i] == 0xBF) { |
michael@0 | 674 | rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form |
michael@0 | 675 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 676 | uint32_t count = aCount - (i + 1); |
michael@0 | 677 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
michael@0 | 678 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 679 | *aWriteCount = writeCount + (i + 1); |
michael@0 | 680 | return rv; |
michael@0 | 681 | } |
michael@0 | 682 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 683 | break; |
michael@0 | 684 | default: |
michael@0 | 685 | mBomState = BOM_SNIFFING_OVER; |
michael@0 | 686 | break; |
michael@0 | 687 | } |
michael@0 | 688 | } |
michael@0 | 689 | // if we get here, there either was no BOM or the BOM sniffing isn't complete |
michael@0 | 690 | // yet |
michael@0 | 691 | |
michael@0 | 692 | MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, |
michael@0 | 693 | "Should not come here if BOM was found."); |
michael@0 | 694 | MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, |
michael@0 | 695 | "kCharsetFromOtherComponent is for XSLT."); |
michael@0 | 696 | |
michael@0 | 697 | if (mBomState == BOM_SNIFFING_OVER && |
michael@0 | 698 | mCharsetSource == kCharsetFromChannel) { |
michael@0 | 699 | // There was no BOM and the charset came from channel. mCharset |
michael@0 | 700 | // still contains the charset from the channel as set by an |
michael@0 | 701 | // earlier call to SetDocumentCharset(), since we didn't find a BOM and |
michael@0 | 702 | // overwrite mCharset. (Note that if the user has overridden the charset, |
michael@0 | 703 | // we don't come here but check <meta> for XSS-dangerous charsets first.) |
michael@0 | 704 | mFeedChardet = false; |
michael@0 | 705 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 706 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
michael@0 | 707 | aCount, aWriteCount); |
michael@0 | 708 | } |
michael@0 | 709 | |
michael@0 | 710 | if (!mMetaScanner && (mMode == NORMAL || |
michael@0 | 711 | mMode == VIEW_SOURCE_HTML || |
michael@0 | 712 | mMode == LOAD_AS_DATA)) { |
michael@0 | 713 | mMetaScanner = new nsHtml5MetaScanner(); |
michael@0 | 714 | } |
michael@0 | 715 | |
michael@0 | 716 | if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) { |
michael@0 | 717 | // this is the last buffer |
michael@0 | 718 | uint32_t countToSniffingLimit = |
michael@0 | 719 | NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength; |
michael@0 | 720 | if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
michael@0 | 721 | nsHtml5ByteReadable readable(aFromSegment, aFromSegment + |
michael@0 | 722 | countToSniffingLimit); |
michael@0 | 723 | nsAutoCString encoding; |
michael@0 | 724 | mMetaScanner->sniff(&readable, encoding); |
michael@0 | 725 | if (!encoding.IsEmpty()) { |
michael@0 | 726 | // meta scan successful; honor overrides unless meta is XSS-dangerous |
michael@0 | 727 | if ((mCharsetSource == kCharsetFromParentForced || |
michael@0 | 728 | mCharsetSource == kCharsetFromUserForced) && |
michael@0 | 729 | EncodingUtils::IsAsciiCompatible(encoding)) { |
michael@0 | 730 | // Honor override |
michael@0 | 731 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
michael@0 | 732 | aFromSegment, aCount, aWriteCount); |
michael@0 | 733 | } |
michael@0 | 734 | mCharset.Assign(encoding); |
michael@0 | 735 | mCharsetSource = kCharsetFromMetaPrescan; |
michael@0 | 736 | mFeedChardet = false; |
michael@0 | 737 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 738 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
michael@0 | 739 | aFromSegment, aCount, aWriteCount); |
michael@0 | 740 | } |
michael@0 | 741 | } |
michael@0 | 742 | if (mCharsetSource == kCharsetFromParentForced || |
michael@0 | 743 | mCharsetSource == kCharsetFromUserForced) { |
michael@0 | 744 | // meta not found, honor override |
michael@0 | 745 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
michael@0 | 746 | aFromSegment, aCount, aWriteCount); |
michael@0 | 747 | } |
michael@0 | 748 | return FinalizeSniffing(aFromSegment, aCount, aWriteCount, |
michael@0 | 749 | countToSniffingLimit); |
michael@0 | 750 | } |
michael@0 | 751 | |
michael@0 | 752 | // not the last buffer |
michael@0 | 753 | if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
michael@0 | 754 | nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount); |
michael@0 | 755 | nsAutoCString encoding; |
michael@0 | 756 | mMetaScanner->sniff(&readable, encoding); |
michael@0 | 757 | if (!encoding.IsEmpty()) { |
michael@0 | 758 | // meta scan successful; honor overrides unless meta is XSS-dangerous |
michael@0 | 759 | if ((mCharsetSource == kCharsetFromParentForced || |
michael@0 | 760 | mCharsetSource == kCharsetFromUserForced) && |
michael@0 | 761 | EncodingUtils::IsAsciiCompatible(encoding)) { |
michael@0 | 762 | // Honor override |
michael@0 | 763 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
michael@0 | 764 | aCount, aWriteCount); |
michael@0 | 765 | } |
michael@0 | 766 | mCharset.Assign(encoding); |
michael@0 | 767 | mCharsetSource = kCharsetFromMetaPrescan; |
michael@0 | 768 | mFeedChardet = false; |
michael@0 | 769 | mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
michael@0 | 770 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
michael@0 | 771 | aCount, aWriteCount); |
michael@0 | 772 | } |
michael@0 | 773 | } |
michael@0 | 774 | |
michael@0 | 775 | if (!mSniffingBuffer) { |
michael@0 | 776 | const mozilla::fallible_t fallible = mozilla::fallible_t(); |
michael@0 | 777 | mSniffingBuffer = new (fallible) |
michael@0 | 778 | uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE]; |
michael@0 | 779 | if (!mSniffingBuffer) { |
michael@0 | 780 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 781 | } |
michael@0 | 782 | } |
michael@0 | 783 | memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount); |
michael@0 | 784 | mSniffingLength += aCount; |
michael@0 | 785 | *aWriteCount = aCount; |
michael@0 | 786 | return NS_OK; |
michael@0 | 787 | } |
michael@0 | 788 | |
michael@0 | 789 | nsresult |
michael@0 | 790 | nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment, |
michael@0 | 791 | uint32_t aCount, |
michael@0 | 792 | uint32_t* aWriteCount) |
michael@0 | 793 | { |
michael@0 | 794 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 795 | // mLastBuffer should always point to a buffer of the size |
michael@0 | 796 | // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE. |
michael@0 | 797 | if (!mLastBuffer) { |
michael@0 | 798 | NS_WARNING("mLastBuffer should not be null!"); |
michael@0 | 799 | MarkAsBroken(NS_ERROR_NULL_POINTER); |
michael@0 | 800 | return NS_ERROR_NULL_POINTER; |
michael@0 | 801 | } |
michael@0 | 802 | if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) { |
michael@0 | 803 | nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
michael@0 | 804 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
michael@0 | 805 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
michael@0 | 806 | if (!newBuf) { |
michael@0 | 807 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 808 | } |
michael@0 | 809 | mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
michael@0 | 810 | } |
michael@0 | 811 | int32_t totalByteCount = 0; |
michael@0 | 812 | for (;;) { |
michael@0 | 813 | int32_t end = mLastBuffer->getEnd(); |
michael@0 | 814 | int32_t byteCount = aCount - totalByteCount; |
michael@0 | 815 | int32_t utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end; |
michael@0 | 816 | |
michael@0 | 817 | NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!"); |
michael@0 | 818 | // byteCount may be zero to force the decoder to output a pending surrogate |
michael@0 | 819 | // pair. |
michael@0 | 820 | |
michael@0 | 821 | nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count); |
michael@0 | 822 | MOZ_ASSERT(NS_SUCCEEDED(convResult)); |
michael@0 | 823 | |
michael@0 | 824 | end += utf16Count; |
michael@0 | 825 | mLastBuffer->setEnd(end); |
michael@0 | 826 | totalByteCount += byteCount; |
michael@0 | 827 | aFromSegment += byteCount; |
michael@0 | 828 | |
michael@0 | 829 | NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, |
michael@0 | 830 | "The Unicode decoder wrote too much data."); |
michael@0 | 831 | NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes."); |
michael@0 | 832 | |
michael@0 | 833 | if (convResult == NS_PARTIAL_MORE_OUTPUT) { |
michael@0 | 834 | nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
michael@0 | 835 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
michael@0 | 836 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
michael@0 | 837 | if (!newBuf) { |
michael@0 | 838 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 839 | } |
michael@0 | 840 | mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
michael@0 | 841 | // All input may have been consumed if there is a pending surrogate pair |
michael@0 | 842 | // that doesn't fit in the output buffer. Loop back to push a zero-length |
michael@0 | 843 | // input to the decoder in that case. |
michael@0 | 844 | } else { |
michael@0 | 845 | NS_ASSERTION(totalByteCount == (int32_t)aCount, |
michael@0 | 846 | "The Unicode decoder consumed the wrong number of bytes."); |
michael@0 | 847 | *aWriteCount = (uint32_t)totalByteCount; |
michael@0 | 848 | return NS_OK; |
michael@0 | 849 | } |
michael@0 | 850 | } |
michael@0 | 851 | } |
michael@0 | 852 | |
michael@0 | 853 | nsresult |
michael@0 | 854 | nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext) |
michael@0 | 855 | { |
michael@0 | 856 | NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState, |
michael@0 | 857 | "Got OnStartRequest when the stream had already started."); |
michael@0 | 858 | NS_PRECONDITION(!mExecutor->HasStarted(), |
michael@0 | 859 | "Got OnStartRequest at the wrong stage in the executor life cycle."); |
michael@0 | 860 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 861 | if (mObserver) { |
michael@0 | 862 | mObserver->OnStartRequest(aRequest, aContext); |
michael@0 | 863 | } |
michael@0 | 864 | mRequest = aRequest; |
michael@0 | 865 | |
michael@0 | 866 | mStreamState = STREAM_BEING_READ; |
michael@0 | 867 | |
michael@0 | 868 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
michael@0 | 869 | mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); |
michael@0 | 870 | } |
michael@0 | 871 | |
michael@0 | 872 | // For View Source, the parser should run with scripts "enabled" if a normal |
michael@0 | 873 | // load would have scripts enabled. |
michael@0 | 874 | bool scriptingEnabled = mMode == LOAD_AS_DATA ? |
michael@0 | 875 | false : mExecutor->IsScriptEnabled(); |
michael@0 | 876 | mOwner->StartTokenizer(scriptingEnabled); |
michael@0 | 877 | |
michael@0 | 878 | bool isSrcdoc = false; |
michael@0 | 879 | nsCOMPtr<nsIChannel> channel; |
michael@0 | 880 | nsresult rv = GetChannel(getter_AddRefs(channel)); |
michael@0 | 881 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 882 | isSrcdoc = NS_IsSrcdocChannel(channel); |
michael@0 | 883 | } |
michael@0 | 884 | mTreeBuilder->setIsSrcdocDocument(isSrcdoc); |
michael@0 | 885 | mTreeBuilder->setScriptingEnabled(scriptingEnabled); |
michael@0 | 886 | mTreeBuilder->SetPreventScriptExecution(!((mMode == NORMAL) && |
michael@0 | 887 | scriptingEnabled)); |
michael@0 | 888 | mTokenizer->start(); |
michael@0 | 889 | mExecutor->Start(); |
michael@0 | 890 | mExecutor->StartReadingFromStage(); |
michael@0 | 891 | |
michael@0 | 892 | if (mMode == PLAIN_TEXT) { |
michael@0 | 893 | mTreeBuilder->StartPlainText(); |
michael@0 | 894 | mTokenizer->StartPlainText(); |
michael@0 | 895 | } else if (mMode == VIEW_SOURCE_PLAIN) { |
michael@0 | 896 | mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); |
michael@0 | 897 | mTokenizer->StartPlainText(); |
michael@0 | 898 | } |
michael@0 | 899 | |
michael@0 | 900 | /* |
michael@0 | 901 | * If you move the following line, be very careful not to cause |
michael@0 | 902 | * WillBuildModel to be called before the document has had its |
michael@0 | 903 | * script global object set. |
michael@0 | 904 | */ |
michael@0 | 905 | rv = mExecutor->WillBuildModel(eDTDMode_unknown); |
michael@0 | 906 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 907 | |
michael@0 | 908 | nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
michael@0 | 909 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
michael@0 | 910 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
michael@0 | 911 | if (!newBuf) { |
michael@0 | 912 | // marks this stream parser as terminated, |
michael@0 | 913 | // which prevents entry to code paths that |
michael@0 | 914 | // would use mFirstBuffer or mLastBuffer. |
michael@0 | 915 | return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 916 | } |
michael@0 | 917 | NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?"); |
michael@0 | 918 | NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?"); |
michael@0 | 919 | mFirstBuffer = mLastBuffer = newBuf; |
michael@0 | 920 | |
michael@0 | 921 | rv = NS_OK; |
michael@0 | 922 | |
michael@0 | 923 | // The line below means that the encoding can end up being wrong if |
michael@0 | 924 | // a view-source URL is loaded without having the encoding hint from a |
michael@0 | 925 | // previous normal load in the history. |
michael@0 | 926 | mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT); |
michael@0 | 927 | |
michael@0 | 928 | nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv)); |
michael@0 | 929 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 930 | nsAutoCString method; |
michael@0 | 931 | httpChannel->GetRequestMethod(method); |
michael@0 | 932 | // XXX does Necko have a way to renavigate POST, etc. without hitting |
michael@0 | 933 | // the network? |
michael@0 | 934 | if (!method.EqualsLiteral("GET")) { |
michael@0 | 935 | // This is the old Gecko behavior but the HTML5 spec disagrees. |
michael@0 | 936 | // Don't reparse on POST. |
michael@0 | 937 | mReparseForbidden = true; |
michael@0 | 938 | mFeedChardet = false; // can't restart anyway |
michael@0 | 939 | } |
michael@0 | 940 | |
michael@0 | 941 | // Attempt to retarget delivery of data (via OnDataAvailable) to the parser |
michael@0 | 942 | // thread, rather than through the main thread. |
michael@0 | 943 | nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest = |
michael@0 | 944 | do_QueryInterface(mRequest); |
michael@0 | 945 | if (threadRetargetableRequest) { |
michael@0 | 946 | threadRetargetableRequest->RetargetDeliveryTo(mThread); |
michael@0 | 947 | } |
michael@0 | 948 | } |
michael@0 | 949 | |
michael@0 | 950 | if (mCharsetSource == kCharsetFromParentFrame) { |
michael@0 | 951 | // Remember this in case chardet overwrites mCharsetSource |
michael@0 | 952 | mInitialEncodingWasFromParentFrame = true; |
michael@0 | 953 | } |
michael@0 | 954 | |
michael@0 | 955 | if (mCharsetSource >= kCharsetFromAutoDetection) { |
michael@0 | 956 | mFeedChardet = false; |
michael@0 | 957 | } |
michael@0 | 958 | |
michael@0 | 959 | nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest)); |
michael@0 | 960 | if (!wyciwygChannel) { |
michael@0 | 961 | // we aren't ready to commit to an encoding yet |
michael@0 | 962 | // leave converter uninstantiated for now |
michael@0 | 963 | return NS_OK; |
michael@0 | 964 | } |
michael@0 | 965 | |
michael@0 | 966 | // We are reloading a document.open()ed doc. |
michael@0 | 967 | mReparseForbidden = true; |
michael@0 | 968 | mFeedChardet = false; |
michael@0 | 969 | |
michael@0 | 970 | // Instantiate the converter here to avoid BOM sniffing. |
michael@0 | 971 | mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
michael@0 | 972 | return NS_OK; |
michael@0 | 973 | } |
michael@0 | 974 | |
michael@0 | 975 | nsresult |
michael@0 | 976 | nsHtml5StreamParser::CheckListenerChain() |
michael@0 | 977 | { |
michael@0 | 978 | NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!"); |
michael@0 | 979 | if (!mObserver) { |
michael@0 | 980 | return NS_OK; |
michael@0 | 981 | } |
michael@0 | 982 | nsresult rv; |
michael@0 | 983 | nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable = |
michael@0 | 984 | do_QueryInterface(mObserver, &rv); |
michael@0 | 985 | if (NS_SUCCEEDED(rv) && retargetable) { |
michael@0 | 986 | rv = retargetable->CheckListenerChain(); |
michael@0 | 987 | } |
michael@0 | 988 | return rv; |
michael@0 | 989 | } |
michael@0 | 990 | |
michael@0 | 991 | void |
michael@0 | 992 | nsHtml5StreamParser::DoStopRequest() |
michael@0 | 993 | { |
michael@0 | 994 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 995 | NS_PRECONDITION(STREAM_BEING_READ == mStreamState, |
michael@0 | 996 | "Stream ended without being open."); |
michael@0 | 997 | mTokenizerMutex.AssertCurrentThreadOwns(); |
michael@0 | 998 | |
michael@0 | 999 | if (IsTerminated()) { |
michael@0 | 1000 | return; |
michael@0 | 1001 | } |
michael@0 | 1002 | |
michael@0 | 1003 | mStreamState = STREAM_ENDED; |
michael@0 | 1004 | |
michael@0 | 1005 | if (!mUnicodeDecoder) { |
michael@0 | 1006 | uint32_t writeCount; |
michael@0 | 1007 | nsresult rv; |
michael@0 | 1008 | if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) { |
michael@0 | 1009 | MarkAsBroken(rv); |
michael@0 | 1010 | return; |
michael@0 | 1011 | } |
michael@0 | 1012 | } else if (mFeedChardet) { |
michael@0 | 1013 | mChardet->Done(); |
michael@0 | 1014 | } |
michael@0 | 1015 | |
michael@0 | 1016 | if (IsTerminatedOrInterrupted()) { |
michael@0 | 1017 | return; |
michael@0 | 1018 | } |
michael@0 | 1019 | |
michael@0 | 1020 | ParseAvailableData(); |
michael@0 | 1021 | } |
michael@0 | 1022 | |
michael@0 | 1023 | class nsHtml5RequestStopper : public nsRunnable |
michael@0 | 1024 | { |
michael@0 | 1025 | private: |
michael@0 | 1026 | nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
michael@0 | 1027 | public: |
michael@0 | 1028 | nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser) |
michael@0 | 1029 | : mStreamParser(aStreamParser) |
michael@0 | 1030 | {} |
michael@0 | 1031 | NS_IMETHODIMP Run() |
michael@0 | 1032 | { |
michael@0 | 1033 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
michael@0 | 1034 | mStreamParser->DoStopRequest(); |
michael@0 | 1035 | return NS_OK; |
michael@0 | 1036 | } |
michael@0 | 1037 | }; |
michael@0 | 1038 | |
michael@0 | 1039 | nsresult |
michael@0 | 1040 | nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest, |
michael@0 | 1041 | nsISupports* aContext, |
michael@0 | 1042 | nsresult status) |
michael@0 | 1043 | { |
michael@0 | 1044 | NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream."); |
michael@0 | 1045 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 1046 | if (mObserver) { |
michael@0 | 1047 | mObserver->OnStopRequest(aRequest, aContext, status); |
michael@0 | 1048 | } |
michael@0 | 1049 | nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this); |
michael@0 | 1050 | if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) { |
michael@0 | 1051 | NS_WARNING("Dispatching StopRequest event failed."); |
michael@0 | 1052 | } |
michael@0 | 1053 | return NS_OK; |
michael@0 | 1054 | } |
michael@0 | 1055 | |
michael@0 | 1056 | void |
michael@0 | 1057 | nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength) |
michael@0 | 1058 | { |
michael@0 | 1059 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1060 | NS_PRECONDITION(STREAM_BEING_READ == mStreamState, |
michael@0 | 1061 | "DoDataAvailable called when stream not open."); |
michael@0 | 1062 | mTokenizerMutex.AssertCurrentThreadOwns(); |
michael@0 | 1063 | |
michael@0 | 1064 | if (IsTerminated()) { |
michael@0 | 1065 | return; |
michael@0 | 1066 | } |
michael@0 | 1067 | |
michael@0 | 1068 | uint32_t writeCount; |
michael@0 | 1069 | nsresult rv; |
michael@0 | 1070 | if (HasDecoder()) { |
michael@0 | 1071 | if (mFeedChardet) { |
michael@0 | 1072 | bool dontFeed; |
michael@0 | 1073 | mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed); |
michael@0 | 1074 | mFeedChardet = !dontFeed; |
michael@0 | 1075 | } |
michael@0 | 1076 | rv = WriteStreamBytes(aBuffer, aLength, &writeCount); |
michael@0 | 1077 | } else { |
michael@0 | 1078 | rv = SniffStreamBytes(aBuffer, aLength, &writeCount); |
michael@0 | 1079 | } |
michael@0 | 1080 | if (NS_FAILED(rv)) { |
michael@0 | 1081 | MarkAsBroken(rv); |
michael@0 | 1082 | return; |
michael@0 | 1083 | } |
michael@0 | 1084 | NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed."); |
michael@0 | 1085 | |
michael@0 | 1086 | if (IsTerminatedOrInterrupted()) { |
michael@0 | 1087 | return; |
michael@0 | 1088 | } |
michael@0 | 1089 | |
michael@0 | 1090 | ParseAvailableData(); |
michael@0 | 1091 | |
michael@0 | 1092 | if (mFlushTimerArmed || mSpeculating) { |
michael@0 | 1093 | return; |
michael@0 | 1094 | } |
michael@0 | 1095 | |
michael@0 | 1096 | mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback, |
michael@0 | 1097 | static_cast<void*> (this), |
michael@0 | 1098 | mFlushTimerEverFired ? |
michael@0 | 1099 | sTimerInitialDelay : |
michael@0 | 1100 | sTimerSubsequentDelay, |
michael@0 | 1101 | nsITimer::TYPE_ONE_SHOT); |
michael@0 | 1102 | mFlushTimerArmed = true; |
michael@0 | 1103 | } |
michael@0 | 1104 | |
michael@0 | 1105 | class nsHtml5DataAvailable : public nsRunnable |
michael@0 | 1106 | { |
michael@0 | 1107 | private: |
michael@0 | 1108 | nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
michael@0 | 1109 | nsAutoArrayPtr<uint8_t> mData; |
michael@0 | 1110 | uint32_t mLength; |
michael@0 | 1111 | public: |
michael@0 | 1112 | nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser, |
michael@0 | 1113 | uint8_t* aData, |
michael@0 | 1114 | uint32_t aLength) |
michael@0 | 1115 | : mStreamParser(aStreamParser) |
michael@0 | 1116 | , mData(aData) |
michael@0 | 1117 | , mLength(aLength) |
michael@0 | 1118 | {} |
michael@0 | 1119 | NS_IMETHODIMP Run() |
michael@0 | 1120 | { |
michael@0 | 1121 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
michael@0 | 1122 | mStreamParser->DoDataAvailable(mData, mLength); |
michael@0 | 1123 | return NS_OK; |
michael@0 | 1124 | } |
michael@0 | 1125 | }; |
michael@0 | 1126 | |
michael@0 | 1127 | nsresult |
michael@0 | 1128 | nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest, |
michael@0 | 1129 | nsISupports* aContext, |
michael@0 | 1130 | nsIInputStream* aInStream, |
michael@0 | 1131 | uint64_t aSourceOffset, |
michael@0 | 1132 | uint32_t aLength) |
michael@0 | 1133 | { |
michael@0 | 1134 | nsresult rv; |
michael@0 | 1135 | if (NS_FAILED(rv = mExecutor->IsBroken())) { |
michael@0 | 1136 | return rv; |
michael@0 | 1137 | } |
michael@0 | 1138 | |
michael@0 | 1139 | NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream."); |
michael@0 | 1140 | uint32_t totalRead; |
michael@0 | 1141 | // Main thread to parser thread dispatch requires copying to buffer first. |
michael@0 | 1142 | if (NS_IsMainThread()) { |
michael@0 | 1143 | const mozilla::fallible_t fallible = mozilla::fallible_t(); |
michael@0 | 1144 | nsAutoArrayPtr<uint8_t> data(new (fallible) uint8_t[aLength]); |
michael@0 | 1145 | if (!data) { |
michael@0 | 1146 | return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 1147 | } |
michael@0 | 1148 | rv = aInStream->Read(reinterpret_cast<char*>(data.get()), |
michael@0 | 1149 | aLength, &totalRead); |
michael@0 | 1150 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 1151 | NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?"); |
michael@0 | 1152 | |
michael@0 | 1153 | nsCOMPtr<nsIRunnable> dataAvailable = new nsHtml5DataAvailable(this, |
michael@0 | 1154 | data.forget(), |
michael@0 | 1155 | totalRead); |
michael@0 | 1156 | if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) { |
michael@0 | 1157 | NS_WARNING("Dispatching DataAvailable event failed."); |
michael@0 | 1158 | } |
michael@0 | 1159 | return rv; |
michael@0 | 1160 | } else { |
michael@0 | 1161 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1162 | mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
michael@0 | 1163 | |
michael@0 | 1164 | // Read directly from response buffer. |
michael@0 | 1165 | rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength, |
michael@0 | 1166 | &totalRead); |
michael@0 | 1167 | if (NS_FAILED(rv)) { |
michael@0 | 1168 | NS_WARNING("Failed reading response data to parser"); |
michael@0 | 1169 | return rv; |
michael@0 | 1170 | } |
michael@0 | 1171 | return NS_OK; |
michael@0 | 1172 | } |
michael@0 | 1173 | } |
michael@0 | 1174 | |
michael@0 | 1175 | /* static */ |
michael@0 | 1176 | NS_METHOD |
michael@0 | 1177 | nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream, |
michael@0 | 1178 | void *aClosure, |
michael@0 | 1179 | const char *aFromSegment, |
michael@0 | 1180 | uint32_t aToOffset, |
michael@0 | 1181 | uint32_t aCount, |
michael@0 | 1182 | uint32_t *aWriteCount) |
michael@0 | 1183 | { |
michael@0 | 1184 | nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure); |
michael@0 | 1185 | |
michael@0 | 1186 | parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount); |
michael@0 | 1187 | // Assume DoDataAvailable consumed all available bytes. |
michael@0 | 1188 | *aWriteCount = aCount; |
michael@0 | 1189 | return NS_OK; |
michael@0 | 1190 | } |
michael@0 | 1191 | |
michael@0 | 1192 | bool |
michael@0 | 1193 | nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) |
michael@0 | 1194 | { |
michael@0 | 1195 | nsAutoCString newEncoding; |
michael@0 | 1196 | if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) { |
michael@0 | 1197 | // the encoding name is bogus |
michael@0 | 1198 | mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", |
michael@0 | 1199 | true, |
michael@0 | 1200 | mTokenizer->getLineNumber()); |
michael@0 | 1201 | return false; |
michael@0 | 1202 | } |
michael@0 | 1203 | |
michael@0 | 1204 | if (newEncoding.EqualsLiteral("UTF-16BE") || |
michael@0 | 1205 | newEncoding.EqualsLiteral("UTF-16LE")) { |
michael@0 | 1206 | mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", |
michael@0 | 1207 | true, |
michael@0 | 1208 | mTokenizer->getLineNumber()); |
michael@0 | 1209 | newEncoding.Assign("UTF-8"); |
michael@0 | 1210 | } |
michael@0 | 1211 | |
michael@0 | 1212 | if (newEncoding.EqualsLiteral("x-user-defined")) { |
michael@0 | 1213 | // WebKit/Blink hack for Indian and Armenian legacy sites |
michael@0 | 1214 | mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", |
michael@0 | 1215 | true, |
michael@0 | 1216 | mTokenizer->getLineNumber()); |
michael@0 | 1217 | newEncoding.Assign("windows-1252"); |
michael@0 | 1218 | } |
michael@0 | 1219 | |
michael@0 | 1220 | if (newEncoding.Equals(mCharset)) { |
michael@0 | 1221 | if (mCharsetSource < kCharsetFromMetaPrescan) { |
michael@0 | 1222 | if (mInitialEncodingWasFromParentFrame) { |
michael@0 | 1223 | mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", |
michael@0 | 1224 | false, |
michael@0 | 1225 | mTokenizer->getLineNumber()); |
michael@0 | 1226 | } else { |
michael@0 | 1227 | mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", |
michael@0 | 1228 | false, |
michael@0 | 1229 | mTokenizer->getLineNumber()); |
michael@0 | 1230 | } |
michael@0 | 1231 | } |
michael@0 | 1232 | mCharsetSource = kCharsetFromMetaTag; // become confident |
michael@0 | 1233 | mFeedChardet = false; // don't feed chardet when confident |
michael@0 | 1234 | return false; |
michael@0 | 1235 | } |
michael@0 | 1236 | |
michael@0 | 1237 | aEncoding.Assign(newEncoding); |
michael@0 | 1238 | return true; |
michael@0 | 1239 | } |
michael@0 | 1240 | |
michael@0 | 1241 | bool |
michael@0 | 1242 | nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding) |
michael@0 | 1243 | { |
michael@0 | 1244 | // This code needs to stay in sync with |
michael@0 | 1245 | // nsHtml5MetaScanner::tryCharset. Unfortunately, the |
michael@0 | 1246 | // trickery with member fields there leads to some copy-paste reuse. :-( |
michael@0 | 1247 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1248 | if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec |
michael@0 | 1249 | return false; |
michael@0 | 1250 | } |
michael@0 | 1251 | |
michael@0 | 1252 | nsAutoCString newEncoding; |
michael@0 | 1253 | CopyUTF16toUTF8(*aEncoding, newEncoding); |
michael@0 | 1254 | |
michael@0 | 1255 | if (!PreferredForInternalEncodingDecl(newEncoding)) { |
michael@0 | 1256 | return false; |
michael@0 | 1257 | } |
michael@0 | 1258 | |
michael@0 | 1259 | if (mReparseForbidden) { |
michael@0 | 1260 | // This mReparseForbidden check happens after the call to |
michael@0 | 1261 | // PreferredForInternalEncodingDecl so that if that method calls |
michael@0 | 1262 | // MaybeComplainAboutCharset, its charset complaint wins over the one |
michael@0 | 1263 | // below. |
michael@0 | 1264 | mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate", |
michael@0 | 1265 | true, |
michael@0 | 1266 | mTokenizer->getLineNumber()); |
michael@0 | 1267 | return false; // not reparsing even if we wanted to |
michael@0 | 1268 | } |
michael@0 | 1269 | |
michael@0 | 1270 | // Avoid having the chardet ask for another restart after this restart |
michael@0 | 1271 | // request. |
michael@0 | 1272 | mFeedChardet = false; |
michael@0 | 1273 | mTreeBuilder->NeedsCharsetSwitchTo(newEncoding, |
michael@0 | 1274 | kCharsetFromMetaTag, |
michael@0 | 1275 | mTokenizer->getLineNumber()); |
michael@0 | 1276 | FlushTreeOpsAndDisarmTimer(); |
michael@0 | 1277 | Interrupt(); |
michael@0 | 1278 | // the tree op executor will cause the stream parser to terminate |
michael@0 | 1279 | // if the charset switch request is accepted or it'll uninterrupt |
michael@0 | 1280 | // if the request failed. Note that if the restart request fails, |
michael@0 | 1281 | // we don't bother trying to make chardet resume. Might as well |
michael@0 | 1282 | // assume that chardet-requested restarts would fail, too. |
michael@0 | 1283 | return true; |
michael@0 | 1284 | } |
michael@0 | 1285 | |
michael@0 | 1286 | void |
michael@0 | 1287 | nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() |
michael@0 | 1288 | { |
michael@0 | 1289 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1290 | if (mFlushTimerArmed) { |
michael@0 | 1291 | // avoid calling Cancel if the flush timer isn't armed to avoid acquiring |
michael@0 | 1292 | // a mutex |
michael@0 | 1293 | mFlushTimer->Cancel(); |
michael@0 | 1294 | mFlushTimerArmed = false; |
michael@0 | 1295 | } |
michael@0 | 1296 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
michael@0 | 1297 | mTokenizer->FlushViewSource(); |
michael@0 | 1298 | } |
michael@0 | 1299 | mTreeBuilder->Flush(); |
michael@0 | 1300 | if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
michael@0 | 1301 | NS_WARNING("failed to dispatch executor flush event"); |
michael@0 | 1302 | } |
michael@0 | 1303 | } |
michael@0 | 1304 | |
michael@0 | 1305 | void |
michael@0 | 1306 | nsHtml5StreamParser::ParseAvailableData() |
michael@0 | 1307 | { |
michael@0 | 1308 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1309 | mTokenizerMutex.AssertCurrentThreadOwns(); |
michael@0 | 1310 | |
michael@0 | 1311 | if (IsTerminatedOrInterrupted()) { |
michael@0 | 1312 | return; |
michael@0 | 1313 | } |
michael@0 | 1314 | |
michael@0 | 1315 | for (;;) { |
michael@0 | 1316 | if (!mFirstBuffer->hasMore()) { |
michael@0 | 1317 | if (mFirstBuffer == mLastBuffer) { |
michael@0 | 1318 | switch (mStreamState) { |
michael@0 | 1319 | case STREAM_BEING_READ: |
michael@0 | 1320 | // never release the last buffer. |
michael@0 | 1321 | if (!mSpeculating) { |
michael@0 | 1322 | // reuse buffer space if not speculating |
michael@0 | 1323 | mFirstBuffer->setStart(0); |
michael@0 | 1324 | mFirstBuffer->setEnd(0); |
michael@0 | 1325 | } |
michael@0 | 1326 | mTreeBuilder->FlushLoads(); |
michael@0 | 1327 | // Dispatch this runnable unconditionally, because the loads |
michael@0 | 1328 | // that need flushing may have been flushed earlier even if the |
michael@0 | 1329 | // flush right above here did nothing. |
michael@0 | 1330 | if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) { |
michael@0 | 1331 | NS_WARNING("failed to dispatch load flush event"); |
michael@0 | 1332 | } |
michael@0 | 1333 | return; // no more data for now but expecting more |
michael@0 | 1334 | case STREAM_ENDED: |
michael@0 | 1335 | if (mAtEOF) { |
michael@0 | 1336 | return; |
michael@0 | 1337 | } |
michael@0 | 1338 | mAtEOF = true; |
michael@0 | 1339 | if (mCharsetSource < kCharsetFromMetaTag) { |
michael@0 | 1340 | if (mInitialEncodingWasFromParentFrame) { |
michael@0 | 1341 | // Unfortunately, this check doesn't take effect for |
michael@0 | 1342 | // cross-origin frames, so cross-origin ad frames that have |
michael@0 | 1343 | // no text and only an image or a Flash embed get the more |
michael@0 | 1344 | // severe message from the next if block. The message is |
michael@0 | 1345 | // technically accurate, though. |
michael@0 | 1346 | mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame", |
michael@0 | 1347 | false, |
michael@0 | 1348 | 0); |
michael@0 | 1349 | } else if (mMode == NORMAL) { |
michael@0 | 1350 | mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration", |
michael@0 | 1351 | true, |
michael@0 | 1352 | 0); |
michael@0 | 1353 | } else if (mMode == PLAIN_TEXT) { |
michael@0 | 1354 | mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain", |
michael@0 | 1355 | true, |
michael@0 | 1356 | 0); |
michael@0 | 1357 | } |
michael@0 | 1358 | } |
michael@0 | 1359 | mTokenizer->eof(); |
michael@0 | 1360 | mTreeBuilder->StreamEnded(); |
michael@0 | 1361 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
michael@0 | 1362 | mTokenizer->EndViewSource(); |
michael@0 | 1363 | } |
michael@0 | 1364 | FlushTreeOpsAndDisarmTimer(); |
michael@0 | 1365 | return; // no more data and not expecting more |
michael@0 | 1366 | default: |
michael@0 | 1367 | NS_NOTREACHED("It should be impossible to reach this."); |
michael@0 | 1368 | return; |
michael@0 | 1369 | } |
michael@0 | 1370 | } |
michael@0 | 1371 | mFirstBuffer = mFirstBuffer->next; |
michael@0 | 1372 | continue; |
michael@0 | 1373 | } |
michael@0 | 1374 | |
michael@0 | 1375 | // now we have a non-empty buffer |
michael@0 | 1376 | mFirstBuffer->adjust(mLastWasCR); |
michael@0 | 1377 | mLastWasCR = false; |
michael@0 | 1378 | if (mFirstBuffer->hasMore()) { |
michael@0 | 1379 | mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); |
michael@0 | 1380 | // At this point, internalEncodingDeclaration() may have called |
michael@0 | 1381 | // Terminate, but that never happens together with script. |
michael@0 | 1382 | // Can't assert that here, though, because it's possible that the main |
michael@0 | 1383 | // thread has called Terminate() while this thread was parsing. |
michael@0 | 1384 | if (mTreeBuilder->HasScript()) { |
michael@0 | 1385 | // HasScript() cannot return true if the tree builder is preventing |
michael@0 | 1386 | // script execution. |
michael@0 | 1387 | MOZ_ASSERT(mMode == NORMAL); |
michael@0 | 1388 | mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
michael@0 | 1389 | nsHtml5Speculation* speculation = |
michael@0 | 1390 | new nsHtml5Speculation(mFirstBuffer, |
michael@0 | 1391 | mFirstBuffer->getStart(), |
michael@0 | 1392 | mTokenizer->getLineNumber(), |
michael@0 | 1393 | mTreeBuilder->newSnapshot()); |
michael@0 | 1394 | mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), |
michael@0 | 1395 | speculation->GetStartLineNumber()); |
michael@0 | 1396 | FlushTreeOpsAndDisarmTimer(); |
michael@0 | 1397 | mTreeBuilder->SetOpSink(speculation); |
michael@0 | 1398 | mSpeculations.AppendElement(speculation); // adopts the pointer |
michael@0 | 1399 | mSpeculating = true; |
michael@0 | 1400 | } |
michael@0 | 1401 | if (IsTerminatedOrInterrupted()) { |
michael@0 | 1402 | return; |
michael@0 | 1403 | } |
michael@0 | 1404 | } |
michael@0 | 1405 | continue; |
michael@0 | 1406 | } |
michael@0 | 1407 | } |
michael@0 | 1408 | |
michael@0 | 1409 | class nsHtml5StreamParserContinuation : public nsRunnable |
michael@0 | 1410 | { |
michael@0 | 1411 | private: |
michael@0 | 1412 | nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
michael@0 | 1413 | public: |
michael@0 | 1414 | nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser) |
michael@0 | 1415 | : mStreamParser(aStreamParser) |
michael@0 | 1416 | {} |
michael@0 | 1417 | NS_IMETHODIMP Run() |
michael@0 | 1418 | { |
michael@0 | 1419 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
michael@0 | 1420 | mStreamParser->Uninterrupt(); |
michael@0 | 1421 | mStreamParser->ParseAvailableData(); |
michael@0 | 1422 | return NS_OK; |
michael@0 | 1423 | } |
michael@0 | 1424 | }; |
michael@0 | 1425 | |
michael@0 | 1426 | void |
michael@0 | 1427 | nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, |
michael@0 | 1428 | nsHtml5TreeBuilder* aTreeBuilder, |
michael@0 | 1429 | bool aLastWasCR) |
michael@0 | 1430 | { |
michael@0 | 1431 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 1432 | NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML), |
michael@0 | 1433 | "ContinueAfterScripts called in view source mode!"); |
michael@0 | 1434 | if (NS_FAILED(mExecutor->IsBroken())) { |
michael@0 | 1435 | return; |
michael@0 | 1436 | } |
michael@0 | 1437 | #ifdef DEBUG |
michael@0 | 1438 | mExecutor->AssertStageEmpty(); |
michael@0 | 1439 | #endif |
michael@0 | 1440 | bool speculationFailed = false; |
michael@0 | 1441 | { |
michael@0 | 1442 | mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
michael@0 | 1443 | if (mSpeculations.IsEmpty()) { |
michael@0 | 1444 | NS_NOTREACHED("ContinueAfterScripts called without speculations."); |
michael@0 | 1445 | return; |
michael@0 | 1446 | } |
michael@0 | 1447 | nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
michael@0 | 1448 | if (aLastWasCR || |
michael@0 | 1449 | !aTokenizer->isInDataState() || |
michael@0 | 1450 | !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) { |
michael@0 | 1451 | speculationFailed = true; |
michael@0 | 1452 | // We've got a failed speculation :-( |
michael@0 | 1453 | Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
michael@0 | 1454 | // now fall out of the speculationAutoLock into the tokenizerAutoLock block |
michael@0 | 1455 | } else { |
michael@0 | 1456 | // We've got a successful speculation! |
michael@0 | 1457 | if (mSpeculations.Length() > 1) { |
michael@0 | 1458 | // the first speculation isn't the current speculation, so there's |
michael@0 | 1459 | // no need to bother the parser thread. |
michael@0 | 1460 | speculation->FlushToSink(mExecutor); |
michael@0 | 1461 | NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
michael@0 | 1462 | "ParseUntilBlocked() was supposed to ensure we don't come " |
michael@0 | 1463 | "here when scripts are executing."); |
michael@0 | 1464 | NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " |
michael@0 | 1465 | "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
michael@0 | 1466 | "only caller of this method?"); |
michael@0 | 1467 | mSpeculations.RemoveElementAt(0); |
michael@0 | 1468 | return; |
michael@0 | 1469 | } |
michael@0 | 1470 | // else |
michael@0 | 1471 | Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
michael@0 | 1472 | |
michael@0 | 1473 | // now fall through |
michael@0 | 1474 | // the first speculation is the current speculation. Need to |
michael@0 | 1475 | // release the the speculation mutex and acquire the tokenizer |
michael@0 | 1476 | // mutex. (Just acquiring the other mutex here would deadlock) |
michael@0 | 1477 | } |
michael@0 | 1478 | } |
michael@0 | 1479 | { |
michael@0 | 1480 | mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex); |
michael@0 | 1481 | #ifdef DEBUG |
michael@0 | 1482 | { |
michael@0 | 1483 | nsCOMPtr<nsIThread> mainThread; |
michael@0 | 1484 | NS_GetMainThread(getter_AddRefs(mainThread)); |
michael@0 | 1485 | mAtomTable.SetPermittedLookupThread(mainThread); |
michael@0 | 1486 | } |
michael@0 | 1487 | #endif |
michael@0 | 1488 | // In principle, the speculation mutex should be acquired here, |
michael@0 | 1489 | // but there's no point, because the parser thread only acquires it |
michael@0 | 1490 | // when it has also acquired the tokenizer mutex and we are already |
michael@0 | 1491 | // holding the tokenizer mutex. |
michael@0 | 1492 | if (speculationFailed) { |
michael@0 | 1493 | // Rewind the stream |
michael@0 | 1494 | mAtEOF = false; |
michael@0 | 1495 | nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
michael@0 | 1496 | mFirstBuffer = speculation->GetBuffer(); |
michael@0 | 1497 | mFirstBuffer->setStart(speculation->GetStart()); |
michael@0 | 1498 | mTokenizer->setLineNumber(speculation->GetStartLineNumber()); |
michael@0 | 1499 | |
michael@0 | 1500 | nsContentUtils::ReportToConsole(nsIScriptError::warningFlag, |
michael@0 | 1501 | NS_LITERAL_CSTRING("DOM Events"), |
michael@0 | 1502 | mExecutor->GetDocument(), |
michael@0 | 1503 | nsContentUtils::eDOM_PROPERTIES, |
michael@0 | 1504 | "SpeculationFailed", |
michael@0 | 1505 | nullptr, 0, |
michael@0 | 1506 | nullptr, |
michael@0 | 1507 | EmptyString(), |
michael@0 | 1508 | speculation->GetStartLineNumber()); |
michael@0 | 1509 | |
michael@0 | 1510 | nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next; |
michael@0 | 1511 | while (buffer) { |
michael@0 | 1512 | buffer->setStart(0); |
michael@0 | 1513 | buffer = buffer->next; |
michael@0 | 1514 | } |
michael@0 | 1515 | |
michael@0 | 1516 | mSpeculations.Clear(); // potentially a huge number of destructors |
michael@0 | 1517 | // run here synchronously on the main thread... |
michael@0 | 1518 | |
michael@0 | 1519 | mTreeBuilder->flushCharacters(); // empty the pending buffer |
michael@0 | 1520 | mTreeBuilder->ClearOps(); // now get rid of the failed ops |
michael@0 | 1521 | |
michael@0 | 1522 | mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
michael@0 | 1523 | mExecutor->StartReadingFromStage(); |
michael@0 | 1524 | mSpeculating = false; |
michael@0 | 1525 | |
michael@0 | 1526 | // Copy state over |
michael@0 | 1527 | mLastWasCR = aLastWasCR; |
michael@0 | 1528 | mTokenizer->loadState(aTokenizer); |
michael@0 | 1529 | mTreeBuilder->loadState(aTreeBuilder, &mAtomTable); |
michael@0 | 1530 | } else { |
michael@0 | 1531 | // We've got a successful speculation and at least a moment ago it was |
michael@0 | 1532 | // the current speculation |
michael@0 | 1533 | mSpeculations.ElementAt(0)->FlushToSink(mExecutor); |
michael@0 | 1534 | NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
michael@0 | 1535 | "ParseUntilBlocked() was supposed to ensure we don't come " |
michael@0 | 1536 | "here when scripts are executing."); |
michael@0 | 1537 | NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " |
michael@0 | 1538 | "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
michael@0 | 1539 | "only caller of this method?"); |
michael@0 | 1540 | mSpeculations.RemoveElementAt(0); |
michael@0 | 1541 | if (mSpeculations.IsEmpty()) { |
michael@0 | 1542 | // yes, it was still the only speculation. Now stop speculating |
michael@0 | 1543 | // However, before telling the executor to read from stage, flush |
michael@0 | 1544 | // any pending ops straight to the executor, because otherwise |
michael@0 | 1545 | // they remain unflushed until we get more data from the network. |
michael@0 | 1546 | mTreeBuilder->SetOpSink(mExecutor); |
michael@0 | 1547 | mTreeBuilder->Flush(true); |
michael@0 | 1548 | mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
michael@0 | 1549 | mExecutor->StartReadingFromStage(); |
michael@0 | 1550 | mSpeculating = false; |
michael@0 | 1551 | } |
michael@0 | 1552 | } |
michael@0 | 1553 | nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
michael@0 | 1554 | if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
michael@0 | 1555 | NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
michael@0 | 1556 | } |
michael@0 | 1557 | // A stream event might run before this event runs, but that's harmless. |
michael@0 | 1558 | #ifdef DEBUG |
michael@0 | 1559 | mAtomTable.SetPermittedLookupThread(mThread); |
michael@0 | 1560 | #endif |
michael@0 | 1561 | } |
michael@0 | 1562 | } |
michael@0 | 1563 | |
michael@0 | 1564 | void |
michael@0 | 1565 | nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch() |
michael@0 | 1566 | { |
michael@0 | 1567 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 1568 | nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
michael@0 | 1569 | if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
michael@0 | 1570 | NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
michael@0 | 1571 | } |
michael@0 | 1572 | } |
michael@0 | 1573 | |
michael@0 | 1574 | class nsHtml5TimerKungFu : public nsRunnable |
michael@0 | 1575 | { |
michael@0 | 1576 | private: |
michael@0 | 1577 | nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
michael@0 | 1578 | public: |
michael@0 | 1579 | nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser) |
michael@0 | 1580 | : mStreamParser(aStreamParser) |
michael@0 | 1581 | {} |
michael@0 | 1582 | NS_IMETHODIMP Run() |
michael@0 | 1583 | { |
michael@0 | 1584 | if (mStreamParser->mFlushTimer) { |
michael@0 | 1585 | mStreamParser->mFlushTimer->Cancel(); |
michael@0 | 1586 | mStreamParser->mFlushTimer = nullptr; |
michael@0 | 1587 | } |
michael@0 | 1588 | return NS_OK; |
michael@0 | 1589 | } |
michael@0 | 1590 | }; |
michael@0 | 1591 | |
michael@0 | 1592 | void |
michael@0 | 1593 | nsHtml5StreamParser::DropTimer() |
michael@0 | 1594 | { |
michael@0 | 1595 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
michael@0 | 1596 | /* |
michael@0 | 1597 | * Simply nulling out the timer wouldn't work, because if the timer is |
michael@0 | 1598 | * armed, it needs to be canceled first. Simply canceling it first wouldn't |
michael@0 | 1599 | * work, because nsTimerImpl::Cancel is not safe for calling from outside |
michael@0 | 1600 | * the thread where nsTimerImpl::Fire would run. It's not safe to |
michael@0 | 1601 | * dispatch a runnable to cancel the timer from the destructor of this |
michael@0 | 1602 | * class, because the timer has a weak (void*) pointer back to this instance |
michael@0 | 1603 | * of the stream parser and having the timer fire before the runnable |
michael@0 | 1604 | * cancels it would make the timer access a deleted object. |
michael@0 | 1605 | * |
michael@0 | 1606 | * This DropTimer method addresses these issues. This method must be called |
michael@0 | 1607 | * on the main thread before the destructor of this class is reached. |
michael@0 | 1608 | * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this |
michael@0 | 1609 | * stream parser object to keep it alive until the runnable is done. |
michael@0 | 1610 | * The runnable cancels the timer on the parser thread, drops the timer |
michael@0 | 1611 | * and lets nsHtml5RefPtr send a runnable back to the main thread to |
michael@0 | 1612 | * release the stream parser. |
michael@0 | 1613 | */ |
michael@0 | 1614 | if (mFlushTimer) { |
michael@0 | 1615 | nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this); |
michael@0 | 1616 | if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
michael@0 | 1617 | NS_WARNING("Failed to dispatch TimerKungFu event"); |
michael@0 | 1618 | } |
michael@0 | 1619 | } |
michael@0 | 1620 | } |
michael@0 | 1621 | |
michael@0 | 1622 | // Using a static, because the method name Notify is taken by the chardet |
michael@0 | 1623 | // callback. |
michael@0 | 1624 | void |
michael@0 | 1625 | nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure) |
michael@0 | 1626 | { |
michael@0 | 1627 | (static_cast<nsHtml5StreamParser*> (aClosure))->TimerFlush(); |
michael@0 | 1628 | } |
michael@0 | 1629 | |
michael@0 | 1630 | void |
michael@0 | 1631 | nsHtml5StreamParser::TimerFlush() |
michael@0 | 1632 | { |
michael@0 | 1633 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1634 | mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
michael@0 | 1635 | |
michael@0 | 1636 | NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating."); |
michael@0 | 1637 | |
michael@0 | 1638 | // The timer fired if we got here. No need to cancel it. Mark it as |
michael@0 | 1639 | // not armed, though. |
michael@0 | 1640 | mFlushTimerArmed = false; |
michael@0 | 1641 | |
michael@0 | 1642 | mFlushTimerEverFired = true; |
michael@0 | 1643 | |
michael@0 | 1644 | if (IsTerminatedOrInterrupted()) { |
michael@0 | 1645 | return; |
michael@0 | 1646 | } |
michael@0 | 1647 | |
michael@0 | 1648 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
michael@0 | 1649 | mTreeBuilder->Flush(); // delete useless ops |
michael@0 | 1650 | if (mTokenizer->FlushViewSource()) { |
michael@0 | 1651 | if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
michael@0 | 1652 | NS_WARNING("failed to dispatch executor flush event"); |
michael@0 | 1653 | } |
michael@0 | 1654 | } |
michael@0 | 1655 | } else { |
michael@0 | 1656 | // we aren't speculating and we don't know when new data is |
michael@0 | 1657 | // going to arrive. Send data to the main thread. |
michael@0 | 1658 | if (mTreeBuilder->Flush(true)) { |
michael@0 | 1659 | if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
michael@0 | 1660 | NS_WARNING("failed to dispatch executor flush event"); |
michael@0 | 1661 | } |
michael@0 | 1662 | } |
michael@0 | 1663 | } |
michael@0 | 1664 | } |
michael@0 | 1665 | |
michael@0 | 1666 | void |
michael@0 | 1667 | nsHtml5StreamParser::MarkAsBroken(nsresult aRv) |
michael@0 | 1668 | { |
michael@0 | 1669 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
michael@0 | 1670 | mTokenizerMutex.AssertCurrentThreadOwns(); |
michael@0 | 1671 | |
michael@0 | 1672 | Terminate(); |
michael@0 | 1673 | mTreeBuilder->MarkAsBroken(aRv); |
michael@0 | 1674 | mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false); |
michael@0 | 1675 | NS_ASSERTION(hadOps, "Should have had the markAsBroken op!"); |
michael@0 | 1676 | if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
michael@0 | 1677 | NS_WARNING("failed to dispatch executor flush event"); |
michael@0 | 1678 | } |
michael@0 | 1679 | } |