|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set sw=2 ts=2 et tw=79: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "mozilla/DebugOnly.h" |
|
8 |
|
9 #include "nsHtml5StreamParser.h" |
|
10 #include "nsContentUtils.h" |
|
11 #include "nsHtml5Tokenizer.h" |
|
12 #include "nsIHttpChannel.h" |
|
13 #include "nsHtml5Parser.h" |
|
14 #include "nsHtml5TreeBuilder.h" |
|
15 #include "nsHtml5AtomTable.h" |
|
16 #include "nsHtml5Module.h" |
|
17 #include "nsHtml5RefPtr.h" |
|
18 #include "nsIScriptError.h" |
|
19 #include "mozilla/Preferences.h" |
|
20 #include "nsHtml5Highlighter.h" |
|
21 #include "expat_config.h" |
|
22 #include "expat.h" |
|
23 #include "nsINestedURI.h" |
|
24 #include "nsCharsetSource.h" |
|
25 #include "nsIWyciwygChannel.h" |
|
26 #include "nsIThreadRetargetableRequest.h" |
|
27 #include "nsPrintfCString.h" |
|
28 #include "nsNetUtil.h" |
|
29 |
|
30 #include "mozilla/dom/EncodingUtils.h" |
|
31 |
|
32 using namespace mozilla; |
|
33 using mozilla::dom::EncodingUtils; |
|
34 |
|
35 int32_t nsHtml5StreamParser::sTimerInitialDelay = 120; |
|
36 int32_t nsHtml5StreamParser::sTimerSubsequentDelay = 120; |
|
37 |
|
38 // static |
|
39 void |
|
40 nsHtml5StreamParser::InitializeStatics() |
|
41 { |
|
42 Preferences::AddIntVarCache(&sTimerInitialDelay, |
|
43 "html5.flushtimer.initialdelay"); |
|
44 Preferences::AddIntVarCache(&sTimerSubsequentDelay, |
|
45 "html5.flushtimer.subsequentdelay"); |
|
46 } |
|
47 |
|
48 /* |
|
49 * Note that nsHtml5StreamParser implements cycle collecting AddRef and |
|
50 * Release. Therefore, nsHtml5StreamParser must never be refcounted from |
|
51 * the parser thread! |
|
52 * |
|
53 * To work around this limitation, runnables posted by the main thread to the |
|
54 * parser thread hold their reference to the stream parser in an |
|
55 * nsHtml5RefPtr. Upon creation, nsHtml5RefPtr addrefs the object it holds |
|
56 * just like a regular nsRefPtr. This is OK, since the creation of the |
|
57 * runnable and the nsHtml5RefPtr happens on the main thread. |
|
58 * |
|
59 * When the runnable is done on the parser thread, the destructor of |
|
60 * nsHtml5RefPtr runs there. It doesn't call Release on the held object |
|
61 * directly. Instead, it posts another runnable back to the main thread where |
|
62 * that runnable calls Release on the wrapped object. |
|
63 * |
|
64 * When posting runnables in the other direction, the runnables have to be |
|
65 * created on the main thread when nsHtml5StreamParser is instantiated and |
|
66 * held for the lifetime of the nsHtml5StreamParser. This works, because the |
|
67 * same runnabled can be dispatched multiple times and currently runnables |
|
68 * posted from the parser thread to main thread don't need to wrap any |
|
69 * runnable-specific data. (In the other direction, the runnables most notably |
|
70 * wrap the byte data of the stream.) |
|
71 */ |
|
72 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser) |
|
73 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser) |
|
74 |
|
75 NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser) |
|
76 NS_INTERFACE_TABLE(nsHtml5StreamParser, |
|
77 nsICharsetDetectionObserver) |
|
78 NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser) |
|
79 NS_INTERFACE_MAP_END |
|
80 |
|
81 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser) |
|
82 |
|
83 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser) |
|
84 tmp->DropTimer(); |
|
85 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) |
|
86 NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest) |
|
87 NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner) |
|
88 tmp->mExecutorFlusher = nullptr; |
|
89 tmp->mLoadFlusher = nullptr; |
|
90 tmp->mExecutor = nullptr; |
|
91 NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet) |
|
92 NS_IMPL_CYCLE_COLLECTION_UNLINK_END |
|
93 |
|
94 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser) |
|
95 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) |
|
96 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest) |
|
97 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner) |
|
98 // hack: count the strongly owned edge wrapped in the runnable |
|
99 if (tmp->mExecutorFlusher) { |
|
100 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor"); |
|
101 cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor)); |
|
102 } |
|
103 // hack: count the strongly owned edge wrapped in the runnable |
|
104 if (tmp->mLoadFlusher) { |
|
105 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor"); |
|
106 cb.NoteXPCOMChild(static_cast<nsIContentSink*> (tmp->mExecutor)); |
|
107 } |
|
108 // hack: count self if held by mChardet |
|
109 if (tmp->mChardet) { |
|
110 NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver"); |
|
111 cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp)); |
|
112 } |
|
113 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END |
|
114 |
|
115 class nsHtml5ExecutorFlusher : public nsRunnable |
|
116 { |
|
117 private: |
|
118 nsRefPtr<nsHtml5TreeOpExecutor> mExecutor; |
|
119 public: |
|
120 nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor) |
|
121 : mExecutor(aExecutor) |
|
122 {} |
|
123 NS_IMETHODIMP Run() |
|
124 { |
|
125 if (!mExecutor->isInList()) { |
|
126 mExecutor->RunFlushLoop(); |
|
127 } |
|
128 return NS_OK; |
|
129 } |
|
130 }; |
|
131 |
|
132 class nsHtml5LoadFlusher : public nsRunnable |
|
133 { |
|
134 private: |
|
135 nsRefPtr<nsHtml5TreeOpExecutor> mExecutor; |
|
136 public: |
|
137 nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor) |
|
138 : mExecutor(aExecutor) |
|
139 {} |
|
140 NS_IMETHODIMP Run() |
|
141 { |
|
142 mExecutor->FlushSpeculativeLoads(); |
|
143 return NS_OK; |
|
144 } |
|
145 }; |
|
146 |
|
147 nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, |
|
148 nsHtml5Parser* aOwner, |
|
149 eParserMode aMode) |
|
150 : mFirstBuffer(nullptr) // Will be filled when starting |
|
151 , mLastBuffer(nullptr) // Will be filled when starting |
|
152 , mExecutor(aExecutor) |
|
153 , mTreeBuilder(new nsHtml5TreeBuilder((aMode == VIEW_SOURCE_HTML || |
|
154 aMode == VIEW_SOURCE_XML) ? |
|
155 nullptr : mExecutor->GetStage(), |
|
156 aMode == NORMAL ? |
|
157 mExecutor->GetStage() : nullptr)) |
|
158 , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)) |
|
159 , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex") |
|
160 , mOwner(aOwner) |
|
161 , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex") |
|
162 , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex") |
|
163 , mThread(nsHtml5Module::GetStreamParserThread()) |
|
164 , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)) |
|
165 , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)) |
|
166 , mFlushTimer(do_CreateInstance("@mozilla.org/timer;1")) |
|
167 , mMode(aMode) |
|
168 { |
|
169 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
170 mFlushTimer->SetTarget(mThread); |
|
171 #ifdef DEBUG |
|
172 mAtomTable.SetPermittedLookupThread(mThread); |
|
173 #endif |
|
174 mTokenizer->setInterner(&mAtomTable); |
|
175 mTokenizer->setEncodingDeclarationHandler(this); |
|
176 |
|
177 if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) { |
|
178 nsHtml5Highlighter* highlighter = |
|
179 new nsHtml5Highlighter(mExecutor->GetStage()); |
|
180 mTokenizer->EnableViewSource(highlighter); // takes ownership |
|
181 mTreeBuilder->EnableViewSource(highlighter); // doesn't own |
|
182 } |
|
183 |
|
184 // Chardet instantiation adapted from nsDOMFile. |
|
185 // Chardet is initialized here even if it turns out to be useless |
|
186 // to make the chardet refcount its observer (nsHtml5StreamParser) |
|
187 // on the main thread. |
|
188 const nsAdoptingCString& detectorName = |
|
189 Preferences::GetLocalizedCString("intl.charset.detector"); |
|
190 if (!detectorName.IsEmpty()) { |
|
191 nsAutoCString detectorContractID; |
|
192 detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE); |
|
193 detectorContractID += detectorName; |
|
194 if ((mChardet = do_CreateInstance(detectorContractID.get()))) { |
|
195 (void) mChardet->Init(this); |
|
196 mFeedChardet = true; |
|
197 } |
|
198 } |
|
199 |
|
200 // There's a zeroing operator new for everything else |
|
201 } |
|
202 |
|
203 nsHtml5StreamParser::~nsHtml5StreamParser() |
|
204 { |
|
205 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
206 mTokenizer->end(); |
|
207 NS_ASSERTION(!mFlushTimer, "Flush timer was not dropped before dtor!"); |
|
208 #ifdef DEBUG |
|
209 mRequest = nullptr; |
|
210 mObserver = nullptr; |
|
211 mUnicodeDecoder = nullptr; |
|
212 mSniffingBuffer = nullptr; |
|
213 mMetaScanner = nullptr; |
|
214 mFirstBuffer = nullptr; |
|
215 mExecutor = nullptr; |
|
216 mTreeBuilder = nullptr; |
|
217 mTokenizer = nullptr; |
|
218 mOwner = nullptr; |
|
219 #endif |
|
220 } |
|
221 |
|
222 nsresult |
|
223 nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) |
|
224 { |
|
225 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
226 return mRequest ? CallQueryInterface(mRequest, aChannel) : |
|
227 NS_ERROR_NOT_AVAILABLE; |
|
228 } |
|
229 |
|
230 NS_IMETHODIMP |
|
231 nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) |
|
232 { |
|
233 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
234 if (aConf == eBestAnswer || aConf == eSureAnswer) { |
|
235 mFeedChardet = false; // just in case |
|
236 nsAutoCString encoding; |
|
237 if (!EncodingUtils::FindEncodingForLabel(nsDependentCString(aCharset), |
|
238 encoding)) { |
|
239 return NS_OK; |
|
240 } |
|
241 if (encoding.EqualsLiteral("replacement")) { |
|
242 return NS_OK; |
|
243 } |
|
244 if (HasDecoder()) { |
|
245 if (mCharset.Equals(encoding)) { |
|
246 NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, |
|
247 "Why are we running chardet at all?"); |
|
248 mCharsetSource = kCharsetFromAutoDetection; |
|
249 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
250 } else { |
|
251 // We've already committed to a decoder. Request a reload from the |
|
252 // docshell. |
|
253 mTreeBuilder->NeedsCharsetSwitchTo(encoding, |
|
254 kCharsetFromAutoDetection, |
|
255 0); |
|
256 FlushTreeOpsAndDisarmTimer(); |
|
257 Interrupt(); |
|
258 } |
|
259 } else { |
|
260 // Got a confident answer from the sniffing buffer. That code will |
|
261 // take care of setting up the decoder. |
|
262 mCharset.Assign(encoding); |
|
263 mCharsetSource = kCharsetFromAutoDetection; |
|
264 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
265 } |
|
266 } |
|
267 return NS_OK; |
|
268 } |
|
269 |
|
270 void |
|
271 nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) |
|
272 { |
|
273 if (aURL) { |
|
274 nsCOMPtr<nsIURI> temp; |
|
275 bool isViewSource; |
|
276 aURL->SchemeIs("view-source", &isViewSource); |
|
277 if (isViewSource) { |
|
278 nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL); |
|
279 nested->GetInnerURI(getter_AddRefs(temp)); |
|
280 } else { |
|
281 temp = aURL; |
|
282 } |
|
283 bool isData; |
|
284 temp->SchemeIs("data", &isData); |
|
285 if (isData) { |
|
286 // Avoid showing potentially huge data: URLs. The three last bytes are |
|
287 // UTF-8 for an ellipsis. |
|
288 mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6"); |
|
289 } else { |
|
290 temp->GetSpec(mViewSourceTitle); |
|
291 } |
|
292 } |
|
293 } |
|
294 |
|
295 nsresult |
|
296 nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const uint8_t* aFromSegment, // can be null |
|
297 uint32_t aCount, |
|
298 uint32_t* aWriteCount) |
|
299 { |
|
300 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
301 nsresult rv = NS_OK; |
|
302 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
|
303 if (mSniffingBuffer) { |
|
304 uint32_t writeCount; |
|
305 rv = WriteStreamBytes(mSniffingBuffer, mSniffingLength, &writeCount); |
|
306 NS_ENSURE_SUCCESS(rv, rv); |
|
307 mSniffingBuffer = nullptr; |
|
308 } |
|
309 mMetaScanner = nullptr; |
|
310 if (aFromSegment) { |
|
311 rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount); |
|
312 } |
|
313 return rv; |
|
314 } |
|
315 |
|
316 nsresult |
|
317 nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName) |
|
318 { |
|
319 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
320 mCharset.Assign(aDecoderCharsetName); |
|
321 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
|
322 mCharsetSource = kCharsetFromByteOrderMark; |
|
323 mFeedChardet = false; |
|
324 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
325 mSniffingBuffer = nullptr; |
|
326 mMetaScanner = nullptr; |
|
327 mBomState = BOM_SNIFFING_OVER; |
|
328 return NS_OK; |
|
329 } |
|
330 |
|
331 void |
|
332 nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment, |
|
333 uint32_t aCountToSniffingLimit) |
|
334 { |
|
335 // Avoid underspecified heuristic craziness for XHR |
|
336 if (mMode == LOAD_AS_DATA) { |
|
337 return; |
|
338 } |
|
339 // Make sure there's enough data. Require room for "<title></title>" |
|
340 if (mSniffingLength + aCountToSniffingLimit < 30) { |
|
341 return; |
|
342 } |
|
343 // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1 |
|
344 bool byteZero[2] = { false, false }; |
|
345 bool byteNonZero[2] = { false, false }; |
|
346 uint32_t i = 0; |
|
347 if (mSniffingBuffer) { |
|
348 for (; i < mSniffingLength; ++i) { |
|
349 if (mSniffingBuffer[i]) { |
|
350 if (byteNonZero[1 - (i % 2)]) { |
|
351 return; |
|
352 } |
|
353 byteNonZero[i % 2] = true; |
|
354 } else { |
|
355 if (byteZero[1 - (i % 2)]) { |
|
356 return; |
|
357 } |
|
358 byteZero[i % 2] = true; |
|
359 } |
|
360 } |
|
361 } |
|
362 if (aFromSegment) { |
|
363 for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) { |
|
364 if (aFromSegment[j]) { |
|
365 if (byteNonZero[1 - ((i + j) % 2)]) { |
|
366 return; |
|
367 } |
|
368 byteNonZero[(i + j) % 2] = true; |
|
369 } else { |
|
370 if (byteZero[1 - ((i + j) % 2)]) { |
|
371 return; |
|
372 } |
|
373 byteZero[(i + j) % 2] = true; |
|
374 } |
|
375 } |
|
376 } |
|
377 |
|
378 if (byteNonZero[0]) { |
|
379 mCharset.Assign("UTF-16LE"); |
|
380 } else { |
|
381 mCharset.Assign("UTF-16BE"); |
|
382 } |
|
383 mCharsetSource = kCharsetFromIrreversibleAutoDetection; |
|
384 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
385 mFeedChardet = false; |
|
386 mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", |
|
387 true, |
|
388 0); |
|
389 |
|
390 } |
|
391 |
|
392 void |
|
393 nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) |
|
394 { |
|
395 if (aEncoding) { |
|
396 nsDependentString utf16(aEncoding); |
|
397 nsAutoCString utf8; |
|
398 CopyUTF16toUTF8(utf16, utf8); |
|
399 if (PreferredForInternalEncodingDecl(utf8)) { |
|
400 mCharset.Assign(utf8); |
|
401 mCharsetSource = kCharsetFromMetaTag; // closest for XML |
|
402 return; |
|
403 } |
|
404 // else the page declared an encoding Gecko doesn't support and we'd |
|
405 // end up defaulting to UTF-8 anyway. Might as well fall through here |
|
406 // right away and let the encoding be set to UTF-8 which we'd default to |
|
407 // anyway. |
|
408 } |
|
409 mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM |
|
410 mCharsetSource = kCharsetFromMetaTag; // means confident |
|
411 } |
|
412 |
|
413 // A separate user data struct is used instead of passing the |
|
414 // nsHtml5StreamParser instance as user data in order to avoid including |
|
415 // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts. |
|
416 // Using a separate user data struct also avoids bloating nsHtml5StreamParser |
|
417 // by one pointer. |
|
418 struct UserData { |
|
419 XML_Parser mExpat; |
|
420 nsHtml5StreamParser* mStreamParser; |
|
421 }; |
|
422 |
|
423 // Using no-namespace handler callbacks to avoid including expat.h in |
|
424 // nsHtml5StreamParser.h, since doing so would cause naming conclicts. |
|
425 static void |
|
426 HandleXMLDeclaration(void* aUserData, |
|
427 const XML_Char* aVersion, |
|
428 const XML_Char* aEncoding, |
|
429 int aStandalone) |
|
430 { |
|
431 UserData* ud = static_cast<UserData*>(aUserData); |
|
432 ud->mStreamParser->SetEncodingFromExpat( |
|
433 reinterpret_cast<const char16_t*>(aEncoding)); |
|
434 XML_StopParser(ud->mExpat, false); |
|
435 } |
|
436 |
|
437 static void |
|
438 HandleStartElement(void* aUserData, |
|
439 const XML_Char* aName, |
|
440 const XML_Char **aAtts) |
|
441 { |
|
442 UserData* ud = static_cast<UserData*>(aUserData); |
|
443 XML_StopParser(ud->mExpat, false); |
|
444 } |
|
445 |
|
446 static void |
|
447 HandleEndElement(void* aUserData, |
|
448 const XML_Char* aName) |
|
449 { |
|
450 UserData* ud = static_cast<UserData*>(aUserData); |
|
451 XML_StopParser(ud->mExpat, false); |
|
452 } |
|
453 |
|
454 static void |
|
455 HandleComment(void* aUserData, |
|
456 const XML_Char* aName) |
|
457 { |
|
458 UserData* ud = static_cast<UserData*>(aUserData); |
|
459 XML_StopParser(ud->mExpat, false); |
|
460 } |
|
461 |
|
462 static void |
|
463 HandleProcessingInstruction(void* aUserData, |
|
464 const XML_Char* aTarget, |
|
465 const XML_Char* aData) |
|
466 { |
|
467 UserData* ud = static_cast<UserData*>(aUserData); |
|
468 XML_StopParser(ud->mExpat, false); |
|
469 } |
|
470 |
|
471 nsresult |
|
472 nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be null |
|
473 uint32_t aCount, |
|
474 uint32_t* aWriteCount, |
|
475 uint32_t aCountToSniffingLimit) |
|
476 { |
|
477 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
478 NS_ASSERTION(mCharsetSource < kCharsetFromParentForced, |
|
479 "Should not finalize sniffing when using forced charset."); |
|
480 if (mMode == VIEW_SOURCE_XML) { |
|
481 static const XML_Memory_Handling_Suite memsuite = |
|
482 { |
|
483 (void *(*)(size_t))moz_xmalloc, |
|
484 (void *(*)(void *, size_t))moz_xrealloc, |
|
485 moz_free |
|
486 }; |
|
487 |
|
488 static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' }; |
|
489 |
|
490 static const char16_t kISO88591[] = |
|
491 { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }; |
|
492 |
|
493 UserData ud; |
|
494 ud.mStreamParser = this; |
|
495 |
|
496 // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML |
|
497 // documents MUST begin with a BOM. We don't support EBCDIC and such. |
|
498 // Thus, at this point, what we have is garbage or something encoded using |
|
499 // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes |
|
500 // without throwing errors when bytes have the most significant bit set |
|
501 // and without triggering expat's unknown encoding code paths. This is |
|
502 // enough to be able to use expat to parse the XML declaration in order |
|
503 // to extract the encoding name from it. |
|
504 ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator); |
|
505 XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration); |
|
506 XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement); |
|
507 XML_SetCommentHandler(ud.mExpat, HandleComment); |
|
508 XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction); |
|
509 XML_SetUserData(ud.mExpat, static_cast<void*>(&ud)); |
|
510 |
|
511 XML_Status status = XML_STATUS_OK; |
|
512 |
|
513 // aFromSegment points to the data obtained from the current network |
|
514 // event. mSniffingBuffer (if it exists) contains the data obtained before |
|
515 // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer |
|
516 // followed by aCountToSniffingLimit bytes from aFromSegment are the |
|
517 // first 1024 bytes of the file (or the file as a whole if the file is |
|
518 // 1024 bytes long or shorter). Thus, we parse both buffers, but if the |
|
519 // first call succeeds already, we skip parsing the second buffer. |
|
520 if (mSniffingBuffer) { |
|
521 status = XML_Parse(ud.mExpat, |
|
522 reinterpret_cast<const char*>(mSniffingBuffer.get()), |
|
523 mSniffingLength, |
|
524 false); |
|
525 } |
|
526 if (status == XML_STATUS_OK && |
|
527 mCharsetSource < kCharsetFromMetaTag && |
|
528 aFromSegment) { |
|
529 status = XML_Parse(ud.mExpat, |
|
530 reinterpret_cast<const char*>(aFromSegment), |
|
531 aCountToSniffingLimit, |
|
532 false); |
|
533 } |
|
534 XML_ParserFree(ud.mExpat); |
|
535 |
|
536 if (mCharsetSource < kCharsetFromMetaTag) { |
|
537 // Failed to get an encoding from the XML declaration. XML defaults |
|
538 // confidently to UTF-8 in this case. |
|
539 // It is also possible that the document has an XML declaration that is |
|
540 // longer than 1024 bytes, but that case is not worth worrying about. |
|
541 mCharset.AssignLiteral("UTF-8"); |
|
542 mCharsetSource = kCharsetFromMetaTag; // means confident |
|
543 } |
|
544 |
|
545 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
|
546 aCount, |
|
547 aWriteCount); |
|
548 } |
|
549 |
|
550 // meta scan failed. |
|
551 if (mCharsetSource >= kCharsetFromHintPrevDoc) { |
|
552 mFeedChardet = false; |
|
553 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); |
|
554 } |
|
555 // Check for BOMless UTF-16 with Basic |
|
556 // Latin content for compat with IE. See bug 631751. |
|
557 SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit); |
|
558 // the charset may have been set now |
|
559 // maybe try chardet now; |
|
560 if (mFeedChardet) { |
|
561 bool dontFeed; |
|
562 nsresult rv; |
|
563 if (mSniffingBuffer) { |
|
564 rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed); |
|
565 mFeedChardet = !dontFeed; |
|
566 NS_ENSURE_SUCCESS(rv, rv); |
|
567 } |
|
568 if (mFeedChardet && aFromSegment) { |
|
569 rv = mChardet->DoIt((const char*)aFromSegment, |
|
570 // Avoid buffer boundary-dependent behavior when |
|
571 // reparsing is forbidden. If reparse is forbidden, |
|
572 // act as if we only saw the first 1024 bytes. |
|
573 // When reparsing isn't forbidden, buffer boundaries |
|
574 // can have an effect on whether the page is loaded |
|
575 // once or twice. :-( |
|
576 mReparseForbidden ? aCountToSniffingLimit : aCount, |
|
577 &dontFeed); |
|
578 mFeedChardet = !dontFeed; |
|
579 NS_ENSURE_SUCCESS(rv, rv); |
|
580 } |
|
581 if (mFeedChardet && (!aFromSegment || mReparseForbidden)) { |
|
582 // mReparseForbidden is checked so that we get to use the sniffing |
|
583 // buffer with the best guess so far if we aren't allowed to guess |
|
584 // better later. |
|
585 mFeedChardet = false; |
|
586 rv = mChardet->Done(); |
|
587 NS_ENSURE_SUCCESS(rv, rv); |
|
588 } |
|
589 // fall thru; callback may have changed charset |
|
590 } |
|
591 if (mCharsetSource == kCharsetUninitialized) { |
|
592 // Hopefully this case is never needed, but dealing with it anyway |
|
593 mCharset.AssignLiteral("windows-1252"); |
|
594 mCharsetSource = kCharsetFromFallback; |
|
595 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
596 } else if (mMode == LOAD_AS_DATA && |
|
597 mCharsetSource == kCharsetFromFallback) { |
|
598 NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); |
|
599 NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); |
|
600 NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"), |
|
601 "XHR should default to UTF-8"); |
|
602 // Now mark charset source as non-weak to signal that we have a decision |
|
603 mCharsetSource = kCharsetFromDocTypeDefault; |
|
604 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
605 } |
|
606 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount); |
|
607 } |
|
608 |
|
609 nsresult |
|
610 nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment, |
|
611 uint32_t aCount, |
|
612 uint32_t* aWriteCount) |
|
613 { |
|
614 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
615 nsresult rv = NS_OK; |
|
616 uint32_t writeCount; |
|
617 |
|
618 // mCharset and mCharsetSource potentially have come from channel or higher |
|
619 // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. |
|
620 // If we don't find a BOM, the previously set values of mCharset and |
|
621 // mCharsetSource are not modified by the BOM sniffing here. |
|
622 for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) { |
|
623 switch (mBomState) { |
|
624 case BOM_SNIFFING_NOT_STARTED: |
|
625 NS_ASSERTION(i == 0, "Bad BOM sniffing state."); |
|
626 switch (*aFromSegment) { |
|
627 case 0xEF: |
|
628 mBomState = SEEN_UTF_8_FIRST_BYTE; |
|
629 break; |
|
630 case 0xFF: |
|
631 mBomState = SEEN_UTF_16_LE_FIRST_BYTE; |
|
632 break; |
|
633 case 0xFE: |
|
634 mBomState = SEEN_UTF_16_BE_FIRST_BYTE; |
|
635 break; |
|
636 default: |
|
637 mBomState = BOM_SNIFFING_OVER; |
|
638 break; |
|
639 } |
|
640 break; |
|
641 case SEEN_UTF_16_LE_FIRST_BYTE: |
|
642 if (aFromSegment[i] == 0xFE) { |
|
643 rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form |
|
644 NS_ENSURE_SUCCESS(rv, rv); |
|
645 uint32_t count = aCount - (i + 1); |
|
646 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
|
647 NS_ENSURE_SUCCESS(rv, rv); |
|
648 *aWriteCount = writeCount + (i + 1); |
|
649 return rv; |
|
650 } |
|
651 mBomState = BOM_SNIFFING_OVER; |
|
652 break; |
|
653 case SEEN_UTF_16_BE_FIRST_BYTE: |
|
654 if (aFromSegment[i] == 0xFF) { |
|
655 rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form |
|
656 NS_ENSURE_SUCCESS(rv, rv); |
|
657 uint32_t count = aCount - (i + 1); |
|
658 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
|
659 NS_ENSURE_SUCCESS(rv, rv); |
|
660 *aWriteCount = writeCount + (i + 1); |
|
661 return rv; |
|
662 } |
|
663 mBomState = BOM_SNIFFING_OVER; |
|
664 break; |
|
665 case SEEN_UTF_8_FIRST_BYTE: |
|
666 if (aFromSegment[i] == 0xBB) { |
|
667 mBomState = SEEN_UTF_8_SECOND_BYTE; |
|
668 } else { |
|
669 mBomState = BOM_SNIFFING_OVER; |
|
670 } |
|
671 break; |
|
672 case SEEN_UTF_8_SECOND_BYTE: |
|
673 if (aFromSegment[i] == 0xBF) { |
|
674 rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form |
|
675 NS_ENSURE_SUCCESS(rv, rv); |
|
676 uint32_t count = aCount - (i + 1); |
|
677 rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
|
678 NS_ENSURE_SUCCESS(rv, rv); |
|
679 *aWriteCount = writeCount + (i + 1); |
|
680 return rv; |
|
681 } |
|
682 mBomState = BOM_SNIFFING_OVER; |
|
683 break; |
|
684 default: |
|
685 mBomState = BOM_SNIFFING_OVER; |
|
686 break; |
|
687 } |
|
688 } |
|
689 // if we get here, there either was no BOM or the BOM sniffing isn't complete |
|
690 // yet |
|
691 |
|
692 MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, |
|
693 "Should not come here if BOM was found."); |
|
694 MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, |
|
695 "kCharsetFromOtherComponent is for XSLT."); |
|
696 |
|
697 if (mBomState == BOM_SNIFFING_OVER && |
|
698 mCharsetSource == kCharsetFromChannel) { |
|
699 // There was no BOM and the charset came from channel. mCharset |
|
700 // still contains the charset from the channel as set by an |
|
701 // earlier call to SetDocumentCharset(), since we didn't find a BOM and |
|
702 // overwrite mCharset. (Note that if the user has overridden the charset, |
|
703 // we don't come here but check <meta> for XSS-dangerous charsets first.) |
|
704 mFeedChardet = false; |
|
705 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
706 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
|
707 aCount, aWriteCount); |
|
708 } |
|
709 |
|
710 if (!mMetaScanner && (mMode == NORMAL || |
|
711 mMode == VIEW_SOURCE_HTML || |
|
712 mMode == LOAD_AS_DATA)) { |
|
713 mMetaScanner = new nsHtml5MetaScanner(); |
|
714 } |
|
715 |
|
716 if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) { |
|
717 // this is the last buffer |
|
718 uint32_t countToSniffingLimit = |
|
719 NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength; |
|
720 if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
|
721 nsHtml5ByteReadable readable(aFromSegment, aFromSegment + |
|
722 countToSniffingLimit); |
|
723 nsAutoCString encoding; |
|
724 mMetaScanner->sniff(&readable, encoding); |
|
725 if (!encoding.IsEmpty()) { |
|
726 // meta scan successful; honor overrides unless meta is XSS-dangerous |
|
727 if ((mCharsetSource == kCharsetFromParentForced || |
|
728 mCharsetSource == kCharsetFromUserForced) && |
|
729 EncodingUtils::IsAsciiCompatible(encoding)) { |
|
730 // Honor override |
|
731 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
|
732 aFromSegment, aCount, aWriteCount); |
|
733 } |
|
734 mCharset.Assign(encoding); |
|
735 mCharsetSource = kCharsetFromMetaPrescan; |
|
736 mFeedChardet = false; |
|
737 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
738 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
|
739 aFromSegment, aCount, aWriteCount); |
|
740 } |
|
741 } |
|
742 if (mCharsetSource == kCharsetFromParentForced || |
|
743 mCharsetSource == kCharsetFromUserForced) { |
|
744 // meta not found, honor override |
|
745 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
|
746 aFromSegment, aCount, aWriteCount); |
|
747 } |
|
748 return FinalizeSniffing(aFromSegment, aCount, aWriteCount, |
|
749 countToSniffingLimit); |
|
750 } |
|
751 |
|
752 // not the last buffer |
|
753 if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
|
754 nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount); |
|
755 nsAutoCString encoding; |
|
756 mMetaScanner->sniff(&readable, encoding); |
|
757 if (!encoding.IsEmpty()) { |
|
758 // meta scan successful; honor overrides unless meta is XSS-dangerous |
|
759 if ((mCharsetSource == kCharsetFromParentForced || |
|
760 mCharsetSource == kCharsetFromUserForced) && |
|
761 EncodingUtils::IsAsciiCompatible(encoding)) { |
|
762 // Honor override |
|
763 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
|
764 aCount, aWriteCount); |
|
765 } |
|
766 mCharset.Assign(encoding); |
|
767 mCharsetSource = kCharsetFromMetaPrescan; |
|
768 mFeedChardet = false; |
|
769 mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource); |
|
770 return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, |
|
771 aCount, aWriteCount); |
|
772 } |
|
773 } |
|
774 |
|
775 if (!mSniffingBuffer) { |
|
776 const mozilla::fallible_t fallible = mozilla::fallible_t(); |
|
777 mSniffingBuffer = new (fallible) |
|
778 uint8_t[NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE]; |
|
779 if (!mSniffingBuffer) { |
|
780 return NS_ERROR_OUT_OF_MEMORY; |
|
781 } |
|
782 } |
|
783 memcpy(mSniffingBuffer + mSniffingLength, aFromSegment, aCount); |
|
784 mSniffingLength += aCount; |
|
785 *aWriteCount = aCount; |
|
786 return NS_OK; |
|
787 } |
|
788 |
|
789 nsresult |
|
790 nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment, |
|
791 uint32_t aCount, |
|
792 uint32_t* aWriteCount) |
|
793 { |
|
794 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
795 // mLastBuffer should always point to a buffer of the size |
|
796 // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE. |
|
797 if (!mLastBuffer) { |
|
798 NS_WARNING("mLastBuffer should not be null!"); |
|
799 MarkAsBroken(NS_ERROR_NULL_POINTER); |
|
800 return NS_ERROR_NULL_POINTER; |
|
801 } |
|
802 if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) { |
|
803 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
|
804 nsHtml5OwningUTF16Buffer::FalliblyCreate( |
|
805 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
|
806 if (!newBuf) { |
|
807 return NS_ERROR_OUT_OF_MEMORY; |
|
808 } |
|
809 mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
|
810 } |
|
811 int32_t totalByteCount = 0; |
|
812 for (;;) { |
|
813 int32_t end = mLastBuffer->getEnd(); |
|
814 int32_t byteCount = aCount - totalByteCount; |
|
815 int32_t utf16Count = NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE - end; |
|
816 |
|
817 NS_ASSERTION(utf16Count, "Trying to convert into a buffer with no free space!"); |
|
818 // byteCount may be zero to force the decoder to output a pending surrogate |
|
819 // pair. |
|
820 |
|
821 nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count); |
|
822 MOZ_ASSERT(NS_SUCCEEDED(convResult)); |
|
823 |
|
824 end += utf16Count; |
|
825 mLastBuffer->setEnd(end); |
|
826 totalByteCount += byteCount; |
|
827 aFromSegment += byteCount; |
|
828 |
|
829 NS_ASSERTION(end <= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE, |
|
830 "The Unicode decoder wrote too much data."); |
|
831 NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes."); |
|
832 |
|
833 if (convResult == NS_PARTIAL_MORE_OUTPUT) { |
|
834 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
|
835 nsHtml5OwningUTF16Buffer::FalliblyCreate( |
|
836 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
|
837 if (!newBuf) { |
|
838 return NS_ERROR_OUT_OF_MEMORY; |
|
839 } |
|
840 mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
|
841 // All input may have been consumed if there is a pending surrogate pair |
|
842 // that doesn't fit in the output buffer. Loop back to push a zero-length |
|
843 // input to the decoder in that case. |
|
844 } else { |
|
845 NS_ASSERTION(totalByteCount == (int32_t)aCount, |
|
846 "The Unicode decoder consumed the wrong number of bytes."); |
|
847 *aWriteCount = (uint32_t)totalByteCount; |
|
848 return NS_OK; |
|
849 } |
|
850 } |
|
851 } |
|
852 |
|
853 nsresult |
|
854 nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext) |
|
855 { |
|
856 NS_PRECONDITION(STREAM_NOT_STARTED == mStreamState, |
|
857 "Got OnStartRequest when the stream had already started."); |
|
858 NS_PRECONDITION(!mExecutor->HasStarted(), |
|
859 "Got OnStartRequest at the wrong stage in the executor life cycle."); |
|
860 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
861 if (mObserver) { |
|
862 mObserver->OnStartRequest(aRequest, aContext); |
|
863 } |
|
864 mRequest = aRequest; |
|
865 |
|
866 mStreamState = STREAM_BEING_READ; |
|
867 |
|
868 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
|
869 mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); |
|
870 } |
|
871 |
|
872 // For View Source, the parser should run with scripts "enabled" if a normal |
|
873 // load would have scripts enabled. |
|
874 bool scriptingEnabled = mMode == LOAD_AS_DATA ? |
|
875 false : mExecutor->IsScriptEnabled(); |
|
876 mOwner->StartTokenizer(scriptingEnabled); |
|
877 |
|
878 bool isSrcdoc = false; |
|
879 nsCOMPtr<nsIChannel> channel; |
|
880 nsresult rv = GetChannel(getter_AddRefs(channel)); |
|
881 if (NS_SUCCEEDED(rv)) { |
|
882 isSrcdoc = NS_IsSrcdocChannel(channel); |
|
883 } |
|
884 mTreeBuilder->setIsSrcdocDocument(isSrcdoc); |
|
885 mTreeBuilder->setScriptingEnabled(scriptingEnabled); |
|
886 mTreeBuilder->SetPreventScriptExecution(!((mMode == NORMAL) && |
|
887 scriptingEnabled)); |
|
888 mTokenizer->start(); |
|
889 mExecutor->Start(); |
|
890 mExecutor->StartReadingFromStage(); |
|
891 |
|
892 if (mMode == PLAIN_TEXT) { |
|
893 mTreeBuilder->StartPlainText(); |
|
894 mTokenizer->StartPlainText(); |
|
895 } else if (mMode == VIEW_SOURCE_PLAIN) { |
|
896 mTreeBuilder->StartPlainTextViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); |
|
897 mTokenizer->StartPlainText(); |
|
898 } |
|
899 |
|
900 /* |
|
901 * If you move the following line, be very careful not to cause |
|
902 * WillBuildModel to be called before the document has had its |
|
903 * script global object set. |
|
904 */ |
|
905 rv = mExecutor->WillBuildModel(eDTDMode_unknown); |
|
906 NS_ENSURE_SUCCESS(rv, rv); |
|
907 |
|
908 nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
|
909 nsHtml5OwningUTF16Buffer::FalliblyCreate( |
|
910 NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
|
911 if (!newBuf) { |
|
912 // marks this stream parser as terminated, |
|
913 // which prevents entry to code paths that |
|
914 // would use mFirstBuffer or mLastBuffer. |
|
915 return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
|
916 } |
|
917 NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?"); |
|
918 NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?"); |
|
919 mFirstBuffer = mLastBuffer = newBuf; |
|
920 |
|
921 rv = NS_OK; |
|
922 |
|
923 // The line below means that the encoding can end up being wrong if |
|
924 // a view-source URL is loaded without having the encoding hint from a |
|
925 // previous normal load in the history. |
|
926 mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT); |
|
927 |
|
928 nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv)); |
|
929 if (NS_SUCCEEDED(rv)) { |
|
930 nsAutoCString method; |
|
931 httpChannel->GetRequestMethod(method); |
|
932 // XXX does Necko have a way to renavigate POST, etc. without hitting |
|
933 // the network? |
|
934 if (!method.EqualsLiteral("GET")) { |
|
935 // This is the old Gecko behavior but the HTML5 spec disagrees. |
|
936 // Don't reparse on POST. |
|
937 mReparseForbidden = true; |
|
938 mFeedChardet = false; // can't restart anyway |
|
939 } |
|
940 |
|
941 // Attempt to retarget delivery of data (via OnDataAvailable) to the parser |
|
942 // thread, rather than through the main thread. |
|
943 nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest = |
|
944 do_QueryInterface(mRequest); |
|
945 if (threadRetargetableRequest) { |
|
946 threadRetargetableRequest->RetargetDeliveryTo(mThread); |
|
947 } |
|
948 } |
|
949 |
|
950 if (mCharsetSource == kCharsetFromParentFrame) { |
|
951 // Remember this in case chardet overwrites mCharsetSource |
|
952 mInitialEncodingWasFromParentFrame = true; |
|
953 } |
|
954 |
|
955 if (mCharsetSource >= kCharsetFromAutoDetection) { |
|
956 mFeedChardet = false; |
|
957 } |
|
958 |
|
959 nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest)); |
|
960 if (!wyciwygChannel) { |
|
961 // we aren't ready to commit to an encoding yet |
|
962 // leave converter uninstantiated for now |
|
963 return NS_OK; |
|
964 } |
|
965 |
|
966 // We are reloading a document.open()ed doc. |
|
967 mReparseForbidden = true; |
|
968 mFeedChardet = false; |
|
969 |
|
970 // Instantiate the converter here to avoid BOM sniffing. |
|
971 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); |
|
972 return NS_OK; |
|
973 } |
|
974 |
|
975 nsresult |
|
976 nsHtml5StreamParser::CheckListenerChain() |
|
977 { |
|
978 NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!"); |
|
979 if (!mObserver) { |
|
980 return NS_OK; |
|
981 } |
|
982 nsresult rv; |
|
983 nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable = |
|
984 do_QueryInterface(mObserver, &rv); |
|
985 if (NS_SUCCEEDED(rv) && retargetable) { |
|
986 rv = retargetable->CheckListenerChain(); |
|
987 } |
|
988 return rv; |
|
989 } |
|
990 |
|
991 void |
|
992 nsHtml5StreamParser::DoStopRequest() |
|
993 { |
|
994 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
995 NS_PRECONDITION(STREAM_BEING_READ == mStreamState, |
|
996 "Stream ended without being open."); |
|
997 mTokenizerMutex.AssertCurrentThreadOwns(); |
|
998 |
|
999 if (IsTerminated()) { |
|
1000 return; |
|
1001 } |
|
1002 |
|
1003 mStreamState = STREAM_ENDED; |
|
1004 |
|
1005 if (!mUnicodeDecoder) { |
|
1006 uint32_t writeCount; |
|
1007 nsresult rv; |
|
1008 if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) { |
|
1009 MarkAsBroken(rv); |
|
1010 return; |
|
1011 } |
|
1012 } else if (mFeedChardet) { |
|
1013 mChardet->Done(); |
|
1014 } |
|
1015 |
|
1016 if (IsTerminatedOrInterrupted()) { |
|
1017 return; |
|
1018 } |
|
1019 |
|
1020 ParseAvailableData(); |
|
1021 } |
|
1022 |
|
1023 class nsHtml5RequestStopper : public nsRunnable |
|
1024 { |
|
1025 private: |
|
1026 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
|
1027 public: |
|
1028 nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser) |
|
1029 : mStreamParser(aStreamParser) |
|
1030 {} |
|
1031 NS_IMETHODIMP Run() |
|
1032 { |
|
1033 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
|
1034 mStreamParser->DoStopRequest(); |
|
1035 return NS_OK; |
|
1036 } |
|
1037 }; |
|
1038 |
|
1039 nsresult |
|
1040 nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest, |
|
1041 nsISupports* aContext, |
|
1042 nsresult status) |
|
1043 { |
|
1044 NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream."); |
|
1045 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
1046 if (mObserver) { |
|
1047 mObserver->OnStopRequest(aRequest, aContext, status); |
|
1048 } |
|
1049 nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this); |
|
1050 if (NS_FAILED(mThread->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) { |
|
1051 NS_WARNING("Dispatching StopRequest event failed."); |
|
1052 } |
|
1053 return NS_OK; |
|
1054 } |
|
1055 |
|
1056 void |
|
1057 nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength) |
|
1058 { |
|
1059 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1060 NS_PRECONDITION(STREAM_BEING_READ == mStreamState, |
|
1061 "DoDataAvailable called when stream not open."); |
|
1062 mTokenizerMutex.AssertCurrentThreadOwns(); |
|
1063 |
|
1064 if (IsTerminated()) { |
|
1065 return; |
|
1066 } |
|
1067 |
|
1068 uint32_t writeCount; |
|
1069 nsresult rv; |
|
1070 if (HasDecoder()) { |
|
1071 if (mFeedChardet) { |
|
1072 bool dontFeed; |
|
1073 mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed); |
|
1074 mFeedChardet = !dontFeed; |
|
1075 } |
|
1076 rv = WriteStreamBytes(aBuffer, aLength, &writeCount); |
|
1077 } else { |
|
1078 rv = SniffStreamBytes(aBuffer, aLength, &writeCount); |
|
1079 } |
|
1080 if (NS_FAILED(rv)) { |
|
1081 MarkAsBroken(rv); |
|
1082 return; |
|
1083 } |
|
1084 NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed."); |
|
1085 |
|
1086 if (IsTerminatedOrInterrupted()) { |
|
1087 return; |
|
1088 } |
|
1089 |
|
1090 ParseAvailableData(); |
|
1091 |
|
1092 if (mFlushTimerArmed || mSpeculating) { |
|
1093 return; |
|
1094 } |
|
1095 |
|
1096 mFlushTimer->InitWithFuncCallback(nsHtml5StreamParser::TimerCallback, |
|
1097 static_cast<void*> (this), |
|
1098 mFlushTimerEverFired ? |
|
1099 sTimerInitialDelay : |
|
1100 sTimerSubsequentDelay, |
|
1101 nsITimer::TYPE_ONE_SHOT); |
|
1102 mFlushTimerArmed = true; |
|
1103 } |
|
1104 |
|
1105 class nsHtml5DataAvailable : public nsRunnable |
|
1106 { |
|
1107 private: |
|
1108 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
|
1109 nsAutoArrayPtr<uint8_t> mData; |
|
1110 uint32_t mLength; |
|
1111 public: |
|
1112 nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser, |
|
1113 uint8_t* aData, |
|
1114 uint32_t aLength) |
|
1115 : mStreamParser(aStreamParser) |
|
1116 , mData(aData) |
|
1117 , mLength(aLength) |
|
1118 {} |
|
1119 NS_IMETHODIMP Run() |
|
1120 { |
|
1121 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
|
1122 mStreamParser->DoDataAvailable(mData, mLength); |
|
1123 return NS_OK; |
|
1124 } |
|
1125 }; |
|
1126 |
|
1127 nsresult |
|
1128 nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest, |
|
1129 nsISupports* aContext, |
|
1130 nsIInputStream* aInStream, |
|
1131 uint64_t aSourceOffset, |
|
1132 uint32_t aLength) |
|
1133 { |
|
1134 nsresult rv; |
|
1135 if (NS_FAILED(rv = mExecutor->IsBroken())) { |
|
1136 return rv; |
|
1137 } |
|
1138 |
|
1139 NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream."); |
|
1140 uint32_t totalRead; |
|
1141 // Main thread to parser thread dispatch requires copying to buffer first. |
|
1142 if (NS_IsMainThread()) { |
|
1143 const mozilla::fallible_t fallible = mozilla::fallible_t(); |
|
1144 nsAutoArrayPtr<uint8_t> data(new (fallible) uint8_t[aLength]); |
|
1145 if (!data) { |
|
1146 return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
|
1147 } |
|
1148 rv = aInStream->Read(reinterpret_cast<char*>(data.get()), |
|
1149 aLength, &totalRead); |
|
1150 NS_ENSURE_SUCCESS(rv, rv); |
|
1151 NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?"); |
|
1152 |
|
1153 nsCOMPtr<nsIRunnable> dataAvailable = new nsHtml5DataAvailable(this, |
|
1154 data.forget(), |
|
1155 totalRead); |
|
1156 if (NS_FAILED(mThread->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) { |
|
1157 NS_WARNING("Dispatching DataAvailable event failed."); |
|
1158 } |
|
1159 return rv; |
|
1160 } else { |
|
1161 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1162 mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
|
1163 |
|
1164 // Read directly from response buffer. |
|
1165 rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength, |
|
1166 &totalRead); |
|
1167 if (NS_FAILED(rv)) { |
|
1168 NS_WARNING("Failed reading response data to parser"); |
|
1169 return rv; |
|
1170 } |
|
1171 return NS_OK; |
|
1172 } |
|
1173 } |
|
1174 |
|
1175 /* static */ |
|
1176 NS_METHOD |
|
1177 nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream, |
|
1178 void *aClosure, |
|
1179 const char *aFromSegment, |
|
1180 uint32_t aToOffset, |
|
1181 uint32_t aCount, |
|
1182 uint32_t *aWriteCount) |
|
1183 { |
|
1184 nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure); |
|
1185 |
|
1186 parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount); |
|
1187 // Assume DoDataAvailable consumed all available bytes. |
|
1188 *aWriteCount = aCount; |
|
1189 return NS_OK; |
|
1190 } |
|
1191 |
|
1192 bool |
|
1193 nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding) |
|
1194 { |
|
1195 nsAutoCString newEncoding; |
|
1196 if (!EncodingUtils::FindEncodingForLabel(aEncoding, newEncoding)) { |
|
1197 // the encoding name is bogus |
|
1198 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", |
|
1199 true, |
|
1200 mTokenizer->getLineNumber()); |
|
1201 return false; |
|
1202 } |
|
1203 |
|
1204 if (newEncoding.EqualsLiteral("UTF-16BE") || |
|
1205 newEncoding.EqualsLiteral("UTF-16LE")) { |
|
1206 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", |
|
1207 true, |
|
1208 mTokenizer->getLineNumber()); |
|
1209 newEncoding.Assign("UTF-8"); |
|
1210 } |
|
1211 |
|
1212 if (newEncoding.EqualsLiteral("x-user-defined")) { |
|
1213 // WebKit/Blink hack for Indian and Armenian legacy sites |
|
1214 mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", |
|
1215 true, |
|
1216 mTokenizer->getLineNumber()); |
|
1217 newEncoding.Assign("windows-1252"); |
|
1218 } |
|
1219 |
|
1220 if (newEncoding.Equals(mCharset)) { |
|
1221 if (mCharsetSource < kCharsetFromMetaPrescan) { |
|
1222 if (mInitialEncodingWasFromParentFrame) { |
|
1223 mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", |
|
1224 false, |
|
1225 mTokenizer->getLineNumber()); |
|
1226 } else { |
|
1227 mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", |
|
1228 false, |
|
1229 mTokenizer->getLineNumber()); |
|
1230 } |
|
1231 } |
|
1232 mCharsetSource = kCharsetFromMetaTag; // become confident |
|
1233 mFeedChardet = false; // don't feed chardet when confident |
|
1234 return false; |
|
1235 } |
|
1236 |
|
1237 aEncoding.Assign(newEncoding); |
|
1238 return true; |
|
1239 } |
|
1240 |
|
1241 bool |
|
1242 nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding) |
|
1243 { |
|
1244 // This code needs to stay in sync with |
|
1245 // nsHtml5MetaScanner::tryCharset. Unfortunately, the |
|
1246 // trickery with member fields there leads to some copy-paste reuse. :-( |
|
1247 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1248 if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to "confident" in the HTML5 spec |
|
1249 return false; |
|
1250 } |
|
1251 |
|
1252 nsAutoCString newEncoding; |
|
1253 CopyUTF16toUTF8(*aEncoding, newEncoding); |
|
1254 |
|
1255 if (!PreferredForInternalEncodingDecl(newEncoding)) { |
|
1256 return false; |
|
1257 } |
|
1258 |
|
1259 if (mReparseForbidden) { |
|
1260 // This mReparseForbidden check happens after the call to |
|
1261 // PreferredForInternalEncodingDecl so that if that method calls |
|
1262 // MaybeComplainAboutCharset, its charset complaint wins over the one |
|
1263 // below. |
|
1264 mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate", |
|
1265 true, |
|
1266 mTokenizer->getLineNumber()); |
|
1267 return false; // not reparsing even if we wanted to |
|
1268 } |
|
1269 |
|
1270 // Avoid having the chardet ask for another restart after this restart |
|
1271 // request. |
|
1272 mFeedChardet = false; |
|
1273 mTreeBuilder->NeedsCharsetSwitchTo(newEncoding, |
|
1274 kCharsetFromMetaTag, |
|
1275 mTokenizer->getLineNumber()); |
|
1276 FlushTreeOpsAndDisarmTimer(); |
|
1277 Interrupt(); |
|
1278 // the tree op executor will cause the stream parser to terminate |
|
1279 // if the charset switch request is accepted or it'll uninterrupt |
|
1280 // if the request failed. Note that if the restart request fails, |
|
1281 // we don't bother trying to make chardet resume. Might as well |
|
1282 // assume that chardet-requested restarts would fail, too. |
|
1283 return true; |
|
1284 } |
|
1285 |
|
1286 void |
|
1287 nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() |
|
1288 { |
|
1289 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1290 if (mFlushTimerArmed) { |
|
1291 // avoid calling Cancel if the flush timer isn't armed to avoid acquiring |
|
1292 // a mutex |
|
1293 mFlushTimer->Cancel(); |
|
1294 mFlushTimerArmed = false; |
|
1295 } |
|
1296 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
|
1297 mTokenizer->FlushViewSource(); |
|
1298 } |
|
1299 mTreeBuilder->Flush(); |
|
1300 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
|
1301 NS_WARNING("failed to dispatch executor flush event"); |
|
1302 } |
|
1303 } |
|
1304 |
|
1305 void |
|
1306 nsHtml5StreamParser::ParseAvailableData() |
|
1307 { |
|
1308 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1309 mTokenizerMutex.AssertCurrentThreadOwns(); |
|
1310 |
|
1311 if (IsTerminatedOrInterrupted()) { |
|
1312 return; |
|
1313 } |
|
1314 |
|
1315 for (;;) { |
|
1316 if (!mFirstBuffer->hasMore()) { |
|
1317 if (mFirstBuffer == mLastBuffer) { |
|
1318 switch (mStreamState) { |
|
1319 case STREAM_BEING_READ: |
|
1320 // never release the last buffer. |
|
1321 if (!mSpeculating) { |
|
1322 // reuse buffer space if not speculating |
|
1323 mFirstBuffer->setStart(0); |
|
1324 mFirstBuffer->setEnd(0); |
|
1325 } |
|
1326 mTreeBuilder->FlushLoads(); |
|
1327 // Dispatch this runnable unconditionally, because the loads |
|
1328 // that need flushing may have been flushed earlier even if the |
|
1329 // flush right above here did nothing. |
|
1330 if (NS_FAILED(NS_DispatchToMainThread(mLoadFlusher))) { |
|
1331 NS_WARNING("failed to dispatch load flush event"); |
|
1332 } |
|
1333 return; // no more data for now but expecting more |
|
1334 case STREAM_ENDED: |
|
1335 if (mAtEOF) { |
|
1336 return; |
|
1337 } |
|
1338 mAtEOF = true; |
|
1339 if (mCharsetSource < kCharsetFromMetaTag) { |
|
1340 if (mInitialEncodingWasFromParentFrame) { |
|
1341 // Unfortunately, this check doesn't take effect for |
|
1342 // cross-origin frames, so cross-origin ad frames that have |
|
1343 // no text and only an image or a Flash embed get the more |
|
1344 // severe message from the next if block. The message is |
|
1345 // technically accurate, though. |
|
1346 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame", |
|
1347 false, |
|
1348 0); |
|
1349 } else if (mMode == NORMAL) { |
|
1350 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration", |
|
1351 true, |
|
1352 0); |
|
1353 } else if (mMode == PLAIN_TEXT) { |
|
1354 mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain", |
|
1355 true, |
|
1356 0); |
|
1357 } |
|
1358 } |
|
1359 mTokenizer->eof(); |
|
1360 mTreeBuilder->StreamEnded(); |
|
1361 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
|
1362 mTokenizer->EndViewSource(); |
|
1363 } |
|
1364 FlushTreeOpsAndDisarmTimer(); |
|
1365 return; // no more data and not expecting more |
|
1366 default: |
|
1367 NS_NOTREACHED("It should be impossible to reach this."); |
|
1368 return; |
|
1369 } |
|
1370 } |
|
1371 mFirstBuffer = mFirstBuffer->next; |
|
1372 continue; |
|
1373 } |
|
1374 |
|
1375 // now we have a non-empty buffer |
|
1376 mFirstBuffer->adjust(mLastWasCR); |
|
1377 mLastWasCR = false; |
|
1378 if (mFirstBuffer->hasMore()) { |
|
1379 mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); |
|
1380 // At this point, internalEncodingDeclaration() may have called |
|
1381 // Terminate, but that never happens together with script. |
|
1382 // Can't assert that here, though, because it's possible that the main |
|
1383 // thread has called Terminate() while this thread was parsing. |
|
1384 if (mTreeBuilder->HasScript()) { |
|
1385 // HasScript() cannot return true if the tree builder is preventing |
|
1386 // script execution. |
|
1387 MOZ_ASSERT(mMode == NORMAL); |
|
1388 mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
|
1389 nsHtml5Speculation* speculation = |
|
1390 new nsHtml5Speculation(mFirstBuffer, |
|
1391 mFirstBuffer->getStart(), |
|
1392 mTokenizer->getLineNumber(), |
|
1393 mTreeBuilder->newSnapshot()); |
|
1394 mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), |
|
1395 speculation->GetStartLineNumber()); |
|
1396 FlushTreeOpsAndDisarmTimer(); |
|
1397 mTreeBuilder->SetOpSink(speculation); |
|
1398 mSpeculations.AppendElement(speculation); // adopts the pointer |
|
1399 mSpeculating = true; |
|
1400 } |
|
1401 if (IsTerminatedOrInterrupted()) { |
|
1402 return; |
|
1403 } |
|
1404 } |
|
1405 continue; |
|
1406 } |
|
1407 } |
|
1408 |
|
1409 class nsHtml5StreamParserContinuation : public nsRunnable |
|
1410 { |
|
1411 private: |
|
1412 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
|
1413 public: |
|
1414 nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser) |
|
1415 : mStreamParser(aStreamParser) |
|
1416 {} |
|
1417 NS_IMETHODIMP Run() |
|
1418 { |
|
1419 mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
|
1420 mStreamParser->Uninterrupt(); |
|
1421 mStreamParser->ParseAvailableData(); |
|
1422 return NS_OK; |
|
1423 } |
|
1424 }; |
|
1425 |
|
1426 void |
|
1427 nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, |
|
1428 nsHtml5TreeBuilder* aTreeBuilder, |
|
1429 bool aLastWasCR) |
|
1430 { |
|
1431 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
1432 NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML), |
|
1433 "ContinueAfterScripts called in view source mode!"); |
|
1434 if (NS_FAILED(mExecutor->IsBroken())) { |
|
1435 return; |
|
1436 } |
|
1437 #ifdef DEBUG |
|
1438 mExecutor->AssertStageEmpty(); |
|
1439 #endif |
|
1440 bool speculationFailed = false; |
|
1441 { |
|
1442 mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
|
1443 if (mSpeculations.IsEmpty()) { |
|
1444 NS_NOTREACHED("ContinueAfterScripts called without speculations."); |
|
1445 return; |
|
1446 } |
|
1447 nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
|
1448 if (aLastWasCR || |
|
1449 !aTokenizer->isInDataState() || |
|
1450 !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) { |
|
1451 speculationFailed = true; |
|
1452 // We've got a failed speculation :-( |
|
1453 Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
|
1454 // now fall out of the speculationAutoLock into the tokenizerAutoLock block |
|
1455 } else { |
|
1456 // We've got a successful speculation! |
|
1457 if (mSpeculations.Length() > 1) { |
|
1458 // the first speculation isn't the current speculation, so there's |
|
1459 // no need to bother the parser thread. |
|
1460 speculation->FlushToSink(mExecutor); |
|
1461 NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
|
1462 "ParseUntilBlocked() was supposed to ensure we don't come " |
|
1463 "here when scripts are executing."); |
|
1464 NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " |
|
1465 "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
|
1466 "only caller of this method?"); |
|
1467 mSpeculations.RemoveElementAt(0); |
|
1468 return; |
|
1469 } |
|
1470 // else |
|
1471 Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
|
1472 |
|
1473 // now fall through |
|
1474 // the first speculation is the current speculation. Need to |
|
1475 // release the the speculation mutex and acquire the tokenizer |
|
1476 // mutex. (Just acquiring the other mutex here would deadlock) |
|
1477 } |
|
1478 } |
|
1479 { |
|
1480 mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex); |
|
1481 #ifdef DEBUG |
|
1482 { |
|
1483 nsCOMPtr<nsIThread> mainThread; |
|
1484 NS_GetMainThread(getter_AddRefs(mainThread)); |
|
1485 mAtomTable.SetPermittedLookupThread(mainThread); |
|
1486 } |
|
1487 #endif |
|
1488 // In principle, the speculation mutex should be acquired here, |
|
1489 // but there's no point, because the parser thread only acquires it |
|
1490 // when it has also acquired the tokenizer mutex and we are already |
|
1491 // holding the tokenizer mutex. |
|
1492 if (speculationFailed) { |
|
1493 // Rewind the stream |
|
1494 mAtEOF = false; |
|
1495 nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
|
1496 mFirstBuffer = speculation->GetBuffer(); |
|
1497 mFirstBuffer->setStart(speculation->GetStart()); |
|
1498 mTokenizer->setLineNumber(speculation->GetStartLineNumber()); |
|
1499 |
|
1500 nsContentUtils::ReportToConsole(nsIScriptError::warningFlag, |
|
1501 NS_LITERAL_CSTRING("DOM Events"), |
|
1502 mExecutor->GetDocument(), |
|
1503 nsContentUtils::eDOM_PROPERTIES, |
|
1504 "SpeculationFailed", |
|
1505 nullptr, 0, |
|
1506 nullptr, |
|
1507 EmptyString(), |
|
1508 speculation->GetStartLineNumber()); |
|
1509 |
|
1510 nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next; |
|
1511 while (buffer) { |
|
1512 buffer->setStart(0); |
|
1513 buffer = buffer->next; |
|
1514 } |
|
1515 |
|
1516 mSpeculations.Clear(); // potentially a huge number of destructors |
|
1517 // run here synchronously on the main thread... |
|
1518 |
|
1519 mTreeBuilder->flushCharacters(); // empty the pending buffer |
|
1520 mTreeBuilder->ClearOps(); // now get rid of the failed ops |
|
1521 |
|
1522 mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
|
1523 mExecutor->StartReadingFromStage(); |
|
1524 mSpeculating = false; |
|
1525 |
|
1526 // Copy state over |
|
1527 mLastWasCR = aLastWasCR; |
|
1528 mTokenizer->loadState(aTokenizer); |
|
1529 mTreeBuilder->loadState(aTreeBuilder, &mAtomTable); |
|
1530 } else { |
|
1531 // We've got a successful speculation and at least a moment ago it was |
|
1532 // the current speculation |
|
1533 mSpeculations.ElementAt(0)->FlushToSink(mExecutor); |
|
1534 NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
|
1535 "ParseUntilBlocked() was supposed to ensure we don't come " |
|
1536 "here when scripts are executing."); |
|
1537 NS_ASSERTION(mExecutor->IsInFlushLoop(), "How are we here if " |
|
1538 "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
|
1539 "only caller of this method?"); |
|
1540 mSpeculations.RemoveElementAt(0); |
|
1541 if (mSpeculations.IsEmpty()) { |
|
1542 // yes, it was still the only speculation. Now stop speculating |
|
1543 // However, before telling the executor to read from stage, flush |
|
1544 // any pending ops straight to the executor, because otherwise |
|
1545 // they remain unflushed until we get more data from the network. |
|
1546 mTreeBuilder->SetOpSink(mExecutor); |
|
1547 mTreeBuilder->Flush(true); |
|
1548 mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
|
1549 mExecutor->StartReadingFromStage(); |
|
1550 mSpeculating = false; |
|
1551 } |
|
1552 } |
|
1553 nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
|
1554 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
|
1555 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
|
1556 } |
|
1557 // A stream event might run before this event runs, but that's harmless. |
|
1558 #ifdef DEBUG |
|
1559 mAtomTable.SetPermittedLookupThread(mThread); |
|
1560 #endif |
|
1561 } |
|
1562 } |
|
1563 |
|
1564 void |
|
1565 nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch() |
|
1566 { |
|
1567 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
1568 nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
|
1569 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
|
1570 NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
|
1571 } |
|
1572 } |
|
1573 |
|
1574 class nsHtml5TimerKungFu : public nsRunnable |
|
1575 { |
|
1576 private: |
|
1577 nsHtml5RefPtr<nsHtml5StreamParser> mStreamParser; |
|
1578 public: |
|
1579 nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser) |
|
1580 : mStreamParser(aStreamParser) |
|
1581 {} |
|
1582 NS_IMETHODIMP Run() |
|
1583 { |
|
1584 if (mStreamParser->mFlushTimer) { |
|
1585 mStreamParser->mFlushTimer->Cancel(); |
|
1586 mStreamParser->mFlushTimer = nullptr; |
|
1587 } |
|
1588 return NS_OK; |
|
1589 } |
|
1590 }; |
|
1591 |
|
1592 void |
|
1593 nsHtml5StreamParser::DropTimer() |
|
1594 { |
|
1595 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
|
1596 /* |
|
1597 * Simply nulling out the timer wouldn't work, because if the timer is |
|
1598 * armed, it needs to be canceled first. Simply canceling it first wouldn't |
|
1599 * work, because nsTimerImpl::Cancel is not safe for calling from outside |
|
1600 * the thread where nsTimerImpl::Fire would run. It's not safe to |
|
1601 * dispatch a runnable to cancel the timer from the destructor of this |
|
1602 * class, because the timer has a weak (void*) pointer back to this instance |
|
1603 * of the stream parser and having the timer fire before the runnable |
|
1604 * cancels it would make the timer access a deleted object. |
|
1605 * |
|
1606 * This DropTimer method addresses these issues. This method must be called |
|
1607 * on the main thread before the destructor of this class is reached. |
|
1608 * The nsHtml5TimerKungFu object has an nsHtml5RefPtr that addrefs this |
|
1609 * stream parser object to keep it alive until the runnable is done. |
|
1610 * The runnable cancels the timer on the parser thread, drops the timer |
|
1611 * and lets nsHtml5RefPtr send a runnable back to the main thread to |
|
1612 * release the stream parser. |
|
1613 */ |
|
1614 if (mFlushTimer) { |
|
1615 nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this); |
|
1616 if (NS_FAILED(mThread->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
|
1617 NS_WARNING("Failed to dispatch TimerKungFu event"); |
|
1618 } |
|
1619 } |
|
1620 } |
|
1621 |
|
1622 // Using a static, because the method name Notify is taken by the chardet |
|
1623 // callback. |
|
1624 void |
|
1625 nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure) |
|
1626 { |
|
1627 (static_cast<nsHtml5StreamParser*> (aClosure))->TimerFlush(); |
|
1628 } |
|
1629 |
|
1630 void |
|
1631 nsHtml5StreamParser::TimerFlush() |
|
1632 { |
|
1633 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1634 mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
|
1635 |
|
1636 NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating."); |
|
1637 |
|
1638 // The timer fired if we got here. No need to cancel it. Mark it as |
|
1639 // not armed, though. |
|
1640 mFlushTimerArmed = false; |
|
1641 |
|
1642 mFlushTimerEverFired = true; |
|
1643 |
|
1644 if (IsTerminatedOrInterrupted()) { |
|
1645 return; |
|
1646 } |
|
1647 |
|
1648 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
|
1649 mTreeBuilder->Flush(); // delete useless ops |
|
1650 if (mTokenizer->FlushViewSource()) { |
|
1651 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
|
1652 NS_WARNING("failed to dispatch executor flush event"); |
|
1653 } |
|
1654 } |
|
1655 } else { |
|
1656 // we aren't speculating and we don't know when new data is |
|
1657 // going to arrive. Send data to the main thread. |
|
1658 if (mTreeBuilder->Flush(true)) { |
|
1659 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
|
1660 NS_WARNING("failed to dispatch executor flush event"); |
|
1661 } |
|
1662 } |
|
1663 } |
|
1664 } |
|
1665 |
|
1666 void |
|
1667 nsHtml5StreamParser::MarkAsBroken(nsresult aRv) |
|
1668 { |
|
1669 NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
|
1670 mTokenizerMutex.AssertCurrentThreadOwns(); |
|
1671 |
|
1672 Terminate(); |
|
1673 mTreeBuilder->MarkAsBroken(aRv); |
|
1674 mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false); |
|
1675 NS_ASSERTION(hadOps, "Should have had the markAsBroken op!"); |
|
1676 if (NS_FAILED(NS_DispatchToMainThread(mExecutorFlusher))) { |
|
1677 NS_WARNING("failed to dispatch executor flush event"); |
|
1678 } |
|
1679 } |