|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set sw=2 ts=2 et tw=79: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "nsIAtom.h" |
|
8 #include "nsParser.h" |
|
9 #include "nsString.h" |
|
10 #include "nsCRT.h" |
|
11 #include "nsScanner.h" |
|
12 #include "plstr.h" |
|
13 #include "nsIStringStream.h" |
|
14 #include "nsIChannel.h" |
|
15 #include "nsICachingChannel.h" |
|
16 #include "nsICacheEntryDescriptor.h" |
|
17 #include "nsIInputStream.h" |
|
18 #include "CNavDTD.h" |
|
19 #include "prenv.h" |
|
20 #include "prlock.h" |
|
21 #include "prcvar.h" |
|
22 #include "nsParserCIID.h" |
|
23 #include "nsReadableUtils.h" |
|
24 #include "nsCOMPtr.h" |
|
25 #include "nsExpatDriver.h" |
|
26 #include "nsIServiceManager.h" |
|
27 #include "nsICategoryManager.h" |
|
28 #include "nsISupportsPrimitives.h" |
|
29 #include "nsIFragmentContentSink.h" |
|
30 #include "nsStreamUtils.h" |
|
31 #include "nsHTMLTokenizer.h" |
|
32 #include "nsNetUtil.h" |
|
33 #include "nsScriptLoader.h" |
|
34 #include "nsDataHashtable.h" |
|
35 #include "nsXPCOMCIDInternal.h" |
|
36 #include "nsMimeTypes.h" |
|
37 #include "mozilla/CondVar.h" |
|
38 #include "mozilla/Mutex.h" |
|
39 #include "nsParserConstants.h" |
|
40 #include "nsCharsetSource.h" |
|
41 #include "nsContentUtils.h" |
|
42 #include "nsThreadUtils.h" |
|
43 #include "nsIHTMLContentSink.h" |
|
44 |
|
45 #include "mozilla/dom/EncodingUtils.h" |
|
46 |
|
47 using namespace mozilla; |
|
48 using mozilla::dom::EncodingUtils; |
|
49 |
|
50 #define NS_PARSER_FLAG_PARSER_ENABLED 0x00000002 |
|
51 #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004 |
|
52 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008 |
|
53 #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020 |
|
54 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040 |
|
55 |
|
56 //-------------- Begin ParseContinue Event Definition ------------------------ |
|
57 /* |
|
58 The parser can be explicitly interrupted by passing a return value of |
|
59 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause |
|
60 the parser to stop processing and allow the application to return to the event |
|
61 loop. The data which was left at the time of interruption will be processed |
|
62 the next time OnDataAvailable is called. If the parser has received its final |
|
63 chunk of data then OnDataAvailable will no longer be called by the networking |
|
64 module, so the parser will schedule a nsParserContinueEvent which will call |
|
65 the parser to process the remaining data after returning to the event loop. |
|
66 If the parser is interrupted while processing the remaining data it will |
|
67 schedule another ParseContinueEvent. The processing of data followed by |
|
68 scheduling of the continue events will proceed until either: |
|
69 |
|
70 1) All of the remaining data can be processed without interrupting |
|
71 2) The parser has been cancelled. |
|
72 |
|
73 |
|
74 This capability is currently used in CNavDTD and nsHTMLContentSink. The |
|
75 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be |
|
76 processed and when each token is processed. The nsHTML content sink records |
|
77 the time when the chunk has started processing and will return |
|
78 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a |
|
79 threshold called max tokenizing processing time. This allows the content sink |
|
80 to limit how much data is processed in a single chunk which in turn gates how |
|
81 much time is spent away from the event loop. Processing smaller chunks of data |
|
82 also reduces the time spent in subsequent reflows. |
|
83 |
|
84 This capability is most apparent when loading large documents. If the maximum |
|
85 token processing time is set small enough the application will remain |
|
86 responsive during document load. |
|
87 |
|
88 A side-effect of this capability is that document load is not complete when |
|
89 the last chunk of data is passed to OnDataAvailable since the parser may have |
|
90 been interrupted when the last chunk of data arrived. The document is complete |
|
91 when all of the document has been tokenized and there aren't any pending |
|
92 nsParserContinueEvents. This can cause problems if the application assumes |
|
93 that it can monitor the load requests to determine when the document load has |
|
94 been completed. This is what happens in Mozilla. The document is considered |
|
95 completely loaded when all of the load requests have been satisfied. To delay |
|
96 the document load until all of the parsing has been completed the |
|
97 nsHTMLContentSink adds a dummy parser load request which is not removed until |
|
98 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call |
|
99 DidBuildModel until the final chunk of data has been passed to the parser |
|
100 through the OnDataAvailable and there aren't any pending |
|
101 nsParserContineEvents. |
|
102 |
|
103 Currently the parser is ignores requests to be interrupted during the |
|
104 processing of script. This is because a document.write followed by JavaScript |
|
105 calls to manipulate the DOM may fail if the parser was interrupted during the |
|
106 document.write. |
|
107 |
|
108 For more details @see bugzilla bug 76722 |
|
109 */ |
|
110 |
|
111 |
|
112 class nsParserContinueEvent : public nsRunnable |
|
113 { |
|
114 public: |
|
115 nsRefPtr<nsParser> mParser; |
|
116 |
|
117 nsParserContinueEvent(nsParser* aParser) |
|
118 : mParser(aParser) |
|
119 {} |
|
120 |
|
121 NS_IMETHOD Run() |
|
122 { |
|
123 mParser->HandleParserContinueEvent(this); |
|
124 return NS_OK; |
|
125 } |
|
126 }; |
|
127 |
|
128 //-------------- End ParseContinue Event Definition ------------------------ |
|
129 |
|
130 /** |
|
131 * default constructor |
|
132 */ |
|
133 nsParser::nsParser() |
|
134 { |
|
135 Initialize(true); |
|
136 } |
|
137 |
|
138 nsParser::~nsParser() |
|
139 { |
|
140 Cleanup(); |
|
141 } |
|
142 |
|
143 void |
|
144 nsParser::Initialize(bool aConstructor) |
|
145 { |
|
146 if (aConstructor) { |
|
147 // Raw pointer |
|
148 mParserContext = 0; |
|
149 } |
|
150 else { |
|
151 // nsCOMPtrs |
|
152 mObserver = nullptr; |
|
153 mUnusedInput.Truncate(); |
|
154 } |
|
155 |
|
156 mContinueEvent = nullptr; |
|
157 mCharsetSource = kCharsetUninitialized; |
|
158 mCharset.AssignLiteral("ISO-8859-1"); |
|
159 mInternalState = NS_OK; |
|
160 mStreamStatus = NS_OK; |
|
161 mCommand = eViewNormal; |
|
162 mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED | |
|
163 NS_PARSER_FLAG_PARSER_ENABLED | |
|
164 NS_PARSER_FLAG_CAN_TOKENIZE; |
|
165 |
|
166 mProcessingNetworkData = false; |
|
167 mIsAboutBlank = false; |
|
168 } |
|
169 |
|
170 void |
|
171 nsParser::Cleanup() |
|
172 { |
|
173 #ifdef DEBUG |
|
174 if (mParserContext && mParserContext->mPrevContext) { |
|
175 NS_WARNING("Extra parser contexts still on the parser stack"); |
|
176 } |
|
177 #endif |
|
178 |
|
179 while (mParserContext) { |
|
180 CParserContext *pc = mParserContext->mPrevContext; |
|
181 delete mParserContext; |
|
182 mParserContext = pc; |
|
183 } |
|
184 |
|
185 // It should not be possible for this flag to be set when we are getting |
|
186 // destroyed since this flag implies a pending nsParserContinueEvent, which |
|
187 // has an owning reference to |this|. |
|
188 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); |
|
189 } |
|
190 |
|
191 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) |
|
192 |
|
193 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) |
|
194 NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD) |
|
195 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) |
|
196 NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) |
|
197 NS_IMPL_CYCLE_COLLECTION_UNLINK_END |
|
198 |
|
199 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) |
|
200 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD) |
|
201 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) |
|
202 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) |
|
203 CParserContext *pc = tmp->mParserContext; |
|
204 while (pc) { |
|
205 cb.NoteXPCOMChild(pc->mTokenizer); |
|
206 pc = pc->mPrevContext; |
|
207 } |
|
208 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END |
|
209 |
|
210 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) |
|
211 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) |
|
212 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) |
|
213 NS_INTERFACE_MAP_ENTRY(nsIStreamListener) |
|
214 NS_INTERFACE_MAP_ENTRY(nsIParser) |
|
215 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) |
|
216 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) |
|
217 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) |
|
218 NS_INTERFACE_MAP_END |
|
219 |
|
220 // The parser continue event is posted only if |
|
221 // all of the data to parse has been passed to ::OnDataAvailable |
|
222 // and the parser has been interrupted by the content sink |
|
223 // because the processing of tokens took too long. |
|
224 |
|
225 nsresult |
|
226 nsParser::PostContinueEvent() |
|
227 { |
|
228 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { |
|
229 // If this flag isn't set, then there shouldn't be a live continue event! |
|
230 NS_ASSERTION(!mContinueEvent, "bad"); |
|
231 |
|
232 // This creates a reference cycle between this and the event that is |
|
233 // broken when the event fires. |
|
234 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this); |
|
235 if (NS_FAILED(NS_DispatchToCurrentThread(event))) { |
|
236 NS_WARNING("failed to dispatch parser continuation event"); |
|
237 } else { |
|
238 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
|
239 mContinueEvent = event; |
|
240 } |
|
241 } |
|
242 return NS_OK; |
|
243 } |
|
244 |
|
245 NS_IMETHODIMP_(void) |
|
246 nsParser::GetCommand(nsCString& aCommand) |
|
247 { |
|
248 aCommand = mCommandStr; |
|
249 } |
|
250 |
|
251 /** |
|
252 * Call this method once you've created a parser, and want to instruct it |
|
253 * about the command which caused the parser to be constructed. For example, |
|
254 * this allows us to select a DTD which can do, say, view-source. |
|
255 * |
|
256 * @param aCommand the command string to set |
|
257 */ |
|
258 NS_IMETHODIMP_(void) |
|
259 nsParser::SetCommand(const char* aCommand) |
|
260 { |
|
261 mCommandStr.Assign(aCommand); |
|
262 if (mCommandStr.Equals("view-source")) { |
|
263 mCommand = eViewSource; |
|
264 } else if (mCommandStr.Equals("view-fragment")) { |
|
265 mCommand = eViewFragment; |
|
266 } else { |
|
267 mCommand = eViewNormal; |
|
268 } |
|
269 } |
|
270 |
|
271 /** |
|
272 * Call this method once you've created a parser, and want to instruct it |
|
273 * about the command which caused the parser to be constructed. For example, |
|
274 * this allows us to select a DTD which can do, say, view-source. |
|
275 * |
|
276 * @param aParserCommand the command to set |
|
277 */ |
|
278 NS_IMETHODIMP_(void) |
|
279 nsParser::SetCommand(eParserCommands aParserCommand) |
|
280 { |
|
281 mCommand = aParserCommand; |
|
282 } |
|
283 |
|
284 /** |
|
285 * Call this method once you've created a parser, and want to instruct it |
|
286 * about what charset to load |
|
287 * |
|
288 * @param aCharset- the charset of a document |
|
289 * @param aCharsetSource- the source of the charset |
|
290 */ |
|
291 NS_IMETHODIMP_(void) |
|
292 nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource) |
|
293 { |
|
294 mCharset = aCharset; |
|
295 mCharsetSource = aCharsetSource; |
|
296 if (mParserContext && mParserContext->mScanner) { |
|
297 mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource); |
|
298 } |
|
299 } |
|
300 |
|
301 void |
|
302 nsParser::SetSinkCharset(nsACString& aCharset) |
|
303 { |
|
304 if (mSink) { |
|
305 mSink->SetDocumentCharset(aCharset); |
|
306 } |
|
307 } |
|
308 |
|
309 /** |
|
310 * This method gets called in order to set the content |
|
311 * sink for this parser to dump nodes to. |
|
312 * |
|
313 * @param nsIContentSink interface for node receiver |
|
314 */ |
|
315 NS_IMETHODIMP_(void) |
|
316 nsParser::SetContentSink(nsIContentSink* aSink) |
|
317 { |
|
318 NS_PRECONDITION(aSink, "sink cannot be null!"); |
|
319 mSink = aSink; |
|
320 |
|
321 if (mSink) { |
|
322 mSink->SetParser(this); |
|
323 nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink); |
|
324 if (htmlSink) { |
|
325 mIsAboutBlank = true; |
|
326 } |
|
327 } |
|
328 } |
|
329 |
|
330 /** |
|
331 * retrieve the sink set into the parser |
|
332 * @return current sink |
|
333 */ |
|
334 NS_IMETHODIMP_(nsIContentSink*) |
|
335 nsParser::GetContentSink() |
|
336 { |
|
337 return mSink; |
|
338 } |
|
339 |
|
340 /** |
|
341 * Determine what DTD mode (and thus what layout nsCompatibility mode) |
|
342 * to use for this document based on the first chunk of data received |
|
343 * from the network (each parsercontext can have its own mode). (No, |
|
344 * this is not an optimal solution -- we really don't need to know until |
|
345 * after we've received the DOCTYPE, and this could easily be part of |
|
346 * the regular parsing process if the parser were designed in a way that |
|
347 * made such modifications easy.) |
|
348 */ |
|
349 |
|
350 // Parse the PS production in the SGML spec (excluding the part dealing |
|
351 // with entity references) starting at theIndex into theBuffer, and |
|
352 // return the first index after the end of the production. |
|
353 static int32_t |
|
354 ParsePS(const nsString& aBuffer, int32_t aIndex) |
|
355 { |
|
356 for (;;) { |
|
357 char16_t ch = aBuffer.CharAt(aIndex); |
|
358 if ((ch == char16_t(' ')) || (ch == char16_t('\t')) || |
|
359 (ch == char16_t('\n')) || (ch == char16_t('\r'))) { |
|
360 ++aIndex; |
|
361 } else if (ch == char16_t('-')) { |
|
362 int32_t tmpIndex; |
|
363 if (aBuffer.CharAt(aIndex+1) == char16_t('-') && |
|
364 kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) { |
|
365 aIndex = tmpIndex + 2; |
|
366 } else { |
|
367 return aIndex; |
|
368 } |
|
369 } else { |
|
370 return aIndex; |
|
371 } |
|
372 } |
|
373 } |
|
374 |
|
375 #define PARSE_DTD_HAVE_DOCTYPE (1<<0) |
|
376 #define PARSE_DTD_HAVE_PUBLIC_ID (1<<1) |
|
377 #define PARSE_DTD_HAVE_SYSTEM_ID (1<<2) |
|
378 #define PARSE_DTD_HAVE_INTERNAL_SUBSET (1<<3) |
|
379 |
|
380 // return true on success (includes not present), false on failure |
|
381 static bool |
|
382 ParseDocTypeDecl(const nsString &aBuffer, |
|
383 int32_t *aResultFlags, |
|
384 nsString &aPublicID, |
|
385 nsString &aSystemID) |
|
386 { |
|
387 bool haveDoctype = false; |
|
388 *aResultFlags = 0; |
|
389 |
|
390 // Skip through any comments and processing instructions |
|
391 // The PI-skipping is a bit of a hack. |
|
392 int32_t theIndex = 0; |
|
393 do { |
|
394 theIndex = aBuffer.FindChar('<', theIndex); |
|
395 if (theIndex == kNotFound) break; |
|
396 char16_t nextChar = aBuffer.CharAt(theIndex+1); |
|
397 if (nextChar == char16_t('!')) { |
|
398 int32_t tmpIndex = theIndex + 2; |
|
399 if (kNotFound != |
|
400 (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) { |
|
401 haveDoctype = true; |
|
402 theIndex += 7; // skip "DOCTYPE" |
|
403 break; |
|
404 } |
|
405 theIndex = ParsePS(aBuffer, tmpIndex); |
|
406 theIndex = aBuffer.FindChar('>', theIndex); |
|
407 } else if (nextChar == char16_t('?')) { |
|
408 theIndex = aBuffer.FindChar('>', theIndex); |
|
409 } else { |
|
410 break; |
|
411 } |
|
412 } while (theIndex != kNotFound); |
|
413 |
|
414 if (!haveDoctype) |
|
415 return true; |
|
416 *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE; |
|
417 |
|
418 theIndex = ParsePS(aBuffer, theIndex); |
|
419 theIndex = aBuffer.Find("HTML", true, theIndex, 0); |
|
420 if (kNotFound == theIndex) |
|
421 return false; |
|
422 theIndex = ParsePS(aBuffer, theIndex+4); |
|
423 int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0); |
|
424 |
|
425 if (kNotFound != tmpIndex) { |
|
426 theIndex = ParsePS(aBuffer, tmpIndex+6); |
|
427 |
|
428 // We get here only if we've read <!DOCTYPE HTML PUBLIC |
|
429 // (not case sensitive) possibly with comments within. |
|
430 |
|
431 // Now find the beginning and end of the public identifier |
|
432 // and the system identifier (if present). |
|
433 |
|
434 char16_t lit = aBuffer.CharAt(theIndex); |
|
435 if ((lit != char16_t('\"')) && (lit != char16_t('\''))) |
|
436 return false; |
|
437 |
|
438 // Start is the first character, excluding the quote, and End is |
|
439 // the final quote, so there are (end-start) characters. |
|
440 |
|
441 int32_t PublicIDStart = theIndex + 1; |
|
442 int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart); |
|
443 if (kNotFound == PublicIDEnd) |
|
444 return false; |
|
445 theIndex = ParsePS(aBuffer, PublicIDEnd + 1); |
|
446 char16_t next = aBuffer.CharAt(theIndex); |
|
447 if (next == char16_t('>')) { |
|
448 // There was a public identifier, but no system |
|
449 // identifier, |
|
450 // so do nothing. |
|
451 // This is needed to avoid the else at the end, and it's |
|
452 // also the most common case. |
|
453 } else if ((next == char16_t('\"')) || |
|
454 (next == char16_t('\''))) { |
|
455 // We found a system identifier. |
|
456 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; |
|
457 int32_t SystemIDStart = theIndex + 1; |
|
458 int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); |
|
459 if (kNotFound == SystemIDEnd) |
|
460 return false; |
|
461 aSystemID = |
|
462 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); |
|
463 } else if (next == char16_t('[')) { |
|
464 // We found an internal subset. |
|
465 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; |
|
466 } else { |
|
467 // Something's wrong. |
|
468 return false; |
|
469 } |
|
470 |
|
471 // Since a public ID is a minimum literal, we must trim |
|
472 // and collapse whitespace |
|
473 aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart); |
|
474 aPublicID.CompressWhitespace(true, true); |
|
475 *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID; |
|
476 } else { |
|
477 tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0); |
|
478 if (kNotFound != tmpIndex) { |
|
479 // DOCTYPES with system ID but no Public ID |
|
480 *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID; |
|
481 |
|
482 theIndex = ParsePS(aBuffer, tmpIndex+6); |
|
483 char16_t next = aBuffer.CharAt(theIndex); |
|
484 if (next != char16_t('\"') && next != char16_t('\'')) |
|
485 return false; |
|
486 |
|
487 int32_t SystemIDStart = theIndex + 1; |
|
488 int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart); |
|
489 |
|
490 if (kNotFound == SystemIDEnd) |
|
491 return false; |
|
492 aSystemID = |
|
493 Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart); |
|
494 theIndex = ParsePS(aBuffer, SystemIDEnd + 1); |
|
495 } |
|
496 |
|
497 char16_t nextChar = aBuffer.CharAt(theIndex); |
|
498 if (nextChar == char16_t('[')) |
|
499 *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET; |
|
500 else if (nextChar != char16_t('>')) |
|
501 return false; |
|
502 } |
|
503 return true; |
|
504 } |
|
505 |
|
506 struct PubIDInfo |
|
507 { |
|
508 enum eMode { |
|
509 eQuirks, /* always quirks mode, unless there's an internal subset */ |
|
510 eAlmostStandards,/* eCompatibility_AlmostStandards */ |
|
511 eFullStandards /* eCompatibility_FullStandards */ |
|
512 /* |
|
513 * public IDs that should trigger strict mode are not listed |
|
514 * since we want all future public IDs to trigger strict mode as |
|
515 * well |
|
516 */ |
|
517 }; |
|
518 |
|
519 const char* name; |
|
520 eMode mode_if_no_sysid; |
|
521 eMode mode_if_sysid; |
|
522 }; |
|
523 |
|
524 #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0])) |
|
525 |
|
526 // These must be in nsCRT::strcmp order so binary-search can be used. |
|
527 // This is verified, |#ifdef DEBUG|, below. |
|
528 |
|
529 // Even though public identifiers should be case sensitive, we will do |
|
530 // all comparisons after converting to lower case in order to do |
|
531 // case-insensitive comparison since there are a number of existing web |
|
532 // sites that use the incorrect case. Therefore all of the public |
|
533 // identifiers below are in lower case (with the correct case following, |
|
534 // in comments). The case is verified, |#ifdef DEBUG|, below. |
|
535 static const PubIDInfo kPublicIDs[] = { |
|
536 {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
537 {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
538 {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
539 {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
540 {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
541 {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
542 {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
543 {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
544 {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
545 {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
546 {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
547 {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
548 {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
549 {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
550 {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
551 {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
552 {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
553 {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
554 {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
555 {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
556 {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
557 {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
558 {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
559 {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
560 {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
561 {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
562 {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
563 {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
564 {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
565 {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
566 {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
567 {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
568 {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
569 {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
570 {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
571 {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
572 {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
573 {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
574 {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
575 {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
576 {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
577 {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
578 {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
579 {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
580 {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
581 {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
582 {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
583 {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
584 {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
585 {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
586 {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
587 {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
588 {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
589 {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
590 {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
591 {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
592 {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
593 {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
594 {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
595 {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
596 {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
597 {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
598 {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, |
|
599 {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards}, |
|
600 {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
601 {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
602 {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
603 {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, |
|
604 {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards}, |
|
605 {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
606 {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
607 {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
608 {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
609 {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
610 {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
611 {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks}, |
|
612 }; |
|
613 |
|
614 #ifdef DEBUG |
|
615 static void |
|
616 VerifyPublicIDs() |
|
617 { |
|
618 static bool gVerified = false; |
|
619 if (!gVerified) { |
|
620 gVerified = true; |
|
621 uint32_t i; |
|
622 for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) { |
|
623 if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) { |
|
624 NS_NOTREACHED("doctypes out of order"); |
|
625 printf("Doctypes %s and %s out of order.\n", |
|
626 kPublicIDs[i].name, kPublicIDs[i+1].name); |
|
627 } |
|
628 } |
|
629 for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) { |
|
630 nsAutoCString lcPubID(kPublicIDs[i].name); |
|
631 ToLowerCase(lcPubID); |
|
632 if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) { |
|
633 NS_NOTREACHED("doctype not lower case"); |
|
634 printf("Doctype %s not lower case.\n", kPublicIDs[i].name); |
|
635 } |
|
636 } |
|
637 } |
|
638 } |
|
639 #endif |
|
640 |
|
641 static void |
|
642 DetermineHTMLParseMode(const nsString& aBuffer, |
|
643 nsDTDMode& aParseMode, |
|
644 eParserDocType& aDocType) |
|
645 { |
|
646 #ifdef DEBUG |
|
647 VerifyPublicIDs(); |
|
648 #endif |
|
649 int32_t resultFlags; |
|
650 nsAutoString publicIDUCS2, sysIDUCS2; |
|
651 if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) { |
|
652 if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) { |
|
653 // no DOCTYPE |
|
654 aParseMode = eDTDMode_quirks; |
|
655 aDocType = eHTML_Quirks; |
|
656 } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) || |
|
657 !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) { |
|
658 // A doctype with an internal subset is always full_standards. |
|
659 // A doctype without a public ID is always full_standards. |
|
660 aDocType = eHTML_Strict; |
|
661 aParseMode = eDTDMode_full_standards; |
|
662 |
|
663 // Special hack for IBM's custom DOCTYPE. |
|
664 if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) && |
|
665 sysIDUCS2 == NS_LITERAL_STRING( |
|
666 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) { |
|
667 aParseMode = eDTDMode_quirks; |
|
668 aDocType = eHTML_Quirks; |
|
669 } |
|
670 |
|
671 } else { |
|
672 // We have to check our list of public IDs to see what to do. |
|
673 // Yes, we want UCS2 to ASCII lossy conversion. |
|
674 nsAutoCString publicID; |
|
675 publicID.AssignWithConversion(publicIDUCS2); |
|
676 |
|
677 // See comment above definition of kPublicIDs about case |
|
678 // sensitivity. |
|
679 ToLowerCase(publicID); |
|
680 |
|
681 // Binary search to see if we can find the correct public ID |
|
682 // These must be signed since maximum can go below zero and we'll |
|
683 // crash if it's unsigned. |
|
684 int32_t minimum = 0; |
|
685 int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1; |
|
686 int32_t index; |
|
687 for (;;) { |
|
688 index = (minimum + maximum) / 2; |
|
689 int32_t comparison = |
|
690 nsCRT::strcmp(publicID.get(), kPublicIDs[index].name); |
|
691 if (comparison == 0) |
|
692 break; |
|
693 if (comparison < 0) |
|
694 maximum = index - 1; |
|
695 else |
|
696 minimum = index + 1; |
|
697 |
|
698 if (maximum < minimum) { |
|
699 // The DOCTYPE is not in our list, so it must be full_standards. |
|
700 aParseMode = eDTDMode_full_standards; |
|
701 aDocType = eHTML_Strict; |
|
702 return; |
|
703 } |
|
704 } |
|
705 |
|
706 switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID) |
|
707 ? kPublicIDs[index].mode_if_sysid |
|
708 : kPublicIDs[index].mode_if_no_sysid) |
|
709 { |
|
710 case PubIDInfo::eQuirks: |
|
711 aParseMode = eDTDMode_quirks; |
|
712 aDocType = eHTML_Quirks; |
|
713 break; |
|
714 case PubIDInfo::eAlmostStandards: |
|
715 aParseMode = eDTDMode_almost_standards; |
|
716 aDocType = eHTML_Strict; |
|
717 break; |
|
718 case PubIDInfo::eFullStandards: |
|
719 aParseMode = eDTDMode_full_standards; |
|
720 aDocType = eHTML_Strict; |
|
721 break; |
|
722 default: |
|
723 NS_NOTREACHED("no other cases!"); |
|
724 } |
|
725 } |
|
726 } else { |
|
727 // badly formed DOCTYPE -> quirks |
|
728 aParseMode = eDTDMode_quirks; |
|
729 aDocType = eHTML_Quirks; |
|
730 } |
|
731 } |
|
732 |
|
733 static void |
|
734 DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode, |
|
735 eParserDocType& aDocType, const nsACString& aMimeType) |
|
736 { |
|
737 if (aMimeType.EqualsLiteral(TEXT_HTML)) { |
|
738 DetermineHTMLParseMode(aBuffer, aParseMode, aDocType); |
|
739 } else if (nsContentUtils::IsPlainTextType(aMimeType)) { |
|
740 aDocType = ePlainText; |
|
741 aParseMode = eDTDMode_quirks; |
|
742 } else { // Some form of XML |
|
743 aDocType = eXML; |
|
744 aParseMode = eDTDMode_full_standards; |
|
745 } |
|
746 } |
|
747 |
|
748 static nsIDTD* |
|
749 FindSuitableDTD(CParserContext& aParserContext) |
|
750 { |
|
751 // We always find a DTD. |
|
752 aParserContext.mAutoDetectStatus = ePrimaryDetect; |
|
753 |
|
754 // Quick check for view source. |
|
755 NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource, |
|
756 "The old parser is not supposed to be used for View Source anymore."); |
|
757 |
|
758 // Now see if we're parsing HTML (which, as far as we're concerned, simply |
|
759 // means "not XML"). |
|
760 if (aParserContext.mDocType != eXML) { |
|
761 return new CNavDTD(); |
|
762 } |
|
763 |
|
764 // If we're here, then we'd better be parsing XML. |
|
765 NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?"); |
|
766 return new nsExpatDriver(); |
|
767 } |
|
768 |
|
769 NS_IMETHODIMP |
|
770 nsParser::CancelParsingEvents() |
|
771 { |
|
772 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { |
|
773 NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); |
|
774 // Revoke the pending continue parsing event |
|
775 mContinueEvent = nullptr; |
|
776 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
|
777 } |
|
778 return NS_OK; |
|
779 } |
|
780 |
|
781 //////////////////////////////////////////////////////////////////////// |
|
782 |
|
783 /** |
|
784 * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value |
|
785 * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1 |
|
786 * (which could be success or failure). |
|
787 * |
|
788 * To understand the motivation for this construct, consider these example |
|
789 * methods: |
|
790 * |
|
791 * nsresult nsSomething::DoThatThing(nsIWhatever* obj) { |
|
792 * nsresult rv = NS_OK; |
|
793 * ... |
|
794 * return obj->DoThatThing(); |
|
795 * NS_ENSURE_SUCCESS(rv, rv); |
|
796 * ... |
|
797 * return rv; |
|
798 * } |
|
799 * |
|
800 * void nsCaller::MakeThingsHappen() { |
|
801 * return mSomething->DoThatThing(mWhatever); |
|
802 * } |
|
803 * |
|
804 * Suppose, for whatever reason*, we want to shift responsibility for calling |
|
805 * mWhatever->DoThatThing() from nsSomething::DoThatThing up to |
|
806 * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows: |
|
807 * |
|
808 * nsresult nsSomething::DoThatThing() { |
|
809 * nsresult rv = NS_OK; |
|
810 * ... |
|
811 * ... |
|
812 * return rv; |
|
813 * } |
|
814 * |
|
815 * void nsCaller::MakeThingsHappen() { |
|
816 * nsresult rv; |
|
817 * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(), |
|
818 * mWhatever->DoThatThing(), |
|
819 * rv); |
|
820 * return rv; |
|
821 * } |
|
822 * |
|
823 * *Possible reasons include: nsCaller doesn't want to give mSomething access |
|
824 * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will |
|
825 * be called regardless of how nsSomething::DoThatThing behaves, &c. |
|
826 */ |
|
827 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \ |
|
828 nsresult RV##__temp = EXPR1; \ |
|
829 RV = EXPR2; \ |
|
830 if (NS_FAILED(RV)) { \ |
|
831 RV = RV##__temp; \ |
|
832 } \ |
|
833 } |
|
834 |
|
835 /** |
|
836 * This gets called just prior to the model actually |
|
837 * being constructed. It's important to make this the |
|
838 * last thing that happens right before parsing, so we |
|
839 * can delay until the last moment the resolution of |
|
840 * which DTD to use (unless of course we're assigned one). |
|
841 */ |
|
842 nsresult |
|
843 nsParser::WillBuildModel(nsString& aFilename) |
|
844 { |
|
845 if (!mParserContext) |
|
846 return kInvalidParserContext; |
|
847 |
|
848 if (eUnknownDetect != mParserContext->mAutoDetectStatus) |
|
849 return NS_OK; |
|
850 |
|
851 if (eDTDMode_unknown == mParserContext->mDTDMode || |
|
852 eDTDMode_autodetect == mParserContext->mDTDMode) { |
|
853 char16_t buf[1025]; |
|
854 nsFixedString theBuffer(buf, 1024, 0); |
|
855 |
|
856 // Grab 1024 characters, starting at the first non-whitespace |
|
857 // character, to look for the doctype in. |
|
858 mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition()); |
|
859 DetermineParseMode(theBuffer, mParserContext->mDTDMode, |
|
860 mParserContext->mDocType, mParserContext->mMimeType); |
|
861 } |
|
862 |
|
863 NS_ASSERTION(!mDTD || !mParserContext->mPrevContext, |
|
864 "Clobbering DTD for non-root parser context!"); |
|
865 mDTD = FindSuitableDTD(*mParserContext); |
|
866 NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY); |
|
867 |
|
868 nsITokenizer* tokenizer; |
|
869 nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer); |
|
870 NS_ENSURE_SUCCESS(rv, rv); |
|
871 |
|
872 rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); |
|
873 nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode()); |
|
874 // nsIDTD::WillBuildModel used to be responsible for calling |
|
875 // nsIContentSink::WillBuildModel, but that obligation isn't expressible |
|
876 // in the nsIDTD interface itself, so it's sounder and simpler to give that |
|
877 // responsibility back to the parser. The former behavior of the DTD was to |
|
878 // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns |
|
879 // failure we should use sinkResult instead of rv, to preserve the old error |
|
880 // handling behavior of the DTD: |
|
881 return NS_FAILED(sinkResult) ? sinkResult : rv; |
|
882 } |
|
883 |
|
884 /** |
|
885 * This gets called when the parser is done with its input. |
|
886 * Note that the parser may have been called recursively, so we |
|
887 * have to check for a prev. context before closing out the DTD/sink. |
|
888 */ |
|
889 nsresult |
|
890 nsParser::DidBuildModel(nsresult anErrorCode) |
|
891 { |
|
892 nsresult result = anErrorCode; |
|
893 |
|
894 if (IsComplete()) { |
|
895 if (mParserContext && !mParserContext->mPrevContext) { |
|
896 // Let sink know if we're about to end load because we've been terminated. |
|
897 // In that case we don't want it to run deferred scripts. |
|
898 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; |
|
899 if (mDTD && mSink) { |
|
900 nsresult dtdResult = mDTD->DidBuildModel(anErrorCode), |
|
901 sinkResult = mSink->DidBuildModel(terminated); |
|
902 // nsIDTD::DidBuildModel used to be responsible for calling |
|
903 // nsIContentSink::DidBuildModel, but that obligation isn't expressible |
|
904 // in the nsIDTD interface itself, so it's sounder and simpler to give |
|
905 // that responsibility back to the parser. The former behavior of the |
|
906 // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the |
|
907 // sink returns failure we should use sinkResult instead of dtdResult, |
|
908 // to preserve the old error handling behavior of the DTD: |
|
909 result = NS_FAILED(sinkResult) ? sinkResult : dtdResult; |
|
910 } |
|
911 |
|
912 //Ref. to bug 61462. |
|
913 mParserContext->mRequest = 0; |
|
914 } |
|
915 } |
|
916 |
|
917 return result; |
|
918 } |
|
919 |
|
920 /** |
|
921 * This method adds a new parser context to the list, |
|
922 * pushing the current one to the next position. |
|
923 * |
|
924 * @param ptr to new context |
|
925 */ |
|
926 void |
|
927 nsParser::PushContext(CParserContext& aContext) |
|
928 { |
|
929 NS_ASSERTION(aContext.mPrevContext == mParserContext, |
|
930 "Trying to push a context whose previous context differs from " |
|
931 "the current parser context."); |
|
932 mParserContext = &aContext; |
|
933 } |
|
934 |
|
935 /** |
|
936 * This method pops the topmost context off the stack, |
|
937 * returning it to the user. The next context (if any) |
|
938 * becomes the current context. |
|
939 * @update gess7/22/98 |
|
940 * @return prev. context |
|
941 */ |
|
942 CParserContext* |
|
943 nsParser::PopContext() |
|
944 { |
|
945 CParserContext* oldContext = mParserContext; |
|
946 if (oldContext) { |
|
947 mParserContext = oldContext->mPrevContext; |
|
948 if (mParserContext) { |
|
949 // If the old context was blocked, propagate the blocked state |
|
950 // back to the new one. Also, propagate the stream listener state |
|
951 // but don't override onStop state to guarantee the call to DidBuildModel(). |
|
952 if (mParserContext->mStreamListenerState != eOnStop) { |
|
953 mParserContext->mStreamListenerState = oldContext->mStreamListenerState; |
|
954 } |
|
955 } |
|
956 } |
|
957 return oldContext; |
|
958 } |
|
959 |
|
960 /** |
|
961 * Call this when you want control whether or not the parser will parse |
|
962 * and tokenize input (TRUE), or whether it just caches input to be |
|
963 * parsed later (FALSE). |
|
964 * |
|
965 * @param aState determines whether we parse/tokenize or just cache. |
|
966 * @return current state |
|
967 */ |
|
968 void |
|
969 nsParser::SetUnusedInput(nsString& aBuffer) |
|
970 { |
|
971 mUnusedInput = aBuffer; |
|
972 } |
|
973 |
|
974 /** |
|
975 * Call this when you want to *force* the parser to terminate the |
|
976 * parsing process altogether. This is binary -- so once you terminate |
|
977 * you can't resume without restarting altogether. |
|
978 */ |
|
979 NS_IMETHODIMP |
|
980 nsParser::Terminate(void) |
|
981 { |
|
982 // We should only call DidBuildModel once, so don't do anything if this is |
|
983 // the second time that Terminate has been called. |
|
984 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { |
|
985 return NS_OK; |
|
986 } |
|
987 |
|
988 nsresult result = NS_OK; |
|
989 // XXX - [ until we figure out a way to break parser-sink circularity ] |
|
990 // Hack - Hold a reference until we are completely done... |
|
991 nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
|
992 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; |
|
993 |
|
994 // CancelParsingEvents must be called to avoid leaking the nsParser object |
|
995 // @see bug 108049 |
|
996 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents |
|
997 // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note: |
|
998 // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag. |
|
999 CancelParsingEvents(); |
|
1000 |
|
1001 // If we got interrupted in the middle of a document.write, then we might |
|
1002 // have more than one parser context on our parsercontext stack. This has |
|
1003 // the effect of making DidBuildModel a no-op, meaning that we never call |
|
1004 // our sink's DidBuildModel and break the reference cycle, causing a leak. |
|
1005 // Since we're getting terminated, we manually clean up our context stack. |
|
1006 while (mParserContext && mParserContext->mPrevContext) { |
|
1007 CParserContext *prev = mParserContext->mPrevContext; |
|
1008 delete mParserContext; |
|
1009 mParserContext = prev; |
|
1010 } |
|
1011 |
|
1012 if (mDTD) { |
|
1013 mDTD->Terminate(); |
|
1014 DidBuildModel(result); |
|
1015 } else if (mSink) { |
|
1016 // We have no parser context or no DTD yet (so we got terminated before we |
|
1017 // got any data). Manually break the reference cycle with the sink. |
|
1018 result = mSink->DidBuildModel(true); |
|
1019 NS_ENSURE_SUCCESS(result, result); |
|
1020 } |
|
1021 |
|
1022 return NS_OK; |
|
1023 } |
|
1024 |
|
1025 NS_IMETHODIMP |
|
1026 nsParser::ContinueInterruptedParsing() |
|
1027 { |
|
1028 // If there are scripts executing, then the content sink is jumping the gun |
|
1029 // (probably due to a synchronous XMLHttpRequest) and will re-enable us |
|
1030 // later, see bug 460706. |
|
1031 if (!IsOkToProcessNetworkData()) { |
|
1032 return NS_OK; |
|
1033 } |
|
1034 |
|
1035 // If the stream has already finished, there's a good chance |
|
1036 // that we might start closing things down when the parser |
|
1037 // is reenabled. To make sure that we're not deleted across |
|
1038 // the reenabling process, hold a reference to ourselves. |
|
1039 nsresult result=NS_OK; |
|
1040 nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
|
1041 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); |
|
1042 |
|
1043 #ifdef DEBUG |
|
1044 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { |
|
1045 NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser."); |
|
1046 } |
|
1047 #endif |
|
1048 |
|
1049 bool isFinalChunk = mParserContext && |
|
1050 mParserContext->mStreamListenerState == eOnStop; |
|
1051 |
|
1052 mProcessingNetworkData = true; |
|
1053 if (mSink) { |
|
1054 mSink->WillParse(); |
|
1055 } |
|
1056 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 |
|
1057 mProcessingNetworkData = false; |
|
1058 |
|
1059 if (result != NS_OK) { |
|
1060 result=mInternalState; |
|
1061 } |
|
1062 |
|
1063 return result; |
|
1064 } |
|
1065 |
|
1066 /** |
|
1067 * Stops parsing temporarily. That's it will prevent the |
|
1068 * parser from building up content model. |
|
1069 */ |
|
1070 NS_IMETHODIMP_(void) |
|
1071 nsParser::BlockParser() |
|
1072 { |
|
1073 mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED; |
|
1074 } |
|
1075 |
|
1076 /** |
|
1077 * Open up the parser for tokenization, building up content |
|
1078 * model..etc. However, this method does not resume parsing |
|
1079 * automatically. It's the callers' responsibility to restart |
|
1080 * the parsing engine. |
|
1081 */ |
|
1082 NS_IMETHODIMP_(void) |
|
1083 nsParser::UnblockParser() |
|
1084 { |
|
1085 if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) { |
|
1086 mFlags |= NS_PARSER_FLAG_PARSER_ENABLED; |
|
1087 } else { |
|
1088 NS_WARNING("Trying to unblock an unblocked parser."); |
|
1089 } |
|
1090 } |
|
1091 |
|
1092 NS_IMETHODIMP_(void) |
|
1093 nsParser::ContinueInterruptedParsingAsync() |
|
1094 { |
|
1095 mSink->ContinueInterruptedParsingAsync(); |
|
1096 } |
|
1097 |
|
1098 /** |
|
1099 * Call this to query whether the parser is enabled or not. |
|
1100 */ |
|
1101 NS_IMETHODIMP_(bool) |
|
1102 nsParser::IsParserEnabled() |
|
1103 { |
|
1104 return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0; |
|
1105 } |
|
1106 |
|
1107 /** |
|
1108 * Call this to query whether the parser thinks it's done with parsing. |
|
1109 */ |
|
1110 NS_IMETHODIMP_(bool) |
|
1111 nsParser::IsComplete() |
|
1112 { |
|
1113 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); |
|
1114 } |
|
1115 |
|
1116 |
|
1117 void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev) |
|
1118 { |
|
1119 // Ignore any revoked continue events... |
|
1120 if (mContinueEvent != ev) |
|
1121 return; |
|
1122 |
|
1123 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; |
|
1124 mContinueEvent = nullptr; |
|
1125 |
|
1126 NS_ASSERTION(IsOkToProcessNetworkData(), |
|
1127 "Interrupted in the middle of a script?"); |
|
1128 ContinueInterruptedParsing(); |
|
1129 } |
|
1130 |
|
1131 bool |
|
1132 nsParser::IsInsertionPointDefined() |
|
1133 { |
|
1134 return false; |
|
1135 } |
|
1136 |
|
1137 void |
|
1138 nsParser::BeginEvaluatingParserInsertedScript() |
|
1139 { |
|
1140 } |
|
1141 |
|
1142 void |
|
1143 nsParser::EndEvaluatingParserInsertedScript() |
|
1144 { |
|
1145 } |
|
1146 |
|
1147 void |
|
1148 nsParser::MarkAsNotScriptCreated(const char* aCommand) |
|
1149 { |
|
1150 } |
|
1151 |
|
1152 bool |
|
1153 nsParser::IsScriptCreated() |
|
1154 { |
|
1155 return false; |
|
1156 } |
|
1157 |
|
1158 /** |
|
1159 * This is the main controlling routine in the parsing process. |
|
1160 * Note that it may get called multiple times for the same scanner, |
|
1161 * since this is a pushed based system, and all the tokens may |
|
1162 * not have been consumed by the scanner during a given invocation |
|
1163 * of this method. |
|
1164 */ |
|
1165 NS_IMETHODIMP |
|
1166 nsParser::Parse(nsIURI* aURL, |
|
1167 nsIRequestObserver* aListener, |
|
1168 void* aKey, |
|
1169 nsDTDMode aMode) |
|
1170 { |
|
1171 |
|
1172 NS_PRECONDITION(aURL, "Error: Null URL given"); |
|
1173 |
|
1174 nsresult result=kBadURL; |
|
1175 mObserver = aListener; |
|
1176 |
|
1177 if (aURL) { |
|
1178 nsAutoCString spec; |
|
1179 nsresult rv = aURL->GetSpec(spec); |
|
1180 if (rv != NS_OK) { |
|
1181 return rv; |
|
1182 } |
|
1183 NS_ConvertUTF8toUTF16 theName(spec); |
|
1184 |
|
1185 nsScanner* theScanner = new nsScanner(theName, false); |
|
1186 CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey, |
|
1187 mCommand, aListener); |
|
1188 if (pc && theScanner) { |
|
1189 pc->mMultipart = true; |
|
1190 pc->mContextType = CParserContext::eCTURL; |
|
1191 pc->mDTDMode = aMode; |
|
1192 PushContext(*pc); |
|
1193 |
|
1194 result = NS_OK; |
|
1195 } else { |
|
1196 result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT; |
|
1197 } |
|
1198 } |
|
1199 return result; |
|
1200 } |
|
1201 |
|
1202 /** |
|
1203 * Used by XML fragment parsing below. |
|
1204 * |
|
1205 * @param aSourceBuffer contains a string-full of real content |
|
1206 */ |
|
1207 nsresult |
|
1208 nsParser::Parse(const nsAString& aSourceBuffer, |
|
1209 void* aKey, |
|
1210 bool aLastCall) |
|
1211 { |
|
1212 nsresult result = NS_OK; |
|
1213 |
|
1214 // Don't bother if we're never going to parse this. |
|
1215 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { |
|
1216 return result; |
|
1217 } |
|
1218 |
|
1219 if (!aLastCall && aSourceBuffer.IsEmpty()) { |
|
1220 // Nothing is being passed to the parser so return |
|
1221 // immediately. mUnusedInput will get processed when |
|
1222 // some data is actually passed in. |
|
1223 // But if this is the last call, make sure to finish up |
|
1224 // stuff correctly. |
|
1225 return result; |
|
1226 } |
|
1227 |
|
1228 // Maintain a reference to ourselves so we don't go away |
|
1229 // till we're completely done. |
|
1230 nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
|
1231 |
|
1232 if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) { |
|
1233 // Note: The following code will always find the parser context associated |
|
1234 // with the given key, even if that context has been suspended (e.g., for |
|
1235 // another document.write call). This doesn't appear to be exactly what IE |
|
1236 // does in the case where this happens, but this makes more sense. |
|
1237 CParserContext* pc = mParserContext; |
|
1238 while (pc && pc->mKey != aKey) { |
|
1239 pc = pc->mPrevContext; |
|
1240 } |
|
1241 |
|
1242 if (!pc) { |
|
1243 // Only make a new context if we don't have one, OR if we do, but has a |
|
1244 // different context key. |
|
1245 nsScanner* theScanner = new nsScanner(mUnusedInput); |
|
1246 NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY); |
|
1247 |
|
1248 eAutoDetectResult theStatus = eUnknownDetect; |
|
1249 |
|
1250 if (mParserContext && |
|
1251 mParserContext->mMimeType.EqualsLiteral("application/xml")) { |
|
1252 // Ref. Bug 90379 |
|
1253 NS_ASSERTION(mDTD, "How come the DTD is null?"); |
|
1254 |
|
1255 if (mParserContext) { |
|
1256 theStatus = mParserContext->mAutoDetectStatus; |
|
1257 // Added this to fix bug 32022. |
|
1258 } |
|
1259 } |
|
1260 |
|
1261 pc = new CParserContext(mParserContext, theScanner, aKey, mCommand, |
|
1262 0, theStatus, aLastCall); |
|
1263 NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); |
|
1264 |
|
1265 PushContext(*pc); |
|
1266 |
|
1267 pc->mMultipart = !aLastCall; // By default |
|
1268 if (pc->mPrevContext) { |
|
1269 pc->mMultipart |= pc->mPrevContext->mMultipart; |
|
1270 } |
|
1271 |
|
1272 // Start fix bug 40143 |
|
1273 if (pc->mMultipart) { |
|
1274 pc->mStreamListenerState = eOnDataAvail; |
|
1275 if (pc->mScanner) { |
|
1276 pc->mScanner->SetIncremental(true); |
|
1277 } |
|
1278 } else { |
|
1279 pc->mStreamListenerState = eOnStop; |
|
1280 if (pc->mScanner) { |
|
1281 pc->mScanner->SetIncremental(false); |
|
1282 } |
|
1283 } |
|
1284 // end fix for 40143 |
|
1285 |
|
1286 pc->mContextType=CParserContext::eCTString; |
|
1287 pc->SetMimeType(NS_LITERAL_CSTRING("application/xml")); |
|
1288 pc->mDTDMode = eDTDMode_full_standards; |
|
1289 |
|
1290 mUnusedInput.Truncate(); |
|
1291 |
|
1292 pc->mScanner->Append(aSourceBuffer); |
|
1293 // Do not interrupt document.write() - bug 95487 |
|
1294 result = ResumeParse(false, false, false); |
|
1295 } else { |
|
1296 pc->mScanner->Append(aSourceBuffer); |
|
1297 if (!pc->mPrevContext) { |
|
1298 // Set stream listener state to eOnStop, on the final context - Fix 68160, |
|
1299 // to guarantee DidBuildModel() call - Fix 36148 |
|
1300 if (aLastCall) { |
|
1301 pc->mStreamListenerState = eOnStop; |
|
1302 pc->mScanner->SetIncremental(false); |
|
1303 } |
|
1304 |
|
1305 if (pc == mParserContext) { |
|
1306 // If pc is not mParserContext, then this call to ResumeParse would |
|
1307 // do the wrong thing and try to continue parsing using |
|
1308 // mParserContext. We need to wait to actually resume parsing on pc. |
|
1309 ResumeParse(false, false, false); |
|
1310 } |
|
1311 } |
|
1312 } |
|
1313 } |
|
1314 |
|
1315 return result; |
|
1316 } |
|
1317 |
|
1318 NS_IMETHODIMP |
|
1319 nsParser::ParseFragment(const nsAString& aSourceBuffer, |
|
1320 nsTArray<nsString>& aTagStack) |
|
1321 { |
|
1322 nsresult result = NS_OK; |
|
1323 nsAutoString theContext; |
|
1324 uint32_t theCount = aTagStack.Length(); |
|
1325 uint32_t theIndex = 0; |
|
1326 |
|
1327 // Disable observers for fragments |
|
1328 mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED; |
|
1329 |
|
1330 for (theIndex = 0; theIndex < theCount; theIndex++) { |
|
1331 theContext.AppendLiteral("<"); |
|
1332 theContext.Append(aTagStack[theCount - theIndex - 1]); |
|
1333 theContext.AppendLiteral(">"); |
|
1334 } |
|
1335 |
|
1336 if (theCount == 0) { |
|
1337 // Ensure that the buffer is not empty. Because none of the DTDs care |
|
1338 // about leading whitespace, this doesn't change the result. |
|
1339 theContext.AssignLiteral(" "); |
|
1340 } |
|
1341 |
|
1342 // First, parse the context to build up the DTD's tag stack. Note that we |
|
1343 // pass false for the aLastCall parameter. |
|
1344 result = Parse(theContext, |
|
1345 (void*)&theContext, |
|
1346 false); |
|
1347 if (NS_FAILED(result)) { |
|
1348 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; |
|
1349 return result; |
|
1350 } |
|
1351 |
|
1352 if (!mSink) { |
|
1353 // Parse must have failed in the XML case and so the sink was killed. |
|
1354 return NS_ERROR_HTMLPARSER_STOPPARSING; |
|
1355 } |
|
1356 |
|
1357 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink); |
|
1358 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); |
|
1359 |
|
1360 fragSink->WillBuildContent(); |
|
1361 // Now, parse the actual content. Note that this is the last call |
|
1362 // for HTML content, but for XML, we will want to build and parse |
|
1363 // the end tags. However, if tagStack is empty, it's the last call |
|
1364 // for XML as well. |
|
1365 if (theCount == 0) { |
|
1366 result = Parse(aSourceBuffer, |
|
1367 &theContext, |
|
1368 true); |
|
1369 fragSink->DidBuildContent(); |
|
1370 } else { |
|
1371 // Add an end tag chunk, so expat will read the whole source buffer, |
|
1372 // and not worry about ']]' etc. |
|
1373 result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"), |
|
1374 &theContext, |
|
1375 false); |
|
1376 fragSink->DidBuildContent(); |
|
1377 |
|
1378 if (NS_SUCCEEDED(result)) { |
|
1379 nsAutoString endContext; |
|
1380 for (theIndex = 0; theIndex < theCount; theIndex++) { |
|
1381 // we already added an end tag chunk above |
|
1382 if (theIndex > 0) { |
|
1383 endContext.AppendLiteral("</"); |
|
1384 } |
|
1385 |
|
1386 nsString& thisTag = aTagStack[theIndex]; |
|
1387 // was there an xmlns=? |
|
1388 int32_t endOfTag = thisTag.FindChar(char16_t(' ')); |
|
1389 if (endOfTag == -1) { |
|
1390 endContext.Append(thisTag); |
|
1391 } else { |
|
1392 endContext.Append(Substring(thisTag,0,endOfTag)); |
|
1393 } |
|
1394 |
|
1395 endContext.AppendLiteral(">"); |
|
1396 } |
|
1397 |
|
1398 result = Parse(endContext, |
|
1399 &theContext, |
|
1400 true); |
|
1401 } |
|
1402 } |
|
1403 |
|
1404 mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED; |
|
1405 |
|
1406 return result; |
|
1407 } |
|
1408 |
|
1409 /** |
|
1410 * This routine is called to cause the parser to continue parsing its |
|
1411 * underlying stream. This call allows the parse process to happen in |
|
1412 * chunks, such as when the content is push based, and we need to parse in |
|
1413 * pieces. |
|
1414 * |
|
1415 * An interesting change in how the parser gets used has led us to add extra |
|
1416 * processing to this method. The case occurs when the parser is blocked in |
|
1417 * one context, and gets a parse(string) call in another context. In this |
|
1418 * case, the parserContexts are linked. No problem. |
|
1419 * |
|
1420 * The problem is that Parse(string) assumes that it can proceed unabated, |
|
1421 * but if the parser is already blocked that assumption is false. So we |
|
1422 * needed to add a mechanism here to allow the parser to continue to process |
|
1423 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs |
|
1424 * out of contexts. |
|
1425 * |
|
1426 * |
|
1427 * @param allowItertion : set to true if non-script resumption is requested |
|
1428 * @param aIsFinalChunk : tells us when the last chunk of data is provided. |
|
1429 * @return error code -- 0 if ok, non-zero if error. |
|
1430 */ |
|
1431 nsresult |
|
1432 nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, |
|
1433 bool aCanInterrupt) |
|
1434 { |
|
1435 nsresult result = NS_OK; |
|
1436 |
|
1437 if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) && |
|
1438 mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { |
|
1439 |
|
1440 result = WillBuildModel(mParserContext->mScanner->GetFilename()); |
|
1441 if (NS_FAILED(result)) { |
|
1442 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; |
|
1443 return result; |
|
1444 } |
|
1445 |
|
1446 if (mDTD) { |
|
1447 mSink->WillResume(); |
|
1448 bool theIterationIsOk = true; |
|
1449 |
|
1450 while (result == NS_OK && theIterationIsOk) { |
|
1451 if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) { |
|
1452 // -- Ref: Bug# 22485 -- |
|
1453 // Insert the unused input into the source buffer |
|
1454 // as if it was read from the input stream. |
|
1455 // Adding UngetReadable() per vidur!! |
|
1456 mParserContext->mScanner->UngetReadable(mUnusedInput); |
|
1457 mUnusedInput.Truncate(0); |
|
1458 } |
|
1459 |
|
1460 // Only allow parsing to be interrupted in the subsequent call to |
|
1461 // build model. |
|
1462 nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) |
|
1463 ? Tokenize(aIsFinalChunk) |
|
1464 : NS_OK; |
|
1465 result = BuildModel(); |
|
1466 |
|
1467 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { |
|
1468 PostContinueEvent(); |
|
1469 } |
|
1470 |
|
1471 theIterationIsOk = theTokenizerResult != kEOF && |
|
1472 result != NS_ERROR_HTMLPARSER_INTERRUPTED; |
|
1473 |
|
1474 // Make sure not to stop parsing too early. Therefore, before shutting |
|
1475 // down the parser, it's important to check whether the input buffer |
|
1476 // has been scanned to completion (theTokenizerResult should be kEOF). |
|
1477 // kEOF -> End of buffer. |
|
1478 |
|
1479 // If we're told to block the parser, we disable all further parsing |
|
1480 // (and cache any data coming in) until the parser is re-enabled. |
|
1481 if (NS_ERROR_HTMLPARSER_BLOCK == result) { |
|
1482 mSink->WillInterrupt(); |
|
1483 if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) { |
|
1484 // If we were blocked by a recursive invocation, don't re-block. |
|
1485 BlockParser(); |
|
1486 } |
|
1487 return NS_OK; |
|
1488 } |
|
1489 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { |
|
1490 // Note: Parser Terminate() calls DidBuildModel. |
|
1491 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { |
|
1492 DidBuildModel(mStreamStatus); |
|
1493 mInternalState = result; |
|
1494 } |
|
1495 |
|
1496 return NS_OK; |
|
1497 } |
|
1498 if ((NS_OK == result && theTokenizerResult == kEOF) || |
|
1499 result == NS_ERROR_HTMLPARSER_INTERRUPTED) { |
|
1500 bool theContextIsStringBased = |
|
1501 CParserContext::eCTString == mParserContext->mContextType; |
|
1502 |
|
1503 if (mParserContext->mStreamListenerState == eOnStop || |
|
1504 !mParserContext->mMultipart || theContextIsStringBased) { |
|
1505 if (!mParserContext->mPrevContext) { |
|
1506 if (mParserContext->mStreamListenerState == eOnStop) { |
|
1507 DidBuildModel(mStreamStatus); |
|
1508 return NS_OK; |
|
1509 } |
|
1510 } else { |
|
1511 CParserContext* theContext = PopContext(); |
|
1512 if (theContext) { |
|
1513 theIterationIsOk = allowIteration && theContextIsStringBased; |
|
1514 if (theContext->mCopyUnused) { |
|
1515 theContext->mScanner->CopyUnusedData(mUnusedInput); |
|
1516 } |
|
1517 |
|
1518 delete theContext; |
|
1519 } |
|
1520 |
|
1521 result = mInternalState; |
|
1522 aIsFinalChunk = mParserContext && |
|
1523 mParserContext->mStreamListenerState == eOnStop; |
|
1524 // ...then intentionally fall through to mSink->WillInterrupt()... |
|
1525 } |
|
1526 } |
|
1527 } |
|
1528 |
|
1529 if (theTokenizerResult == kEOF || |
|
1530 result == NS_ERROR_HTMLPARSER_INTERRUPTED) { |
|
1531 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; |
|
1532 mSink->WillInterrupt(); |
|
1533 } |
|
1534 } |
|
1535 } else { |
|
1536 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; |
|
1537 } |
|
1538 } |
|
1539 |
|
1540 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; |
|
1541 } |
|
1542 |
|
1543 /** |
|
1544 * This is where we loop over the tokens created in the |
|
1545 * tokenization phase, and try to make sense out of them. |
|
1546 */ |
|
1547 nsresult |
|
1548 nsParser::BuildModel() |
|
1549 { |
|
1550 nsITokenizer* theTokenizer = nullptr; |
|
1551 |
|
1552 nsresult result = NS_OK; |
|
1553 if (mParserContext) { |
|
1554 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
|
1555 } |
|
1556 |
|
1557 if (NS_SUCCEEDED(result)) { |
|
1558 if (mDTD) { |
|
1559 result = mDTD->BuildModel(theTokenizer, mSink); |
|
1560 } |
|
1561 } else { |
|
1562 mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER; |
|
1563 } |
|
1564 return result; |
|
1565 } |
|
1566 |
|
1567 /******************************************************************* |
|
1568 These methods are used to talk to the netlib system... |
|
1569 *******************************************************************/ |
|
1570 |
|
1571 nsresult |
|
1572 nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext) |
|
1573 { |
|
1574 NS_PRECONDITION(eNone == mParserContext->mStreamListenerState, |
|
1575 "Parser's nsIStreamListener API was not setup " |
|
1576 "correctly in constructor."); |
|
1577 if (mObserver) { |
|
1578 mObserver->OnStartRequest(request, aContext); |
|
1579 } |
|
1580 mParserContext->mStreamListenerState = eOnStart; |
|
1581 mParserContext->mAutoDetectStatus = eUnknownDetect; |
|
1582 mParserContext->mRequest = request; |
|
1583 |
|
1584 NS_ASSERTION(!mParserContext->mPrevContext, |
|
1585 "Clobbering DTD for non-root parser context!"); |
|
1586 mDTD = nullptr; |
|
1587 |
|
1588 nsresult rv; |
|
1589 nsAutoCString contentType; |
|
1590 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); |
|
1591 if (channel) { |
|
1592 rv = channel->GetContentType(contentType); |
|
1593 if (NS_SUCCEEDED(rv)) { |
|
1594 mParserContext->SetMimeType(contentType); |
|
1595 } |
|
1596 } |
|
1597 |
|
1598 rv = NS_OK; |
|
1599 |
|
1600 return rv; |
|
1601 } |
|
1602 |
|
1603 static bool |
|
1604 ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen, |
|
1605 nsCString& oCharset) |
|
1606 { |
|
1607 // This code is rather pointless to have. Might as well reuse expat as |
|
1608 // seen in nsHtml5StreamParser. -- hsivonen |
|
1609 oCharset.Truncate(); |
|
1610 if ((aLen >= 5) && |
|
1611 ('<' == aBytes[0]) && |
|
1612 ('?' == aBytes[1]) && |
|
1613 ('x' == aBytes[2]) && |
|
1614 ('m' == aBytes[3]) && |
|
1615 ('l' == aBytes[4])) { |
|
1616 int32_t i; |
|
1617 bool versionFound = false, encodingFound = false; |
|
1618 for (i = 6; i < aLen && !encodingFound; ++i) { |
|
1619 // end of XML declaration? |
|
1620 if ((((char*) aBytes)[i] == '?') && |
|
1621 ((i + 1) < aLen) && |
|
1622 (((char*) aBytes)[i + 1] == '>')) { |
|
1623 break; |
|
1624 } |
|
1625 // Version is required. |
|
1626 if (!versionFound) { |
|
1627 // Want to avoid string comparisons, hence looking for 'n' |
|
1628 // and only if found check the string leading to it. Not |
|
1629 // foolproof, but fast. |
|
1630 // The shortest string allowed before this is (strlen==13): |
|
1631 // <?xml version |
|
1632 if ((((char*) aBytes)[i] == 'n') && |
|
1633 (i >= 12) && |
|
1634 (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) { |
|
1635 // Fast forward through version |
|
1636 char q = 0; |
|
1637 for (++i; i < aLen; ++i) { |
|
1638 char qi = ((char*) aBytes)[i]; |
|
1639 if (qi == '\'' || qi == '"') { |
|
1640 if (q && q == qi) { |
|
1641 // ending quote |
|
1642 versionFound = true; |
|
1643 break; |
|
1644 } else { |
|
1645 // Starting quote |
|
1646 q = qi; |
|
1647 } |
|
1648 } |
|
1649 } |
|
1650 } |
|
1651 } else { |
|
1652 // encoding must follow version |
|
1653 // Want to avoid string comparisons, hence looking for 'g' |
|
1654 // and only if found check the string leading to it. Not |
|
1655 // foolproof, but fast. |
|
1656 // The shortest allowed string before this (strlen==26): |
|
1657 // <?xml version="1" encoding |
|
1658 if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp( |
|
1659 "encodin", (char*) (aBytes + i - 7), 7))) { |
|
1660 int32_t encStart = 0; |
|
1661 char q = 0; |
|
1662 for (++i; i < aLen; ++i) { |
|
1663 char qi = ((char*) aBytes)[i]; |
|
1664 if (qi == '\'' || qi == '"') { |
|
1665 if (q && q == qi) { |
|
1666 int32_t count = i - encStart; |
|
1667 // encoding value is invalid if it is UTF-16 |
|
1668 if (count > 0 && PL_strncasecmp("UTF-16", |
|
1669 (char*) (aBytes + encStart), count)) { |
|
1670 oCharset.Assign((char*) (aBytes + encStart), count); |
|
1671 } |
|
1672 encodingFound = true; |
|
1673 break; |
|
1674 } else { |
|
1675 encStart = i + 1; |
|
1676 q = qi; |
|
1677 } |
|
1678 } |
|
1679 } |
|
1680 } |
|
1681 } // if (!versionFound) |
|
1682 } // for |
|
1683 } |
|
1684 return !oCharset.IsEmpty(); |
|
1685 } |
|
1686 |
|
1687 inline const char |
|
1688 GetNextChar(nsACString::const_iterator& aStart, |
|
1689 nsACString::const_iterator& aEnd) |
|
1690 { |
|
1691 NS_ASSERTION(aStart != aEnd, "end of buffer"); |
|
1692 return (++aStart != aEnd) ? *aStart : '\0'; |
|
1693 } |
|
1694 |
|
1695 static NS_METHOD |
|
1696 NoOpParserWriteFunc(nsIInputStream* in, |
|
1697 void* closure, |
|
1698 const char* fromRawSegment, |
|
1699 uint32_t toOffset, |
|
1700 uint32_t count, |
|
1701 uint32_t *writeCount) |
|
1702 { |
|
1703 *writeCount = count; |
|
1704 return NS_OK; |
|
1705 } |
|
1706 |
|
1707 typedef struct { |
|
1708 bool mNeedCharsetCheck; |
|
1709 nsParser* mParser; |
|
1710 nsScanner* mScanner; |
|
1711 nsIRequest* mRequest; |
|
1712 } ParserWriteStruct; |
|
1713 |
|
1714 /* |
|
1715 * This function is invoked as a result of a call to a stream's |
|
1716 * ReadSegments() method. It is called for each contiguous buffer |
|
1717 * of data in the underlying stream or pipe. Using ReadSegments |
|
1718 * allows us to avoid copying data to read out of the stream. |
|
1719 */ |
|
1720 static NS_METHOD |
|
1721 ParserWriteFunc(nsIInputStream* in, |
|
1722 void* closure, |
|
1723 const char* fromRawSegment, |
|
1724 uint32_t toOffset, |
|
1725 uint32_t count, |
|
1726 uint32_t *writeCount) |
|
1727 { |
|
1728 nsresult result; |
|
1729 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure); |
|
1730 const unsigned char* buf = |
|
1731 reinterpret_cast<const unsigned char*> (fromRawSegment); |
|
1732 uint32_t theNumRead = count; |
|
1733 |
|
1734 if (!pws) { |
|
1735 return NS_ERROR_FAILURE; |
|
1736 } |
|
1737 |
|
1738 if (pws->mNeedCharsetCheck) { |
|
1739 pws->mNeedCharsetCheck = false; |
|
1740 int32_t source; |
|
1741 nsAutoCString preferred; |
|
1742 nsAutoCString maybePrefer; |
|
1743 pws->mParser->GetDocumentCharset(preferred, source); |
|
1744 |
|
1745 // This code was bogus when I found it. It expects the BOM or the XML |
|
1746 // declaration to be entirely in the first network buffer. -- hsivonen |
|
1747 if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) { |
|
1748 // The decoder will swallow the BOM. The UTF-16 will re-sniff for |
|
1749 // endianness. The value of preferred is now either "UTF-8" or "UTF-16". |
|
1750 preferred.Assign(maybePrefer); |
|
1751 source = kCharsetFromByteOrderMark; |
|
1752 } else if (source < kCharsetFromChannel) { |
|
1753 nsAutoCString declCharset; |
|
1754 |
|
1755 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { |
|
1756 if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) { |
|
1757 preferred.Assign(maybePrefer); |
|
1758 source = kCharsetFromMetaTag; |
|
1759 } |
|
1760 } |
|
1761 } |
|
1762 |
|
1763 pws->mParser->SetDocumentCharset(preferred, source); |
|
1764 pws->mParser->SetSinkCharset(preferred); |
|
1765 |
|
1766 } |
|
1767 |
|
1768 result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest); |
|
1769 if (NS_SUCCEEDED(result)) { |
|
1770 *writeCount = count; |
|
1771 } |
|
1772 |
|
1773 return result; |
|
1774 } |
|
1775 |
|
1776 nsresult |
|
1777 nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext, |
|
1778 nsIInputStream *pIStream, uint64_t sourceOffset, |
|
1779 uint32_t aLength) |
|
1780 { |
|
1781 NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState || |
|
1782 eOnDataAvail == mParserContext->mStreamListenerState), |
|
1783 "Error: OnStartRequest() must be called before OnDataAvailable()"); |
|
1784 NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream), |
|
1785 "Must have a buffered input stream"); |
|
1786 |
|
1787 nsresult rv = NS_OK; |
|
1788 |
|
1789 if (mIsAboutBlank) { |
|
1790 MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank"); |
|
1791 // ... but if an extension tries to feed us data for about:blank in a |
|
1792 // release build, silently ignore the data. |
|
1793 uint32_t totalRead; |
|
1794 rv = pIStream->ReadSegments(NoOpParserWriteFunc, |
|
1795 nullptr, |
|
1796 aLength, |
|
1797 &totalRead); |
|
1798 return rv; |
|
1799 } |
|
1800 |
|
1801 CParserContext *theContext = mParserContext; |
|
1802 |
|
1803 while (theContext && theContext->mRequest != request) { |
|
1804 theContext = theContext->mPrevContext; |
|
1805 } |
|
1806 |
|
1807 if (theContext) { |
|
1808 theContext->mStreamListenerState = eOnDataAvail; |
|
1809 |
|
1810 if (eInvalidDetect == theContext->mAutoDetectStatus) { |
|
1811 if (theContext->mScanner) { |
|
1812 nsScannerIterator iter; |
|
1813 theContext->mScanner->EndReading(iter); |
|
1814 theContext->mScanner->SetPosition(iter, true); |
|
1815 } |
|
1816 } |
|
1817 |
|
1818 uint32_t totalRead; |
|
1819 ParserWriteStruct pws; |
|
1820 pws.mNeedCharsetCheck = true; |
|
1821 pws.mParser = this; |
|
1822 pws.mScanner = theContext->mScanner; |
|
1823 pws.mRequest = request; |
|
1824 |
|
1825 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); |
|
1826 if (NS_FAILED(rv)) { |
|
1827 return rv; |
|
1828 } |
|
1829 |
|
1830 // Don't bother to start parsing until we've seen some |
|
1831 // non-whitespace data |
|
1832 if (IsOkToProcessNetworkData() && |
|
1833 theContext->mScanner->FirstNonWhitespacePosition() >= 0) { |
|
1834 nsCOMPtr<nsIParser> kungFuDeathGrip(this); |
|
1835 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); |
|
1836 mProcessingNetworkData = true; |
|
1837 if (mSink) { |
|
1838 mSink->WillParse(); |
|
1839 } |
|
1840 rv = ResumeParse(); |
|
1841 mProcessingNetworkData = false; |
|
1842 } |
|
1843 } else { |
|
1844 rv = NS_ERROR_UNEXPECTED; |
|
1845 } |
|
1846 |
|
1847 return rv; |
|
1848 } |
|
1849 |
|
1850 /** |
|
1851 * This is called by the networking library once the last block of data |
|
1852 * has been collected from the net. |
|
1853 */ |
|
1854 nsresult |
|
1855 nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext, |
|
1856 nsresult status) |
|
1857 { |
|
1858 nsresult rv = NS_OK; |
|
1859 |
|
1860 CParserContext *pc = mParserContext; |
|
1861 while (pc) { |
|
1862 if (pc->mRequest == request) { |
|
1863 pc->mStreamListenerState = eOnStop; |
|
1864 pc->mScanner->SetIncremental(false); |
|
1865 break; |
|
1866 } |
|
1867 |
|
1868 pc = pc->mPrevContext; |
|
1869 } |
|
1870 |
|
1871 mStreamStatus = status; |
|
1872 |
|
1873 if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) { |
|
1874 mProcessingNetworkData = true; |
|
1875 if (mSink) { |
|
1876 mSink->WillParse(); |
|
1877 } |
|
1878 rv = ResumeParse(true, true); |
|
1879 mProcessingNetworkData = false; |
|
1880 } |
|
1881 |
|
1882 // If the parser isn't enabled, we don't finish parsing till |
|
1883 // it is reenabled. |
|
1884 |
|
1885 |
|
1886 // XXX Should we wait to notify our observers as well if the |
|
1887 // parser isn't yet enabled? |
|
1888 if (mObserver) { |
|
1889 mObserver->OnStopRequest(request, aContext, status); |
|
1890 } |
|
1891 |
|
1892 return rv; |
|
1893 } |
|
1894 |
|
1895 |
|
1896 /******************************************************************* |
|
1897 Here come the tokenization methods... |
|
1898 *******************************************************************/ |
|
1899 |
|
1900 |
|
1901 /** |
|
1902 * Part of the code sandwich, this gets called right before |
|
1903 * the tokenization process begins. The main reason for |
|
1904 * this call is to allow the delegate to do initialization. |
|
1905 */ |
|
1906 bool |
|
1907 nsParser::WillTokenize(bool aIsFinalChunk) |
|
1908 { |
|
1909 if (!mParserContext) { |
|
1910 return true; |
|
1911 } |
|
1912 |
|
1913 nsITokenizer* theTokenizer; |
|
1914 nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
|
1915 NS_ENSURE_SUCCESS(result, false); |
|
1916 return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk)); |
|
1917 } |
|
1918 |
|
1919 |
|
1920 /** |
|
1921 * This is the primary control routine to consume tokens. |
|
1922 * It iteratively consumes tokens until an error occurs or |
|
1923 * you run out of data. |
|
1924 */ |
|
1925 nsresult nsParser::Tokenize(bool aIsFinalChunk) |
|
1926 { |
|
1927 nsITokenizer* theTokenizer; |
|
1928 |
|
1929 nsresult result = NS_ERROR_NOT_AVAILABLE; |
|
1930 if (mParserContext) { |
|
1931 result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer); |
|
1932 } |
|
1933 |
|
1934 if (NS_SUCCEEDED(result)) { |
|
1935 bool flushTokens = false; |
|
1936 |
|
1937 bool killSink = false; |
|
1938 |
|
1939 WillTokenize(aIsFinalChunk); |
|
1940 while (NS_SUCCEEDED(result)) { |
|
1941 mParserContext->mScanner->Mark(); |
|
1942 result = theTokenizer->ConsumeToken(*mParserContext->mScanner, |
|
1943 flushTokens); |
|
1944 if (NS_FAILED(result)) { |
|
1945 mParserContext->mScanner->RewindToMark(); |
|
1946 if (kEOF == result){ |
|
1947 break; |
|
1948 } |
|
1949 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { |
|
1950 killSink = true; |
|
1951 result = Terminate(); |
|
1952 break; |
|
1953 } |
|
1954 } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) { |
|
1955 // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931. |
|
1956 // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 -- |
|
1957 // Also remember to update the marked position. |
|
1958 mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS; |
|
1959 mParserContext->mScanner->Mark(); |
|
1960 break; |
|
1961 } |
|
1962 } |
|
1963 |
|
1964 if (killSink) { |
|
1965 mSink = nullptr; |
|
1966 } |
|
1967 } else { |
|
1968 result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER; |
|
1969 } |
|
1970 |
|
1971 return result; |
|
1972 } |
|
1973 |
|
1974 /** |
|
1975 * Get the channel associated with this parser |
|
1976 * |
|
1977 * @param aChannel out param that will contain the result |
|
1978 * @return NS_OK if successful |
|
1979 */ |
|
1980 NS_IMETHODIMP |
|
1981 nsParser::GetChannel(nsIChannel** aChannel) |
|
1982 { |
|
1983 nsresult result = NS_ERROR_NOT_AVAILABLE; |
|
1984 if (mParserContext && mParserContext->mRequest) { |
|
1985 result = CallQueryInterface(mParserContext->mRequest, aChannel); |
|
1986 } |
|
1987 return result; |
|
1988 } |
|
1989 |
|
1990 /** |
|
1991 * Get the DTD associated with this parser |
|
1992 */ |
|
1993 NS_IMETHODIMP |
|
1994 nsParser::GetDTD(nsIDTD** aDTD) |
|
1995 { |
|
1996 if (mParserContext) { |
|
1997 NS_IF_ADDREF(*aDTD = mDTD); |
|
1998 } |
|
1999 |
|
2000 return NS_OK; |
|
2001 } |
|
2002 |
|
2003 /** |
|
2004 * Get this as nsIStreamListener |
|
2005 */ |
|
2006 nsIStreamListener* |
|
2007 nsParser::GetStreamListener() |
|
2008 { |
|
2009 return this; |
|
2010 } |