The Tor Browser: parser/htmlparser/src/nsParser.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* vim: set sw=2 ts=2 et tw=79: */

     3 /* This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 #include "nsIAtom.h"

     8 #include "nsParser.h"

     9 #include "nsString.h"

    10 #include "nsCRT.h"

    11 #include "nsScanner.h"

    12 #include "plstr.h"

    13 #include "nsIStringStream.h"

    14 #include "nsIChannel.h"

    15 #include "nsICachingChannel.h"

    16 #include "nsICacheEntryDescriptor.h"

    17 #include "nsIInputStream.h"

    18 #include "CNavDTD.h"

    19 #include "prenv.h"

    20 #include "prlock.h"

    21 #include "prcvar.h"

    22 #include "nsParserCIID.h"

    23 #include "nsReadableUtils.h"

    24 #include "nsCOMPtr.h"

    25 #include "nsExpatDriver.h"

    26 #include "nsIServiceManager.h"

    27 #include "nsICategoryManager.h"

    28 #include "nsISupportsPrimitives.h"

    29 #include "nsIFragmentContentSink.h"

    30 #include "nsStreamUtils.h"

    31 #include "nsHTMLTokenizer.h"

    32 #include "nsNetUtil.h"

    33 #include "nsScriptLoader.h"

    34 #include "nsDataHashtable.h"

    35 #include "nsXPCOMCIDInternal.h"

    36 #include "nsMimeTypes.h"

    37 #include "mozilla/CondVar.h"

    38 #include "mozilla/Mutex.h"

    39 #include "nsParserConstants.h"

    40 #include "nsCharsetSource.h"

    41 #include "nsContentUtils.h"

    42 #include "nsThreadUtils.h"

    43 #include "nsIHTMLContentSink.h"

    45 #include "mozilla/dom/EncodingUtils.h"

    47 using namespace mozilla;

    48 using mozilla::dom::EncodingUtils;

    50 #define NS_PARSER_FLAG_PARSER_ENABLED         0x00000002

    51 #define NS_PARSER_FLAG_OBSERVERS_ENABLED      0x00000004

    52 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008

    53 #define NS_PARSER_FLAG_FLUSH_TOKENS           0x00000020

    54 #define NS_PARSER_FLAG_CAN_TOKENIZE           0x00000040

    56 //-------------- Begin ParseContinue Event Definition ------------------------

    57 /*

    58 The parser can be explicitly interrupted by passing a return value of

    59 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause

    60 the parser to stop processing and allow the application to return to the event

    61 loop. The data which was left at the time of interruption will be processed

    62 the next time OnDataAvailable is called. If the parser has received its final

    63 chunk of data then OnDataAvailable will no longer be called by the networking

    64 module, so the parser will schedule a nsParserContinueEvent which will call

    65 the parser to process the remaining data after returning to the event loop.

    66 If the parser is interrupted while processing the remaining data it will

    67 schedule another ParseContinueEvent. The processing of data followed by

    68 scheduling of the continue events will proceed until either:

    70   1) All of the remaining data can be processed without interrupting

    71   2) The parser has been cancelled.

    74 This capability is currently used in CNavDTD and nsHTMLContentSink. The

    75 nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be

    76 processed and when each token is processed. The nsHTML content sink records

    77 the time when the chunk has started processing and will return

    78 NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a

    79 threshold called max tokenizing processing time. This allows the content sink

    80 to limit how much data is processed in a single chunk which in turn gates how

    81 much time is spent away from the event loop. Processing smaller chunks of data

    82 also reduces the time spent in subsequent reflows.

    84 This capability is most apparent when loading large documents. If the maximum

    85 token processing time is set small enough the application will remain

    86 responsive during document load.

    88 A side-effect of this capability is that document load is not complete when

    89 the last chunk of data is passed to OnDataAvailable since  the parser may have

    90 been interrupted when the last chunk of data arrived. The document is complete

    91 when all of the document has been tokenized and there aren't any pending

    92 nsParserContinueEvents. This can cause problems if the application assumes

    93 that it can monitor the load requests to determine when the document load has

    94 been completed. This is what happens in Mozilla. The document is considered

    95 completely loaded when all of the load requests have been satisfied. To delay

    96 the document load until all of the parsing has been completed the

    97 nsHTMLContentSink adds a dummy parser load request which is not removed until

    98 the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call

    99 DidBuildModel until the final chunk of data has been passed to the parser

   100 through the OnDataAvailable and there aren't any pending

   101 nsParserContineEvents.

   103 Currently the parser is ignores requests to be interrupted during the

   104 processing of script.  This is because a document.write followed by JavaScript

   105 calls to manipulate the DOM may fail if the parser was interrupted during the

   106 document.write.

   108 For more details @see bugzilla bug 76722

   109 */

   112 class nsParserContinueEvent : public nsRunnable

   113 {

   114 public:

   115   nsRefPtr<nsParser> mParser;

   117   nsParserContinueEvent(nsParser* aParser)

   118     : mParser(aParser)

   119   {}

   121   NS_IMETHOD Run()

   122   {

   123     mParser->HandleParserContinueEvent(this);

   124     return NS_OK;

   125   }

   126 };

   128 //-------------- End ParseContinue Event Definition ------------------------

   130 /**

   131  *  default constructor

   132  */

   133 nsParser::nsParser()

   134 {

   135   Initialize(true);

   136 }

   138 nsParser::~nsParser()

   139 {

   140   Cleanup();

   141 }

   143 void

   144 nsParser::Initialize(bool aConstructor)

   145 {

   146   if (aConstructor) {

   147     // Raw pointer

   148     mParserContext = 0;

   149   }

   150   else {

   151     // nsCOMPtrs

   152     mObserver = nullptr;

   153     mUnusedInput.Truncate();

   154   }

   156   mContinueEvent = nullptr;

   157   mCharsetSource = kCharsetUninitialized;

   158   mCharset.AssignLiteral("ISO-8859-1");

   159   mInternalState = NS_OK;

   160   mStreamStatus = NS_OK;

   161   mCommand = eViewNormal;

   162   mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |

   163            NS_PARSER_FLAG_PARSER_ENABLED |

   164            NS_PARSER_FLAG_CAN_TOKENIZE;

   166   mProcessingNetworkData = false;

   167   mIsAboutBlank = false;

   168 }

   170 void

   171 nsParser::Cleanup()

   172 {

   173 #ifdef DEBUG

   174   if (mParserContext && mParserContext->mPrevContext) {

   175     NS_WARNING("Extra parser contexts still on the parser stack");

   176   }

   177 #endif

   179   while (mParserContext) {

   180     CParserContext *pc = mParserContext->mPrevContext;

   181     delete mParserContext;

   182     mParserContext = pc;

   183   }

   185   // It should not be possible for this flag to be set when we are getting

   186   // destroyed since this flag implies a pending nsParserContinueEvent, which

   187   // has an owning reference to |this|.

   188   NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");

   189 }

   191 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)

   193 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)

   194   NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)

   195   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)

   196   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)

   197 NS_IMPL_CYCLE_COLLECTION_UNLINK_END

   199 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)

   200   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)

   201   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)

   202   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)

   203   CParserContext *pc = tmp->mParserContext;

   204   while (pc) {

   205     cb.NoteXPCOMChild(pc->mTokenizer);

   206     pc = pc->mPrevContext;

   207   }

   208 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END

   210 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)

   211 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)

   212 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)

   213   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)

   214   NS_INTERFACE_MAP_ENTRY(nsIParser)

   215   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)

   216   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)

   217   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)

   218 NS_INTERFACE_MAP_END

   220 // The parser continue event is posted only if

   221 // all of the data to parse has been passed to ::OnDataAvailable

   222 // and the parser has been interrupted by the content sink

   223 // because the processing of tokens took too long.

   225 nsresult

   226 nsParser::PostContinueEvent()

   227 {

   228   if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {

   229     // If this flag isn't set, then there shouldn't be a live continue event!

   230     NS_ASSERTION(!mContinueEvent, "bad");

   232     // This creates a reference cycle between this and the event that is

   233     // broken when the event fires.

   234     nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);

   235     if (NS_FAILED(NS_DispatchToCurrentThread(event))) {

   236         NS_WARNING("failed to dispatch parser continuation event");

   237     } else {

   238         mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;

   239         mContinueEvent = event;

   240     }

   241   }

   242   return NS_OK;

   243 }

   245 NS_IMETHODIMP_(void)

   246 nsParser::GetCommand(nsCString& aCommand)

   247 {

   248   aCommand = mCommandStr;

   249 }

   251 /**

   252  *  Call this method once you've created a parser, and want to instruct it

   253  *  about the command which caused the parser to be constructed. For example,

   254  *  this allows us to select a DTD which can do, say, view-source.

   255  *

   256  *  @param   aCommand the command string to set

   257  */

   258 NS_IMETHODIMP_(void)

   259 nsParser::SetCommand(const char* aCommand)

   260 {

   261   mCommandStr.Assign(aCommand);

   262   if (mCommandStr.Equals("view-source")) {

   263     mCommand = eViewSource;

   264   } else if (mCommandStr.Equals("view-fragment")) {

   265     mCommand = eViewFragment;

   266   } else {

   267     mCommand = eViewNormal;

   268   }

   269 }

   271 /**

   272  *  Call this method once you've created a parser, and want to instruct it

   273  *  about the command which caused the parser to be constructed. For example,

   274  *  this allows us to select a DTD which can do, say, view-source.

   275  *

   276  *  @param   aParserCommand the command to set

   277  */

   278 NS_IMETHODIMP_(void)

   279 nsParser::SetCommand(eParserCommands aParserCommand)

   280 {

   281   mCommand = aParserCommand;

   282 }

   284 /**

   285  *  Call this method once you've created a parser, and want to instruct it

   286  *  about what charset to load

   287  *

   288  *  @param   aCharset- the charset of a document

   289  *  @param   aCharsetSource- the source of the charset

   290  */

   291 NS_IMETHODIMP_(void)

   292 nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)

   293 {

   294   mCharset = aCharset;

   295   mCharsetSource = aCharsetSource;

   296   if (mParserContext && mParserContext->mScanner) {

   297      mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);

   298   }

   299 }

   301 void

   302 nsParser::SetSinkCharset(nsACString& aCharset)

   303 {

   304   if (mSink) {

   305     mSink->SetDocumentCharset(aCharset);

   306   }

   307 }

   309 /**

   310  *  This method gets called in order to set the content

   311  *  sink for this parser to dump nodes to.

   312  *

   313  *  @param   nsIContentSink interface for node receiver

   314  */

   315 NS_IMETHODIMP_(void)

   316 nsParser::SetContentSink(nsIContentSink* aSink)

   317 {

   318   NS_PRECONDITION(aSink, "sink cannot be null!");

   319   mSink = aSink;

   321   if (mSink) {

   322     mSink->SetParser(this);

   323     nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);

   324     if (htmlSink) {

   325       mIsAboutBlank = true;

   326     }

   327   }

   328 }

   330 /**

   331  * retrieve the sink set into the parser

   332  * @return  current sink

   333  */

   334 NS_IMETHODIMP_(nsIContentSink*)

   335 nsParser::GetContentSink()

   336 {

   337   return mSink;

   338 }

   340 /**

   341  * Determine what DTD mode (and thus what layout nsCompatibility mode)

   342  * to use for this document based on the first chunk of data received

   343  * from the network (each parsercontext can have its own mode).  (No,

   344  * this is not an optimal solution -- we really don't need to know until

   345  * after we've received the DOCTYPE, and this could easily be part of

   346  * the regular parsing process if the parser were designed in a way that

   347  * made such modifications easy.)

   348  */

   350 // Parse the PS production in the SGML spec (excluding the part dealing

   351 // with entity references) starting at theIndex into theBuffer, and

   352 // return the first index after the end of the production.

   353 static int32_t

   354 ParsePS(const nsString& aBuffer, int32_t aIndex)

   355 {

   356   for (;;) {

   357     char16_t ch = aBuffer.CharAt(aIndex);

   358     if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||

   359         (ch == char16_t('\n')) || (ch == char16_t('\r'))) {

   360       ++aIndex;

   361     } else if (ch == char16_t('-')) {

   362       int32_t tmpIndex;

   363       if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&

   364           kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {

   365         aIndex = tmpIndex + 2;

   366       } else {

   367         return aIndex;

   368       }

   369     } else {

   370       return aIndex;

   371     }

   372   }

   373 }

   375 #define PARSE_DTD_HAVE_DOCTYPE          (1<<0)

   376 #define PARSE_DTD_HAVE_PUBLIC_ID        (1<<1)

   377 #define PARSE_DTD_HAVE_SYSTEM_ID        (1<<2)

   378 #define PARSE_DTD_HAVE_INTERNAL_SUBSET  (1<<3)

   380 // return true on success (includes not present), false on failure

   381 static bool

   382 ParseDocTypeDecl(const nsString &aBuffer,

   383                  int32_t *aResultFlags,

   384                  nsString &aPublicID,

   385                  nsString &aSystemID)

   386 {

   387   bool haveDoctype = false;

   388   *aResultFlags = 0;

   390   // Skip through any comments and processing instructions

   391   // The PI-skipping is a bit of a hack.

   392   int32_t theIndex = 0;

   393   do {

   394     theIndex = aBuffer.FindChar('<', theIndex);

   395     if (theIndex == kNotFound) break;

   396     char16_t nextChar = aBuffer.CharAt(theIndex+1);

   397     if (nextChar == char16_t('!')) {

   398       int32_t tmpIndex = theIndex + 2;

   399       if (kNotFound !=

   400           (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {

   401         haveDoctype = true;

   402         theIndex += 7; // skip "DOCTYPE"

   403         break;

   404       }

   405       theIndex = ParsePS(aBuffer, tmpIndex);

   406       theIndex = aBuffer.FindChar('>', theIndex);

   407     } else if (nextChar == char16_t('?')) {

   408       theIndex = aBuffer.FindChar('>', theIndex);

   409     } else {

   410       break;

   411     }

   412   } while (theIndex != kNotFound);

   414   if (!haveDoctype)

   415     return true;

   416   *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;

   418   theIndex = ParsePS(aBuffer, theIndex);

   419   theIndex = aBuffer.Find("HTML", true, theIndex, 0);

   420   if (kNotFound == theIndex)

   421     return false;

   422   theIndex = ParsePS(aBuffer, theIndex+4);

   423   int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);

   425   if (kNotFound != tmpIndex) {

   426     theIndex = ParsePS(aBuffer, tmpIndex+6);

   428     // We get here only if we've read <!DOCTYPE HTML PUBLIC

   429     // (not case sensitive) possibly with comments within.

   431     // Now find the beginning and end of the public identifier

   432     // and the system identifier (if present).

   434     char16_t lit = aBuffer.CharAt(theIndex);

   435     if ((lit != char16_t('\"')) && (lit != char16_t('\'')))

   436       return false;

   438     // Start is the first character, excluding the quote, and End is

   439     // the final quote, so there are (end-start) characters.

   441     int32_t PublicIDStart = theIndex + 1;

   442     int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);

   443     if (kNotFound == PublicIDEnd)

   444       return false;

   445     theIndex = ParsePS(aBuffer, PublicIDEnd + 1);

   446     char16_t next = aBuffer.CharAt(theIndex);

   447     if (next == char16_t('>')) {

   448       // There was a public identifier, but no system

   449       // identifier,

   450       // so do nothing.

   451       // This is needed to avoid the else at the end, and it's

   452       // also the most common case.

   453     } else if ((next == char16_t('\"')) ||

   454                (next == char16_t('\''))) {

   455       // We found a system identifier.

   456       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;

   457       int32_t SystemIDStart = theIndex + 1;

   458       int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);

   459       if (kNotFound == SystemIDEnd)

   460         return false;

   461       aSystemID =

   462         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);

   463     } else if (next == char16_t('[')) {

   464       // We found an internal subset.

   465       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;

   466     } else {

   467       // Something's wrong.

   468       return false;

   469     }

   471     // Since a public ID is a minimum literal, we must trim

   472     // and collapse whitespace

   473     aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);

   474     aPublicID.CompressWhitespace(true, true);

   475     *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;

   476   } else {

   477     tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);

   478     if (kNotFound != tmpIndex) {

   479       // DOCTYPES with system ID but no Public ID

   480       *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;

   482       theIndex = ParsePS(aBuffer, tmpIndex+6);

   483       char16_t next = aBuffer.CharAt(theIndex);

   484       if (next != char16_t('\"') && next != char16_t('\''))

   485         return false;

   487       int32_t SystemIDStart = theIndex + 1;

   488       int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);

   490       if (kNotFound == SystemIDEnd)

   491         return false;

   492       aSystemID =

   493         Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);

   494       theIndex = ParsePS(aBuffer, SystemIDEnd + 1);

   495     }

   497     char16_t nextChar = aBuffer.CharAt(theIndex);

   498     if (nextChar == char16_t('['))

   499       *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;

   500     else if (nextChar != char16_t('>'))

   501       return false;

   502   }

   503   return true;

   504 }

   506 struct PubIDInfo

   507 {

   508   enum eMode {

   509     eQuirks,         /* always quirks mode, unless there's an internal subset */

   510     eAlmostStandards,/* eCompatibility_AlmostStandards */

   511     eFullStandards   /* eCompatibility_FullStandards */

   512       /*

   513        * public IDs that should trigger strict mode are not listed

   514        * since we want all future public IDs to trigger strict mode as

   515        * well

   516        */

   517   };

   519   const char* name;

   520   eMode mode_if_no_sysid;

   521   eMode mode_if_sysid;

   522 };

   524 #define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))

   526 // These must be in nsCRT::strcmp order so binary-search can be used.

   527 // This is verified, |#ifdef DEBUG|, below.

   529 // Even though public identifiers should be case sensitive, we will do

   530 // all comparisons after converting to lower case in order to do

   531 // case-insensitive comparison since there are a number of existing web

   532 // sites that use the incorrect case.  Therefore all of the public

   533 // identifiers below are in lower case (with the correct case following,

   534 // in comments).  The case is verified, |#ifdef DEBUG|, below.

   535 static const PubIDInfo kPublicIDs[] = {

   536   {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   537   {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   538   {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   539   {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   540   {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   541   {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   542   {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   543   {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   544   {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   545   {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   546   {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   547   {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   548   {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   549   {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   550   {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   551   {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   552   {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   553   {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   554   {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   555   {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   556   {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   557   {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   558   {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   559   {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   560   {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   561   {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   562   {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   563   {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   564   {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   565   {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   566   {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   567   {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   568   {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   569   {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   570   {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   571   {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   572   {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   573   {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   574   {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   575   {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   576   {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   577   {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   578   {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   579   {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   580   {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   581   {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   582   {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   583   {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   584   {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   585   {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   586   {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   587   {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   588   {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   589   {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   590   {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   591   {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   592   {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   593   {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   594   {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   595   {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   596   {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   597   {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   598   {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},

   599   {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},

   600   {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   601   {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   602   {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   603   {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},

   604   {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},

   605   {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   606   {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   607   {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   608   {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   609   {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   610   {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   611   {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},

   612 };

   614 #ifdef DEBUG

   615 static void

   616 VerifyPublicIDs()

   617 {

   618   static bool gVerified = false;

   619   if (!gVerified) {

   620     gVerified = true;

   621     uint32_t i;

   622     for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {

   623       if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {

   624         NS_NOTREACHED("doctypes out of order");

   625         printf("Doctypes %s and %s out of order.\n",

   626                kPublicIDs[i].name, kPublicIDs[i+1].name);

   627       }

   628     }

   629     for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {

   630       nsAutoCString lcPubID(kPublicIDs[i].name);

   631       ToLowerCase(lcPubID);

   632       if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {

   633         NS_NOTREACHED("doctype not lower case");

   634         printf("Doctype %s not lower case.\n", kPublicIDs[i].name);

   635       }

   636     }

   637   }

   638 }

   639 #endif

   641 static void

   642 DetermineHTMLParseMode(const nsString& aBuffer,

   643                        nsDTDMode& aParseMode,

   644                        eParserDocType& aDocType)

   645 {

   646 #ifdef DEBUG

   647   VerifyPublicIDs();

   648 #endif

   649   int32_t resultFlags;

   650   nsAutoString publicIDUCS2, sysIDUCS2;

   651   if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {

   652     if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {

   653       // no DOCTYPE

   654       aParseMode = eDTDMode_quirks;

   655       aDocType = eHTML_Quirks;

   656     } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||

   657                !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {

   658       // A doctype with an internal subset is always full_standards.

   659       // A doctype without a public ID is always full_standards.

   660       aDocType = eHTML_Strict;

   661       aParseMode = eDTDMode_full_standards;

   663       // Special hack for IBM's custom DOCTYPE.

   664       if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&

   665           sysIDUCS2 == NS_LITERAL_STRING(

   666                "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {

   667         aParseMode = eDTDMode_quirks;

   668         aDocType = eHTML_Quirks;

   669       }

   671     } else {

   672       // We have to check our list of public IDs to see what to do.

   673       // Yes, we want UCS2 to ASCII lossy conversion.

   674       nsAutoCString publicID;

   675       publicID.AssignWithConversion(publicIDUCS2);

   677       // See comment above definition of kPublicIDs about case

   678       // sensitivity.

   679       ToLowerCase(publicID);

   681       // Binary search to see if we can find the correct public ID

   682       // These must be signed since maximum can go below zero and we'll

   683       // crash if it's unsigned.

   684       int32_t minimum = 0;

   685       int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1;

   686       int32_t index;

   687       for (;;) {

   688         index = (minimum + maximum) / 2;

   689         int32_t comparison =

   690             nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);

   691         if (comparison == 0)

   692           break;

   693         if (comparison < 0)

   694           maximum = index - 1;

   695         else

   696           minimum = index + 1;

   698         if (maximum < minimum) {

   699           // The DOCTYPE is not in our list, so it must be full_standards.

   700           aParseMode = eDTDMode_full_standards;

   701           aDocType = eHTML_Strict;

   702           return;

   703         }

   704       }

   706       switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)

   707                 ? kPublicIDs[index].mode_if_sysid

   708                 : kPublicIDs[index].mode_if_no_sysid)

   709       {

   710         case PubIDInfo::eQuirks:

   711           aParseMode = eDTDMode_quirks;

   712           aDocType = eHTML_Quirks;

   713           break;

   714         case PubIDInfo::eAlmostStandards:

   715           aParseMode = eDTDMode_almost_standards;

   716           aDocType = eHTML_Strict;

   717           break;

   718         case PubIDInfo::eFullStandards:

   719           aParseMode = eDTDMode_full_standards;

   720           aDocType = eHTML_Strict;

   721           break;

   722         default:

   723           NS_NOTREACHED("no other cases!");

   724       }

   725     }

   726   } else {

   727     // badly formed DOCTYPE -> quirks

   728     aParseMode = eDTDMode_quirks;

   729     aDocType = eHTML_Quirks;

   730   }

   731 }

   733 static void

   734 DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,

   735                    eParserDocType& aDocType, const nsACString& aMimeType)

   736 {

   737   if (aMimeType.EqualsLiteral(TEXT_HTML)) {

   738     DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);

   739   } else if (nsContentUtils::IsPlainTextType(aMimeType)) {

   740     aDocType = ePlainText;

   741     aParseMode = eDTDMode_quirks;

   742   } else { // Some form of XML

   743     aDocType = eXML;

   744     aParseMode = eDTDMode_full_standards;

   745   }

   746 }

   748 static nsIDTD*

   749 FindSuitableDTD(CParserContext& aParserContext)

   750 {

   751   // We always find a DTD.

   752   aParserContext.mAutoDetectStatus = ePrimaryDetect;

   754   // Quick check for view source.

   755   NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource,

   756     "The old parser is not supposed to be used for View Source anymore.");

   758   // Now see if we're parsing HTML (which, as far as we're concerned, simply

   759   // means "not XML").

   760   if (aParserContext.mDocType != eXML) {

   761     return new CNavDTD();

   762   }

   764   // If we're here, then we'd better be parsing XML.

   765   NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");

   766   return new nsExpatDriver();

   767 }

   769 NS_IMETHODIMP

   770 nsParser::CancelParsingEvents()

   771 {

   772   if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {

   773     NS_ASSERTION(mContinueEvent, "mContinueEvent is null");

   774     // Revoke the pending continue parsing event

   775     mContinueEvent = nullptr;

   776     mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;

   777   }

   778   return NS_OK;

   779 }

   781 ////////////////////////////////////////////////////////////////////////

   783 /**

   784  * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value

   785  * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1

   786  * (which could be success or failure).

   787  *

   788  * To understand the motivation for this construct, consider these example

   789  * methods:

   790  *

   791  *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {

   792  *     nsresult rv = NS_OK;

   793  *     ...

   794  *     return obj->DoThatThing();

   795  *     NS_ENSURE_SUCCESS(rv, rv);

   796  *     ...

   797  *     return rv;

   798  *   }

   799  *

   800  *   void nsCaller::MakeThingsHappen() {

   801  *     return mSomething->DoThatThing(mWhatever);

   802  *   }

   803  *

   804  * Suppose, for whatever reason*, we want to shift responsibility for calling

   805  * mWhatever->DoThatThing() from nsSomething::DoThatThing up to

   806  * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:

   807  *

   808  *   nsresult nsSomething::DoThatThing() {

   809  *     nsresult rv = NS_OK;

   810  *     ...

   811  *     ...

   812  *     return rv;

   813  *   }

   814  *

   815  *   void nsCaller::MakeThingsHappen() {

   816  *     nsresult rv;

   817  *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),

   818  *                              mWhatever->DoThatThing(),

   819  *                              rv);

   820  *     return rv;

   821  *   }

   822  *

   823  * *Possible reasons include: nsCaller doesn't want to give mSomething access

   824  * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will

   825  * be called regardless of how nsSomething::DoThatThing behaves, &c.

   826  */

   827 #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) {                          \

   828   nsresult RV##__temp = EXPR1;                                                \

   829   RV = EXPR2;                                                                 \

   830   if (NS_FAILED(RV)) {                                                        \

   831     RV = RV##__temp;                                                          \

   832   }                                                                           \

   833 }

   835 /**

   836  * This gets called just prior to the model actually

   837  * being constructed. It's important to make this the

   838  * last thing that happens right before parsing, so we

   839  * can delay until the last moment the resolution of

   840  * which DTD to use (unless of course we're assigned one).

   841  */

   842 nsresult

   843 nsParser::WillBuildModel(nsString& aFilename)

   844 {

   845   if (!mParserContext)

   846     return kInvalidParserContext;

   848   if (eUnknownDetect != mParserContext->mAutoDetectStatus)

   849     return NS_OK;

   851   if (eDTDMode_unknown == mParserContext->mDTDMode ||

   852       eDTDMode_autodetect == mParserContext->mDTDMode) {

   853     char16_t buf[1025];

   854     nsFixedString theBuffer(buf, 1024, 0);

   856     // Grab 1024 characters, starting at the first non-whitespace

   857     // character, to look for the doctype in.

   858     mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());

   859     DetermineParseMode(theBuffer, mParserContext->mDTDMode,

   860                        mParserContext->mDocType, mParserContext->mMimeType);

   861   }

   863   NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,

   864                "Clobbering DTD for non-root parser context!");

   865   mDTD = FindSuitableDTD(*mParserContext);

   866   NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);

   868   nsITokenizer* tokenizer;

   869   nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);

   870   NS_ENSURE_SUCCESS(rv, rv);

   872   rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);

   873   nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());

   874   // nsIDTD::WillBuildModel used to be responsible for calling

   875   // nsIContentSink::WillBuildModel, but that obligation isn't expressible

   876   // in the nsIDTD interface itself, so it's sounder and simpler to give that

   877   // responsibility back to the parser. The former behavior of the DTD was to

   878   // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns

   879   // failure we should use sinkResult instead of rv, to preserve the old error

   880   // handling behavior of the DTD:

   881   return NS_FAILED(sinkResult) ? sinkResult : rv;

   882 }

   884 /**

   885  * This gets called when the parser is done with its input.

   886  * Note that the parser may have been called recursively, so we

   887  * have to check for a prev. context before closing out the DTD/sink.

   888  */

   889 nsresult

   890 nsParser::DidBuildModel(nsresult anErrorCode)

   891 {

   892   nsresult result = anErrorCode;

   894   if (IsComplete()) {

   895     if (mParserContext && !mParserContext->mPrevContext) {

   896       // Let sink know if we're about to end load because we've been terminated.

   897       // In that case we don't want it to run deferred scripts.

   898       bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;

   899       if (mDTD && mSink) {

   900         nsresult dtdResult =  mDTD->DidBuildModel(anErrorCode),

   901                 sinkResult = mSink->DidBuildModel(terminated);

   902         // nsIDTD::DidBuildModel used to be responsible for calling

   903         // nsIContentSink::DidBuildModel, but that obligation isn't expressible

   904         // in the nsIDTD interface itself, so it's sounder and simpler to give

   905         // that responsibility back to the parser. The former behavior of the

   906         // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the

   907         // sink returns failure we should use sinkResult instead of dtdResult,

   908         // to preserve the old error handling behavior of the DTD:

   909         result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;

   910       }

   912       //Ref. to bug 61462.

   913       mParserContext->mRequest = 0;

   914     }

   915   }

   917   return result;

   918 }

   920 /**

   921  * This method adds a new parser context to the list,

   922  * pushing the current one to the next position.

   923  *

   924  * @param   ptr to new context

   925  */

   926 void

   927 nsParser::PushContext(CParserContext& aContext)

   928 {

   929   NS_ASSERTION(aContext.mPrevContext == mParserContext,

   930                "Trying to push a context whose previous context differs from "

   931                "the current parser context.");

   932   mParserContext = &aContext;

   933 }

   935 /**

   936  * This method pops the topmost context off the stack,

   937  * returning it to the user. The next context  (if any)

   938  * becomes the current context.

   939  * @update	gess7/22/98

   940  * @return  prev. context

   941  */

   942 CParserContext*

   943 nsParser::PopContext()

   944 {

   945   CParserContext* oldContext = mParserContext;

   946   if (oldContext) {

   947     mParserContext = oldContext->mPrevContext;

   948     if (mParserContext) {

   949       // If the old context was blocked, propagate the blocked state

   950       // back to the new one. Also, propagate the stream listener state

   951       // but don't override onStop state to guarantee the call to DidBuildModel().

   952       if (mParserContext->mStreamListenerState != eOnStop) {

   953         mParserContext->mStreamListenerState = oldContext->mStreamListenerState;

   954       }

   955     }

   956   }

   957   return oldContext;

   958 }

   960 /**

   961  *  Call this when you want control whether or not the parser will parse

   962  *  and tokenize input (TRUE), or whether it just caches input to be

   963  *  parsed later (FALSE).

   964  *

   965  *  @param   aState determines whether we parse/tokenize or just cache.

   966  *  @return  current state

   967  */

   968 void

   969 nsParser::SetUnusedInput(nsString& aBuffer)

   970 {

   971   mUnusedInput = aBuffer;

   972 }

   974 /**

   975  *  Call this when you want to *force* the parser to terminate the

   976  *  parsing process altogether. This is binary -- so once you terminate

   977  *  you can't resume without restarting altogether.

   978  */

   979 NS_IMETHODIMP

   980 nsParser::Terminate(void)

   981 {

   982   // We should only call DidBuildModel once, so don't do anything if this is

   983   // the second time that Terminate has been called.

   984   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {

   985     return NS_OK;

   986   }

   988   nsresult result = NS_OK;

   989   // XXX - [ until we figure out a way to break parser-sink circularity ]

   990   // Hack - Hold a reference until we are completely done...

   991   nsCOMPtr<nsIParser> kungFuDeathGrip(this);

   992   mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;

   994   // CancelParsingEvents must be called to avoid leaking the nsParser object

   995   // @see bug 108049

   996   // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents

   997   // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:

   998   // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.

   999   CancelParsingEvents();

  1001   // If we got interrupted in the middle of a document.write, then we might

  1002   // have more than one parser context on our parsercontext stack. This has

  1003   // the effect of making DidBuildModel a no-op, meaning that we never call

  1004   // our sink's DidBuildModel and break the reference cycle, causing a leak.

  1005   // Since we're getting terminated, we manually clean up our context stack.

  1006   while (mParserContext && mParserContext->mPrevContext) {

  1007     CParserContext *prev = mParserContext->mPrevContext;

  1008     delete mParserContext;

  1009     mParserContext = prev;

  1010   }

  1012   if (mDTD) {

  1013     mDTD->Terminate();

  1014     DidBuildModel(result);

  1015   } else if (mSink) {

  1016     // We have no parser context or no DTD yet (so we got terminated before we

  1017     // got any data).  Manually break the reference cycle with the sink.

  1018     result = mSink->DidBuildModel(true);

  1019     NS_ENSURE_SUCCESS(result, result);

  1020   }

  1022   return NS_OK;

  1023 }

  1025 NS_IMETHODIMP

  1026 nsParser::ContinueInterruptedParsing()

  1027 {

  1028   // If there are scripts executing, then the content sink is jumping the gun

  1029   // (probably due to a synchronous XMLHttpRequest) and will re-enable us

  1030   // later, see bug 460706.

  1031   if (!IsOkToProcessNetworkData()) {

  1032     return NS_OK;

  1033   }

  1035   // If the stream has already finished, there's a good chance

  1036   // that we might start closing things down when the parser

  1037   // is reenabled. To make sure that we're not deleted across

  1038   // the reenabling process, hold a reference to ourselves.

  1039   nsresult result=NS_OK;

  1040   nsCOMPtr<nsIParser> kungFuDeathGrip(this);

  1041   nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);

  1043 #ifdef DEBUG

  1044   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {

  1045     NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");

  1046   }

  1047 #endif

  1049   bool isFinalChunk = mParserContext &&

  1050                         mParserContext->mStreamListenerState == eOnStop;

  1052   mProcessingNetworkData = true;

  1053   if (mSink) {

  1054     mSink->WillParse();

  1055   }

  1056   result = ResumeParse(true, isFinalChunk); // Ref. bug 57999

  1057   mProcessingNetworkData = false;

  1059   if (result != NS_OK) {

  1060     result=mInternalState;

  1061   }

  1063   return result;

  1064 }

  1066 /**

  1067  *  Stops parsing temporarily. That's it will prevent the

  1068  *  parser from building up content model.

  1069  */

  1070 NS_IMETHODIMP_(void)

  1071 nsParser::BlockParser()

  1072 {

  1073   mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;

  1074 }

  1076 /**

  1077  *  Open up the parser for tokenization, building up content

  1078  *  model..etc. However, this method does not resume parsing

  1079  *  automatically. It's the callers' responsibility to restart

  1080  *  the parsing engine.

  1081  */

  1082 NS_IMETHODIMP_(void)

  1083 nsParser::UnblockParser()

  1084 {

  1085   if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {

  1086     mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;

  1087   } else {

  1088     NS_WARNING("Trying to unblock an unblocked parser.");

  1089   }

  1090 }

  1092 NS_IMETHODIMP_(void)

  1093 nsParser::ContinueInterruptedParsingAsync()

  1094 {

  1095   mSink->ContinueInterruptedParsingAsync();

  1096 }

  1098 /**

  1099  * Call this to query whether the parser is enabled or not.

  1100  */

  1101 NS_IMETHODIMP_(bool)

  1102 nsParser::IsParserEnabled()

  1103 {

  1104   return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;

  1105 }

  1107 /**

  1108  * Call this to query whether the parser thinks it's done with parsing.

  1109  */

  1110 NS_IMETHODIMP_(bool)

  1111 nsParser::IsComplete()

  1112 {

  1113   return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);

  1114 }

  1117 void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)

  1118 {

  1119   // Ignore any revoked continue events...

  1120   if (mContinueEvent != ev)

  1121     return;

  1123   mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;

  1124   mContinueEvent = nullptr;

  1126   NS_ASSERTION(IsOkToProcessNetworkData(),

  1127                "Interrupted in the middle of a script?");

  1128   ContinueInterruptedParsing();

  1129 }

  1131 bool

  1132 nsParser::IsInsertionPointDefined()

  1133 {

  1134   return false;

  1135 }

  1137 void

  1138 nsParser::BeginEvaluatingParserInsertedScript()

  1139 {

  1140 }

  1142 void

  1143 nsParser::EndEvaluatingParserInsertedScript()

  1144 {

  1145 }

  1147 void

  1148 nsParser::MarkAsNotScriptCreated(const char* aCommand)

  1149 {

  1150 }

  1152 bool

  1153 nsParser::IsScriptCreated()

  1154 {

  1155   return false;

  1156 }

  1158 /**

  1159  *  This is the main controlling routine in the parsing process.

  1160  *  Note that it may get called multiple times for the same scanner,

  1161  *  since this is a pushed based system, and all the tokens may

  1162  *  not have been consumed by the scanner during a given invocation

  1163  *  of this method.

  1164  */

  1165 NS_IMETHODIMP

  1166 nsParser::Parse(nsIURI* aURL,

  1167                 nsIRequestObserver* aListener,

  1168                 void* aKey,

  1169                 nsDTDMode aMode)

  1170 {

  1172   NS_PRECONDITION(aURL, "Error: Null URL given");

  1174   nsresult result=kBadURL;

  1175   mObserver = aListener;

  1177   if (aURL) {

  1178     nsAutoCString spec;

  1179     nsresult rv = aURL->GetSpec(spec);

  1180     if (rv != NS_OK) {

  1181       return rv;

  1182     }

  1183     NS_ConvertUTF8toUTF16 theName(spec);

  1185     nsScanner* theScanner = new nsScanner(theName, false);

  1186     CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,

  1187                                             mCommand, aListener);

  1188     if (pc && theScanner) {

  1189       pc->mMultipart = true;

  1190       pc->mContextType = CParserContext::eCTURL;

  1191       pc->mDTDMode = aMode;

  1192       PushContext(*pc);

  1194       result = NS_OK;

  1195     } else {

  1196       result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;

  1197     }

  1198   }

  1199   return result;

  1200 }

  1202 /**

  1203  * Used by XML fragment parsing below.

  1204  *

  1205  * @param   aSourceBuffer contains a string-full of real content

  1206  */

  1207 nsresult

  1208 nsParser::Parse(const nsAString& aSourceBuffer,

  1209                 void* aKey,

  1210                 bool aLastCall)

  1211 {

  1212   nsresult result = NS_OK;

  1214   // Don't bother if we're never going to parse this.

  1215   if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {

  1216     return result;

  1217   }

  1219   if (!aLastCall && aSourceBuffer.IsEmpty()) {

  1220     // Nothing is being passed to the parser so return

  1221     // immediately. mUnusedInput will get processed when

  1222     // some data is actually passed in.

  1223     // But if this is the last call, make sure to finish up

  1224     // stuff correctly.

  1225     return result;

  1226   }

  1228   // Maintain a reference to ourselves so we don't go away

  1229   // till we're completely done.

  1230   nsCOMPtr<nsIParser> kungFuDeathGrip(this);

  1232   if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {

  1233     // Note: The following code will always find the parser context associated

  1234     // with the given key, even if that context has been suspended (e.g., for

  1235     // another document.write call). This doesn't appear to be exactly what IE

  1236     // does in the case where this happens, but this makes more sense.

  1237     CParserContext* pc = mParserContext;

  1238     while (pc && pc->mKey != aKey) {

  1239       pc = pc->mPrevContext;

  1240     }

  1242     if (!pc) {

  1243       // Only make a new context if we don't have one, OR if we do, but has a

  1244       // different context key.

  1245       nsScanner* theScanner = new nsScanner(mUnusedInput);

  1246       NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);

  1248       eAutoDetectResult theStatus = eUnknownDetect;

  1250       if (mParserContext &&

  1251           mParserContext->mMimeType.EqualsLiteral("application/xml")) {

  1252         // Ref. Bug 90379

  1253         NS_ASSERTION(mDTD, "How come the DTD is null?");

  1255         if (mParserContext) {

  1256           theStatus = mParserContext->mAutoDetectStatus;

  1257           // Added this to fix bug 32022.

  1258         }

  1259       }

  1261       pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,

  1262                               0, theStatus, aLastCall);

  1263       NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);

  1265       PushContext(*pc);

  1267       pc->mMultipart = !aLastCall; // By default

  1268       if (pc->mPrevContext) {

  1269         pc->mMultipart |= pc->mPrevContext->mMultipart;

  1270       }

  1272       // Start fix bug 40143

  1273       if (pc->mMultipart) {

  1274         pc->mStreamListenerState = eOnDataAvail;

  1275         if (pc->mScanner) {

  1276           pc->mScanner->SetIncremental(true);

  1277         }

  1278       } else {

  1279         pc->mStreamListenerState = eOnStop;

  1280         if (pc->mScanner) {

  1281           pc->mScanner->SetIncremental(false);

  1282         }

  1283       }

  1284       // end fix for 40143

  1286       pc->mContextType=CParserContext::eCTString;

  1287       pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));

  1288       pc->mDTDMode = eDTDMode_full_standards;

  1290       mUnusedInput.Truncate();

  1292       pc->mScanner->Append(aSourceBuffer);

  1293       // Do not interrupt document.write() - bug 95487

  1294       result = ResumeParse(false, false, false);

  1295     } else {

  1296       pc->mScanner->Append(aSourceBuffer);

  1297       if (!pc->mPrevContext) {

  1298         // Set stream listener state to eOnStop, on the final context - Fix 68160,

  1299         // to guarantee DidBuildModel() call - Fix 36148

  1300         if (aLastCall) {

  1301           pc->mStreamListenerState = eOnStop;

  1302           pc->mScanner->SetIncremental(false);

  1303         }

  1305         if (pc == mParserContext) {

  1306           // If pc is not mParserContext, then this call to ResumeParse would

  1307           // do the wrong thing and try to continue parsing using

  1308           // mParserContext. We need to wait to actually resume parsing on pc.

  1309           ResumeParse(false, false, false);

  1310         }

  1311       }

  1312     }

  1313   }

  1315   return result;

  1316 }

  1318 NS_IMETHODIMP

  1319 nsParser::ParseFragment(const nsAString& aSourceBuffer,

  1320                         nsTArray<nsString>& aTagStack)

  1321 {

  1322   nsresult result = NS_OK;

  1323   nsAutoString  theContext;

  1324   uint32_t theCount = aTagStack.Length();

  1325   uint32_t theIndex = 0;

  1327   // Disable observers for fragments

  1328   mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;

  1330   for (theIndex = 0; theIndex < theCount; theIndex++) {

  1331     theContext.AppendLiteral("<");

  1332     theContext.Append(aTagStack[theCount - theIndex - 1]);

  1333     theContext.AppendLiteral(">");

  1334   }

  1336   if (theCount == 0) {

  1337     // Ensure that the buffer is not empty. Because none of the DTDs care

  1338     // about leading whitespace, this doesn't change the result.

  1339     theContext.AssignLiteral(" ");

  1340   }

  1342   // First, parse the context to build up the DTD's tag stack. Note that we

  1343   // pass false for the aLastCall parameter.

  1344   result = Parse(theContext,

  1345                  (void*)&theContext,

  1346                  false);

  1347   if (NS_FAILED(result)) {

  1348     mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;

  1349     return result;

  1350   }

  1352   if (!mSink) {

  1353     // Parse must have failed in the XML case and so the sink was killed.

  1354     return NS_ERROR_HTMLPARSER_STOPPARSING;

  1355   }

  1357   nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);

  1358   NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");

  1360   fragSink->WillBuildContent();

  1361   // Now, parse the actual content. Note that this is the last call

  1362   // for HTML content, but for XML, we will want to build and parse

  1363   // the end tags.  However, if tagStack is empty, it's the last call

  1364   // for XML as well.

  1365   if (theCount == 0) {

  1366     result = Parse(aSourceBuffer,

  1367                    &theContext,

  1368                    true);

  1369     fragSink->DidBuildContent();

  1370   } else {

  1371     // Add an end tag chunk, so expat will read the whole source buffer,

  1372     // and not worry about ']]' etc.

  1373     result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),

  1374                    &theContext,

  1375                    false);

  1376     fragSink->DidBuildContent();

  1378     if (NS_SUCCEEDED(result)) {

  1379       nsAutoString endContext;

  1380       for (theIndex = 0; theIndex < theCount; theIndex++) {

  1381          // we already added an end tag chunk above

  1382         if (theIndex > 0) {

  1383           endContext.AppendLiteral("</");

  1384         }

  1386         nsString& thisTag = aTagStack[theIndex];

  1387         // was there an xmlns=?

  1388         int32_t endOfTag = thisTag.FindChar(char16_t(' '));

  1389         if (endOfTag == -1) {

  1390           endContext.Append(thisTag);

  1391         } else {

  1392           endContext.Append(Substring(thisTag,0,endOfTag));

  1393         }

  1395         endContext.AppendLiteral(">");

  1396       }

  1398       result = Parse(endContext,

  1399                      &theContext,

  1400                      true);

  1401     }

  1402   }

  1404   mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;

  1406   return result;

  1407 }

  1409 /**

  1410  *  This routine is called to cause the parser to continue parsing its

  1411  *  underlying stream.  This call allows the parse process to happen in

  1412  *  chunks, such as when the content is push based, and we need to parse in

  1413  *  pieces.

  1414  *

  1415  *  An interesting change in how the parser gets used has led us to add extra

  1416  *  processing to this method.  The case occurs when the parser is blocked in

  1417  *  one context, and gets a parse(string) call in another context.  In this

  1418  *  case, the parserContexts are linked. No problem.

  1419  *

  1420  *  The problem is that Parse(string) assumes that it can proceed unabated,

  1421  *  but if the parser is already blocked that assumption is false. So we

  1422  *  needed to add a mechanism here to allow the parser to continue to process

  1423  *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs

  1424  *  out of contexts.

  1425  *

  1426  *

  1427  *  @param   allowItertion : set to true if non-script resumption is requested

  1428  *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.

  1429  *  @return  error code -- 0 if ok, non-zero if error.

  1430  */

  1431 nsresult

  1432 nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,

  1433                       bool aCanInterrupt)

  1434 {

  1435   nsresult result = NS_OK;

  1437   if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&

  1438       mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {

  1440     result = WillBuildModel(mParserContext->mScanner->GetFilename());

  1441     if (NS_FAILED(result)) {

  1442       mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;

  1443       return result;

  1444     }

  1446     if (mDTD) {

  1447       mSink->WillResume();

  1448       bool theIterationIsOk = true;

  1450       while (result == NS_OK && theIterationIsOk) {

  1451         if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {

  1452           // -- Ref: Bug# 22485 --

  1453           // Insert the unused input into the source buffer

  1454           // as if it was read from the input stream.

  1455           // Adding UngetReadable() per vidur!!

  1456           mParserContext->mScanner->UngetReadable(mUnusedInput);

  1457           mUnusedInput.Truncate(0);

  1458         }

  1460         // Only allow parsing to be interrupted in the subsequent call to

  1461         // build model.

  1462         nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)

  1463                                       ? Tokenize(aIsFinalChunk)

  1464                                       : NS_OK;

  1465         result = BuildModel();

  1467         if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {

  1468           PostContinueEvent();

  1469         }

  1471         theIterationIsOk = theTokenizerResult != kEOF &&

  1472                            result != NS_ERROR_HTMLPARSER_INTERRUPTED;

  1474         // Make sure not to stop parsing too early. Therefore, before shutting

  1475         // down the parser, it's important to check whether the input buffer

  1476         // has been scanned to completion (theTokenizerResult should be kEOF).

  1477         // kEOF -> End of buffer.

  1479         // If we're told to block the parser, we disable all further parsing

  1480         // (and cache any data coming in) until the parser is re-enabled.

  1481         if (NS_ERROR_HTMLPARSER_BLOCK == result) {

  1482           mSink->WillInterrupt();

  1483           if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {

  1484             // If we were blocked by a recursive invocation, don't re-block.

  1485             BlockParser();

  1486           }

  1487           return NS_OK;

  1488         }

  1489         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {

  1490           // Note: Parser Terminate() calls DidBuildModel.

  1491           if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {

  1492             DidBuildModel(mStreamStatus);

  1493             mInternalState = result;

  1494           }

  1496           return NS_OK;

  1497         }

  1498         if ((NS_OK == result && theTokenizerResult == kEOF) ||

  1499              result == NS_ERROR_HTMLPARSER_INTERRUPTED) {

  1500           bool theContextIsStringBased =

  1501             CParserContext::eCTString == mParserContext->mContextType;

  1503           if (mParserContext->mStreamListenerState == eOnStop ||

  1504               !mParserContext->mMultipart || theContextIsStringBased) {

  1505             if (!mParserContext->mPrevContext) {

  1506               if (mParserContext->mStreamListenerState == eOnStop) {

  1507                 DidBuildModel(mStreamStatus);

  1508                 return NS_OK;

  1509               }

  1510             } else {

  1511               CParserContext* theContext = PopContext();

  1512               if (theContext) {

  1513                 theIterationIsOk = allowIteration && theContextIsStringBased;

  1514                 if (theContext->mCopyUnused) {

  1515                   theContext->mScanner->CopyUnusedData(mUnusedInput);

  1516                 }

  1518                 delete theContext;

  1519               }

  1521               result = mInternalState;

  1522               aIsFinalChunk = mParserContext &&

  1523                               mParserContext->mStreamListenerState == eOnStop;

  1524               // ...then intentionally fall through to mSink->WillInterrupt()...

  1525             }

  1526           }

  1527         }

  1529         if (theTokenizerResult == kEOF ||

  1530             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {

  1531           result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;

  1532           mSink->WillInterrupt();

  1533         }

  1534       }

  1535     } else {

  1536       mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;

  1537     }

  1538   }

  1540   return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;

  1541 }

  1543 /**

  1544  *  This is where we loop over the tokens created in the

  1545  *  tokenization phase, and try to make sense out of them.

  1546  */

  1547 nsresult

  1548 nsParser::BuildModel()

  1549 {

  1550   nsITokenizer* theTokenizer = nullptr;

  1552   nsresult result = NS_OK;

  1553   if (mParserContext) {

  1554     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);

  1555   }

  1557   if (NS_SUCCEEDED(result)) {

  1558     if (mDTD) {

  1559       result = mDTD->BuildModel(theTokenizer, mSink);

  1560     }

  1561   } else {

  1562     mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;

  1563   }

  1564   return result;

  1565 }

  1567 /*******************************************************************

  1568   These methods are used to talk to the netlib system...

  1569  *******************************************************************/

  1571 nsresult

  1572 nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)

  1573 {

  1574   NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,

  1575                   "Parser's nsIStreamListener API was not setup "

  1576                   "correctly in constructor.");

  1577   if (mObserver) {

  1578     mObserver->OnStartRequest(request, aContext);

  1579   }

  1580   mParserContext->mStreamListenerState = eOnStart;

  1581   mParserContext->mAutoDetectStatus = eUnknownDetect;

  1582   mParserContext->mRequest = request;

  1584   NS_ASSERTION(!mParserContext->mPrevContext,

  1585                "Clobbering DTD for non-root parser context!");

  1586   mDTD = nullptr;

  1588   nsresult rv;

  1589   nsAutoCString contentType;

  1590   nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);

  1591   if (channel) {

  1592     rv = channel->GetContentType(contentType);

  1593     if (NS_SUCCEEDED(rv)) {

  1594       mParserContext->SetMimeType(contentType);

  1595     }

  1596   }

  1598   rv = NS_OK;

  1600   return rv;

  1601 }

  1603 static bool

  1604 ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,

  1605                                  nsCString& oCharset)

  1606 {

  1607   // This code is rather pointless to have. Might as well reuse expat as

  1608   // seen in nsHtml5StreamParser. -- hsivonen

  1609   oCharset.Truncate();

  1610   if ((aLen >= 5) &&

  1611       ('<' == aBytes[0]) &&

  1612       ('?' == aBytes[1]) &&

  1613       ('x' == aBytes[2]) &&

  1614       ('m' == aBytes[3]) &&

  1615       ('l' == aBytes[4])) {

  1616     int32_t i;

  1617     bool versionFound = false, encodingFound = false;

  1618     for (i = 6; i < aLen && !encodingFound; ++i) {

  1619       // end of XML declaration?

  1620       if ((((char*) aBytes)[i] == '?') &&

  1621           ((i + 1) < aLen) &&

  1622           (((char*) aBytes)[i + 1] == '>')) {

  1623         break;

  1624       }

  1625       // Version is required.

  1626       if (!versionFound) {

  1627         // Want to avoid string comparisons, hence looking for 'n'

  1628         // and only if found check the string leading to it. Not

  1629         // foolproof, but fast.

  1630         // The shortest string allowed before this is  (strlen==13):

  1631         // <?xml version

  1632         if ((((char*) aBytes)[i] == 'n') &&

  1633             (i >= 12) &&

  1634             (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {

  1635           // Fast forward through version

  1636           char q = 0;

  1637           for (++i; i < aLen; ++i) {

  1638             char qi = ((char*) aBytes)[i];

  1639             if (qi == '\'' || qi == '"') {

  1640               if (q && q == qi) {

  1641                 //  ending quote

  1642                 versionFound = true;

  1643                 break;

  1644               } else {

  1645                 // Starting quote

  1646                 q = qi;

  1647               }

  1648             }

  1649           }

  1650         }

  1651       } else {

  1652         // encoding must follow version

  1653         // Want to avoid string comparisons, hence looking for 'g'

  1654         // and only if found check the string leading to it. Not

  1655         // foolproof, but fast.

  1656         // The shortest allowed string before this (strlen==26):

  1657         // <?xml version="1" encoding

  1658         if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(

  1659             "encodin", (char*) (aBytes + i - 7), 7))) {

  1660           int32_t encStart = 0;

  1661           char q = 0;

  1662           for (++i; i < aLen; ++i) {

  1663             char qi = ((char*) aBytes)[i];

  1664             if (qi == '\'' || qi == '"') {

  1665               if (q && q == qi) {

  1666                 int32_t count = i - encStart;

  1667                 // encoding value is invalid if it is UTF-16

  1668                 if (count > 0 && PL_strncasecmp("UTF-16",

  1669                     (char*) (aBytes + encStart), count)) {

  1670                   oCharset.Assign((char*) (aBytes + encStart), count);

  1671                 }

  1672                 encodingFound = true;

  1673                 break;

  1674               } else {

  1675                 encStart = i + 1;

  1676                 q = qi;

  1677               }

  1678             }

  1679           }

  1680         }

  1681       } // if (!versionFound)

  1682     } // for

  1683   }

  1684   return !oCharset.IsEmpty();

  1685 }

  1687 inline const char

  1688 GetNextChar(nsACString::const_iterator& aStart,

  1689             nsACString::const_iterator& aEnd)

  1690 {

  1691   NS_ASSERTION(aStart != aEnd, "end of buffer");

  1692   return (++aStart != aEnd) ? *aStart : '\0';

  1693 }

  1695 static NS_METHOD

  1696 NoOpParserWriteFunc(nsIInputStream* in,

  1697                 void* closure,

  1698                 const char* fromRawSegment,

  1699                 uint32_t toOffset,

  1700                 uint32_t count,

  1701                 uint32_t *writeCount)

  1702 {

  1703   *writeCount = count;

  1704   return NS_OK;

  1705 }

  1707 typedef struct {

  1708   bool mNeedCharsetCheck;

  1709   nsParser* mParser;

  1710   nsScanner* mScanner;

  1711   nsIRequest* mRequest;

  1712 } ParserWriteStruct;

  1714 /*

  1715  * This function is invoked as a result of a call to a stream's

  1716  * ReadSegments() method. It is called for each contiguous buffer

  1717  * of data in the underlying stream or pipe. Using ReadSegments

  1718  * allows us to avoid copying data to read out of the stream.

  1719  */

  1720 static NS_METHOD

  1721 ParserWriteFunc(nsIInputStream* in,

  1722                 void* closure,

  1723                 const char* fromRawSegment,

  1724                 uint32_t toOffset,

  1725                 uint32_t count,

  1726                 uint32_t *writeCount)

  1727 {

  1728   nsresult result;

  1729   ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);

  1730   const unsigned char* buf =

  1731     reinterpret_cast<const unsigned char*> (fromRawSegment);

  1732   uint32_t theNumRead = count;

  1734   if (!pws) {

  1735     return NS_ERROR_FAILURE;

  1736   }

  1738   if (pws->mNeedCharsetCheck) {

  1739     pws->mNeedCharsetCheck = false;

  1740     int32_t source;

  1741     nsAutoCString preferred;

  1742     nsAutoCString maybePrefer;

  1743     pws->mParser->GetDocumentCharset(preferred, source);

  1745     // This code was bogus when I found it. It expects the BOM or the XML

  1746     // declaration to be entirely in the first network buffer. -- hsivonen

  1747     if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {

  1748       // The decoder will swallow the BOM. The UTF-16 will re-sniff for

  1749       // endianness. The value of preferred is now either "UTF-8" or "UTF-16".

  1750       preferred.Assign(maybePrefer);

  1751       source = kCharsetFromByteOrderMark;

  1752     } else if (source < kCharsetFromChannel) {

  1753       nsAutoCString declCharset;

  1755       if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {

  1756         if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {

  1757           preferred.Assign(maybePrefer);

  1758           source = kCharsetFromMetaTag;

  1759         }

  1760       }

  1761     }

  1763     pws->mParser->SetDocumentCharset(preferred, source);

  1764     pws->mParser->SetSinkCharset(preferred);

  1766   }

  1768   result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest);

  1769   if (NS_SUCCEEDED(result)) {

  1770     *writeCount = count;

  1771   }

  1773   return result;

  1774 }

  1776 nsresult

  1777 nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,

  1778                           nsIInputStream *pIStream, uint64_t sourceOffset,

  1779                           uint32_t aLength)

  1780 {

  1781   NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||

  1782                    eOnDataAvail == mParserContext->mStreamListenerState),

  1783             "Error: OnStartRequest() must be called before OnDataAvailable()");

  1784   NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),

  1785                   "Must have a buffered input stream");

  1787   nsresult rv = NS_OK;

  1789   if (mIsAboutBlank) {

  1790     MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");

  1791     // ... but if an extension tries to feed us data for about:blank in a

  1792     // release build, silently ignore the data.

  1793     uint32_t totalRead;

  1794     rv = pIStream->ReadSegments(NoOpParserWriteFunc,

  1795                                 nullptr,

  1796                                 aLength,

  1797                                 &totalRead);

  1798     return rv;

  1799   }

  1801   CParserContext *theContext = mParserContext;

  1803   while (theContext && theContext->mRequest != request) {

  1804     theContext = theContext->mPrevContext;

  1805   }

  1807   if (theContext) {

  1808     theContext->mStreamListenerState = eOnDataAvail;

  1810     if (eInvalidDetect == theContext->mAutoDetectStatus) {

  1811       if (theContext->mScanner) {

  1812         nsScannerIterator iter;

  1813         theContext->mScanner->EndReading(iter);

  1814         theContext->mScanner->SetPosition(iter, true);

  1815       }

  1816     }

  1818     uint32_t totalRead;

  1819     ParserWriteStruct pws;

  1820     pws.mNeedCharsetCheck = true;

  1821     pws.mParser = this;

  1822     pws.mScanner = theContext->mScanner;

  1823     pws.mRequest = request;

  1825     rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);

  1826     if (NS_FAILED(rv)) {

  1827       return rv;

  1828     }

  1830     // Don't bother to start parsing until we've seen some

  1831     // non-whitespace data

  1832     if (IsOkToProcessNetworkData() &&

  1833         theContext->mScanner->FirstNonWhitespacePosition() >= 0) {

  1834       nsCOMPtr<nsIParser> kungFuDeathGrip(this);

  1835       nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);

  1836       mProcessingNetworkData = true;

  1837       if (mSink) {

  1838         mSink->WillParse();

  1839       }

  1840       rv = ResumeParse();

  1841       mProcessingNetworkData = false;

  1842     }

  1843   } else {

  1844     rv = NS_ERROR_UNEXPECTED;

  1845   }

  1847   return rv;

  1848 }

  1850 /**

  1851  *  This is called by the networking library once the last block of data

  1852  *  has been collected from the net.

  1853  */

  1854 nsresult

  1855 nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,

  1856                         nsresult status)

  1857 {

  1858   nsresult rv = NS_OK;

  1860   CParserContext *pc = mParserContext;

  1861   while (pc) {

  1862     if (pc->mRequest == request) {

  1863       pc->mStreamListenerState = eOnStop;

  1864       pc->mScanner->SetIncremental(false);

  1865       break;

  1866     }

  1868     pc = pc->mPrevContext;

  1869   }

  1871   mStreamStatus = status;

  1873   if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {

  1874     mProcessingNetworkData = true;

  1875     if (mSink) {

  1876       mSink->WillParse();

  1877     }

  1878     rv = ResumeParse(true, true);

  1879     mProcessingNetworkData = false;

  1880   }

  1882   // If the parser isn't enabled, we don't finish parsing till

  1883   // it is reenabled.

  1886   // XXX Should we wait to notify our observers as well if the

  1887   // parser isn't yet enabled?

  1888   if (mObserver) {

  1889     mObserver->OnStopRequest(request, aContext, status);

  1890   }

  1892   return rv;

  1893 }

  1896 /*******************************************************************

  1897   Here come the tokenization methods...

  1898  *******************************************************************/

  1901 /**

  1902  *  Part of the code sandwich, this gets called right before

  1903  *  the tokenization process begins. The main reason for

  1904  *  this call is to allow the delegate to do initialization.

  1905  */

  1906 bool

  1907 nsParser::WillTokenize(bool aIsFinalChunk)

  1908 {

  1909   if (!mParserContext) {

  1910     return true;

  1911   }

  1913   nsITokenizer* theTokenizer;

  1914   nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);

  1915   NS_ENSURE_SUCCESS(result, false);

  1916   return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));

  1917 }

  1920 /**

  1921  * This is the primary control routine to consume tokens.

  1922  * It iteratively consumes tokens until an error occurs or

  1923  * you run out of data.

  1924  */

  1925 nsresult nsParser::Tokenize(bool aIsFinalChunk)

  1926 {

  1927   nsITokenizer* theTokenizer;

  1929   nsresult result = NS_ERROR_NOT_AVAILABLE;

  1930   if (mParserContext) {

  1931     result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);

  1932   }

  1934   if (NS_SUCCEEDED(result)) {

  1935     bool flushTokens = false;

  1937     bool killSink = false;

  1939     WillTokenize(aIsFinalChunk);

  1940     while (NS_SUCCEEDED(result)) {

  1941       mParserContext->mScanner->Mark();

  1942       result = theTokenizer->ConsumeToken(*mParserContext->mScanner,

  1943                                           flushTokens);

  1944       if (NS_FAILED(result)) {

  1945         mParserContext->mScanner->RewindToMark();

  1946         if (kEOF == result){

  1947           break;

  1948         }

  1949         if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {

  1950           killSink = true;

  1951           result = Terminate();

  1952           break;

  1953         }

  1954       } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {

  1955         // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.

  1956         // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --

  1957         // Also remember to update the marked position.

  1958         mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;

  1959         mParserContext->mScanner->Mark();

  1960         break;

  1961       }

  1962     }

  1964     if (killSink) {

  1965       mSink = nullptr;

  1966     }

  1967   } else {

  1968     result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;

  1969   }

  1971   return result;

  1972 }

  1974 /**

  1975  * Get the channel associated with this parser

  1976  *

  1977  * @param aChannel out param that will contain the result

  1978  * @return NS_OK if successful

  1979  */

  1980 NS_IMETHODIMP

  1981 nsParser::GetChannel(nsIChannel** aChannel)

  1982 {

  1983   nsresult result = NS_ERROR_NOT_AVAILABLE;

  1984   if (mParserContext && mParserContext->mRequest) {

  1985     result = CallQueryInterface(mParserContext->mRequest, aChannel);

  1986   }

  1987   return result;

  1988 }

  1990 /**

  1991  * Get the DTD associated with this parser

  1992  */

  1993 NS_IMETHODIMP

  1994 nsParser::GetDTD(nsIDTD** aDTD)

  1995 {

  1996   if (mParserContext) {

  1997     NS_IF_ADDREF(*aDTD = mDTD);

  1998   }

  2000   return NS_OK;

  2001 }

  2003 /**

  2004  * Get this as nsIStreamListener

  2005  */

  2006 nsIStreamListener*

  2007 nsParser::GetStreamListener()

  2008 {

  2009   return this;

  2010 }

The Tor Browser / file revision

parser/htmlparser/src/nsParser.cpp@6474c204b198

parser/htmlparser/src/nsParser.cpp