The Tor Browser: diff parser/htmlparser/src/nsParser.cpp

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/htmlparser/src/nsParser.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2010 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set sw=2 ts=2 et tw=79: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "nsIAtom.h"
    1.11 +#include "nsParser.h"
    1.12 +#include "nsString.h"
    1.13 +#include "nsCRT.h"
    1.14 +#include "nsScanner.h"
    1.15 +#include "plstr.h"
    1.16 +#include "nsIStringStream.h"
    1.17 +#include "nsIChannel.h"
    1.18 +#include "nsICachingChannel.h"
    1.19 +#include "nsICacheEntryDescriptor.h"
    1.20 +#include "nsIInputStream.h"
    1.21 +#include "CNavDTD.h"
    1.22 +#include "prenv.h"
    1.23 +#include "prlock.h"
    1.24 +#include "prcvar.h"
    1.25 +#include "nsParserCIID.h"
    1.26 +#include "nsReadableUtils.h"
    1.27 +#include "nsCOMPtr.h"
    1.28 +#include "nsExpatDriver.h"
    1.29 +#include "nsIServiceManager.h"
    1.30 +#include "nsICategoryManager.h"
    1.31 +#include "nsISupportsPrimitives.h"
    1.32 +#include "nsIFragmentContentSink.h"
    1.33 +#include "nsStreamUtils.h"
    1.34 +#include "nsHTMLTokenizer.h"
    1.35 +#include "nsNetUtil.h"
    1.36 +#include "nsScriptLoader.h"
    1.37 +#include "nsDataHashtable.h"
    1.38 +#include "nsXPCOMCIDInternal.h"
    1.39 +#include "nsMimeTypes.h"
    1.40 +#include "mozilla/CondVar.h"
    1.41 +#include "mozilla/Mutex.h"
    1.42 +#include "nsParserConstants.h"
    1.43 +#include "nsCharsetSource.h"
    1.44 +#include "nsContentUtils.h"
    1.45 +#include "nsThreadUtils.h"
    1.46 +#include "nsIHTMLContentSink.h"
    1.47 +
    1.48 +#include "mozilla/dom/EncodingUtils.h"
    1.49 +
    1.50 +using namespace mozilla;
    1.51 +using mozilla::dom::EncodingUtils;
    1.52 +
    1.53 +#define NS_PARSER_FLAG_PARSER_ENABLED         0x00000002
    1.54 +#define NS_PARSER_FLAG_OBSERVERS_ENABLED      0x00000004
    1.55 +#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
    1.56 +#define NS_PARSER_FLAG_FLUSH_TOKENS           0x00000020
    1.57 +#define NS_PARSER_FLAG_CAN_TOKENIZE           0x00000040
    1.58 +
    1.59 +//-------------- Begin ParseContinue Event Definition ------------------------
    1.60 +/*
    1.61 +The parser can be explicitly interrupted by passing a return value of
    1.62 +NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
    1.63 +the parser to stop processing and allow the application to return to the event
    1.64 +loop. The data which was left at the time of interruption will be processed
    1.65 +the next time OnDataAvailable is called. If the parser has received its final
    1.66 +chunk of data then OnDataAvailable will no longer be called by the networking
    1.67 +module, so the parser will schedule a nsParserContinueEvent which will call
    1.68 +the parser to process the remaining data after returning to the event loop.
    1.69 +If the parser is interrupted while processing the remaining data it will
    1.70 +schedule another ParseContinueEvent. The processing of data followed by
    1.71 +scheduling of the continue events will proceed until either:
    1.72 +
    1.73 +  1) All of the remaining data can be processed without interrupting
    1.74 +  2) The parser has been cancelled.
    1.75 +
    1.76 +
    1.77 +This capability is currently used in CNavDTD and nsHTMLContentSink. The
    1.78 +nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
    1.79 +processed and when each token is processed. The nsHTML content sink records
    1.80 +the time when the chunk has started processing and will return
    1.81 +NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
    1.82 +threshold called max tokenizing processing time. This allows the content sink
    1.83 +to limit how much data is processed in a single chunk which in turn gates how
    1.84 +much time is spent away from the event loop. Processing smaller chunks of data
    1.85 +also reduces the time spent in subsequent reflows.
    1.86 +
    1.87 +This capability is most apparent when loading large documents. If the maximum
    1.88 +token processing time is set small enough the application will remain
    1.89 +responsive during document load.
    1.90 +
    1.91 +A side-effect of this capability is that document load is not complete when
    1.92 +the last chunk of data is passed to OnDataAvailable since  the parser may have
    1.93 +been interrupted when the last chunk of data arrived. The document is complete
    1.94 +when all of the document has been tokenized and there aren't any pending
    1.95 +nsParserContinueEvents. This can cause problems if the application assumes
    1.96 +that it can monitor the load requests to determine when the document load has
    1.97 +been completed. This is what happens in Mozilla. The document is considered
    1.98 +completely loaded when all of the load requests have been satisfied. To delay
    1.99 +the document load until all of the parsing has been completed the
   1.100 +nsHTMLContentSink adds a dummy parser load request which is not removed until
   1.101 +the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
   1.102 +DidBuildModel until the final chunk of data has been passed to the parser
   1.103 +through the OnDataAvailable and there aren't any pending
   1.104 +nsParserContineEvents.
   1.105 +
   1.106 +Currently the parser is ignores requests to be interrupted during the
   1.107 +processing of script.  This is because a document.write followed by JavaScript
   1.108 +calls to manipulate the DOM may fail if the parser was interrupted during the
   1.109 +document.write.
   1.110 +
   1.111 +For more details @see bugzilla bug 76722
   1.112 +*/
   1.113 +
   1.114 +
   1.115 +class nsParserContinueEvent : public nsRunnable
   1.116 +{
   1.117 +public:
   1.118 +  nsRefPtr<nsParser> mParser;
   1.119 +
   1.120 +  nsParserContinueEvent(nsParser* aParser)
   1.121 +    : mParser(aParser)
   1.122 +  {}
   1.123 +
   1.124 +  NS_IMETHOD Run()
   1.125 +  {
   1.126 +    mParser->HandleParserContinueEvent(this);
   1.127 +    return NS_OK;
   1.128 +  }
   1.129 +};
   1.130 +
   1.131 +//-------------- End ParseContinue Event Definition ------------------------
   1.132 +
   1.133 +/**
   1.134 + *  default constructor
   1.135 + */
   1.136 +nsParser::nsParser()
   1.137 +{
   1.138 +  Initialize(true);
   1.139 +}
   1.140 +
   1.141 +nsParser::~nsParser()
   1.142 +{
   1.143 +  Cleanup();
   1.144 +}
   1.145 +
   1.146 +void
   1.147 +nsParser::Initialize(bool aConstructor)
   1.148 +{
   1.149 +  if (aConstructor) {
   1.150 +    // Raw pointer
   1.151 +    mParserContext = 0;
   1.152 +  }
   1.153 +  else {
   1.154 +    // nsCOMPtrs
   1.155 +    mObserver = nullptr;
   1.156 +    mUnusedInput.Truncate();
   1.157 +  }
   1.158 +
   1.159 +  mContinueEvent = nullptr;
   1.160 +  mCharsetSource = kCharsetUninitialized;
   1.161 +  mCharset.AssignLiteral("ISO-8859-1");
   1.162 +  mInternalState = NS_OK;
   1.163 +  mStreamStatus = NS_OK;
   1.164 +  mCommand = eViewNormal;
   1.165 +  mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
   1.166 +           NS_PARSER_FLAG_PARSER_ENABLED |
   1.167 +           NS_PARSER_FLAG_CAN_TOKENIZE;
   1.168 +
   1.169 +  mProcessingNetworkData = false;
   1.170 +  mIsAboutBlank = false;
   1.171 +}
   1.172 +
   1.173 +void
   1.174 +nsParser::Cleanup()
   1.175 +{
   1.176 +#ifdef DEBUG
   1.177 +  if (mParserContext && mParserContext->mPrevContext) {
   1.178 +    NS_WARNING("Extra parser contexts still on the parser stack");
   1.179 +  }
   1.180 +#endif
   1.181 +
   1.182 +  while (mParserContext) {
   1.183 +    CParserContext *pc = mParserContext->mPrevContext;
   1.184 +    delete mParserContext;
   1.185 +    mParserContext = pc;
   1.186 +  }
   1.187 +
   1.188 +  // It should not be possible for this flag to be set when we are getting
   1.189 +  // destroyed since this flag implies a pending nsParserContinueEvent, which
   1.190 +  // has an owning reference to |this|.
   1.191 +  NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
   1.192 +}
   1.193 +
   1.194 +NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
   1.195 +
   1.196 +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
   1.197 +  NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
   1.198 +  NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
   1.199 +  NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
   1.200 +NS_IMPL_CYCLE_COLLECTION_UNLINK_END
   1.201 +
   1.202 +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
   1.203 +  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
   1.204 +  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
   1.205 +  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
   1.206 +  CParserContext *pc = tmp->mParserContext;
   1.207 +  while (pc) {
   1.208 +    cb.NoteXPCOMChild(pc->mTokenizer);
   1.209 +    pc = pc->mPrevContext;
   1.210 +  }
   1.211 +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
   1.212 +
   1.213 +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
   1.214 +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
   1.215 +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
   1.216 +  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
   1.217 +  NS_INTERFACE_MAP_ENTRY(nsIParser)
   1.218 +  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
   1.219 +  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
   1.220 +  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
   1.221 +NS_INTERFACE_MAP_END
   1.222 +
   1.223 +// The parser continue event is posted only if
   1.224 +// all of the data to parse has been passed to ::OnDataAvailable
   1.225 +// and the parser has been interrupted by the content sink
   1.226 +// because the processing of tokens took too long.
   1.227 +
   1.228 +nsresult
   1.229 +nsParser::PostContinueEvent()
   1.230 +{
   1.231 +  if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
   1.232 +    // If this flag isn't set, then there shouldn't be a live continue event!
   1.233 +    NS_ASSERTION(!mContinueEvent, "bad");
   1.234 +
   1.235 +    // This creates a reference cycle between this and the event that is
   1.236 +    // broken when the event fires.
   1.237 +    nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
   1.238 +    if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
   1.239 +        NS_WARNING("failed to dispatch parser continuation event");
   1.240 +    } else {
   1.241 +        mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
   1.242 +        mContinueEvent = event;
   1.243 +    }
   1.244 +  }
   1.245 +  return NS_OK;
   1.246 +}
   1.247 +
   1.248 +NS_IMETHODIMP_(void)
   1.249 +nsParser::GetCommand(nsCString& aCommand)
   1.250 +{
   1.251 +  aCommand = mCommandStr;
   1.252 +}
   1.253 +
   1.254 +/**
   1.255 + *  Call this method once you've created a parser, and want to instruct it
   1.256 + *  about the command which caused the parser to be constructed. For example,
   1.257 + *  this allows us to select a DTD which can do, say, view-source.
   1.258 + *
   1.259 + *  @param   aCommand the command string to set
   1.260 + */
   1.261 +NS_IMETHODIMP_(void)
   1.262 +nsParser::SetCommand(const char* aCommand)
   1.263 +{
   1.264 +  mCommandStr.Assign(aCommand);
   1.265 +  if (mCommandStr.Equals("view-source")) {
   1.266 +    mCommand = eViewSource;
   1.267 +  } else if (mCommandStr.Equals("view-fragment")) {
   1.268 +    mCommand = eViewFragment;
   1.269 +  } else {
   1.270 +    mCommand = eViewNormal;
   1.271 +  }
   1.272 +}
   1.273 +
   1.274 +/**
   1.275 + *  Call this method once you've created a parser, and want to instruct it
   1.276 + *  about the command which caused the parser to be constructed. For example,
   1.277 + *  this allows us to select a DTD which can do, say, view-source.
   1.278 + *
   1.279 + *  @param   aParserCommand the command to set
   1.280 + */
   1.281 +NS_IMETHODIMP_(void)
   1.282 +nsParser::SetCommand(eParserCommands aParserCommand)
   1.283 +{
   1.284 +  mCommand = aParserCommand;
   1.285 +}
   1.286 +
   1.287 +/**
   1.288 + *  Call this method once you've created a parser, and want to instruct it
   1.289 + *  about what charset to load
   1.290 + *
   1.291 + *  @param   aCharset- the charset of a document
   1.292 + *  @param   aCharsetSource- the source of the charset
   1.293 + */
   1.294 +NS_IMETHODIMP_(void)
   1.295 +nsParser::SetDocumentCharset(const nsACString& aCharset, int32_t aCharsetSource)
   1.296 +{
   1.297 +  mCharset = aCharset;
   1.298 +  mCharsetSource = aCharsetSource;
   1.299 +  if (mParserContext && mParserContext->mScanner) {
   1.300 +     mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
   1.301 +  }
   1.302 +}
   1.303 +
   1.304 +void
   1.305 +nsParser::SetSinkCharset(nsACString& aCharset)
   1.306 +{
   1.307 +  if (mSink) {
   1.308 +    mSink->SetDocumentCharset(aCharset);
   1.309 +  }
   1.310 +}
   1.311 +
   1.312 +/**
   1.313 + *  This method gets called in order to set the content
   1.314 + *  sink for this parser to dump nodes to.
   1.315 + *
   1.316 + *  @param   nsIContentSink interface for node receiver
   1.317 + */
   1.318 +NS_IMETHODIMP_(void)
   1.319 +nsParser::SetContentSink(nsIContentSink* aSink)
   1.320 +{
   1.321 +  NS_PRECONDITION(aSink, "sink cannot be null!");
   1.322 +  mSink = aSink;
   1.323 +
   1.324 +  if (mSink) {
   1.325 +    mSink->SetParser(this);
   1.326 +    nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
   1.327 +    if (htmlSink) {
   1.328 +      mIsAboutBlank = true;
   1.329 +    }
   1.330 +  }
   1.331 +}
   1.332 +
   1.333 +/**
   1.334 + * retrieve the sink set into the parser
   1.335 + * @return  current sink
   1.336 + */
   1.337 +NS_IMETHODIMP_(nsIContentSink*)
   1.338 +nsParser::GetContentSink()
   1.339 +{
   1.340 +  return mSink;
   1.341 +}
   1.342 +
   1.343 +/**
   1.344 + * Determine what DTD mode (and thus what layout nsCompatibility mode)
   1.345 + * to use for this document based on the first chunk of data received
   1.346 + * from the network (each parsercontext can have its own mode).  (No,
   1.347 + * this is not an optimal solution -- we really don't need to know until
   1.348 + * after we've received the DOCTYPE, and this could easily be part of
   1.349 + * the regular parsing process if the parser were designed in a way that
   1.350 + * made such modifications easy.)
   1.351 + */
   1.352 +
   1.353 +// Parse the PS production in the SGML spec (excluding the part dealing
   1.354 +// with entity references) starting at theIndex into theBuffer, and
   1.355 +// return the first index after the end of the production.
   1.356 +static int32_t
   1.357 +ParsePS(const nsString& aBuffer, int32_t aIndex)
   1.358 +{
   1.359 +  for (;;) {
   1.360 +    char16_t ch = aBuffer.CharAt(aIndex);
   1.361 +    if ((ch == char16_t(' ')) || (ch == char16_t('\t')) ||
   1.362 +        (ch == char16_t('\n')) || (ch == char16_t('\r'))) {
   1.363 +      ++aIndex;
   1.364 +    } else if (ch == char16_t('-')) {
   1.365 +      int32_t tmpIndex;
   1.366 +      if (aBuffer.CharAt(aIndex+1) == char16_t('-') &&
   1.367 +          kNotFound != (tmpIndex=aBuffer.Find("--",false,aIndex+2,-1))) {
   1.368 +        aIndex = tmpIndex + 2;
   1.369 +      } else {
   1.370 +        return aIndex;
   1.371 +      }
   1.372 +    } else {
   1.373 +      return aIndex;
   1.374 +    }
   1.375 +  }
   1.376 +}
   1.377 +
   1.378 +#define PARSE_DTD_HAVE_DOCTYPE          (1<<0)
   1.379 +#define PARSE_DTD_HAVE_PUBLIC_ID        (1<<1)
   1.380 +#define PARSE_DTD_HAVE_SYSTEM_ID        (1<<2)
   1.381 +#define PARSE_DTD_HAVE_INTERNAL_SUBSET  (1<<3)
   1.382 +
   1.383 +// return true on success (includes not present), false on failure
   1.384 +static bool
   1.385 +ParseDocTypeDecl(const nsString &aBuffer,
   1.386 +                 int32_t *aResultFlags,
   1.387 +                 nsString &aPublicID,
   1.388 +                 nsString &aSystemID)
   1.389 +{
   1.390 +  bool haveDoctype = false;
   1.391 +  *aResultFlags = 0;
   1.392 +
   1.393 +  // Skip through any comments and processing instructions
   1.394 +  // The PI-skipping is a bit of a hack.
   1.395 +  int32_t theIndex = 0;
   1.396 +  do {
   1.397 +    theIndex = aBuffer.FindChar('<', theIndex);
   1.398 +    if (theIndex == kNotFound) break;
   1.399 +    char16_t nextChar = aBuffer.CharAt(theIndex+1);
   1.400 +    if (nextChar == char16_t('!')) {
   1.401 +      int32_t tmpIndex = theIndex + 2;
   1.402 +      if (kNotFound !=
   1.403 +          (theIndex=aBuffer.Find("DOCTYPE", true, tmpIndex, 0))) {
   1.404 +        haveDoctype = true;
   1.405 +        theIndex += 7; // skip "DOCTYPE"
   1.406 +        break;
   1.407 +      }
   1.408 +      theIndex = ParsePS(aBuffer, tmpIndex);
   1.409 +      theIndex = aBuffer.FindChar('>', theIndex);
   1.410 +    } else if (nextChar == char16_t('?')) {
   1.411 +      theIndex = aBuffer.FindChar('>', theIndex);
   1.412 +    } else {
   1.413 +      break;
   1.414 +    }
   1.415 +  } while (theIndex != kNotFound);
   1.416 +
   1.417 +  if (!haveDoctype)
   1.418 +    return true;
   1.419 +  *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;
   1.420 +
   1.421 +  theIndex = ParsePS(aBuffer, theIndex);
   1.422 +  theIndex = aBuffer.Find("HTML", true, theIndex, 0);
   1.423 +  if (kNotFound == theIndex)
   1.424 +    return false;
   1.425 +  theIndex = ParsePS(aBuffer, theIndex+4);
   1.426 +  int32_t tmpIndex = aBuffer.Find("PUBLIC", true, theIndex, 0);
   1.427 +
   1.428 +  if (kNotFound != tmpIndex) {
   1.429 +    theIndex = ParsePS(aBuffer, tmpIndex+6);
   1.430 +
   1.431 +    // We get here only if we've read <!DOCTYPE HTML PUBLIC
   1.432 +    // (not case sensitive) possibly with comments within.
   1.433 +
   1.434 +    // Now find the beginning and end of the public identifier
   1.435 +    // and the system identifier (if present).
   1.436 +
   1.437 +    char16_t lit = aBuffer.CharAt(theIndex);
   1.438 +    if ((lit != char16_t('\"')) && (lit != char16_t('\'')))
   1.439 +      return false;
   1.440 +
   1.441 +    // Start is the first character, excluding the quote, and End is
   1.442 +    // the final quote, so there are (end-start) characters.
   1.443 +
   1.444 +    int32_t PublicIDStart = theIndex + 1;
   1.445 +    int32_t PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
   1.446 +    if (kNotFound == PublicIDEnd)
   1.447 +      return false;
   1.448 +    theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
   1.449 +    char16_t next = aBuffer.CharAt(theIndex);
   1.450 +    if (next == char16_t('>')) {
   1.451 +      // There was a public identifier, but no system
   1.452 +      // identifier,
   1.453 +      // so do nothing.
   1.454 +      // This is needed to avoid the else at the end, and it's
   1.455 +      // also the most common case.
   1.456 +    } else if ((next == char16_t('\"')) ||
   1.457 +               (next == char16_t('\''))) {
   1.458 +      // We found a system identifier.
   1.459 +      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
   1.460 +      int32_t SystemIDStart = theIndex + 1;
   1.461 +      int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
   1.462 +      if (kNotFound == SystemIDEnd)
   1.463 +        return false;
   1.464 +      aSystemID =
   1.465 +        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
   1.466 +    } else if (next == char16_t('[')) {
   1.467 +      // We found an internal subset.
   1.468 +      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
   1.469 +    } else {
   1.470 +      // Something's wrong.
   1.471 +      return false;
   1.472 +    }
   1.473 +
   1.474 +    // Since a public ID is a minimum literal, we must trim
   1.475 +    // and collapse whitespace
   1.476 +    aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
   1.477 +    aPublicID.CompressWhitespace(true, true);
   1.478 +    *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
   1.479 +  } else {
   1.480 +    tmpIndex=aBuffer.Find("SYSTEM", true, theIndex, 0);
   1.481 +    if (kNotFound != tmpIndex) {
   1.482 +      // DOCTYPES with system ID but no Public ID
   1.483 +      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
   1.484 +
   1.485 +      theIndex = ParsePS(aBuffer, tmpIndex+6);
   1.486 +      char16_t next = aBuffer.CharAt(theIndex);
   1.487 +      if (next != char16_t('\"') && next != char16_t('\''))
   1.488 +        return false;
   1.489 +
   1.490 +      int32_t SystemIDStart = theIndex + 1;
   1.491 +      int32_t SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
   1.492 +
   1.493 +      if (kNotFound == SystemIDEnd)
   1.494 +        return false;
   1.495 +      aSystemID =
   1.496 +        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
   1.497 +      theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
   1.498 +    }
   1.499 +
   1.500 +    char16_t nextChar = aBuffer.CharAt(theIndex);
   1.501 +    if (nextChar == char16_t('['))
   1.502 +      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
   1.503 +    else if (nextChar != char16_t('>'))
   1.504 +      return false;
   1.505 +  }
   1.506 +  return true;
   1.507 +}
   1.508 +
   1.509 +struct PubIDInfo
   1.510 +{
   1.511 +  enum eMode {
   1.512 +    eQuirks,         /* always quirks mode, unless there's an internal subset */
   1.513 +    eAlmostStandards,/* eCompatibility_AlmostStandards */
   1.514 +    eFullStandards   /* eCompatibility_FullStandards */
   1.515 +      /*
   1.516 +       * public IDs that should trigger strict mode are not listed
   1.517 +       * since we want all future public IDs to trigger strict mode as
   1.518 +       * well
   1.519 +       */
   1.520 +  };
   1.521 +
   1.522 +  const char* name;
   1.523 +  eMode mode_if_no_sysid;
   1.524 +  eMode mode_if_sysid;
   1.525 +};
   1.526 +
   1.527 +#define ELEMENTS_OF(array_) (sizeof(array_)/sizeof(array_[0]))
   1.528 +
   1.529 +// These must be in nsCRT::strcmp order so binary-search can be used.
   1.530 +// This is verified, |#ifdef DEBUG|, below.
   1.531 +
   1.532 +// Even though public identifiers should be case sensitive, we will do
   1.533 +// all comparisons after converting to lower case in order to do
   1.534 +// case-insensitive comparison since there are a number of existing web
   1.535 +// sites that use the incorrect case.  Therefore all of the public
   1.536 +// identifiers below are in lower case (with the correct case following,
   1.537 +// in comments).  The case is verified, |#ifdef DEBUG|, below.
   1.538 +static const PubIDInfo kPublicIDs[] = {
   1.539 +  {"+//silmaril//dtd html pro v0r11 19970101//en" /* "+//Silmaril//dtd html Pro v0r11 19970101//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.540 +  {"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" /* "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.541 +  {"-//as//dtd html 3.0 aswedit + extensions//en" /* "-//AS//DTD HTML 3.0 asWedit + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.542 +  {"-//ietf//dtd html 2.0 level 1//en" /* "-//IETF//DTD HTML 2.0 Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.543 +  {"-//ietf//dtd html 2.0 level 2//en" /* "-//IETF//DTD HTML 2.0 Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.544 +  {"-//ietf//dtd html 2.0 strict level 1//en" /* "-//IETF//DTD HTML 2.0 Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.545 +  {"-//ietf//dtd html 2.0 strict level 2//en" /* "-//IETF//DTD HTML 2.0 Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.546 +  {"-//ietf//dtd html 2.0 strict//en" /* "-//IETF//DTD HTML 2.0 Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.547 +  {"-//ietf//dtd html 2.0//en" /* "-//IETF//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.548 +  {"-//ietf//dtd html 2.1e//en" /* "-//IETF//DTD HTML 2.1E//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.549 +  {"-//ietf//dtd html 3.0//en" /* "-//IETF//DTD HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.550 +  {"-//ietf//dtd html 3.0//en//" /* "-//IETF//DTD HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.551 +  {"-//ietf//dtd html 3.2 final//en" /* "-//IETF//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.552 +  {"-//ietf//dtd html 3.2//en" /* "-//IETF//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.553 +  {"-//ietf//dtd html 3//en" /* "-//IETF//DTD HTML 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.554 +  {"-//ietf//dtd html level 0//en" /* "-//IETF//DTD HTML Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.555 +  {"-//ietf//dtd html level 0//en//2.0" /* "-//IETF//DTD HTML Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.556 +  {"-//ietf//dtd html level 1//en" /* "-//IETF//DTD HTML Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.557 +  {"-//ietf//dtd html level 1//en//2.0" /* "-//IETF//DTD HTML Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.558 +  {"-//ietf//dtd html level 2//en" /* "-//IETF//DTD HTML Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.559 +  {"-//ietf//dtd html level 2//en//2.0" /* "-//IETF//DTD HTML Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.560 +  {"-//ietf//dtd html level 3//en" /* "-//IETF//DTD HTML Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.561 +  {"-//ietf//dtd html level 3//en//3.0" /* "-//IETF//DTD HTML Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.562 +  {"-//ietf//dtd html strict level 0//en" /* "-//IETF//DTD HTML Strict Level 0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.563 +  {"-//ietf//dtd html strict level 0//en//2.0" /* "-//IETF//DTD HTML Strict Level 0//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.564 +  {"-//ietf//dtd html strict level 1//en" /* "-//IETF//DTD HTML Strict Level 1//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.565 +  {"-//ietf//dtd html strict level 1//en//2.0" /* "-//IETF//DTD HTML Strict Level 1//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.566 +  {"-//ietf//dtd html strict level 2//en" /* "-//IETF//DTD HTML Strict Level 2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.567 +  {"-//ietf//dtd html strict level 2//en//2.0" /* "-//IETF//DTD HTML Strict Level 2//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.568 +  {"-//ietf//dtd html strict level 3//en" /* "-//IETF//DTD HTML Strict Level 3//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.569 +  {"-//ietf//dtd html strict level 3//en//3.0" /* "-//IETF//DTD HTML Strict Level 3//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.570 +  {"-//ietf//dtd html strict//en" /* "-//IETF//DTD HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.571 +  {"-//ietf//dtd html strict//en//2.0" /* "-//IETF//DTD HTML Strict//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.572 +  {"-//ietf//dtd html strict//en//3.0" /* "-//IETF//DTD HTML Strict//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.573 +  {"-//ietf//dtd html//en" /* "-//IETF//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.574 +  {"-//ietf//dtd html//en//2.0" /* "-//IETF//DTD HTML//EN//2.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.575 +  {"-//ietf//dtd html//en//3.0" /* "-//IETF//DTD HTML//EN//3.0" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.576 +  {"-//metrius//dtd metrius presentational//en" /* "-//Metrius//DTD Metrius Presentational//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.577 +  {"-//microsoft//dtd internet explorer 2.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.578 +  {"-//microsoft//dtd internet explorer 2.0 html//en" /* "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.579 +  {"-//microsoft//dtd internet explorer 2.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.580 +  {"-//microsoft//dtd internet explorer 3.0 html strict//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.581 +  {"-//microsoft//dtd internet explorer 3.0 html//en" /* "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.582 +  {"-//microsoft//dtd internet explorer 3.0 tables//en" /* "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.583 +  {"-//netscape comm. corp.//dtd html//en" /* "-//Netscape Comm. Corp.//DTD HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.584 +  {"-//netscape comm. corp.//dtd strict html//en" /* "-//Netscape Comm. Corp.//DTD Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.585 +  {"-//o'reilly and associates//dtd html 2.0//en" /* "-//O'Reilly and Associates//DTD HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.586 +  {"-//o'reilly and associates//dtd html extended 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.587 +  {"-//o'reilly and associates//dtd html extended relaxed 1.0//en" /* "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.588 +  {"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//en" /* "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.589 +  {"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//en" /* "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.590 +  {"-//spyglass//dtd html 2.0 extended//en" /* "-//Spyglass//DTD HTML 2.0 Extended//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.591 +  {"-//sq//dtd html 2.0 hotmetal + extensions//en" /* "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.592 +  {"-//sun microsystems corp.//dtd hotjava html//en" /* "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.593 +  {"-//sun microsystems corp.//dtd hotjava strict html//en" /* "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.594 +  {"-//w3c//dtd html 3 1995-03-24//en" /* "-//W3C//DTD HTML 3 1995-03-24//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.595 +  {"-//w3c//dtd html 3.2 draft//en" /* "-//W3C//DTD HTML 3.2 Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.596 +  {"-//w3c//dtd html 3.2 final//en" /* "-//W3C//DTD HTML 3.2 Final//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.597 +  {"-//w3c//dtd html 3.2//en" /* "-//W3C//DTD HTML 3.2//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.598 +  {"-//w3c//dtd html 3.2s draft//en" /* "-//W3C//DTD HTML 3.2S Draft//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.599 +  {"-//w3c//dtd html 4.0 frameset//en" /* "-//W3C//DTD HTML 4.0 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.600 +  {"-//w3c//dtd html 4.0 transitional//en" /* "-//W3C//DTD HTML 4.0 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.601 +  {"-//w3c//dtd html 4.01 frameset//en" /* "-//W3C//DTD HTML 4.01 Frameset//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
   1.602 +  {"-//w3c//dtd html 4.01 transitional//en" /* "-//W3C//DTD HTML 4.01 Transitional//EN" */, PubIDInfo::eQuirks, PubIDInfo::eAlmostStandards},
   1.603 +  {"-//w3c//dtd html experimental 19960712//en" /* "-//W3C//DTD HTML Experimental 19960712//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.604 +  {"-//w3c//dtd html experimental 970421//en" /* "-//W3C//DTD HTML Experimental 970421//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.605 +  {"-//w3c//dtd w3 html//en" /* "-//W3C//DTD W3 HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.606 +  {"-//w3c//dtd xhtml 1.0 frameset//en" /* "-//W3C//DTD XHTML 1.0 Frameset//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
   1.607 +  {"-//w3c//dtd xhtml 1.0 transitional//en" /* "-//W3C//DTD XHTML 1.0 Transitional//EN" */, PubIDInfo::eAlmostStandards, PubIDInfo::eAlmostStandards},
   1.608 +  {"-//w3o//dtd w3 html 3.0//en" /* "-//W3O//DTD W3 HTML 3.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.609 +  {"-//w3o//dtd w3 html 3.0//en//" /* "-//W3O//DTD W3 HTML 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.610 +  {"-//w3o//dtd w3 html strict 3.0//en//" /* "-//W3O//DTD W3 HTML Strict 3.0//EN//" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.611 +  {"-//webtechs//dtd mozilla html 2.0//en" /* "-//WebTechs//DTD Mozilla HTML 2.0//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.612 +  {"-//webtechs//dtd mozilla html//en" /* "-//WebTechs//DTD Mozilla HTML//EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.613 +  {"-/w3c/dtd html 4.0 transitional/en" /* "-/W3C/DTD HTML 4.0 Transitional/EN" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.614 +  {"html" /* "HTML" */, PubIDInfo::eQuirks, PubIDInfo::eQuirks},
   1.615 +};
   1.616 +
   1.617 +#ifdef DEBUG
   1.618 +static void
   1.619 +VerifyPublicIDs()
   1.620 +{
   1.621 +  static bool gVerified = false;
   1.622 +  if (!gVerified) {
   1.623 +    gVerified = true;
   1.624 +    uint32_t i;
   1.625 +    for (i = 0; i < ELEMENTS_OF(kPublicIDs) - 1; ++i) {
   1.626 +      if (nsCRT::strcmp(kPublicIDs[i].name, kPublicIDs[i+1].name) >= 0) {
   1.627 +        NS_NOTREACHED("doctypes out of order");
   1.628 +        printf("Doctypes %s and %s out of order.\n",
   1.629 +               kPublicIDs[i].name, kPublicIDs[i+1].name);
   1.630 +      }
   1.631 +    }
   1.632 +    for (i = 0; i < ELEMENTS_OF(kPublicIDs); ++i) {
   1.633 +      nsAutoCString lcPubID(kPublicIDs[i].name);
   1.634 +      ToLowerCase(lcPubID);
   1.635 +      if (nsCRT::strcmp(kPublicIDs[i].name, lcPubID.get()) != 0) {
   1.636 +        NS_NOTREACHED("doctype not lower case");
   1.637 +        printf("Doctype %s not lower case.\n", kPublicIDs[i].name);
   1.638 +      }
   1.639 +    }
   1.640 +  }
   1.641 +}
   1.642 +#endif
   1.643 +
   1.644 +static void
   1.645 +DetermineHTMLParseMode(const nsString& aBuffer,
   1.646 +                       nsDTDMode& aParseMode,
   1.647 +                       eParserDocType& aDocType)
   1.648 +{
   1.649 +#ifdef DEBUG
   1.650 +  VerifyPublicIDs();
   1.651 +#endif
   1.652 +  int32_t resultFlags;
   1.653 +  nsAutoString publicIDUCS2, sysIDUCS2;
   1.654 +  if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
   1.655 +    if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {
   1.656 +      // no DOCTYPE
   1.657 +      aParseMode = eDTDMode_quirks;
   1.658 +      aDocType = eHTML_Quirks;
   1.659 +    } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
   1.660 +               !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {
   1.661 +      // A doctype with an internal subset is always full_standards.
   1.662 +      // A doctype without a public ID is always full_standards.
   1.663 +      aDocType = eHTML_Strict;
   1.664 +      aParseMode = eDTDMode_full_standards;
   1.665 +
   1.666 +      // Special hack for IBM's custom DOCTYPE.
   1.667 +      if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
   1.668 +          sysIDUCS2 == NS_LITERAL_STRING(
   1.669 +               "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
   1.670 +        aParseMode = eDTDMode_quirks;
   1.671 +        aDocType = eHTML_Quirks;
   1.672 +      }
   1.673 +
   1.674 +    } else {
   1.675 +      // We have to check our list of public IDs to see what to do.
   1.676 +      // Yes, we want UCS2 to ASCII lossy conversion.
   1.677 +      nsAutoCString publicID;
   1.678 +      publicID.AssignWithConversion(publicIDUCS2);
   1.679 +
   1.680 +      // See comment above definition of kPublicIDs about case
   1.681 +      // sensitivity.
   1.682 +      ToLowerCase(publicID);
   1.683 +
   1.684 +      // Binary search to see if we can find the correct public ID
   1.685 +      // These must be signed since maximum can go below zero and we'll
   1.686 +      // crash if it's unsigned.
   1.687 +      int32_t minimum = 0;
   1.688 +      int32_t maximum = ELEMENTS_OF(kPublicIDs) - 1;
   1.689 +      int32_t index;
   1.690 +      for (;;) {
   1.691 +        index = (minimum + maximum) / 2;
   1.692 +        int32_t comparison =
   1.693 +            nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
   1.694 +        if (comparison == 0)
   1.695 +          break;
   1.696 +        if (comparison < 0)
   1.697 +          maximum = index - 1;
   1.698 +        else
   1.699 +          minimum = index + 1;
   1.700 +
   1.701 +        if (maximum < minimum) {
   1.702 +          // The DOCTYPE is not in our list, so it must be full_standards.
   1.703 +          aParseMode = eDTDMode_full_standards;
   1.704 +          aDocType = eHTML_Strict;
   1.705 +          return;
   1.706 +        }
   1.707 +      }
   1.708 +
   1.709 +      switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
   1.710 +                ? kPublicIDs[index].mode_if_sysid
   1.711 +                : kPublicIDs[index].mode_if_no_sysid)
   1.712 +      {
   1.713 +        case PubIDInfo::eQuirks:
   1.714 +          aParseMode = eDTDMode_quirks;
   1.715 +          aDocType = eHTML_Quirks;
   1.716 +          break;
   1.717 +        case PubIDInfo::eAlmostStandards:
   1.718 +          aParseMode = eDTDMode_almost_standards;
   1.719 +          aDocType = eHTML_Strict;
   1.720 +          break;
   1.721 +        case PubIDInfo::eFullStandards:
   1.722 +          aParseMode = eDTDMode_full_standards;
   1.723 +          aDocType = eHTML_Strict;
   1.724 +          break;
   1.725 +        default:
   1.726 +          NS_NOTREACHED("no other cases!");
   1.727 +      }
   1.728 +    }
   1.729 +  } else {
   1.730 +    // badly formed DOCTYPE -> quirks
   1.731 +    aParseMode = eDTDMode_quirks;
   1.732 +    aDocType = eHTML_Quirks;
   1.733 +  }
   1.734 +}
   1.735 +
   1.736 +static void
   1.737 +DetermineParseMode(const nsString& aBuffer, nsDTDMode& aParseMode,
   1.738 +                   eParserDocType& aDocType, const nsACString& aMimeType)
   1.739 +{
   1.740 +  if (aMimeType.EqualsLiteral(TEXT_HTML)) {
   1.741 +    DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
   1.742 +  } else if (nsContentUtils::IsPlainTextType(aMimeType)) {
   1.743 +    aDocType = ePlainText;
   1.744 +    aParseMode = eDTDMode_quirks;
   1.745 +  } else { // Some form of XML
   1.746 +    aDocType = eXML;
   1.747 +    aParseMode = eDTDMode_full_standards;
   1.748 +  }
   1.749 +}
   1.750 +
   1.751 +static nsIDTD*
   1.752 +FindSuitableDTD(CParserContext& aParserContext)
   1.753 +{
   1.754 +  // We always find a DTD.
   1.755 +  aParserContext.mAutoDetectStatus = ePrimaryDetect;
   1.756 +
   1.757 +  // Quick check for view source.
   1.758 +  NS_ABORT_IF_FALSE(aParserContext.mParserCommand != eViewSource,
   1.759 +    "The old parser is not supposed to be used for View Source anymore.");
   1.760 +
   1.761 +  // Now see if we're parsing HTML (which, as far as we're concerned, simply
   1.762 +  // means "not XML").
   1.763 +  if (aParserContext.mDocType != eXML) {
   1.764 +    return new CNavDTD();
   1.765 +  }
   1.766 +
   1.767 +  // If we're here, then we'd better be parsing XML.
   1.768 +  NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
   1.769 +  return new nsExpatDriver();
   1.770 +}
   1.771 +
   1.772 +NS_IMETHODIMP
   1.773 +nsParser::CancelParsingEvents()
   1.774 +{
   1.775 +  if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
   1.776 +    NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
   1.777 +    // Revoke the pending continue parsing event
   1.778 +    mContinueEvent = nullptr;
   1.779 +    mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
   1.780 +  }
   1.781 +  return NS_OK;
   1.782 +}
   1.783 +
   1.784 +////////////////////////////////////////////////////////////////////////
   1.785 +
   1.786 +/**
   1.787 + * Evalutes EXPR1 and EXPR2 exactly once each, in that order.  Stores the value
   1.788 + * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
   1.789 + * (which could be success or failure).
   1.790 + *
   1.791 + * To understand the motivation for this construct, consider these example
   1.792 + * methods:
   1.793 + *
   1.794 + *   nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
   1.795 + *     nsresult rv = NS_OK;
   1.796 + *     ...
   1.797 + *     return obj->DoThatThing();
   1.798 + *     NS_ENSURE_SUCCESS(rv, rv);
   1.799 + *     ...
   1.800 + *     return rv;
   1.801 + *   }
   1.802 + *
   1.803 + *   void nsCaller::MakeThingsHappen() {
   1.804 + *     return mSomething->DoThatThing(mWhatever);
   1.805 + *   }
   1.806 + *
   1.807 + * Suppose, for whatever reason*, we want to shift responsibility for calling
   1.808 + * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
   1.809 + * nsCaller::MakeThingsHappen.  We might rewrite the two methods as follows:
   1.810 + *
   1.811 + *   nsresult nsSomething::DoThatThing() {
   1.812 + *     nsresult rv = NS_OK;
   1.813 + *     ...
   1.814 + *     ...
   1.815 + *     return rv;
   1.816 + *   }
   1.817 + *
   1.818 + *   void nsCaller::MakeThingsHappen() {
   1.819 + *     nsresult rv;
   1.820 + *     PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
   1.821 + *                              mWhatever->DoThatThing(),
   1.822 + *                              rv);
   1.823 + *     return rv;
   1.824 + *   }
   1.825 + *
   1.826 + * *Possible reasons include: nsCaller doesn't want to give mSomething access
   1.827 + * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
   1.828 + * be called regardless of how nsSomething::DoThatThing behaves, &c.
   1.829 + */
   1.830 +#define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) {                          \
   1.831 +  nsresult RV##__temp = EXPR1;                                                \
   1.832 +  RV = EXPR2;                                                                 \
   1.833 +  if (NS_FAILED(RV)) {                                                        \
   1.834 +    RV = RV##__temp;                                                          \
   1.835 +  }                                                                           \
   1.836 +}
   1.837 +
   1.838 +/**
   1.839 + * This gets called just prior to the model actually
   1.840 + * being constructed. It's important to make this the
   1.841 + * last thing that happens right before parsing, so we
   1.842 + * can delay until the last moment the resolution of
   1.843 + * which DTD to use (unless of course we're assigned one).
   1.844 + */
   1.845 +nsresult
   1.846 +nsParser::WillBuildModel(nsString& aFilename)
   1.847 +{
   1.848 +  if (!mParserContext)
   1.849 +    return kInvalidParserContext;
   1.850 +
   1.851 +  if (eUnknownDetect != mParserContext->mAutoDetectStatus)
   1.852 +    return NS_OK;
   1.853 +
   1.854 +  if (eDTDMode_unknown == mParserContext->mDTDMode ||
   1.855 +      eDTDMode_autodetect == mParserContext->mDTDMode) {
   1.856 +    char16_t buf[1025];
   1.857 +    nsFixedString theBuffer(buf, 1024, 0);
   1.858 +
   1.859 +    // Grab 1024 characters, starting at the first non-whitespace
   1.860 +    // character, to look for the doctype in.
   1.861 +    mParserContext->mScanner->Peek(theBuffer, 1024, mParserContext->mScanner->FirstNonWhitespacePosition());
   1.862 +    DetermineParseMode(theBuffer, mParserContext->mDTDMode,
   1.863 +                       mParserContext->mDocType, mParserContext->mMimeType);
   1.864 +  }
   1.865 +
   1.866 +  NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
   1.867 +               "Clobbering DTD for non-root parser context!");
   1.868 +  mDTD = FindSuitableDTD(*mParserContext);
   1.869 +  NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
   1.870 +
   1.871 +  nsITokenizer* tokenizer;
   1.872 +  nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
   1.873 +  NS_ENSURE_SUCCESS(rv, rv);
   1.874 +
   1.875 +  rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
   1.876 +  nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
   1.877 +  // nsIDTD::WillBuildModel used to be responsible for calling
   1.878 +  // nsIContentSink::WillBuildModel, but that obligation isn't expressible
   1.879 +  // in the nsIDTD interface itself, so it's sounder and simpler to give that
   1.880 +  // responsibility back to the parser. The former behavior of the DTD was to
   1.881 +  // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
   1.882 +  // failure we should use sinkResult instead of rv, to preserve the old error
   1.883 +  // handling behavior of the DTD:
   1.884 +  return NS_FAILED(sinkResult) ? sinkResult : rv;
   1.885 +}
   1.886 +
   1.887 +/**
   1.888 + * This gets called when the parser is done with its input.
   1.889 + * Note that the parser may have been called recursively, so we
   1.890 + * have to check for a prev. context before closing out the DTD/sink.
   1.891 + */
   1.892 +nsresult
   1.893 +nsParser::DidBuildModel(nsresult anErrorCode)
   1.894 +{
   1.895 +  nsresult result = anErrorCode;
   1.896 +
   1.897 +  if (IsComplete()) {
   1.898 +    if (mParserContext && !mParserContext->mPrevContext) {
   1.899 +      // Let sink know if we're about to end load because we've been terminated.
   1.900 +      // In that case we don't want it to run deferred scripts.
   1.901 +      bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
   1.902 +      if (mDTD && mSink) {
   1.903 +        nsresult dtdResult =  mDTD->DidBuildModel(anErrorCode),
   1.904 +                sinkResult = mSink->DidBuildModel(terminated);
   1.905 +        // nsIDTD::DidBuildModel used to be responsible for calling
   1.906 +        // nsIContentSink::DidBuildModel, but that obligation isn't expressible
   1.907 +        // in the nsIDTD interface itself, so it's sounder and simpler to give
   1.908 +        // that responsibility back to the parser. The former behavior of the
   1.909 +        // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
   1.910 +        // sink returns failure we should use sinkResult instead of dtdResult,
   1.911 +        // to preserve the old error handling behavior of the DTD:
   1.912 +        result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
   1.913 +      }
   1.914 +
   1.915 +      //Ref. to bug 61462.
   1.916 +      mParserContext->mRequest = 0;
   1.917 +    }
   1.918 +  }
   1.919 +
   1.920 +  return result;
   1.921 +}
   1.922 +
   1.923 +/**
   1.924 + * This method adds a new parser context to the list,
   1.925 + * pushing the current one to the next position.
   1.926 + *
   1.927 + * @param   ptr to new context
   1.928 + */
   1.929 +void
   1.930 +nsParser::PushContext(CParserContext& aContext)
   1.931 +{
   1.932 +  NS_ASSERTION(aContext.mPrevContext == mParserContext,
   1.933 +               "Trying to push a context whose previous context differs from "
   1.934 +               "the current parser context.");
   1.935 +  mParserContext = &aContext;
   1.936 +}
   1.937 +
   1.938 +/**
   1.939 + * This method pops the topmost context off the stack,
   1.940 + * returning it to the user. The next context  (if any)
   1.941 + * becomes the current context.
   1.942 + * @update	gess7/22/98
   1.943 + * @return  prev. context
   1.944 + */
   1.945 +CParserContext*
   1.946 +nsParser::PopContext()
   1.947 +{
   1.948 +  CParserContext* oldContext = mParserContext;
   1.949 +  if (oldContext) {
   1.950 +    mParserContext = oldContext->mPrevContext;
   1.951 +    if (mParserContext) {
   1.952 +      // If the old context was blocked, propagate the blocked state
   1.953 +      // back to the new one. Also, propagate the stream listener state
   1.954 +      // but don't override onStop state to guarantee the call to DidBuildModel().
   1.955 +      if (mParserContext->mStreamListenerState != eOnStop) {
   1.956 +        mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
   1.957 +      }
   1.958 +    }
   1.959 +  }
   1.960 +  return oldContext;
   1.961 +}
   1.962 +
   1.963 +/**
   1.964 + *  Call this when you want control whether or not the parser will parse
   1.965 + *  and tokenize input (TRUE), or whether it just caches input to be
   1.966 + *  parsed later (FALSE).
   1.967 + *
   1.968 + *  @param   aState determines whether we parse/tokenize or just cache.
   1.969 + *  @return  current state
   1.970 + */
   1.971 +void
   1.972 +nsParser::SetUnusedInput(nsString& aBuffer)
   1.973 +{
   1.974 +  mUnusedInput = aBuffer;
   1.975 +}
   1.976 +
   1.977 +/**
   1.978 + *  Call this when you want to *force* the parser to terminate the
   1.979 + *  parsing process altogether. This is binary -- so once you terminate
   1.980 + *  you can't resume without restarting altogether.
   1.981 + */
   1.982 +NS_IMETHODIMP
   1.983 +nsParser::Terminate(void)
   1.984 +{
   1.985 +  // We should only call DidBuildModel once, so don't do anything if this is
   1.986 +  // the second time that Terminate has been called.
   1.987 +  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
   1.988 +    return NS_OK;
   1.989 +  }
   1.990 +
   1.991 +  nsresult result = NS_OK;
   1.992 +  // XXX - [ until we figure out a way to break parser-sink circularity ]
   1.993 +  // Hack - Hold a reference until we are completely done...
   1.994 +  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
   1.995 +  mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
   1.996 +
   1.997 +  // CancelParsingEvents must be called to avoid leaking the nsParser object
   1.998 +  // @see bug 108049
   1.999 +  // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
  1.1000 +  // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
  1.1001 +  // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
  1.1002 +  CancelParsingEvents();
  1.1003 +
  1.1004 +  // If we got interrupted in the middle of a document.write, then we might
  1.1005 +  // have more than one parser context on our parsercontext stack. This has
  1.1006 +  // the effect of making DidBuildModel a no-op, meaning that we never call
  1.1007 +  // our sink's DidBuildModel and break the reference cycle, causing a leak.
  1.1008 +  // Since we're getting terminated, we manually clean up our context stack.
  1.1009 +  while (mParserContext && mParserContext->mPrevContext) {
  1.1010 +    CParserContext *prev = mParserContext->mPrevContext;
  1.1011 +    delete mParserContext;
  1.1012 +    mParserContext = prev;
  1.1013 +  }
  1.1014 +
  1.1015 +  if (mDTD) {
  1.1016 +    mDTD->Terminate();
  1.1017 +    DidBuildModel(result);
  1.1018 +  } else if (mSink) {
  1.1019 +    // We have no parser context or no DTD yet (so we got terminated before we
  1.1020 +    // got any data).  Manually break the reference cycle with the sink.
  1.1021 +    result = mSink->DidBuildModel(true);
  1.1022 +    NS_ENSURE_SUCCESS(result, result);
  1.1023 +  }
  1.1024 +
  1.1025 +  return NS_OK;
  1.1026 +}
  1.1027 +
  1.1028 +NS_IMETHODIMP
  1.1029 +nsParser::ContinueInterruptedParsing()
  1.1030 +{
  1.1031 +  // If there are scripts executing, then the content sink is jumping the gun
  1.1032 +  // (probably due to a synchronous XMLHttpRequest) and will re-enable us
  1.1033 +  // later, see bug 460706.
  1.1034 +  if (!IsOkToProcessNetworkData()) {
  1.1035 +    return NS_OK;
  1.1036 +  }
  1.1037 +
  1.1038 +  // If the stream has already finished, there's a good chance
  1.1039 +  // that we might start closing things down when the parser
  1.1040 +  // is reenabled. To make sure that we're not deleted across
  1.1041 +  // the reenabling process, hold a reference to ourselves.
  1.1042 +  nsresult result=NS_OK;
  1.1043 +  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
  1.1044 +  nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
  1.1045 +
  1.1046 +#ifdef DEBUG
  1.1047 +  if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
  1.1048 +    NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
  1.1049 +  }
  1.1050 +#endif
  1.1051 +
  1.1052 +  bool isFinalChunk = mParserContext &&
  1.1053 +                        mParserContext->mStreamListenerState == eOnStop;
  1.1054 +
  1.1055 +  mProcessingNetworkData = true;
  1.1056 +  if (mSink) {
  1.1057 +    mSink->WillParse();
  1.1058 +  }
  1.1059 +  result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
  1.1060 +  mProcessingNetworkData = false;
  1.1061 +
  1.1062 +  if (result != NS_OK) {
  1.1063 +    result=mInternalState;
  1.1064 +  }
  1.1065 +
  1.1066 +  return result;
  1.1067 +}
  1.1068 +
  1.1069 +/**
  1.1070 + *  Stops parsing temporarily. That's it will prevent the
  1.1071 + *  parser from building up content model.
  1.1072 + */
  1.1073 +NS_IMETHODIMP_(void)
  1.1074 +nsParser::BlockParser()
  1.1075 +{
  1.1076 +  mFlags &= ~NS_PARSER_FLAG_PARSER_ENABLED;
  1.1077 +}
  1.1078 +
  1.1079 +/**
  1.1080 + *  Open up the parser for tokenization, building up content
  1.1081 + *  model..etc. However, this method does not resume parsing
  1.1082 + *  automatically. It's the callers' responsibility to restart
  1.1083 + *  the parsing engine.
  1.1084 + */
  1.1085 +NS_IMETHODIMP_(void)
  1.1086 +nsParser::UnblockParser()
  1.1087 +{
  1.1088 +  if (!(mFlags & NS_PARSER_FLAG_PARSER_ENABLED)) {
  1.1089 +    mFlags |= NS_PARSER_FLAG_PARSER_ENABLED;
  1.1090 +  } else {
  1.1091 +    NS_WARNING("Trying to unblock an unblocked parser.");
  1.1092 +  }
  1.1093 +}
  1.1094 +
  1.1095 +NS_IMETHODIMP_(void)
  1.1096 +nsParser::ContinueInterruptedParsingAsync()
  1.1097 +{
  1.1098 +  mSink->ContinueInterruptedParsingAsync();
  1.1099 +}
  1.1100 +
  1.1101 +/**
  1.1102 + * Call this to query whether the parser is enabled or not.
  1.1103 + */
  1.1104 +NS_IMETHODIMP_(bool)
  1.1105 +nsParser::IsParserEnabled()
  1.1106 +{
  1.1107 +  return (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) != 0;
  1.1108 +}
  1.1109 +
  1.1110 +/**
  1.1111 + * Call this to query whether the parser thinks it's done with parsing.
  1.1112 + */
  1.1113 +NS_IMETHODIMP_(bool)
  1.1114 +nsParser::IsComplete()
  1.1115 +{
  1.1116 +  return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
  1.1117 +}
  1.1118 +
  1.1119 +
  1.1120 +void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
  1.1121 +{
  1.1122 +  // Ignore any revoked continue events...
  1.1123 +  if (mContinueEvent != ev)
  1.1124 +    return;
  1.1125 +
  1.1126 +  mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
  1.1127 +  mContinueEvent = nullptr;
  1.1128 +
  1.1129 +  NS_ASSERTION(IsOkToProcessNetworkData(),
  1.1130 +               "Interrupted in the middle of a script?");
  1.1131 +  ContinueInterruptedParsing();
  1.1132 +}
  1.1133 +
  1.1134 +bool
  1.1135 +nsParser::IsInsertionPointDefined()
  1.1136 +{
  1.1137 +  return false;
  1.1138 +}
  1.1139 +
  1.1140 +void
  1.1141 +nsParser::BeginEvaluatingParserInsertedScript()
  1.1142 +{
  1.1143 +}
  1.1144 +
  1.1145 +void
  1.1146 +nsParser::EndEvaluatingParserInsertedScript()
  1.1147 +{
  1.1148 +}
  1.1149 +
  1.1150 +void
  1.1151 +nsParser::MarkAsNotScriptCreated(const char* aCommand)
  1.1152 +{
  1.1153 +}
  1.1154 +
  1.1155 +bool
  1.1156 +nsParser::IsScriptCreated()
  1.1157 +{
  1.1158 +  return false;
  1.1159 +}
  1.1160 +
  1.1161 +/**
  1.1162 + *  This is the main controlling routine in the parsing process.
  1.1163 + *  Note that it may get called multiple times for the same scanner,
  1.1164 + *  since this is a pushed based system, and all the tokens may
  1.1165 + *  not have been consumed by the scanner during a given invocation
  1.1166 + *  of this method.
  1.1167 + */
  1.1168 +NS_IMETHODIMP
  1.1169 +nsParser::Parse(nsIURI* aURL,
  1.1170 +                nsIRequestObserver* aListener,
  1.1171 +                void* aKey,
  1.1172 +                nsDTDMode aMode)
  1.1173 +{
  1.1174 +
  1.1175 +  NS_PRECONDITION(aURL, "Error: Null URL given");
  1.1176 +
  1.1177 +  nsresult result=kBadURL;
  1.1178 +  mObserver = aListener;
  1.1179 +
  1.1180 +  if (aURL) {
  1.1181 +    nsAutoCString spec;
  1.1182 +    nsresult rv = aURL->GetSpec(spec);
  1.1183 +    if (rv != NS_OK) {
  1.1184 +      return rv;
  1.1185 +    }
  1.1186 +    NS_ConvertUTF8toUTF16 theName(spec);
  1.1187 +
  1.1188 +    nsScanner* theScanner = new nsScanner(theName, false);
  1.1189 +    CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
  1.1190 +                                            mCommand, aListener);
  1.1191 +    if (pc && theScanner) {
  1.1192 +      pc->mMultipart = true;
  1.1193 +      pc->mContextType = CParserContext::eCTURL;
  1.1194 +      pc->mDTDMode = aMode;
  1.1195 +      PushContext(*pc);
  1.1196 +
  1.1197 +      result = NS_OK;
  1.1198 +    } else {
  1.1199 +      result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
  1.1200 +    }
  1.1201 +  }
  1.1202 +  return result;
  1.1203 +}
  1.1204 +
  1.1205 +/**
  1.1206 + * Used by XML fragment parsing below.
  1.1207 + *
  1.1208 + * @param   aSourceBuffer contains a string-full of real content
  1.1209 + */
  1.1210 +nsresult
  1.1211 +nsParser::Parse(const nsAString& aSourceBuffer,
  1.1212 +                void* aKey,
  1.1213 +                bool aLastCall)
  1.1214 +{
  1.1215 +  nsresult result = NS_OK;
  1.1216 +
  1.1217 +  // Don't bother if we're never going to parse this.
  1.1218 +  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
  1.1219 +    return result;
  1.1220 +  }
  1.1221 +
  1.1222 +  if (!aLastCall && aSourceBuffer.IsEmpty()) {
  1.1223 +    // Nothing is being passed to the parser so return
  1.1224 +    // immediately. mUnusedInput will get processed when
  1.1225 +    // some data is actually passed in.
  1.1226 +    // But if this is the last call, make sure to finish up
  1.1227 +    // stuff correctly.
  1.1228 +    return result;
  1.1229 +  }
  1.1230 +
  1.1231 +  // Maintain a reference to ourselves so we don't go away
  1.1232 +  // till we're completely done.
  1.1233 +  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
  1.1234 +
  1.1235 +  if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
  1.1236 +    // Note: The following code will always find the parser context associated
  1.1237 +    // with the given key, even if that context has been suspended (e.g., for
  1.1238 +    // another document.write call). This doesn't appear to be exactly what IE
  1.1239 +    // does in the case where this happens, but this makes more sense.
  1.1240 +    CParserContext* pc = mParserContext;
  1.1241 +    while (pc && pc->mKey != aKey) {
  1.1242 +      pc = pc->mPrevContext;
  1.1243 +    }
  1.1244 +
  1.1245 +    if (!pc) {
  1.1246 +      // Only make a new context if we don't have one, OR if we do, but has a
  1.1247 +      // different context key.
  1.1248 +      nsScanner* theScanner = new nsScanner(mUnusedInput);
  1.1249 +      NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
  1.1250 +
  1.1251 +      eAutoDetectResult theStatus = eUnknownDetect;
  1.1252 +
  1.1253 +      if (mParserContext &&
  1.1254 +          mParserContext->mMimeType.EqualsLiteral("application/xml")) {
  1.1255 +        // Ref. Bug 90379
  1.1256 +        NS_ASSERTION(mDTD, "How come the DTD is null?");
  1.1257 +
  1.1258 +        if (mParserContext) {
  1.1259 +          theStatus = mParserContext->mAutoDetectStatus;
  1.1260 +          // Added this to fix bug 32022.
  1.1261 +        }
  1.1262 +      }
  1.1263 +
  1.1264 +      pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
  1.1265 +                              0, theStatus, aLastCall);
  1.1266 +      NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
  1.1267 +
  1.1268 +      PushContext(*pc);
  1.1269 +
  1.1270 +      pc->mMultipart = !aLastCall; // By default
  1.1271 +      if (pc->mPrevContext) {
  1.1272 +        pc->mMultipart |= pc->mPrevContext->mMultipart;
  1.1273 +      }
  1.1274 +
  1.1275 +      // Start fix bug 40143
  1.1276 +      if (pc->mMultipart) {
  1.1277 +        pc->mStreamListenerState = eOnDataAvail;
  1.1278 +        if (pc->mScanner) {
  1.1279 +          pc->mScanner->SetIncremental(true);
  1.1280 +        }
  1.1281 +      } else {
  1.1282 +        pc->mStreamListenerState = eOnStop;
  1.1283 +        if (pc->mScanner) {
  1.1284 +          pc->mScanner->SetIncremental(false);
  1.1285 +        }
  1.1286 +      }
  1.1287 +      // end fix for 40143
  1.1288 +
  1.1289 +      pc->mContextType=CParserContext::eCTString;
  1.1290 +      pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
  1.1291 +      pc->mDTDMode = eDTDMode_full_standards;
  1.1292 +
  1.1293 +      mUnusedInput.Truncate();
  1.1294 +
  1.1295 +      pc->mScanner->Append(aSourceBuffer);
  1.1296 +      // Do not interrupt document.write() - bug 95487
  1.1297 +      result = ResumeParse(false, false, false);
  1.1298 +    } else {
  1.1299 +      pc->mScanner->Append(aSourceBuffer);
  1.1300 +      if (!pc->mPrevContext) {
  1.1301 +        // Set stream listener state to eOnStop, on the final context - Fix 68160,
  1.1302 +        // to guarantee DidBuildModel() call - Fix 36148
  1.1303 +        if (aLastCall) {
  1.1304 +          pc->mStreamListenerState = eOnStop;
  1.1305 +          pc->mScanner->SetIncremental(false);
  1.1306 +        }
  1.1307 +
  1.1308 +        if (pc == mParserContext) {
  1.1309 +          // If pc is not mParserContext, then this call to ResumeParse would
  1.1310 +          // do the wrong thing and try to continue parsing using
  1.1311 +          // mParserContext. We need to wait to actually resume parsing on pc.
  1.1312 +          ResumeParse(false, false, false);
  1.1313 +        }
  1.1314 +      }
  1.1315 +    }
  1.1316 +  }
  1.1317 +
  1.1318 +  return result;
  1.1319 +}
  1.1320 +
  1.1321 +NS_IMETHODIMP
  1.1322 +nsParser::ParseFragment(const nsAString& aSourceBuffer,
  1.1323 +                        nsTArray<nsString>& aTagStack)
  1.1324 +{
  1.1325 +  nsresult result = NS_OK;
  1.1326 +  nsAutoString  theContext;
  1.1327 +  uint32_t theCount = aTagStack.Length();
  1.1328 +  uint32_t theIndex = 0;
  1.1329 +
  1.1330 +  // Disable observers for fragments
  1.1331 +  mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
  1.1332 +
  1.1333 +  for (theIndex = 0; theIndex < theCount; theIndex++) {
  1.1334 +    theContext.AppendLiteral("<");
  1.1335 +    theContext.Append(aTagStack[theCount - theIndex - 1]);
  1.1336 +    theContext.AppendLiteral(">");
  1.1337 +  }
  1.1338 +
  1.1339 +  if (theCount == 0) {
  1.1340 +    // Ensure that the buffer is not empty. Because none of the DTDs care
  1.1341 +    // about leading whitespace, this doesn't change the result.
  1.1342 +    theContext.AssignLiteral(" ");
  1.1343 +  }
  1.1344 +
  1.1345 +  // First, parse the context to build up the DTD's tag stack. Note that we
  1.1346 +  // pass false for the aLastCall parameter.
  1.1347 +  result = Parse(theContext,
  1.1348 +                 (void*)&theContext,
  1.1349 +                 false);
  1.1350 +  if (NS_FAILED(result)) {
  1.1351 +    mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
  1.1352 +    return result;
  1.1353 +  }
  1.1354 +
  1.1355 +  if (!mSink) {
  1.1356 +    // Parse must have failed in the XML case and so the sink was killed.
  1.1357 +    return NS_ERROR_HTMLPARSER_STOPPARSING;
  1.1358 +  }
  1.1359 +
  1.1360 +  nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
  1.1361 +  NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
  1.1362 +
  1.1363 +  fragSink->WillBuildContent();
  1.1364 +  // Now, parse the actual content. Note that this is the last call
  1.1365 +  // for HTML content, but for XML, we will want to build and parse
  1.1366 +  // the end tags.  However, if tagStack is empty, it's the last call
  1.1367 +  // for XML as well.
  1.1368 +  if (theCount == 0) {
  1.1369 +    result = Parse(aSourceBuffer,
  1.1370 +                   &theContext,
  1.1371 +                   true);
  1.1372 +    fragSink->DidBuildContent();
  1.1373 +  } else {
  1.1374 +    // Add an end tag chunk, so expat will read the whole source buffer,
  1.1375 +    // and not worry about ']]' etc.
  1.1376 +    result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
  1.1377 +                   &theContext,
  1.1378 +                   false);
  1.1379 +    fragSink->DidBuildContent();
  1.1380 +
  1.1381 +    if (NS_SUCCEEDED(result)) {
  1.1382 +      nsAutoString endContext;
  1.1383 +      for (theIndex = 0; theIndex < theCount; theIndex++) {
  1.1384 +         // we already added an end tag chunk above
  1.1385 +        if (theIndex > 0) {
  1.1386 +          endContext.AppendLiteral("</");
  1.1387 +        }
  1.1388 +
  1.1389 +        nsString& thisTag = aTagStack[theIndex];
  1.1390 +        // was there an xmlns=?
  1.1391 +        int32_t endOfTag = thisTag.FindChar(char16_t(' '));
  1.1392 +        if (endOfTag == -1) {
  1.1393 +          endContext.Append(thisTag);
  1.1394 +        } else {
  1.1395 +          endContext.Append(Substring(thisTag,0,endOfTag));
  1.1396 +        }
  1.1397 +
  1.1398 +        endContext.AppendLiteral(">");
  1.1399 +      }
  1.1400 +
  1.1401 +      result = Parse(endContext,
  1.1402 +                     &theContext,
  1.1403 +                     true);
  1.1404 +    }
  1.1405 +  }
  1.1406 +
  1.1407 +  mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
  1.1408 +
  1.1409 +  return result;
  1.1410 +}
  1.1411 +
  1.1412 +/**
  1.1413 + *  This routine is called to cause the parser to continue parsing its
  1.1414 + *  underlying stream.  This call allows the parse process to happen in
  1.1415 + *  chunks, such as when the content is push based, and we need to parse in
  1.1416 + *  pieces.
  1.1417 + *
  1.1418 + *  An interesting change in how the parser gets used has led us to add extra
  1.1419 + *  processing to this method.  The case occurs when the parser is blocked in
  1.1420 + *  one context, and gets a parse(string) call in another context.  In this
  1.1421 + *  case, the parserContexts are linked. No problem.
  1.1422 + *
  1.1423 + *  The problem is that Parse(string) assumes that it can proceed unabated,
  1.1424 + *  but if the parser is already blocked that assumption is false. So we
  1.1425 + *  needed to add a mechanism here to allow the parser to continue to process
  1.1426 + *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
  1.1427 + *  out of contexts.
  1.1428 + *
  1.1429 + *
  1.1430 + *  @param   allowItertion : set to true if non-script resumption is requested
  1.1431 + *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
  1.1432 + *  @return  error code -- 0 if ok, non-zero if error.
  1.1433 + */
  1.1434 +nsresult
  1.1435 +nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
  1.1436 +                      bool aCanInterrupt)
  1.1437 +{
  1.1438 +  nsresult result = NS_OK;
  1.1439 +
  1.1440 +  if ((mFlags & NS_PARSER_FLAG_PARSER_ENABLED) &&
  1.1441 +      mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
  1.1442 +
  1.1443 +    result = WillBuildModel(mParserContext->mScanner->GetFilename());
  1.1444 +    if (NS_FAILED(result)) {
  1.1445 +      mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
  1.1446 +      return result;
  1.1447 +    }
  1.1448 +
  1.1449 +    if (mDTD) {
  1.1450 +      mSink->WillResume();
  1.1451 +      bool theIterationIsOk = true;
  1.1452 +
  1.1453 +      while (result == NS_OK && theIterationIsOk) {
  1.1454 +        if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
  1.1455 +          // -- Ref: Bug# 22485 --
  1.1456 +          // Insert the unused input into the source buffer
  1.1457 +          // as if it was read from the input stream.
  1.1458 +          // Adding UngetReadable() per vidur!!
  1.1459 +          mParserContext->mScanner->UngetReadable(mUnusedInput);
  1.1460 +          mUnusedInput.Truncate(0);
  1.1461 +        }
  1.1462 +
  1.1463 +        // Only allow parsing to be interrupted in the subsequent call to
  1.1464 +        // build model.
  1.1465 +        nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
  1.1466 +                                      ? Tokenize(aIsFinalChunk)
  1.1467 +                                      : NS_OK;
  1.1468 +        result = BuildModel();
  1.1469 +
  1.1470 +        if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
  1.1471 +          PostContinueEvent();
  1.1472 +        }
  1.1473 +
  1.1474 +        theIterationIsOk = theTokenizerResult != kEOF &&
  1.1475 +                           result != NS_ERROR_HTMLPARSER_INTERRUPTED;
  1.1476 +
  1.1477 +        // Make sure not to stop parsing too early. Therefore, before shutting
  1.1478 +        // down the parser, it's important to check whether the input buffer
  1.1479 +        // has been scanned to completion (theTokenizerResult should be kEOF).
  1.1480 +        // kEOF -> End of buffer.
  1.1481 +
  1.1482 +        // If we're told to block the parser, we disable all further parsing
  1.1483 +        // (and cache any data coming in) until the parser is re-enabled.
  1.1484 +        if (NS_ERROR_HTMLPARSER_BLOCK == result) {
  1.1485 +          mSink->WillInterrupt();
  1.1486 +          if (mFlags & NS_PARSER_FLAG_PARSER_ENABLED) {
  1.1487 +            // If we were blocked by a recursive invocation, don't re-block.
  1.1488 +            BlockParser();
  1.1489 +          }
  1.1490 +          return NS_OK;
  1.1491 +        }
  1.1492 +        if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
  1.1493 +          // Note: Parser Terminate() calls DidBuildModel.
  1.1494 +          if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
  1.1495 +            DidBuildModel(mStreamStatus);
  1.1496 +            mInternalState = result;
  1.1497 +          }
  1.1498 +
  1.1499 +          return NS_OK;
  1.1500 +        }
  1.1501 +        if ((NS_OK == result && theTokenizerResult == kEOF) ||
  1.1502 +             result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
  1.1503 +          bool theContextIsStringBased =
  1.1504 +            CParserContext::eCTString == mParserContext->mContextType;
  1.1505 +
  1.1506 +          if (mParserContext->mStreamListenerState == eOnStop ||
  1.1507 +              !mParserContext->mMultipart || theContextIsStringBased) {
  1.1508 +            if (!mParserContext->mPrevContext) {
  1.1509 +              if (mParserContext->mStreamListenerState == eOnStop) {
  1.1510 +                DidBuildModel(mStreamStatus);
  1.1511 +                return NS_OK;
  1.1512 +              }
  1.1513 +            } else {
  1.1514 +              CParserContext* theContext = PopContext();
  1.1515 +              if (theContext) {
  1.1516 +                theIterationIsOk = allowIteration && theContextIsStringBased;
  1.1517 +                if (theContext->mCopyUnused) {
  1.1518 +                  theContext->mScanner->CopyUnusedData(mUnusedInput);
  1.1519 +                }
  1.1520 +
  1.1521 +                delete theContext;
  1.1522 +              }
  1.1523 +
  1.1524 +              result = mInternalState;
  1.1525 +              aIsFinalChunk = mParserContext &&
  1.1526 +                              mParserContext->mStreamListenerState == eOnStop;
  1.1527 +              // ...then intentionally fall through to mSink->WillInterrupt()...
  1.1528 +            }
  1.1529 +          }
  1.1530 +        }
  1.1531 +
  1.1532 +        if (theTokenizerResult == kEOF ||
  1.1533 +            result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
  1.1534 +          result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
  1.1535 +          mSink->WillInterrupt();
  1.1536 +        }
  1.1537 +      }
  1.1538 +    } else {
  1.1539 +      mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
  1.1540 +    }
  1.1541 +  }
  1.1542 +
  1.1543 +  return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
  1.1544 +}
  1.1545 +
  1.1546 +/**
  1.1547 + *  This is where we loop over the tokens created in the
  1.1548 + *  tokenization phase, and try to make sense out of them.
  1.1549 + */
  1.1550 +nsresult
  1.1551 +nsParser::BuildModel()
  1.1552 +{
  1.1553 +  nsITokenizer* theTokenizer = nullptr;
  1.1554 +
  1.1555 +  nsresult result = NS_OK;
  1.1556 +  if (mParserContext) {
  1.1557 +    result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
  1.1558 +  }
  1.1559 +
  1.1560 +  if (NS_SUCCEEDED(result)) {
  1.1561 +    if (mDTD) {
  1.1562 +      result = mDTD->BuildModel(theTokenizer, mSink);
  1.1563 +    }
  1.1564 +  } else {
  1.1565 +    mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
  1.1566 +  }
  1.1567 +  return result;
  1.1568 +}
  1.1569 +
  1.1570 +/*******************************************************************
  1.1571 +  These methods are used to talk to the netlib system...
  1.1572 + *******************************************************************/
  1.1573 +
  1.1574 +nsresult
  1.1575 +nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
  1.1576 +{
  1.1577 +  NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
  1.1578 +                  "Parser's nsIStreamListener API was not setup "
  1.1579 +                  "correctly in constructor.");
  1.1580 +  if (mObserver) {
  1.1581 +    mObserver->OnStartRequest(request, aContext);
  1.1582 +  }
  1.1583 +  mParserContext->mStreamListenerState = eOnStart;
  1.1584 +  mParserContext->mAutoDetectStatus = eUnknownDetect;
  1.1585 +  mParserContext->mRequest = request;
  1.1586 +
  1.1587 +  NS_ASSERTION(!mParserContext->mPrevContext,
  1.1588 +               "Clobbering DTD for non-root parser context!");
  1.1589 +  mDTD = nullptr;
  1.1590 +
  1.1591 +  nsresult rv;
  1.1592 +  nsAutoCString contentType;
  1.1593 +  nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
  1.1594 +  if (channel) {
  1.1595 +    rv = channel->GetContentType(contentType);
  1.1596 +    if (NS_SUCCEEDED(rv)) {
  1.1597 +      mParserContext->SetMimeType(contentType);
  1.1598 +    }
  1.1599 +  }
  1.1600 +
  1.1601 +  rv = NS_OK;
  1.1602 +
  1.1603 +  return rv;
  1.1604 +}
  1.1605 +
  1.1606 +static bool
  1.1607 +ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
  1.1608 +                                 nsCString& oCharset)
  1.1609 +{
  1.1610 +  // This code is rather pointless to have. Might as well reuse expat as
  1.1611 +  // seen in nsHtml5StreamParser. -- hsivonen
  1.1612 +  oCharset.Truncate();
  1.1613 +  if ((aLen >= 5) &&
  1.1614 +      ('<' == aBytes[0]) &&
  1.1615 +      ('?' == aBytes[1]) &&
  1.1616 +      ('x' == aBytes[2]) &&
  1.1617 +      ('m' == aBytes[3]) &&
  1.1618 +      ('l' == aBytes[4])) {
  1.1619 +    int32_t i;
  1.1620 +    bool versionFound = false, encodingFound = false;
  1.1621 +    for (i = 6; i < aLen && !encodingFound; ++i) {
  1.1622 +      // end of XML declaration?
  1.1623 +      if ((((char*) aBytes)[i] == '?') &&
  1.1624 +          ((i + 1) < aLen) &&
  1.1625 +          (((char*) aBytes)[i + 1] == '>')) {
  1.1626 +        break;
  1.1627 +      }
  1.1628 +      // Version is required.
  1.1629 +      if (!versionFound) {
  1.1630 +        // Want to avoid string comparisons, hence looking for 'n'
  1.1631 +        // and only if found check the string leading to it. Not
  1.1632 +        // foolproof, but fast.
  1.1633 +        // The shortest string allowed before this is  (strlen==13):
  1.1634 +        // <?xml version
  1.1635 +        if ((((char*) aBytes)[i] == 'n') &&
  1.1636 +            (i >= 12) &&
  1.1637 +            (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
  1.1638 +          // Fast forward through version
  1.1639 +          char q = 0;
  1.1640 +          for (++i; i < aLen; ++i) {
  1.1641 +            char qi = ((char*) aBytes)[i];
  1.1642 +            if (qi == '\'' || qi == '"') {
  1.1643 +              if (q && q == qi) {
  1.1644 +                //  ending quote
  1.1645 +                versionFound = true;
  1.1646 +                break;
  1.1647 +              } else {
  1.1648 +                // Starting quote
  1.1649 +                q = qi;
  1.1650 +              }
  1.1651 +            }
  1.1652 +          }
  1.1653 +        }
  1.1654 +      } else {
  1.1655 +        // encoding must follow version
  1.1656 +        // Want to avoid string comparisons, hence looking for 'g'
  1.1657 +        // and only if found check the string leading to it. Not
  1.1658 +        // foolproof, but fast.
  1.1659 +        // The shortest allowed string before this (strlen==26):
  1.1660 +        // <?xml version="1" encoding
  1.1661 +        if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
  1.1662 +            "encodin", (char*) (aBytes + i - 7), 7))) {
  1.1663 +          int32_t encStart = 0;
  1.1664 +          char q = 0;
  1.1665 +          for (++i; i < aLen; ++i) {
  1.1666 +            char qi = ((char*) aBytes)[i];
  1.1667 +            if (qi == '\'' || qi == '"') {
  1.1668 +              if (q && q == qi) {
  1.1669 +                int32_t count = i - encStart;
  1.1670 +                // encoding value is invalid if it is UTF-16
  1.1671 +                if (count > 0 && PL_strncasecmp("UTF-16",
  1.1672 +                    (char*) (aBytes + encStart), count)) {
  1.1673 +                  oCharset.Assign((char*) (aBytes + encStart), count);
  1.1674 +                }
  1.1675 +                encodingFound = true;
  1.1676 +                break;
  1.1677 +              } else {
  1.1678 +                encStart = i + 1;
  1.1679 +                q = qi;
  1.1680 +              }
  1.1681 +            }
  1.1682 +          }
  1.1683 +        }
  1.1684 +      } // if (!versionFound)
  1.1685 +    } // for
  1.1686 +  }
  1.1687 +  return !oCharset.IsEmpty();
  1.1688 +}
  1.1689 +
  1.1690 +inline const char
  1.1691 +GetNextChar(nsACString::const_iterator& aStart,
  1.1692 +            nsACString::const_iterator& aEnd)
  1.1693 +{
  1.1694 +  NS_ASSERTION(aStart != aEnd, "end of buffer");
  1.1695 +  return (++aStart != aEnd) ? *aStart : '\0';
  1.1696 +}
  1.1697 +
  1.1698 +static NS_METHOD
  1.1699 +NoOpParserWriteFunc(nsIInputStream* in,
  1.1700 +                void* closure,
  1.1701 +                const char* fromRawSegment,
  1.1702 +                uint32_t toOffset,
  1.1703 +                uint32_t count,
  1.1704 +                uint32_t *writeCount)
  1.1705 +{
  1.1706 +  *writeCount = count;
  1.1707 +  return NS_OK;
  1.1708 +}
  1.1709 +
  1.1710 +typedef struct {
  1.1711 +  bool mNeedCharsetCheck;
  1.1712 +  nsParser* mParser;
  1.1713 +  nsScanner* mScanner;
  1.1714 +  nsIRequest* mRequest;
  1.1715 +} ParserWriteStruct;
  1.1716 +
  1.1717 +/*
  1.1718 + * This function is invoked as a result of a call to a stream's
  1.1719 + * ReadSegments() method. It is called for each contiguous buffer
  1.1720 + * of data in the underlying stream or pipe. Using ReadSegments
  1.1721 + * allows us to avoid copying data to read out of the stream.
  1.1722 + */
  1.1723 +static NS_METHOD
  1.1724 +ParserWriteFunc(nsIInputStream* in,
  1.1725 +                void* closure,
  1.1726 +                const char* fromRawSegment,
  1.1727 +                uint32_t toOffset,
  1.1728 +                uint32_t count,
  1.1729 +                uint32_t *writeCount)
  1.1730 +{
  1.1731 +  nsresult result;
  1.1732 +  ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
  1.1733 +  const unsigned char* buf =
  1.1734 +    reinterpret_cast<const unsigned char*> (fromRawSegment);
  1.1735 +  uint32_t theNumRead = count;
  1.1736 +
  1.1737 +  if (!pws) {
  1.1738 +    return NS_ERROR_FAILURE;
  1.1739 +  }
  1.1740 +
  1.1741 +  if (pws->mNeedCharsetCheck) {
  1.1742 +    pws->mNeedCharsetCheck = false;
  1.1743 +    int32_t source;
  1.1744 +    nsAutoCString preferred;
  1.1745 +    nsAutoCString maybePrefer;
  1.1746 +    pws->mParser->GetDocumentCharset(preferred, source);
  1.1747 +
  1.1748 +    // This code was bogus when I found it. It expects the BOM or the XML
  1.1749 +    // declaration to be entirely in the first network buffer. -- hsivonen
  1.1750 +    if (nsContentUtils::CheckForBOM(buf, count, maybePrefer)) {
  1.1751 +      // The decoder will swallow the BOM. The UTF-16 will re-sniff for
  1.1752 +      // endianness. The value of preferred is now either "UTF-8" or "UTF-16".
  1.1753 +      preferred.Assign(maybePrefer);
  1.1754 +      source = kCharsetFromByteOrderMark;
  1.1755 +    } else if (source < kCharsetFromChannel) {
  1.1756 +      nsAutoCString declCharset;
  1.1757 +
  1.1758 +      if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
  1.1759 +        if (EncodingUtils::FindEncodingForLabel(declCharset, maybePrefer)) {
  1.1760 +          preferred.Assign(maybePrefer);
  1.1761 +          source = kCharsetFromMetaTag;
  1.1762 +        }
  1.1763 +      }
  1.1764 +    }
  1.1765 +
  1.1766 +    pws->mParser->SetDocumentCharset(preferred, source);
  1.1767 +    pws->mParser->SetSinkCharset(preferred);
  1.1768 +
  1.1769 +  }
  1.1770 +
  1.1771 +  result = pws->mScanner->Append(fromRawSegment, theNumRead, pws->mRequest);
  1.1772 +  if (NS_SUCCEEDED(result)) {
  1.1773 +    *writeCount = count;
  1.1774 +  }
  1.1775 +
  1.1776 +  return result;
  1.1777 +}
  1.1778 +
  1.1779 +nsresult
  1.1780 +nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
  1.1781 +                          nsIInputStream *pIStream, uint64_t sourceOffset,
  1.1782 +                          uint32_t aLength)
  1.1783 +{
  1.1784 +  NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
  1.1785 +                   eOnDataAvail == mParserContext->mStreamListenerState),
  1.1786 +            "Error: OnStartRequest() must be called before OnDataAvailable()");
  1.1787 +  NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
  1.1788 +                  "Must have a buffered input stream");
  1.1789 +
  1.1790 +  nsresult rv = NS_OK;
  1.1791 +
  1.1792 +  if (mIsAboutBlank) {
  1.1793 +    MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
  1.1794 +    // ... but if an extension tries to feed us data for about:blank in a
  1.1795 +    // release build, silently ignore the data.
  1.1796 +    uint32_t totalRead;
  1.1797 +    rv = pIStream->ReadSegments(NoOpParserWriteFunc,
  1.1798 +                                nullptr,
  1.1799 +                                aLength,
  1.1800 +                                &totalRead);
  1.1801 +    return rv;
  1.1802 +  }
  1.1803 +
  1.1804 +  CParserContext *theContext = mParserContext;
  1.1805 +
  1.1806 +  while (theContext && theContext->mRequest != request) {
  1.1807 +    theContext = theContext->mPrevContext;
  1.1808 +  }
  1.1809 +
  1.1810 +  if (theContext) {
  1.1811 +    theContext->mStreamListenerState = eOnDataAvail;
  1.1812 +
  1.1813 +    if (eInvalidDetect == theContext->mAutoDetectStatus) {
  1.1814 +      if (theContext->mScanner) {
  1.1815 +        nsScannerIterator iter;
  1.1816 +        theContext->mScanner->EndReading(iter);
  1.1817 +        theContext->mScanner->SetPosition(iter, true);
  1.1818 +      }
  1.1819 +    }
  1.1820 +
  1.1821 +    uint32_t totalRead;
  1.1822 +    ParserWriteStruct pws;
  1.1823 +    pws.mNeedCharsetCheck = true;
  1.1824 +    pws.mParser = this;
  1.1825 +    pws.mScanner = theContext->mScanner;
  1.1826 +    pws.mRequest = request;
  1.1827 +
  1.1828 +    rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
  1.1829 +    if (NS_FAILED(rv)) {
  1.1830 +      return rv;
  1.1831 +    }
  1.1832 +
  1.1833 +    // Don't bother to start parsing until we've seen some
  1.1834 +    // non-whitespace data
  1.1835 +    if (IsOkToProcessNetworkData() &&
  1.1836 +        theContext->mScanner->FirstNonWhitespacePosition() >= 0) {
  1.1837 +      nsCOMPtr<nsIParser> kungFuDeathGrip(this);
  1.1838 +      nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
  1.1839 +      mProcessingNetworkData = true;
  1.1840 +      if (mSink) {
  1.1841 +        mSink->WillParse();
  1.1842 +      }
  1.1843 +      rv = ResumeParse();
  1.1844 +      mProcessingNetworkData = false;
  1.1845 +    }
  1.1846 +  } else {
  1.1847 +    rv = NS_ERROR_UNEXPECTED;
  1.1848 +  }
  1.1849 +
  1.1850 +  return rv;
  1.1851 +}
  1.1852 +
  1.1853 +/**
  1.1854 + *  This is called by the networking library once the last block of data
  1.1855 + *  has been collected from the net.
  1.1856 + */
  1.1857 +nsresult
  1.1858 +nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
  1.1859 +                        nsresult status)
  1.1860 +{
  1.1861 +  nsresult rv = NS_OK;
  1.1862 +
  1.1863 +  CParserContext *pc = mParserContext;
  1.1864 +  while (pc) {
  1.1865 +    if (pc->mRequest == request) {
  1.1866 +      pc->mStreamListenerState = eOnStop;
  1.1867 +      pc->mScanner->SetIncremental(false);
  1.1868 +      break;
  1.1869 +    }
  1.1870 +
  1.1871 +    pc = pc->mPrevContext;
  1.1872 +  }
  1.1873 +
  1.1874 +  mStreamStatus = status;
  1.1875 +
  1.1876 +  if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
  1.1877 +    mProcessingNetworkData = true;
  1.1878 +    if (mSink) {
  1.1879 +      mSink->WillParse();
  1.1880 +    }
  1.1881 +    rv = ResumeParse(true, true);
  1.1882 +    mProcessingNetworkData = false;
  1.1883 +  }
  1.1884 +
  1.1885 +  // If the parser isn't enabled, we don't finish parsing till
  1.1886 +  // it is reenabled.
  1.1887 +
  1.1888 +
  1.1889 +  // XXX Should we wait to notify our observers as well if the
  1.1890 +  // parser isn't yet enabled?
  1.1891 +  if (mObserver) {
  1.1892 +    mObserver->OnStopRequest(request, aContext, status);
  1.1893 +  }
  1.1894 +
  1.1895 +  return rv;
  1.1896 +}
  1.1897 +
  1.1898 +
  1.1899 +/*******************************************************************
  1.1900 +  Here come the tokenization methods...
  1.1901 + *******************************************************************/
  1.1902 +
  1.1903 +
  1.1904 +/**
  1.1905 + *  Part of the code sandwich, this gets called right before
  1.1906 + *  the tokenization process begins. The main reason for
  1.1907 + *  this call is to allow the delegate to do initialization.
  1.1908 + */
  1.1909 +bool
  1.1910 +nsParser::WillTokenize(bool aIsFinalChunk)
  1.1911 +{
  1.1912 +  if (!mParserContext) {
  1.1913 +    return true;
  1.1914 +  }
  1.1915 +
  1.1916 +  nsITokenizer* theTokenizer;
  1.1917 +  nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
  1.1918 +  NS_ENSURE_SUCCESS(result, false);
  1.1919 +  return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
  1.1920 +}
  1.1921 +
  1.1922 +
  1.1923 +/**
  1.1924 + * This is the primary control routine to consume tokens.
  1.1925 + * It iteratively consumes tokens until an error occurs or
  1.1926 + * you run out of data.
  1.1927 + */
  1.1928 +nsresult nsParser::Tokenize(bool aIsFinalChunk)
  1.1929 +{
  1.1930 +  nsITokenizer* theTokenizer;
  1.1931 +
  1.1932 +  nsresult result = NS_ERROR_NOT_AVAILABLE;
  1.1933 +  if (mParserContext) {
  1.1934 +    result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
  1.1935 +  }
  1.1936 +
  1.1937 +  if (NS_SUCCEEDED(result)) {
  1.1938 +    bool flushTokens = false;
  1.1939 +
  1.1940 +    bool killSink = false;
  1.1941 +
  1.1942 +    WillTokenize(aIsFinalChunk);
  1.1943 +    while (NS_SUCCEEDED(result)) {
  1.1944 +      mParserContext->mScanner->Mark();
  1.1945 +      result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
  1.1946 +                                          flushTokens);
  1.1947 +      if (NS_FAILED(result)) {
  1.1948 +        mParserContext->mScanner->RewindToMark();
  1.1949 +        if (kEOF == result){
  1.1950 +          break;
  1.1951 +        }
  1.1952 +        if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
  1.1953 +          killSink = true;
  1.1954 +          result = Terminate();
  1.1955 +          break;
  1.1956 +        }
  1.1957 +      } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
  1.1958 +        // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
  1.1959 +        // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
  1.1960 +        // Also remember to update the marked position.
  1.1961 +        mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
  1.1962 +        mParserContext->mScanner->Mark();
  1.1963 +        break;
  1.1964 +      }
  1.1965 +    }
  1.1966 +
  1.1967 +    if (killSink) {
  1.1968 +      mSink = nullptr;
  1.1969 +    }
  1.1970 +  } else {
  1.1971 +    result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
  1.1972 +  }
  1.1973 +
  1.1974 +  return result;
  1.1975 +}
  1.1976 +
  1.1977 +/**
  1.1978 + * Get the channel associated with this parser
  1.1979 + *
  1.1980 + * @param aChannel out param that will contain the result
  1.1981 + * @return NS_OK if successful
  1.1982 + */
  1.1983 +NS_IMETHODIMP
  1.1984 +nsParser::GetChannel(nsIChannel** aChannel)
  1.1985 +{
  1.1986 +  nsresult result = NS_ERROR_NOT_AVAILABLE;
  1.1987 +  if (mParserContext && mParserContext->mRequest) {
  1.1988 +    result = CallQueryInterface(mParserContext->mRequest, aChannel);
  1.1989 +  }
  1.1990 +  return result;
  1.1991 +}
  1.1992 +
  1.1993 +/**
  1.1994 + * Get the DTD associated with this parser
  1.1995 + */
  1.1996 +NS_IMETHODIMP
  1.1997 +nsParser::GetDTD(nsIDTD** aDTD)
  1.1998 +{
  1.1999 +  if (mParserContext) {
  1.2000 +    NS_IF_ADDREF(*aDTD = mDTD);
  1.2001 +  }
  1.2002 +
  1.2003 +  return NS_OK;
  1.2004 +}
  1.2005 +
  1.2006 +/**
  1.2007 + * Get this as nsIStreamListener
  1.2008 + */
  1.2009 +nsIStreamListener*
  1.2010 +nsParser::GetStreamListener()
  1.2011 +{
  1.2012 +  return this;
  1.2013 +}
The Tor Browser / file diff

diff: parser/htmlparser/src/nsParser.cpp

parser/htmlparser/src/nsParser.cpp