1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/htmlparser/src/nsScanner.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1199 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=2 sw=2 et tw=78: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +//#define __INCREMENTAL 1 1.11 + 1.12 +#include "mozilla/DebugOnly.h" 1.13 + 1.14 +#include "nsScanner.h" 1.15 +#include "nsDebug.h" 1.16 +#include "nsReadableUtils.h" 1.17 +#include "nsIInputStream.h" 1.18 +#include "nsIFile.h" 1.19 +#include "nsNetUtil.h" 1.20 +#include "nsUTF8Utils.h" // for LossyConvertEncoding 1.21 +#include "nsCRT.h" 1.22 +#include "nsParser.h" 1.23 +#include "nsCharsetSource.h" 1.24 + 1.25 +#include "mozilla/dom/EncodingUtils.h" 1.26 + 1.27 +using mozilla::dom::EncodingUtils; 1.28 + 1.29 +// We replace NUL characters with this character. 1.30 +static char16_t sInvalid = UCS2_REPLACEMENT_CHAR; 1.31 + 1.32 +nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) : 1.33 + mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set 1.34 +{ 1.35 + // Build filter that will be used to filter out characters with 1.36 + // bits that none of the terminal chars have. This works very well 1.37 + // because terminal chars often have only the last 4-6 bits set and 1.38 + // normal ascii letters have bit 7 set. Other letters have even higher 1.39 + // bits set. 1.40 + 1.41 + // Calculate filter 1.42 + const char16_t *current = aTerminateChars; 1.43 + char16_t terminalChar = *current; 1.44 + while (terminalChar) { 1.45 + mFilter &= ~terminalChar; 1.46 + ++current; 1.47 + terminalChar = *current; 1.48 + } 1.49 +} 1.50 + 1.51 +/** 1.52 + * Use this constructor if you want i/o to be based on 1.53 + * a single string you hand in during construction. 1.54 + * This short cut was added for Javascript. 1.55 + * 1.56 + * @update gess 5/12/98 1.57 + * @param aMode represents the parser mode (nav, other) 1.58 + * @return 1.59 + */ 1.60 +nsScanner::nsScanner(const nsAString& anHTMLString) 1.61 +{ 1.62 + MOZ_COUNT_CTOR(nsScanner); 1.63 + 1.64 + mSlidingBuffer = nullptr; 1.65 + mCountRemaining = 0; 1.66 + mFirstNonWhitespacePosition = -1; 1.67 + if (AppendToBuffer(anHTMLString)) { 1.68 + mSlidingBuffer->BeginReading(mCurrentPosition); 1.69 + } else { 1.70 + /* XXX see hack below, re: bug 182067 */ 1.71 + memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); 1.72 + mEndPosition = mCurrentPosition; 1.73 + } 1.74 + mMarkPosition = mCurrentPosition; 1.75 + mIncremental = false; 1.76 + mUnicodeDecoder = 0; 1.77 + mCharsetSource = kCharsetUninitialized; 1.78 + mHasInvalidCharacter = false; 1.79 + mReplacementCharacter = char16_t(0x0); 1.80 +} 1.81 + 1.82 +/** 1.83 + * Use this constructor if you want i/o to be based on strings 1.84 + * the scanner receives. If you pass a null filename, you 1.85 + * can still provide data to the scanner via append. 1.86 + */ 1.87 +nsScanner::nsScanner(nsString& aFilename, bool aCreateStream) 1.88 + : mFilename(aFilename) 1.89 +{ 1.90 + MOZ_COUNT_CTOR(nsScanner); 1.91 + NS_ASSERTION(!aCreateStream, "This is always true."); 1.92 + 1.93 + mSlidingBuffer = nullptr; 1.94 + 1.95 + // XXX This is a big hack. We need to initialize the iterators to something. 1.96 + // What matters is that mCurrentPosition == mEndPosition, so that our methods 1.97 + // believe that we are at EOF (see bug 182067). We null out mCurrentPosition 1.98 + // so that we have some hope of catching null pointer dereferences associated 1.99 + // with this hack. --darin 1.100 + memset(&mCurrentPosition, 0, sizeof(mCurrentPosition)); 1.101 + mMarkPosition = mCurrentPosition; 1.102 + mEndPosition = mCurrentPosition; 1.103 + 1.104 + mIncremental = true; 1.105 + mFirstNonWhitespacePosition = -1; 1.106 + mCountRemaining = 0; 1.107 + 1.108 + mUnicodeDecoder = 0; 1.109 + mCharsetSource = kCharsetUninitialized; 1.110 + mHasInvalidCharacter = false; 1.111 + mReplacementCharacter = char16_t(0x0); 1.112 + // XML defaults to UTF-8 and about:blank is UTF-8, too. 1.113 + SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault); 1.114 +} 1.115 + 1.116 +nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource) 1.117 +{ 1.118 + if (aSource < mCharsetSource) // priority is lower than the current one 1.119 + return NS_OK; 1.120 + 1.121 + mCharsetSource = aSource; 1.122 + 1.123 + nsCString charsetName; 1.124 + mozilla::DebugOnly<bool> valid = 1.125 + EncodingUtils::FindEncodingForLabel(aCharset, charsetName); 1.126 + MOZ_ASSERT(valid, "Should never call with a bogus aCharset."); 1.127 + 1.128 + if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) { 1.129 + return NS_OK; // no difference, don't change it 1.130 + } 1.131 + 1.132 + // different, need to change it 1.133 + 1.134 + mCharset.Assign(charsetName); 1.135 + 1.136 + mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset); 1.137 + mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); 1.138 + 1.139 + return NS_OK; 1.140 +} 1.141 + 1.142 + 1.143 +/** 1.144 + * default destructor 1.145 + * 1.146 + * @update gess 3/25/98 1.147 + * @param 1.148 + * @return 1.149 + */ 1.150 +nsScanner::~nsScanner() { 1.151 + 1.152 + delete mSlidingBuffer; 1.153 + 1.154 + MOZ_COUNT_DTOR(nsScanner); 1.155 +} 1.156 + 1.157 +/** 1.158 + * Resets current offset position of input stream to marked position. 1.159 + * This allows us to back up to this point if the need should arise, 1.160 + * such as when tokenization gets interrupted. 1.161 + * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! 1.162 + * 1.163 + * @update gess 5/12/98 1.164 + * @param 1.165 + * @return 1.166 + */ 1.167 +void nsScanner::RewindToMark(void){ 1.168 + if (mSlidingBuffer) { 1.169 + mCountRemaining += (Distance(mMarkPosition, mCurrentPosition)); 1.170 + mCurrentPosition = mMarkPosition; 1.171 + } 1.172 +} 1.173 + 1.174 + 1.175 +/** 1.176 + * Records current offset position in input stream. This allows us 1.177 + * to back up to this point if the need should arise, such as when 1.178 + * tokenization gets interrupted. 1.179 + * 1.180 + * @update gess 7/29/98 1.181 + * @param 1.182 + * @return 1.183 + */ 1.184 +int32_t nsScanner::Mark() { 1.185 + int32_t distance = 0; 1.186 + if (mSlidingBuffer) { 1.187 + nsScannerIterator oldStart; 1.188 + mSlidingBuffer->BeginReading(oldStart); 1.189 + 1.190 + distance = Distance(oldStart, mCurrentPosition); 1.191 + 1.192 + mSlidingBuffer->DiscardPrefix(mCurrentPosition); 1.193 + mSlidingBuffer->BeginReading(mCurrentPosition); 1.194 + mMarkPosition = mCurrentPosition; 1.195 + } 1.196 + 1.197 + return distance; 1.198 +} 1.199 + 1.200 +/** 1.201 + * Insert data to our underlying input buffer as 1.202 + * if it were read from an input stream. 1.203 + * 1.204 + * @update harishd 01/12/99 1.205 + * @return error code 1.206 + */ 1.207 +bool nsScanner::UngetReadable(const nsAString& aBuffer) { 1.208 + if (!mSlidingBuffer) { 1.209 + return false; 1.210 + } 1.211 + 1.212 + mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition); 1.213 + mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators 1.214 + mSlidingBuffer->EndReading(mEndPosition); 1.215 + 1.216 + uint32_t length = aBuffer.Length(); 1.217 + mCountRemaining += length; // Ref. bug 117441 1.218 + return true; 1.219 +} 1.220 + 1.221 +/** 1.222 + * Append data to our underlying input buffer as 1.223 + * if it were read from an input stream. 1.224 + * 1.225 + * @update gess4/3/98 1.226 + * @return error code 1.227 + */ 1.228 +nsresult nsScanner::Append(const nsAString& aBuffer) { 1.229 + if (!AppendToBuffer(aBuffer)) 1.230 + return NS_ERROR_OUT_OF_MEMORY; 1.231 + return NS_OK; 1.232 +} 1.233 + 1.234 +/** 1.235 + * 1.236 + * 1.237 + * @update gess 5/21/98 1.238 + * @param 1.239 + * @return 1.240 + */ 1.241 +nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen, 1.242 + nsIRequest *aRequest) 1.243 +{ 1.244 + nsresult res = NS_OK; 1.245 + if (mUnicodeDecoder) { 1.246 + int32_t unicharBufLen = 0; 1.247 + mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen); 1.248 + nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1); 1.249 + NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY); 1.250 + char16_t *unichars = buffer->DataStart(); 1.251 + 1.252 + int32_t totalChars = 0; 1.253 + int32_t unicharLength = unicharBufLen; 1.254 + int32_t errorPos = -1; 1.255 + 1.256 + do { 1.257 + int32_t srcLength = aLen; 1.258 + res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength); 1.259 + 1.260 + totalChars += unicharLength; 1.261 + // Continuation of failure case 1.262 + if(NS_FAILED(res)) { 1.263 + // if we failed, we consume one byte, replace it with the replacement 1.264 + // character and try the conversion again. 1.265 + 1.266 + // This is only needed because some decoders don't follow the 1.267 + // nsIUnicodeDecoder contract: they return a failure when *aDestLength 1.268 + // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177 1.269 + if ((unichars + unicharLength) >= buffer->DataEnd()) { 1.270 + NS_ERROR("Unexpected end of destination buffer"); 1.271 + break; 1.272 + } 1.273 + 1.274 + if (mReplacementCharacter == 0x0 && errorPos == -1) { 1.275 + errorPos = totalChars; 1.276 + } 1.277 + unichars[unicharLength++] = mReplacementCharacter == 0x0 ? 1.278 + mUnicodeDecoder->GetCharacterForUnMapped() : 1.279 + mReplacementCharacter; 1.280 + 1.281 + unichars = unichars + unicharLength; 1.282 + unicharLength = unicharBufLen - (++totalChars); 1.283 + 1.284 + mUnicodeDecoder->Reset(); 1.285 + 1.286 + if(((uint32_t) (srcLength + 1)) > aLen) { 1.287 + srcLength = aLen; 1.288 + } 1.289 + else { 1.290 + ++srcLength; 1.291 + } 1.292 + 1.293 + aBuffer += srcLength; 1.294 + aLen -= srcLength; 1.295 + } 1.296 + } while (NS_FAILED(res) && (aLen > 0)); 1.297 + 1.298 + buffer->SetDataLength(totalChars); 1.299 + // Don't propagate return code of unicode decoder 1.300 + // since it doesn't reflect on our success or failure 1.301 + // - Ref. bug 87110 1.302 + res = NS_OK; 1.303 + if (!AppendToBuffer(buffer, aRequest, errorPos)) 1.304 + res = NS_ERROR_OUT_OF_MEMORY; 1.305 + } 1.306 + else { 1.307 + NS_WARNING("No decoder found."); 1.308 + res = NS_ERROR_FAILURE; 1.309 + } 1.310 + 1.311 + return res; 1.312 +} 1.313 + 1.314 +/** 1.315 + * retrieve next char from scanners internal input stream 1.316 + * 1.317 + * @update gess 3/25/98 1.318 + * @param 1.319 + * @return error code reflecting read status 1.320 + */ 1.321 +nsresult nsScanner::GetChar(char16_t& aChar) { 1.322 + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { 1.323 + aChar = 0; 1.324 + return kEOF; 1.325 + } 1.326 + 1.327 + aChar = *mCurrentPosition++; 1.328 + --mCountRemaining; 1.329 + 1.330 + return NS_OK; 1.331 +} 1.332 + 1.333 + 1.334 +/** 1.335 + * peek ahead to consume next char from scanner's internal 1.336 + * input buffer 1.337 + * 1.338 + * @update gess 3/25/98 1.339 + * @param 1.340 + * @return 1.341 + */ 1.342 +nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) { 1.343 + aChar = 0; 1.344 + 1.345 + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { 1.346 + return kEOF; 1.347 + } 1.348 + 1.349 + if (aOffset > 0) { 1.350 + if (mCountRemaining <= aOffset) 1.351 + return kEOF; 1.352 + 1.353 + nsScannerIterator pos = mCurrentPosition; 1.354 + pos.advance(aOffset); 1.355 + aChar=*pos; 1.356 + } 1.357 + else { 1.358 + aChar=*mCurrentPosition; 1.359 + } 1.360 + 1.361 + return NS_OK; 1.362 +} 1.363 + 1.364 +nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset) 1.365 +{ 1.366 + if (!mSlidingBuffer || mCurrentPosition == mEndPosition) { 1.367 + return kEOF; 1.368 + } 1.369 + 1.370 + nsScannerIterator start, end; 1.371 + 1.372 + start = mCurrentPosition; 1.373 + 1.374 + if ((int32_t)mCountRemaining <= aOffset) { 1.375 + return kEOF; 1.376 + } 1.377 + 1.378 + if (aOffset > 0) { 1.379 + start.advance(aOffset); 1.380 + } 1.381 + 1.382 + if (mCountRemaining < uint32_t(aNumChars + aOffset)) { 1.383 + end = mEndPosition; 1.384 + } 1.385 + else { 1.386 + end = start; 1.387 + end.advance(aNumChars); 1.388 + } 1.389 + 1.390 + CopyUnicodeTo(start, end, aStr); 1.391 + 1.392 + return NS_OK; 1.393 +} 1.394 + 1.395 + 1.396 +/** 1.397 + * Skip whitespace on scanner input stream 1.398 + * 1.399 + * @update gess 3/25/98 1.400 + * @param 1.401 + * @return error status 1.402 + */ 1.403 +nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) { 1.404 + 1.405 + if (!mSlidingBuffer) { 1.406 + return kEOF; 1.407 + } 1.408 + 1.409 + char16_t theChar = 0; 1.410 + nsresult result = Peek(theChar); 1.411 + 1.412 + if (NS_FAILED(result)) { 1.413 + return result; 1.414 + } 1.415 + 1.416 + nsScannerIterator current = mCurrentPosition; 1.417 + bool done = false; 1.418 + bool skipped = false; 1.419 + 1.420 + while (!done && current != mEndPosition) { 1.421 + switch(theChar) { 1.422 + case '\n': 1.423 + case '\r': ++aNewlinesSkipped; 1.424 + case ' ' : 1.425 + case '\t': 1.426 + { 1.427 + skipped = true; 1.428 + char16_t thePrevChar = theChar; 1.429 + theChar = (++current != mEndPosition) ? *current : '\0'; 1.430 + if ((thePrevChar == '\r' && theChar == '\n') || 1.431 + (thePrevChar == '\n' && theChar == '\r')) { 1.432 + theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF 1.433 + } 1.434 + } 1.435 + break; 1.436 + default: 1.437 + done = true; 1.438 + break; 1.439 + } 1.440 + } 1.441 + 1.442 + if (skipped) { 1.443 + SetPosition(current); 1.444 + if (current == mEndPosition) { 1.445 + result = kEOF; 1.446 + } 1.447 + } 1.448 + 1.449 + return result; 1.450 +} 1.451 + 1.452 +/** 1.453 + * Skip over chars as long as they equal given char 1.454 + * 1.455 + * @update gess 3/25/98 1.456 + * @param 1.457 + * @return error code 1.458 + */ 1.459 +nsresult nsScanner::SkipOver(char16_t aSkipChar){ 1.460 + 1.461 + if (!mSlidingBuffer) { 1.462 + return kEOF; 1.463 + } 1.464 + 1.465 + char16_t ch=0; 1.466 + nsresult result=NS_OK; 1.467 + 1.468 + while(NS_OK==result) { 1.469 + result=Peek(ch); 1.470 + if(NS_OK == result) { 1.471 + if(ch!=aSkipChar) { 1.472 + break; 1.473 + } 1.474 + GetChar(ch); 1.475 + } 1.476 + else break; 1.477 + } //while 1.478 + return result; 1.479 + 1.480 +} 1.481 + 1.482 +#if 0 1.483 +void DoErrTest(nsString& aString) { 1.484 + int32_t pos=aString.FindChar(0); 1.485 + if(kNotFound<pos) { 1.486 + if(aString.Length()-1!=pos) { 1.487 + } 1.488 + } 1.489 +} 1.490 + 1.491 +void DoErrTest(nsCString& aString) { 1.492 + int32_t pos=aString.FindChar(0); 1.493 + if(kNotFound<pos) { 1.494 + if(aString.Length()-1!=pos) { 1.495 + } 1.496 + } 1.497 +} 1.498 +#endif 1.499 + 1.500 +/** 1.501 + * Consume characters until you run into space, a '<', a '>', or a '/'. 1.502 + * 1.503 + * @param aString - receives new data from stream 1.504 + * @return error code 1.505 + */ 1.506 +nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) { 1.507 + 1.508 + if (!mSlidingBuffer) { 1.509 + return kEOF; 1.510 + } 1.511 + 1.512 + char16_t theChar=0; 1.513 + nsresult result=Peek(theChar); 1.514 + nsScannerIterator current, end; 1.515 + bool found=false; 1.516 + 1.517 + current = mCurrentPosition; 1.518 + end = mEndPosition; 1.519 + 1.520 + // Loop until we find an illegal character. Everything is then appended 1.521 + // later. 1.522 + while(current != end && !found) { 1.523 + theChar=*current; 1.524 + 1.525 + switch(theChar) { 1.526 + case '\n': 1.527 + case '\r': 1.528 + case ' ' : 1.529 + case '\t': 1.530 + case '\v': 1.531 + case '\f': 1.532 + case '<': 1.533 + case '>': 1.534 + case '/': 1.535 + found = true; 1.536 + break; 1.537 + 1.538 + case '\0': 1.539 + ReplaceCharacter(current, sInvalid); 1.540 + break; 1.541 + 1.542 + default: 1.543 + break; 1.544 + } 1.545 + 1.546 + if (!found) { 1.547 + ++current; 1.548 + } 1.549 + } 1.550 + 1.551 + // Don't bother appending nothing. 1.552 + if (current != mCurrentPosition) { 1.553 + AppendUnicodeTo(mCurrentPosition, current, aString); 1.554 + } 1.555 + 1.556 + SetPosition(current); 1.557 + if (current == end) { 1.558 + result = kEOF; 1.559 + } 1.560 + 1.561 + //DoErrTest(aString); 1.562 + 1.563 + return result; 1.564 +} 1.565 + 1.566 +/** 1.567 + * Consume characters until you run into a char that's not valid in an 1.568 + * entity name 1.569 + * 1.570 + * @param aString - receives new data from stream 1.571 + * @return error code 1.572 + */ 1.573 +nsresult nsScanner::ReadEntityIdentifier(nsString& aString) { 1.574 + 1.575 + if (!mSlidingBuffer) { 1.576 + return kEOF; 1.577 + } 1.578 + 1.579 + char16_t theChar=0; 1.580 + nsresult result=Peek(theChar); 1.581 + nsScannerIterator origin, current, end; 1.582 + bool found=false; 1.583 + 1.584 + origin = mCurrentPosition; 1.585 + current = mCurrentPosition; 1.586 + end = mEndPosition; 1.587 + 1.588 + while(current != end) { 1.589 + 1.590 + theChar=*current; 1.591 + if(theChar) { 1.592 + found=false; 1.593 + switch(theChar) { 1.594 + case '_': 1.595 + case '-': 1.596 + case '.': 1.597 + // Don't allow ':' in entity names. See bug 23791 1.598 + found = true; 1.599 + break; 1.600 + default: 1.601 + found = ('a'<=theChar && theChar<='z') || 1.602 + ('A'<=theChar && theChar<='Z') || 1.603 + ('0'<=theChar && theChar<='9'); 1.604 + break; 1.605 + } 1.606 + 1.607 + if(!found) { 1.608 + AppendUnicodeTo(mCurrentPosition, current, aString); 1.609 + break; 1.610 + } 1.611 + } 1.612 + ++current; 1.613 + } 1.614 + 1.615 + SetPosition(current); 1.616 + if (current == end) { 1.617 + AppendUnicodeTo(origin, current, aString); 1.618 + return kEOF; 1.619 + } 1.620 + 1.621 + //DoErrTest(aString); 1.622 + 1.623 + return result; 1.624 +} 1.625 + 1.626 +/** 1.627 + * Consume digits 1.628 + * 1.629 + * @param aString - should contain digits 1.630 + * @return error code 1.631 + */ 1.632 +nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) { 1.633 + 1.634 + if (!mSlidingBuffer) { 1.635 + return kEOF; 1.636 + } 1.637 + 1.638 + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); 1.639 + 1.640 + char16_t theChar=0; 1.641 + nsresult result=Peek(theChar); 1.642 + nsScannerIterator origin, current, end; 1.643 + 1.644 + origin = mCurrentPosition; 1.645 + current = origin; 1.646 + end = mEndPosition; 1.647 + 1.648 + bool done = false; 1.649 + while(current != end) { 1.650 + theChar=*current; 1.651 + if(theChar) { 1.652 + done = (theChar < '0' || theChar > '9') && 1.653 + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && 1.654 + (theChar < 'a' || theChar > 'f') 1.655 + :true); 1.656 + if(done) { 1.657 + AppendUnicodeTo(origin, current, aString); 1.658 + break; 1.659 + } 1.660 + } 1.661 + ++current; 1.662 + } 1.663 + 1.664 + SetPosition(current); 1.665 + if (current == end) { 1.666 + AppendUnicodeTo(origin, current, aString); 1.667 + return kEOF; 1.668 + } 1.669 + 1.670 + //DoErrTest(aString); 1.671 + 1.672 + return result; 1.673 +} 1.674 + 1.675 +/** 1.676 + * Consume characters until you find the terminal char 1.677 + * 1.678 + * @update gess 3/25/98 1.679 + * @param aString receives new data from stream 1.680 + * @param addTerminal tells us whether to append terminal to aString 1.681 + * @return error code 1.682 + */ 1.683 +nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString, 1.684 + int32_t& aNewlinesSkipped, 1.685 + bool& aHaveCR) { 1.686 + 1.687 + aHaveCR = false; 1.688 + 1.689 + if (!mSlidingBuffer) { 1.690 + return kEOF; 1.691 + } 1.692 + 1.693 + char16_t theChar = 0; 1.694 + nsresult result = Peek(theChar); 1.695 + 1.696 + if (NS_FAILED(result)) { 1.697 + return result; 1.698 + } 1.699 + 1.700 + nsScannerIterator origin, current, end; 1.701 + bool done = false; 1.702 + 1.703 + origin = mCurrentPosition; 1.704 + current = origin; 1.705 + end = mEndPosition; 1.706 + 1.707 + bool haveCR = false; 1.708 + 1.709 + while(!done && current != end) { 1.710 + switch(theChar) { 1.711 + case '\n': 1.712 + case '\r': 1.713 + { 1.714 + ++aNewlinesSkipped; 1.715 + char16_t thePrevChar = theChar; 1.716 + theChar = (++current != end) ? *current : '\0'; 1.717 + if ((thePrevChar == '\r' && theChar == '\n') || 1.718 + (thePrevChar == '\n' && theChar == '\r')) { 1.719 + theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF 1.720 + haveCR = true; 1.721 + } else if (thePrevChar == '\r') { 1.722 + // Lone CR becomes CRLF; callers should know to remove extra CRs 1.723 + AppendUnicodeTo(origin, current, aString); 1.724 + aString.writable().Append(char16_t('\n')); 1.725 + origin = current; 1.726 + haveCR = true; 1.727 + } 1.728 + } 1.729 + break; 1.730 + case ' ' : 1.731 + case '\t': 1.732 + theChar = (++current != end) ? *current : '\0'; 1.733 + break; 1.734 + default: 1.735 + done = true; 1.736 + AppendUnicodeTo(origin, current, aString); 1.737 + break; 1.738 + } 1.739 + } 1.740 + 1.741 + SetPosition(current); 1.742 + if (current == end) { 1.743 + AppendUnicodeTo(origin, current, aString); 1.744 + result = kEOF; 1.745 + } 1.746 + 1.747 + aHaveCR = haveCR; 1.748 + return result; 1.749 +} 1.750 + 1.751 +//XXXbz callers of this have to manage their lone '\r' themselves if they want 1.752 +//it to work. Good thing they're all in view-source and it deals. 1.753 +nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart, 1.754 + nsScannerIterator& aEnd, 1.755 + int32_t& aNewlinesSkipped) { 1.756 + 1.757 + if (!mSlidingBuffer) { 1.758 + return kEOF; 1.759 + } 1.760 + 1.761 + char16_t theChar = 0; 1.762 + nsresult result = Peek(theChar); 1.763 + 1.764 + if (NS_FAILED(result)) { 1.765 + return result; 1.766 + } 1.767 + 1.768 + nsScannerIterator origin, current, end; 1.769 + bool done = false; 1.770 + 1.771 + origin = mCurrentPosition; 1.772 + current = origin; 1.773 + end = mEndPosition; 1.774 + 1.775 + while(!done && current != end) { 1.776 + switch(theChar) { 1.777 + case '\n': 1.778 + case '\r': ++aNewlinesSkipped; 1.779 + case ' ' : 1.780 + case '\t': 1.781 + { 1.782 + char16_t thePrevChar = theChar; 1.783 + theChar = (++current != end) ? *current : '\0'; 1.784 + if ((thePrevChar == '\r' && theChar == '\n') || 1.785 + (thePrevChar == '\n' && theChar == '\r')) { 1.786 + theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF 1.787 + } 1.788 + } 1.789 + break; 1.790 + default: 1.791 + done = true; 1.792 + aStart = origin; 1.793 + aEnd = current; 1.794 + break; 1.795 + } 1.796 + } 1.797 + 1.798 + SetPosition(current); 1.799 + if (current == end) { 1.800 + aStart = origin; 1.801 + aEnd = current; 1.802 + result = kEOF; 1.803 + } 1.804 + 1.805 + return result; 1.806 +} 1.807 + 1.808 +/** 1.809 + * Consume characters until you encounter one contained in given 1.810 + * input set. 1.811 + * 1.812 + * @update gess 3/25/98 1.813 + * @param aString will contain the result of this method 1.814 + * @param aTerminalSet is an ordered string that contains 1.815 + * the set of INVALID characters 1.816 + * @return error code 1.817 + */ 1.818 +nsresult nsScanner::ReadUntil(nsAString& aString, 1.819 + const nsReadEndCondition& aEndCondition, 1.820 + bool addTerminal) 1.821 +{ 1.822 + if (!mSlidingBuffer) { 1.823 + return kEOF; 1.824 + } 1.825 + 1.826 + nsScannerIterator origin, current; 1.827 + const char16_t* setstart = aEndCondition.mChars; 1.828 + const char16_t* setcurrent; 1.829 + 1.830 + origin = mCurrentPosition; 1.831 + current = origin; 1.832 + 1.833 + char16_t theChar=0; 1.834 + nsresult result=Peek(theChar); 1.835 + 1.836 + if (NS_FAILED(result)) { 1.837 + return result; 1.838 + } 1.839 + 1.840 + while (current != mEndPosition) { 1.841 + theChar = *current; 1.842 + if (theChar == '\0') { 1.843 + ReplaceCharacter(current, sInvalid); 1.844 + theChar = sInvalid; 1.845 + } 1.846 + 1.847 + // Filter out completely wrong characters 1.848 + // Check if all bits are in the required area 1.849 + if(!(theChar & aEndCondition.mFilter)) { 1.850 + // They were. Do a thorough check. 1.851 + 1.852 + setcurrent = setstart; 1.853 + while (*setcurrent) { 1.854 + if (*setcurrent == theChar) { 1.855 + if(addTerminal) 1.856 + ++current; 1.857 + AppendUnicodeTo(origin, current, aString); 1.858 + SetPosition(current); 1.859 + 1.860 + //DoErrTest(aString); 1.861 + 1.862 + return NS_OK; 1.863 + } 1.864 + ++setcurrent; 1.865 + } 1.866 + } 1.867 + 1.868 + ++current; 1.869 + } 1.870 + 1.871 + // If we are here, we didn't find any terminator in the string and 1.872 + // current = mEndPosition 1.873 + SetPosition(current); 1.874 + AppendUnicodeTo(origin, current, aString); 1.875 + return kEOF; 1.876 +} 1.877 + 1.878 +nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString, 1.879 + const nsReadEndCondition& aEndCondition, 1.880 + bool addTerminal) 1.881 +{ 1.882 + if (!mSlidingBuffer) { 1.883 + return kEOF; 1.884 + } 1.885 + 1.886 + nsScannerIterator origin, current; 1.887 + const char16_t* setstart = aEndCondition.mChars; 1.888 + const char16_t* setcurrent; 1.889 + 1.890 + origin = mCurrentPosition; 1.891 + current = origin; 1.892 + 1.893 + char16_t theChar=0; 1.894 + nsresult result=Peek(theChar); 1.895 + 1.896 + if (NS_FAILED(result)) { 1.897 + return result; 1.898 + } 1.899 + 1.900 + while (current != mEndPosition) { 1.901 + theChar = *current; 1.902 + if (theChar == '\0') { 1.903 + ReplaceCharacter(current, sInvalid); 1.904 + theChar = sInvalid; 1.905 + } 1.906 + 1.907 + // Filter out completely wrong characters 1.908 + // Check if all bits are in the required area 1.909 + if(!(theChar & aEndCondition.mFilter)) { 1.910 + // They were. Do a thorough check. 1.911 + 1.912 + setcurrent = setstart; 1.913 + while (*setcurrent) { 1.914 + if (*setcurrent == theChar) { 1.915 + if(addTerminal) 1.916 + ++current; 1.917 + AppendUnicodeTo(origin, current, aString); 1.918 + SetPosition(current); 1.919 + 1.920 + //DoErrTest(aString); 1.921 + 1.922 + return NS_OK; 1.923 + } 1.924 + ++setcurrent; 1.925 + } 1.926 + } 1.927 + 1.928 + ++current; 1.929 + } 1.930 + 1.931 + // If we are here, we didn't find any terminator in the string and 1.932 + // current = mEndPosition 1.933 + SetPosition(current); 1.934 + AppendUnicodeTo(origin, current, aString); 1.935 + return kEOF; 1.936 +} 1.937 + 1.938 +nsresult nsScanner::ReadUntil(nsScannerIterator& aStart, 1.939 + nsScannerIterator& aEnd, 1.940 + const nsReadEndCondition &aEndCondition, 1.941 + bool addTerminal) 1.942 +{ 1.943 + if (!mSlidingBuffer) { 1.944 + return kEOF; 1.945 + } 1.946 + 1.947 + nsScannerIterator origin, current; 1.948 + const char16_t* setstart = aEndCondition.mChars; 1.949 + const char16_t* setcurrent; 1.950 + 1.951 + origin = mCurrentPosition; 1.952 + current = origin; 1.953 + 1.954 + char16_t theChar=0; 1.955 + nsresult result=Peek(theChar); 1.956 + 1.957 + if (NS_FAILED(result)) { 1.958 + aStart = aEnd = current; 1.959 + return result; 1.960 + } 1.961 + 1.962 + while (current != mEndPosition) { 1.963 + theChar = *current; 1.964 + if (theChar == '\0') { 1.965 + ReplaceCharacter(current, sInvalid); 1.966 + theChar = sInvalid; 1.967 + } 1.968 + 1.969 + // Filter out completely wrong characters 1.970 + // Check if all bits are in the required area 1.971 + if(!(theChar & aEndCondition.mFilter)) { 1.972 + // They were. Do a thorough check. 1.973 + setcurrent = setstart; 1.974 + while (*setcurrent) { 1.975 + if (*setcurrent == theChar) { 1.976 + if(addTerminal) 1.977 + ++current; 1.978 + aStart = origin; 1.979 + aEnd = current; 1.980 + SetPosition(current); 1.981 + 1.982 + return NS_OK; 1.983 + } 1.984 + ++setcurrent; 1.985 + } 1.986 + } 1.987 + 1.988 + ++current; 1.989 + } 1.990 + 1.991 + // If we are here, we didn't find any terminator in the string and 1.992 + // current = mEndPosition 1.993 + SetPosition(current); 1.994 + aStart = origin; 1.995 + aEnd = current; 1.996 + return kEOF; 1.997 +} 1.998 + 1.999 +/** 1.1000 + * Consumes chars until you see the given terminalChar 1.1001 + * 1.1002 + * @update gess 3/25/98 1.1003 + * @param 1.1004 + * @return error code 1.1005 + */ 1.1006 +nsresult nsScanner::ReadUntil(nsAString& aString, 1.1007 + char16_t aTerminalChar, 1.1008 + bool addTerminal) 1.1009 +{ 1.1010 + if (!mSlidingBuffer) { 1.1011 + return kEOF; 1.1012 + } 1.1013 + 1.1014 + nsScannerIterator origin, current; 1.1015 + 1.1016 + origin = mCurrentPosition; 1.1017 + current = origin; 1.1018 + 1.1019 + char16_t theChar; 1.1020 + nsresult result = Peek(theChar); 1.1021 + 1.1022 + if (NS_FAILED(result)) { 1.1023 + return result; 1.1024 + } 1.1025 + 1.1026 + while (current != mEndPosition) { 1.1027 + theChar = *current; 1.1028 + if (theChar == '\0') { 1.1029 + ReplaceCharacter(current, sInvalid); 1.1030 + theChar = sInvalid; 1.1031 + } 1.1032 + 1.1033 + if (aTerminalChar == theChar) { 1.1034 + if(addTerminal) 1.1035 + ++current; 1.1036 + AppendUnicodeTo(origin, current, aString); 1.1037 + SetPosition(current); 1.1038 + return NS_OK; 1.1039 + } 1.1040 + ++current; 1.1041 + } 1.1042 + 1.1043 + // If we are here, we didn't find any terminator in the string and 1.1044 + // current = mEndPosition 1.1045 + AppendUnicodeTo(origin, current, aString); 1.1046 + SetPosition(current); 1.1047 + return kEOF; 1.1048 + 1.1049 +} 1.1050 + 1.1051 +void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd) 1.1052 +{ 1.1053 + aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd); 1.1054 +} 1.1055 + 1.1056 +void nsScanner::CurrentPosition(nsScannerIterator& aPosition) 1.1057 +{ 1.1058 + aPosition = mCurrentPosition; 1.1059 +} 1.1060 + 1.1061 +void nsScanner::EndReading(nsScannerIterator& aPosition) 1.1062 +{ 1.1063 + aPosition = mEndPosition; 1.1064 +} 1.1065 + 1.1066 +void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse) 1.1067 +{ 1.1068 + if (mSlidingBuffer) { 1.1069 +#ifdef DEBUG 1.1070 + uint32_t origRemaining = mCountRemaining; 1.1071 +#endif 1.1072 + 1.1073 + if (aReverse) { 1.1074 + mCountRemaining += (Distance(aPosition, mCurrentPosition)); 1.1075 + } 1.1076 + else { 1.1077 + mCountRemaining -= (Distance(mCurrentPosition, aPosition)); 1.1078 + } 1.1079 + 1.1080 + NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) || 1.1081 + (mCountRemaining <= origRemaining && !aReverse), 1.1082 + "Improper use of nsScanner::SetPosition. Make sure to set the" 1.1083 + " aReverse parameter correctly"); 1.1084 + 1.1085 + mCurrentPosition = aPosition; 1.1086 + if (aTerminate && (mCurrentPosition == mEndPosition)) { 1.1087 + mMarkPosition = mCurrentPosition; 1.1088 + mSlidingBuffer->DiscardPrefix(mCurrentPosition); 1.1089 + } 1.1090 + } 1.1091 +} 1.1092 + 1.1093 +void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition, 1.1094 + char16_t aChar) 1.1095 +{ 1.1096 + if (mSlidingBuffer) { 1.1097 + mSlidingBuffer->ReplaceCharacter(aPosition, aChar); 1.1098 + } 1.1099 +} 1.1100 + 1.1101 +bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf, 1.1102 + nsIRequest *aRequest, 1.1103 + int32_t aErrorPos) 1.1104 +{ 1.1105 + uint32_t countRemaining = mCountRemaining; 1.1106 + if (!mSlidingBuffer) { 1.1107 + mSlidingBuffer = new nsScannerString(aBuf); 1.1108 + if (!mSlidingBuffer) 1.1109 + return false; 1.1110 + mSlidingBuffer->BeginReading(mCurrentPosition); 1.1111 + mMarkPosition = mCurrentPosition; 1.1112 + mSlidingBuffer->EndReading(mEndPosition); 1.1113 + mCountRemaining = aBuf->DataLength(); 1.1114 + } 1.1115 + else { 1.1116 + mSlidingBuffer->AppendBuffer(aBuf); 1.1117 + if (mCurrentPosition == mEndPosition) { 1.1118 + mSlidingBuffer->BeginReading(mCurrentPosition); 1.1119 + } 1.1120 + mSlidingBuffer->EndReading(mEndPosition); 1.1121 + mCountRemaining += aBuf->DataLength(); 1.1122 + } 1.1123 + 1.1124 + if (aErrorPos != -1 && !mHasInvalidCharacter) { 1.1125 + mHasInvalidCharacter = true; 1.1126 + mFirstInvalidPosition = mCurrentPosition; 1.1127 + mFirstInvalidPosition.advance(countRemaining + aErrorPos); 1.1128 + } 1.1129 + 1.1130 + if (mFirstNonWhitespacePosition == -1) { 1.1131 + nsScannerIterator iter(mCurrentPosition); 1.1132 + nsScannerIterator end(mEndPosition); 1.1133 + 1.1134 + while (iter != end) { 1.1135 + if (!nsCRT::IsAsciiSpace(*iter)) { 1.1136 + mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter); 1.1137 + 1.1138 + break; 1.1139 + } 1.1140 + 1.1141 + ++iter; 1.1142 + } 1.1143 + } 1.1144 + return true; 1.1145 +} 1.1146 + 1.1147 +/** 1.1148 + * call this to copy bytes out of the scanner that have not yet been consumed 1.1149 + * by the tokenization process. 1.1150 + * 1.1151 + * @update gess 5/12/98 1.1152 + * @param aCopyBuffer is where the scanner buffer will be copied to 1.1153 + * @return nada 1.1154 + */ 1.1155 +void nsScanner::CopyUnusedData(nsString& aCopyBuffer) { 1.1156 + if (!mSlidingBuffer) { 1.1157 + aCopyBuffer.Truncate(); 1.1158 + return; 1.1159 + } 1.1160 + 1.1161 + nsScannerIterator start, end; 1.1162 + start = mCurrentPosition; 1.1163 + end = mEndPosition; 1.1164 + 1.1165 + CopyUnicodeTo(start, end, aCopyBuffer); 1.1166 +} 1.1167 + 1.1168 +/** 1.1169 + * Retrieve the name of the file that the scanner is reading from. 1.1170 + * In some cases, it's just a given name, because the scanner isn't 1.1171 + * really reading from a file. 1.1172 + * 1.1173 + * @update gess 5/12/98 1.1174 + * @return 1.1175 + */ 1.1176 +nsString& nsScanner::GetFilename(void) { 1.1177 + return mFilename; 1.1178 +} 1.1179 + 1.1180 +/** 1.1181 + * Conduct self test. Actually, selftesting for this class 1.1182 + * occurs in the parser selftest. 1.1183 + * 1.1184 + * @update gess 3/25/98 1.1185 + * @param 1.1186 + * @return 1.1187 + */ 1.1188 + 1.1189 +void nsScanner::SelfTest(void) { 1.1190 +#ifdef _DEBUG 1.1191 +#endif 1.1192 +} 1.1193 + 1.1194 +void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter) 1.1195 +{ 1.1196 + mReplacementCharacter = aReplacementCharacter; 1.1197 + 1.1198 + if (mHasInvalidCharacter) { 1.1199 + ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter); 1.1200 + } 1.1201 +} 1.1202 +