parser/htmlparser/src/nsScanner.cpp

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:c8c3de5c21f1
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 sw=2 et tw=78: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 //#define __INCREMENTAL 1
8
9 #include "mozilla/DebugOnly.h"
10
11 #include "nsScanner.h"
12 #include "nsDebug.h"
13 #include "nsReadableUtils.h"
14 #include "nsIInputStream.h"
15 #include "nsIFile.h"
16 #include "nsNetUtil.h"
17 #include "nsUTF8Utils.h" // for LossyConvertEncoding
18 #include "nsCRT.h"
19 #include "nsParser.h"
20 #include "nsCharsetSource.h"
21
22 #include "mozilla/dom/EncodingUtils.h"
23
24 using mozilla::dom::EncodingUtils;
25
26 // We replace NUL characters with this character.
27 static char16_t sInvalid = UCS2_REPLACEMENT_CHAR;
28
29 nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
30 mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
31 {
32 // Build filter that will be used to filter out characters with
33 // bits that none of the terminal chars have. This works very well
34 // because terminal chars often have only the last 4-6 bits set and
35 // normal ascii letters have bit 7 set. Other letters have even higher
36 // bits set.
37
38 // Calculate filter
39 const char16_t *current = aTerminateChars;
40 char16_t terminalChar = *current;
41 while (terminalChar) {
42 mFilter &= ~terminalChar;
43 ++current;
44 terminalChar = *current;
45 }
46 }
47
48 /**
49 * Use this constructor if you want i/o to be based on
50 * a single string you hand in during construction.
51 * This short cut was added for Javascript.
52 *
53 * @update gess 5/12/98
54 * @param aMode represents the parser mode (nav, other)
55 * @return
56 */
57 nsScanner::nsScanner(const nsAString& anHTMLString)
58 {
59 MOZ_COUNT_CTOR(nsScanner);
60
61 mSlidingBuffer = nullptr;
62 mCountRemaining = 0;
63 mFirstNonWhitespacePosition = -1;
64 if (AppendToBuffer(anHTMLString)) {
65 mSlidingBuffer->BeginReading(mCurrentPosition);
66 } else {
67 /* XXX see hack below, re: bug 182067 */
68 memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
69 mEndPosition = mCurrentPosition;
70 }
71 mMarkPosition = mCurrentPosition;
72 mIncremental = false;
73 mUnicodeDecoder = 0;
74 mCharsetSource = kCharsetUninitialized;
75 mHasInvalidCharacter = false;
76 mReplacementCharacter = char16_t(0x0);
77 }
78
79 /**
80 * Use this constructor if you want i/o to be based on strings
81 * the scanner receives. If you pass a null filename, you
82 * can still provide data to the scanner via append.
83 */
84 nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
85 : mFilename(aFilename)
86 {
87 MOZ_COUNT_CTOR(nsScanner);
88 NS_ASSERTION(!aCreateStream, "This is always true.");
89
90 mSlidingBuffer = nullptr;
91
92 // XXX This is a big hack. We need to initialize the iterators to something.
93 // What matters is that mCurrentPosition == mEndPosition, so that our methods
94 // believe that we are at EOF (see bug 182067). We null out mCurrentPosition
95 // so that we have some hope of catching null pointer dereferences associated
96 // with this hack. --darin
97 memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
98 mMarkPosition = mCurrentPosition;
99 mEndPosition = mCurrentPosition;
100
101 mIncremental = true;
102 mFirstNonWhitespacePosition = -1;
103 mCountRemaining = 0;
104
105 mUnicodeDecoder = 0;
106 mCharsetSource = kCharsetUninitialized;
107 mHasInvalidCharacter = false;
108 mReplacementCharacter = char16_t(0x0);
109 // XML defaults to UTF-8 and about:blank is UTF-8, too.
110 SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
111 }
112
113 nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
114 {
115 if (aSource < mCharsetSource) // priority is lower than the current one
116 return NS_OK;
117
118 mCharsetSource = aSource;
119
120 nsCString charsetName;
121 mozilla::DebugOnly<bool> valid =
122 EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
123 MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
124
125 if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
126 return NS_OK; // no difference, don't change it
127 }
128
129 // different, need to change it
130
131 mCharset.Assign(charsetName);
132
133 mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
134 mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
135
136 return NS_OK;
137 }
138
139
140 /**
141 * default destructor
142 *
143 * @update gess 3/25/98
144 * @param
145 * @return
146 */
147 nsScanner::~nsScanner() {
148
149 delete mSlidingBuffer;
150
151 MOZ_COUNT_DTOR(nsScanner);
152 }
153
154 /**
155 * Resets current offset position of input stream to marked position.
156 * This allows us to back up to this point if the need should arise,
157 * such as when tokenization gets interrupted.
158 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
159 *
160 * @update gess 5/12/98
161 * @param
162 * @return
163 */
164 void nsScanner::RewindToMark(void){
165 if (mSlidingBuffer) {
166 mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
167 mCurrentPosition = mMarkPosition;
168 }
169 }
170
171
172 /**
173 * Records current offset position in input stream. This allows us
174 * to back up to this point if the need should arise, such as when
175 * tokenization gets interrupted.
176 *
177 * @update gess 7/29/98
178 * @param
179 * @return
180 */
181 int32_t nsScanner::Mark() {
182 int32_t distance = 0;
183 if (mSlidingBuffer) {
184 nsScannerIterator oldStart;
185 mSlidingBuffer->BeginReading(oldStart);
186
187 distance = Distance(oldStart, mCurrentPosition);
188
189 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
190 mSlidingBuffer->BeginReading(mCurrentPosition);
191 mMarkPosition = mCurrentPosition;
192 }
193
194 return distance;
195 }
196
197 /**
198 * Insert data to our underlying input buffer as
199 * if it were read from an input stream.
200 *
201 * @update harishd 01/12/99
202 * @return error code
203 */
204 bool nsScanner::UngetReadable(const nsAString& aBuffer) {
205 if (!mSlidingBuffer) {
206 return false;
207 }
208
209 mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
210 mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
211 mSlidingBuffer->EndReading(mEndPosition);
212
213 uint32_t length = aBuffer.Length();
214 mCountRemaining += length; // Ref. bug 117441
215 return true;
216 }
217
218 /**
219 * Append data to our underlying input buffer as
220 * if it were read from an input stream.
221 *
222 * @update gess4/3/98
223 * @return error code
224 */
225 nsresult nsScanner::Append(const nsAString& aBuffer) {
226 if (!AppendToBuffer(aBuffer))
227 return NS_ERROR_OUT_OF_MEMORY;
228 return NS_OK;
229 }
230
231 /**
232 *
233 *
234 * @update gess 5/21/98
235 * @param
236 * @return
237 */
238 nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen,
239 nsIRequest *aRequest)
240 {
241 nsresult res = NS_OK;
242 if (mUnicodeDecoder) {
243 int32_t unicharBufLen = 0;
244 mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
245 nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
246 NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
247 char16_t *unichars = buffer->DataStart();
248
249 int32_t totalChars = 0;
250 int32_t unicharLength = unicharBufLen;
251 int32_t errorPos = -1;
252
253 do {
254 int32_t srcLength = aLen;
255 res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
256
257 totalChars += unicharLength;
258 // Continuation of failure case
259 if(NS_FAILED(res)) {
260 // if we failed, we consume one byte, replace it with the replacement
261 // character and try the conversion again.
262
263 // This is only needed because some decoders don't follow the
264 // nsIUnicodeDecoder contract: they return a failure when *aDestLength
265 // is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177
266 if ((unichars + unicharLength) >= buffer->DataEnd()) {
267 NS_ERROR("Unexpected end of destination buffer");
268 break;
269 }
270
271 if (mReplacementCharacter == 0x0 && errorPos == -1) {
272 errorPos = totalChars;
273 }
274 unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
275 mUnicodeDecoder->GetCharacterForUnMapped() :
276 mReplacementCharacter;
277
278 unichars = unichars + unicharLength;
279 unicharLength = unicharBufLen - (++totalChars);
280
281 mUnicodeDecoder->Reset();
282
283 if(((uint32_t) (srcLength + 1)) > aLen) {
284 srcLength = aLen;
285 }
286 else {
287 ++srcLength;
288 }
289
290 aBuffer += srcLength;
291 aLen -= srcLength;
292 }
293 } while (NS_FAILED(res) && (aLen > 0));
294
295 buffer->SetDataLength(totalChars);
296 // Don't propagate return code of unicode decoder
297 // since it doesn't reflect on our success or failure
298 // - Ref. bug 87110
299 res = NS_OK;
300 if (!AppendToBuffer(buffer, aRequest, errorPos))
301 res = NS_ERROR_OUT_OF_MEMORY;
302 }
303 else {
304 NS_WARNING("No decoder found.");
305 res = NS_ERROR_FAILURE;
306 }
307
308 return res;
309 }
310
311 /**
312 * retrieve next char from scanners internal input stream
313 *
314 * @update gess 3/25/98
315 * @param
316 * @return error code reflecting read status
317 */
318 nsresult nsScanner::GetChar(char16_t& aChar) {
319 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
320 aChar = 0;
321 return kEOF;
322 }
323
324 aChar = *mCurrentPosition++;
325 --mCountRemaining;
326
327 return NS_OK;
328 }
329
330
331 /**
332 * peek ahead to consume next char from scanner's internal
333 * input buffer
334 *
335 * @update gess 3/25/98
336 * @param
337 * @return
338 */
339 nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
340 aChar = 0;
341
342 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
343 return kEOF;
344 }
345
346 if (aOffset > 0) {
347 if (mCountRemaining <= aOffset)
348 return kEOF;
349
350 nsScannerIterator pos = mCurrentPosition;
351 pos.advance(aOffset);
352 aChar=*pos;
353 }
354 else {
355 aChar=*mCurrentPosition;
356 }
357
358 return NS_OK;
359 }
360
361 nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
362 {
363 if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
364 return kEOF;
365 }
366
367 nsScannerIterator start, end;
368
369 start = mCurrentPosition;
370
371 if ((int32_t)mCountRemaining <= aOffset) {
372 return kEOF;
373 }
374
375 if (aOffset > 0) {
376 start.advance(aOffset);
377 }
378
379 if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
380 end = mEndPosition;
381 }
382 else {
383 end = start;
384 end.advance(aNumChars);
385 }
386
387 CopyUnicodeTo(start, end, aStr);
388
389 return NS_OK;
390 }
391
392
393 /**
394 * Skip whitespace on scanner input stream
395 *
396 * @update gess 3/25/98
397 * @param
398 * @return error status
399 */
400 nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) {
401
402 if (!mSlidingBuffer) {
403 return kEOF;
404 }
405
406 char16_t theChar = 0;
407 nsresult result = Peek(theChar);
408
409 if (NS_FAILED(result)) {
410 return result;
411 }
412
413 nsScannerIterator current = mCurrentPosition;
414 bool done = false;
415 bool skipped = false;
416
417 while (!done && current != mEndPosition) {
418 switch(theChar) {
419 case '\n':
420 case '\r': ++aNewlinesSkipped;
421 case ' ' :
422 case '\t':
423 {
424 skipped = true;
425 char16_t thePrevChar = theChar;
426 theChar = (++current != mEndPosition) ? *current : '\0';
427 if ((thePrevChar == '\r' && theChar == '\n') ||
428 (thePrevChar == '\n' && theChar == '\r')) {
429 theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
430 }
431 }
432 break;
433 default:
434 done = true;
435 break;
436 }
437 }
438
439 if (skipped) {
440 SetPosition(current);
441 if (current == mEndPosition) {
442 result = kEOF;
443 }
444 }
445
446 return result;
447 }
448
449 /**
450 * Skip over chars as long as they equal given char
451 *
452 * @update gess 3/25/98
453 * @param
454 * @return error code
455 */
456 nsresult nsScanner::SkipOver(char16_t aSkipChar){
457
458 if (!mSlidingBuffer) {
459 return kEOF;
460 }
461
462 char16_t ch=0;
463 nsresult result=NS_OK;
464
465 while(NS_OK==result) {
466 result=Peek(ch);
467 if(NS_OK == result) {
468 if(ch!=aSkipChar) {
469 break;
470 }
471 GetChar(ch);
472 }
473 else break;
474 } //while
475 return result;
476
477 }
478
479 #if 0
480 void DoErrTest(nsString& aString) {
481 int32_t pos=aString.FindChar(0);
482 if(kNotFound<pos) {
483 if(aString.Length()-1!=pos) {
484 }
485 }
486 }
487
488 void DoErrTest(nsCString& aString) {
489 int32_t pos=aString.FindChar(0);
490 if(kNotFound<pos) {
491 if(aString.Length()-1!=pos) {
492 }
493 }
494 }
495 #endif
496
497 /**
498 * Consume characters until you run into space, a '<', a '>', or a '/'.
499 *
500 * @param aString - receives new data from stream
501 * @return error code
502 */
503 nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
504
505 if (!mSlidingBuffer) {
506 return kEOF;
507 }
508
509 char16_t theChar=0;
510 nsresult result=Peek(theChar);
511 nsScannerIterator current, end;
512 bool found=false;
513
514 current = mCurrentPosition;
515 end = mEndPosition;
516
517 // Loop until we find an illegal character. Everything is then appended
518 // later.
519 while(current != end && !found) {
520 theChar=*current;
521
522 switch(theChar) {
523 case '\n':
524 case '\r':
525 case ' ' :
526 case '\t':
527 case '\v':
528 case '\f':
529 case '<':
530 case '>':
531 case '/':
532 found = true;
533 break;
534
535 case '\0':
536 ReplaceCharacter(current, sInvalid);
537 break;
538
539 default:
540 break;
541 }
542
543 if (!found) {
544 ++current;
545 }
546 }
547
548 // Don't bother appending nothing.
549 if (current != mCurrentPosition) {
550 AppendUnicodeTo(mCurrentPosition, current, aString);
551 }
552
553 SetPosition(current);
554 if (current == end) {
555 result = kEOF;
556 }
557
558 //DoErrTest(aString);
559
560 return result;
561 }
562
563 /**
564 * Consume characters until you run into a char that's not valid in an
565 * entity name
566 *
567 * @param aString - receives new data from stream
568 * @return error code
569 */
570 nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
571
572 if (!mSlidingBuffer) {
573 return kEOF;
574 }
575
576 char16_t theChar=0;
577 nsresult result=Peek(theChar);
578 nsScannerIterator origin, current, end;
579 bool found=false;
580
581 origin = mCurrentPosition;
582 current = mCurrentPosition;
583 end = mEndPosition;
584
585 while(current != end) {
586
587 theChar=*current;
588 if(theChar) {
589 found=false;
590 switch(theChar) {
591 case '_':
592 case '-':
593 case '.':
594 // Don't allow ':' in entity names. See bug 23791
595 found = true;
596 break;
597 default:
598 found = ('a'<=theChar && theChar<='z') ||
599 ('A'<=theChar && theChar<='Z') ||
600 ('0'<=theChar && theChar<='9');
601 break;
602 }
603
604 if(!found) {
605 AppendUnicodeTo(mCurrentPosition, current, aString);
606 break;
607 }
608 }
609 ++current;
610 }
611
612 SetPosition(current);
613 if (current == end) {
614 AppendUnicodeTo(origin, current, aString);
615 return kEOF;
616 }
617
618 //DoErrTest(aString);
619
620 return result;
621 }
622
623 /**
624 * Consume digits
625 *
626 * @param aString - should contain digits
627 * @return error code
628 */
629 nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) {
630
631 if (!mSlidingBuffer) {
632 return kEOF;
633 }
634
635 NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
636
637 char16_t theChar=0;
638 nsresult result=Peek(theChar);
639 nsScannerIterator origin, current, end;
640
641 origin = mCurrentPosition;
642 current = origin;
643 end = mEndPosition;
644
645 bool done = false;
646 while(current != end) {
647 theChar=*current;
648 if(theChar) {
649 done = (theChar < '0' || theChar > '9') &&
650 ((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
651 (theChar < 'a' || theChar > 'f')
652 :true);
653 if(done) {
654 AppendUnicodeTo(origin, current, aString);
655 break;
656 }
657 }
658 ++current;
659 }
660
661 SetPosition(current);
662 if (current == end) {
663 AppendUnicodeTo(origin, current, aString);
664 return kEOF;
665 }
666
667 //DoErrTest(aString);
668
669 return result;
670 }
671
672 /**
673 * Consume characters until you find the terminal char
674 *
675 * @update gess 3/25/98
676 * @param aString receives new data from stream
677 * @param addTerminal tells us whether to append terminal to aString
678 * @return error code
679 */
680 nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
681 int32_t& aNewlinesSkipped,
682 bool& aHaveCR) {
683
684 aHaveCR = false;
685
686 if (!mSlidingBuffer) {
687 return kEOF;
688 }
689
690 char16_t theChar = 0;
691 nsresult result = Peek(theChar);
692
693 if (NS_FAILED(result)) {
694 return result;
695 }
696
697 nsScannerIterator origin, current, end;
698 bool done = false;
699
700 origin = mCurrentPosition;
701 current = origin;
702 end = mEndPosition;
703
704 bool haveCR = false;
705
706 while(!done && current != end) {
707 switch(theChar) {
708 case '\n':
709 case '\r':
710 {
711 ++aNewlinesSkipped;
712 char16_t thePrevChar = theChar;
713 theChar = (++current != end) ? *current : '\0';
714 if ((thePrevChar == '\r' && theChar == '\n') ||
715 (thePrevChar == '\n' && theChar == '\r')) {
716 theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
717 haveCR = true;
718 } else if (thePrevChar == '\r') {
719 // Lone CR becomes CRLF; callers should know to remove extra CRs
720 AppendUnicodeTo(origin, current, aString);
721 aString.writable().Append(char16_t('\n'));
722 origin = current;
723 haveCR = true;
724 }
725 }
726 break;
727 case ' ' :
728 case '\t':
729 theChar = (++current != end) ? *current : '\0';
730 break;
731 default:
732 done = true;
733 AppendUnicodeTo(origin, current, aString);
734 break;
735 }
736 }
737
738 SetPosition(current);
739 if (current == end) {
740 AppendUnicodeTo(origin, current, aString);
741 result = kEOF;
742 }
743
744 aHaveCR = haveCR;
745 return result;
746 }
747
748 //XXXbz callers of this have to manage their lone '\r' themselves if they want
749 //it to work. Good thing they're all in view-source and it deals.
750 nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,
751 nsScannerIterator& aEnd,
752 int32_t& aNewlinesSkipped) {
753
754 if (!mSlidingBuffer) {
755 return kEOF;
756 }
757
758 char16_t theChar = 0;
759 nsresult result = Peek(theChar);
760
761 if (NS_FAILED(result)) {
762 return result;
763 }
764
765 nsScannerIterator origin, current, end;
766 bool done = false;
767
768 origin = mCurrentPosition;
769 current = origin;
770 end = mEndPosition;
771
772 while(!done && current != end) {
773 switch(theChar) {
774 case '\n':
775 case '\r': ++aNewlinesSkipped;
776 case ' ' :
777 case '\t':
778 {
779 char16_t thePrevChar = theChar;
780 theChar = (++current != end) ? *current : '\0';
781 if ((thePrevChar == '\r' && theChar == '\n') ||
782 (thePrevChar == '\n' && theChar == '\r')) {
783 theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
784 }
785 }
786 break;
787 default:
788 done = true;
789 aStart = origin;
790 aEnd = current;
791 break;
792 }
793 }
794
795 SetPosition(current);
796 if (current == end) {
797 aStart = origin;
798 aEnd = current;
799 result = kEOF;
800 }
801
802 return result;
803 }
804
805 /**
806 * Consume characters until you encounter one contained in given
807 * input set.
808 *
809 * @update gess 3/25/98
810 * @param aString will contain the result of this method
811 * @param aTerminalSet is an ordered string that contains
812 * the set of INVALID characters
813 * @return error code
814 */
815 nsresult nsScanner::ReadUntil(nsAString& aString,
816 const nsReadEndCondition& aEndCondition,
817 bool addTerminal)
818 {
819 if (!mSlidingBuffer) {
820 return kEOF;
821 }
822
823 nsScannerIterator origin, current;
824 const char16_t* setstart = aEndCondition.mChars;
825 const char16_t* setcurrent;
826
827 origin = mCurrentPosition;
828 current = origin;
829
830 char16_t theChar=0;
831 nsresult result=Peek(theChar);
832
833 if (NS_FAILED(result)) {
834 return result;
835 }
836
837 while (current != mEndPosition) {
838 theChar = *current;
839 if (theChar == '\0') {
840 ReplaceCharacter(current, sInvalid);
841 theChar = sInvalid;
842 }
843
844 // Filter out completely wrong characters
845 // Check if all bits are in the required area
846 if(!(theChar & aEndCondition.mFilter)) {
847 // They were. Do a thorough check.
848
849 setcurrent = setstart;
850 while (*setcurrent) {
851 if (*setcurrent == theChar) {
852 if(addTerminal)
853 ++current;
854 AppendUnicodeTo(origin, current, aString);
855 SetPosition(current);
856
857 //DoErrTest(aString);
858
859 return NS_OK;
860 }
861 ++setcurrent;
862 }
863 }
864
865 ++current;
866 }
867
868 // If we are here, we didn't find any terminator in the string and
869 // current = mEndPosition
870 SetPosition(current);
871 AppendUnicodeTo(origin, current, aString);
872 return kEOF;
873 }
874
875 nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
876 const nsReadEndCondition& aEndCondition,
877 bool addTerminal)
878 {
879 if (!mSlidingBuffer) {
880 return kEOF;
881 }
882
883 nsScannerIterator origin, current;
884 const char16_t* setstart = aEndCondition.mChars;
885 const char16_t* setcurrent;
886
887 origin = mCurrentPosition;
888 current = origin;
889
890 char16_t theChar=0;
891 nsresult result=Peek(theChar);
892
893 if (NS_FAILED(result)) {
894 return result;
895 }
896
897 while (current != mEndPosition) {
898 theChar = *current;
899 if (theChar == '\0') {
900 ReplaceCharacter(current, sInvalid);
901 theChar = sInvalid;
902 }
903
904 // Filter out completely wrong characters
905 // Check if all bits are in the required area
906 if(!(theChar & aEndCondition.mFilter)) {
907 // They were. Do a thorough check.
908
909 setcurrent = setstart;
910 while (*setcurrent) {
911 if (*setcurrent == theChar) {
912 if(addTerminal)
913 ++current;
914 AppendUnicodeTo(origin, current, aString);
915 SetPosition(current);
916
917 //DoErrTest(aString);
918
919 return NS_OK;
920 }
921 ++setcurrent;
922 }
923 }
924
925 ++current;
926 }
927
928 // If we are here, we didn't find any terminator in the string and
929 // current = mEndPosition
930 SetPosition(current);
931 AppendUnicodeTo(origin, current, aString);
932 return kEOF;
933 }
934
935 nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,
936 nsScannerIterator& aEnd,
937 const nsReadEndCondition &aEndCondition,
938 bool addTerminal)
939 {
940 if (!mSlidingBuffer) {
941 return kEOF;
942 }
943
944 nsScannerIterator origin, current;
945 const char16_t* setstart = aEndCondition.mChars;
946 const char16_t* setcurrent;
947
948 origin = mCurrentPosition;
949 current = origin;
950
951 char16_t theChar=0;
952 nsresult result=Peek(theChar);
953
954 if (NS_FAILED(result)) {
955 aStart = aEnd = current;
956 return result;
957 }
958
959 while (current != mEndPosition) {
960 theChar = *current;
961 if (theChar == '\0') {
962 ReplaceCharacter(current, sInvalid);
963 theChar = sInvalid;
964 }
965
966 // Filter out completely wrong characters
967 // Check if all bits are in the required area
968 if(!(theChar & aEndCondition.mFilter)) {
969 // They were. Do a thorough check.
970 setcurrent = setstart;
971 while (*setcurrent) {
972 if (*setcurrent == theChar) {
973 if(addTerminal)
974 ++current;
975 aStart = origin;
976 aEnd = current;
977 SetPosition(current);
978
979 return NS_OK;
980 }
981 ++setcurrent;
982 }
983 }
984
985 ++current;
986 }
987
988 // If we are here, we didn't find any terminator in the string and
989 // current = mEndPosition
990 SetPosition(current);
991 aStart = origin;
992 aEnd = current;
993 return kEOF;
994 }
995
996 /**
997 * Consumes chars until you see the given terminalChar
998 *
999 * @update gess 3/25/98
1000 * @param
1001 * @return error code
1002 */
1003 nsresult nsScanner::ReadUntil(nsAString& aString,
1004 char16_t aTerminalChar,
1005 bool addTerminal)
1006 {
1007 if (!mSlidingBuffer) {
1008 return kEOF;
1009 }
1010
1011 nsScannerIterator origin, current;
1012
1013 origin = mCurrentPosition;
1014 current = origin;
1015
1016 char16_t theChar;
1017 nsresult result = Peek(theChar);
1018
1019 if (NS_FAILED(result)) {
1020 return result;
1021 }
1022
1023 while (current != mEndPosition) {
1024 theChar = *current;
1025 if (theChar == '\0') {
1026 ReplaceCharacter(current, sInvalid);
1027 theChar = sInvalid;
1028 }
1029
1030 if (aTerminalChar == theChar) {
1031 if(addTerminal)
1032 ++current;
1033 AppendUnicodeTo(origin, current, aString);
1034 SetPosition(current);
1035 return NS_OK;
1036 }
1037 ++current;
1038 }
1039
1040 // If we are here, we didn't find any terminator in the string and
1041 // current = mEndPosition
1042 AppendUnicodeTo(origin, current, aString);
1043 SetPosition(current);
1044 return kEOF;
1045
1046 }
1047
1048 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
1049 {
1050 aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
1051 }
1052
1053 void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
1054 {
1055 aPosition = mCurrentPosition;
1056 }
1057
1058 void nsScanner::EndReading(nsScannerIterator& aPosition)
1059 {
1060 aPosition = mEndPosition;
1061 }
1062
1063 void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse)
1064 {
1065 if (mSlidingBuffer) {
1066 #ifdef DEBUG
1067 uint32_t origRemaining = mCountRemaining;
1068 #endif
1069
1070 if (aReverse) {
1071 mCountRemaining += (Distance(aPosition, mCurrentPosition));
1072 }
1073 else {
1074 mCountRemaining -= (Distance(mCurrentPosition, aPosition));
1075 }
1076
1077 NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
1078 (mCountRemaining <= origRemaining && !aReverse),
1079 "Improper use of nsScanner::SetPosition. Make sure to set the"
1080 " aReverse parameter correctly");
1081
1082 mCurrentPosition = aPosition;
1083 if (aTerminate && (mCurrentPosition == mEndPosition)) {
1084 mMarkPosition = mCurrentPosition;
1085 mSlidingBuffer->DiscardPrefix(mCurrentPosition);
1086 }
1087 }
1088 }
1089
1090 void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
1091 char16_t aChar)
1092 {
1093 if (mSlidingBuffer) {
1094 mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
1095 }
1096 }
1097
1098 bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
1099 nsIRequest *aRequest,
1100 int32_t aErrorPos)
1101 {
1102 uint32_t countRemaining = mCountRemaining;
1103 if (!mSlidingBuffer) {
1104 mSlidingBuffer = new nsScannerString(aBuf);
1105 if (!mSlidingBuffer)
1106 return false;
1107 mSlidingBuffer->BeginReading(mCurrentPosition);
1108 mMarkPosition = mCurrentPosition;
1109 mSlidingBuffer->EndReading(mEndPosition);
1110 mCountRemaining = aBuf->DataLength();
1111 }
1112 else {
1113 mSlidingBuffer->AppendBuffer(aBuf);
1114 if (mCurrentPosition == mEndPosition) {
1115 mSlidingBuffer->BeginReading(mCurrentPosition);
1116 }
1117 mSlidingBuffer->EndReading(mEndPosition);
1118 mCountRemaining += aBuf->DataLength();
1119 }
1120
1121 if (aErrorPos != -1 && !mHasInvalidCharacter) {
1122 mHasInvalidCharacter = true;
1123 mFirstInvalidPosition = mCurrentPosition;
1124 mFirstInvalidPosition.advance(countRemaining + aErrorPos);
1125 }
1126
1127 if (mFirstNonWhitespacePosition == -1) {
1128 nsScannerIterator iter(mCurrentPosition);
1129 nsScannerIterator end(mEndPosition);
1130
1131 while (iter != end) {
1132 if (!nsCRT::IsAsciiSpace(*iter)) {
1133 mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
1134
1135 break;
1136 }
1137
1138 ++iter;
1139 }
1140 }
1141 return true;
1142 }
1143
1144 /**
1145 * call this to copy bytes out of the scanner that have not yet been consumed
1146 * by the tokenization process.
1147 *
1148 * @update gess 5/12/98
1149 * @param aCopyBuffer is where the scanner buffer will be copied to
1150 * @return nada
1151 */
1152 void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
1153 if (!mSlidingBuffer) {
1154 aCopyBuffer.Truncate();
1155 return;
1156 }
1157
1158 nsScannerIterator start, end;
1159 start = mCurrentPosition;
1160 end = mEndPosition;
1161
1162 CopyUnicodeTo(start, end, aCopyBuffer);
1163 }
1164
1165 /**
1166 * Retrieve the name of the file that the scanner is reading from.
1167 * In some cases, it's just a given name, because the scanner isn't
1168 * really reading from a file.
1169 *
1170 * @update gess 5/12/98
1171 * @return
1172 */
1173 nsString& nsScanner::GetFilename(void) {
1174 return mFilename;
1175 }
1176
1177 /**
1178 * Conduct self test. Actually, selftesting for this class
1179 * occurs in the parser selftest.
1180 *
1181 * @update gess 3/25/98
1182 * @param
1183 * @return
1184 */
1185
1186 void nsScanner::SelfTest(void) {
1187 #ifdef _DEBUG
1188 #endif
1189 }
1190
1191 void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter)
1192 {
1193 mReplacementCharacter = aReplacementCharacter;
1194
1195 if (mHasInvalidCharacter) {
1196 ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
1197 }
1198 }
1199

mercurial