parser/htmlparser/src/nsScanner.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 /**
     8  * MODULE NOTES:
     9  * @update  gess 4/1/98
    10  * 
    11  * The scanner is a low-level service class that knows
    12  * how to consume characters out of an (internal) stream.
    13  * This class also offers a series of utility methods
    14  * that most tokenizers want, such as readUntil()
    15  * and SkipWhitespace().
    16  */
    19 #ifndef SCANNER
    20 #define SCANNER
    22 #include "nsCOMPtr.h"
    23 #include "nsString.h"
    24 #include "nsIParser.h"
    25 #include "nsIUnicodeDecoder.h"
    26 #include "nsScannerString.h"
    28 class nsParser;
    30 class nsReadEndCondition {
    31 public:
    32   const char16_t *mChars;
    33   char16_t mFilter;
    34   explicit nsReadEndCondition(const char16_t* aTerminateChars);
    35 private:
    36   nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
    37   void operator=(const nsReadEndCondition& aOther); // No assigning
    38 };
    40 class nsScanner {
    41   public:
    43       /**
    44        *  Use this constructor for the XML fragment parsing case
    45        */
    46       nsScanner(const nsAString& anHTMLString);
    48       /**
    49        *  Use this constructor if you want i/o to be based on 
    50        *  a file (therefore a stream) or just data you provide via Append().
    51        */
    52       nsScanner(nsString& aFilename, bool aCreateStream);
    54       ~nsScanner();
    56       /**
    57        *  retrieve next char from internal input stream
    58        *  
    59        *  @update  gess 3/25/98
    60        *  @param   ch is the char to accept new value
    61        *  @return  error code reflecting read status
    62        */
    63       nsresult GetChar(char16_t& ch);
    65       /**
    66        *  peek ahead to consume next char from scanner's internal
    67        *  input buffer
    68        *  
    69        *  @update  gess 3/25/98
    70        *  @param   ch is the char to accept new value
    71        *  @return  error code reflecting read status
    72        */
    73       nsresult Peek(char16_t& ch, uint32_t aOffset=0);
    75       nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
    77       /**
    78        *  Skip over chars as long as they equal given char
    79        *  
    80        *  @update  gess 3/25/98
    81        *  @param   char to be skipped
    82        *  @return  error code
    83        */
    84       nsresult SkipOver(char16_t aSkipChar);
    86       /**
    87        *  Skip whitespace on scanner input stream
    88        *  
    89        *  @update  gess 3/25/98
    90        *  @return  error status
    91        */
    92       nsresult SkipWhitespace(int32_t& aNewlinesSkipped);
    94       /**
    95        *  Consume characters until you run into space, a '<', a '>', or a '/'.
    96        *  
    97        *  @param   aString - receives new data from stream
    98        *  @return  error code
    99        */
   100       nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
   102       /**
   103        *  Consume characters until you run into a char that's not valid in an
   104        *  entity name
   105        *  
   106        *  @param   aString - receives new data from stream
   107        *  @return  error code
   108        */
   109       nsresult ReadEntityIdentifier(nsString& aString);
   110       nsresult ReadNumber(nsString& aString,int32_t aBase);
   111       nsresult ReadWhitespace(nsScannerSharedSubstring& aString, 
   112                               int32_t& aNewlinesSkipped,
   113                               bool& aHaveCR);
   114       nsresult ReadWhitespace(nsScannerIterator& aStart, 
   115                               nsScannerIterator& aEnd,
   116                               int32_t& aNewlinesSkipped);
   118       /**
   119        *  Consume characters until you find the terminal char
   120        *  
   121        *  @update  gess 3/25/98
   122        *  @param   aString receives new data from stream
   123        *  @param   aTerminal contains terminating char
   124        *  @param   addTerminal tells us whether to append terminal to aString
   125        *  @return  error code
   126        */
   127       nsresult ReadUntil(nsAString& aString,
   128                          char16_t aTerminal,
   129                          bool addTerminal);
   131       /**
   132        *  Consume characters until you find one contained in given
   133        *  terminal set.
   134        *  
   135        *  @update  gess 3/25/98
   136        *  @param   aString receives new data from stream
   137        *  @param   aTermSet contains set of terminating chars
   138        *  @param   addTerminal tells us whether to append terminal to aString
   139        *  @return  error code
   140        */
   141       nsresult ReadUntil(nsAString& aString,
   142                          const nsReadEndCondition& aEndCondition, 
   143                          bool addTerminal);
   145       nsresult ReadUntil(nsScannerSharedSubstring& aString,
   146                          const nsReadEndCondition& aEndCondition,
   147                          bool addTerminal);
   149       nsresult ReadUntil(nsScannerIterator& aStart,
   150                          nsScannerIterator& aEnd,
   151                          const nsReadEndCondition& aEndCondition, 
   152                          bool addTerminal);
   154       /**
   155        *  Records current offset position in input stream. This allows us
   156        *  to back up to this point if the need should arise, such as when
   157        *  tokenization gets interrupted.
   158        *  
   159        *  @update  gess 5/12/98
   160        *  @param   
   161        *  @return  
   162        */
   163       int32_t Mark(void);
   165       /**
   166        *  Resets current offset position of input stream to marked position. 
   167        *  This allows us to back up to this point if the need should arise, 
   168        *  such as when tokenization gets interrupted.
   169        *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
   170        *  
   171        *  @update  gess 5/12/98
   172        *  @param   
   173        *  @return  
   174        */
   175       void RewindToMark(void);
   178       /**
   179        *  
   180        *  
   181        *  @update  harishd 01/12/99
   182        *  @param   
   183        *  @return  
   184        */
   185       bool UngetReadable(const nsAString& aBuffer);
   187       /**
   188        *  
   189        *  
   190        *  @update  gess 5/13/98
   191        *  @param   
   192        *  @return  
   193        */
   194       nsresult Append(const nsAString& aBuffer);
   196       /**
   197        *  
   198        *  
   199        *  @update  gess 5/21/98
   200        *  @param   
   201        *  @return  
   202        */
   203       nsresult Append(const char* aBuffer, uint32_t aLen,
   204                       nsIRequest *aRequest);
   206       /**
   207        *  Call this to copy bytes out of the scanner that have not yet been consumed
   208        *  by the tokenization process.
   209        *  
   210        *  @update  gess 5/12/98
   211        *  @param   aCopyBuffer is where the scanner buffer will be copied to
   212        *  @return  nada
   213        */
   214       void CopyUnusedData(nsString& aCopyBuffer);
   216       /**
   217        *  Retrieve the name of the file that the scanner is reading from.
   218        *  In some cases, it's just a given name, because the scanner isn't
   219        *  really reading from a file.
   220        *  
   221        *  @update  gess 5/12/98
   222        *  @return  
   223        */
   224       nsString& GetFilename(void);
   226       static void SelfTest();
   228       /**
   229        *  Use this setter to change the scanner's unicode decoder
   230        *
   231        *  @update  ftang 3/02/99
   232        *  @param   aCharset a normalized (alias resolved) charset name
   233        *  @param   aCharsetSource- where the charset info came from
   234        *  @return  
   235        */
   236       nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
   238       void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
   239       void CurrentPosition(nsScannerIterator& aPosition);
   240       void EndReading(nsScannerIterator& aPosition);
   241       void SetPosition(nsScannerIterator& aPosition,
   242                        bool aTruncate = false,
   243                        bool aReverse = false);
   244       void ReplaceCharacter(nsScannerIterator& aPosition,
   245                             char16_t aChar);
   247       /**
   248        * Internal method used to cause the internal buffer to
   249        * be filled with data. 
   250        *
   251        * @update  gess4/3/98
   252        */
   253       bool      IsIncremental(void) {return mIncremental;}
   254       void      SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
   256       /**
   257        * Return the position of the first non-whitespace
   258        * character. This is only reliable before consumers start
   259        * reading from this scanner.
   260        */
   261       int32_t FirstNonWhitespacePosition()
   262       {
   263         return mFirstNonWhitespacePosition;
   264       }
   266       /**
   267        * Override replacement character used by nsIUnicodeDecoder.
   268        * Default behavior is that it uses nsIUnicodeDecoder's mapping.
   269        *
   270        * @param aReplacementCharacter the replacement character
   271        *        XML (expat) parser uses 0xffff
   272        */
   273       void OverrideReplacementCharacter(char16_t aReplacementCharacter);
   275   protected:
   277       bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1);
   278       bool AppendToBuffer(const nsAString& aStr)
   279       {
   280         nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
   281         if (!buf)
   282           return false;
   283         AppendToBuffer(buf, nullptr);
   284         return true;
   285       }
   287       nsScannerString*             mSlidingBuffer;
   288       nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
   289       nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
   290       nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
   291       nsScannerIterator            mFirstInvalidPosition; // The position of the first invalid character that was detected
   292       nsString        mFilename;
   293       uint32_t        mCountRemaining; // The number of bytes still to be read
   294                                        // from the scanner buffer
   295       bool            mIncremental;
   296       bool            mHasInvalidCharacter;
   297       char16_t       mReplacementCharacter;
   298       int32_t         mFirstNonWhitespacePosition;
   299       int32_t         mCharsetSource;
   300       nsCString       mCharset;
   301       nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
   303   private:
   304       nsScanner &operator =(const nsScanner &); // Not implemented.
   305 };
   307 #endif

mercurial