parser/htmlparser/src/nsScanner.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/htmlparser/src/nsScanner.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,309 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +
    1.10 +/**
    1.11 + * MODULE NOTES:
    1.12 + * @update  gess 4/1/98
    1.13 + * 
    1.14 + * The scanner is a low-level service class that knows
    1.15 + * how to consume characters out of an (internal) stream.
    1.16 + * This class also offers a series of utility methods
    1.17 + * that most tokenizers want, such as readUntil()
    1.18 + * and SkipWhitespace().
    1.19 + */
    1.20 +
    1.21 +
    1.22 +#ifndef SCANNER
    1.23 +#define SCANNER
    1.24 +
    1.25 +#include "nsCOMPtr.h"
    1.26 +#include "nsString.h"
    1.27 +#include "nsIParser.h"
    1.28 +#include "nsIUnicodeDecoder.h"
    1.29 +#include "nsScannerString.h"
    1.30 +
    1.31 +class nsParser;
    1.32 +
    1.33 +class nsReadEndCondition {
    1.34 +public:
    1.35 +  const char16_t *mChars;
    1.36 +  char16_t mFilter;
    1.37 +  explicit nsReadEndCondition(const char16_t* aTerminateChars);
    1.38 +private:
    1.39 +  nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
    1.40 +  void operator=(const nsReadEndCondition& aOther); // No assigning
    1.41 +};
    1.42 +
    1.43 +class nsScanner {
    1.44 +  public:
    1.45 +
    1.46 +      /**
    1.47 +       *  Use this constructor for the XML fragment parsing case
    1.48 +       */
    1.49 +      nsScanner(const nsAString& anHTMLString);
    1.50 +
    1.51 +      /**
    1.52 +       *  Use this constructor if you want i/o to be based on 
    1.53 +       *  a file (therefore a stream) or just data you provide via Append().
    1.54 +       */
    1.55 +      nsScanner(nsString& aFilename, bool aCreateStream);
    1.56 +
    1.57 +      ~nsScanner();
    1.58 +
    1.59 +      /**
    1.60 +       *  retrieve next char from internal input stream
    1.61 +       *  
    1.62 +       *  @update  gess 3/25/98
    1.63 +       *  @param   ch is the char to accept new value
    1.64 +       *  @return  error code reflecting read status
    1.65 +       */
    1.66 +      nsresult GetChar(char16_t& ch);
    1.67 +
    1.68 +      /**
    1.69 +       *  peek ahead to consume next char from scanner's internal
    1.70 +       *  input buffer
    1.71 +       *  
    1.72 +       *  @update  gess 3/25/98
    1.73 +       *  @param   ch is the char to accept new value
    1.74 +       *  @return  error code reflecting read status
    1.75 +       */
    1.76 +      nsresult Peek(char16_t& ch, uint32_t aOffset=0);
    1.77 +
    1.78 +      nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
    1.79 +
    1.80 +      /**
    1.81 +       *  Skip over chars as long as they equal given char
    1.82 +       *  
    1.83 +       *  @update  gess 3/25/98
    1.84 +       *  @param   char to be skipped
    1.85 +       *  @return  error code
    1.86 +       */
    1.87 +      nsresult SkipOver(char16_t aSkipChar);
    1.88 +
    1.89 +      /**
    1.90 +       *  Skip whitespace on scanner input stream
    1.91 +       *  
    1.92 +       *  @update  gess 3/25/98
    1.93 +       *  @return  error status
    1.94 +       */
    1.95 +      nsresult SkipWhitespace(int32_t& aNewlinesSkipped);
    1.96 +
    1.97 +      /**
    1.98 +       *  Consume characters until you run into space, a '<', a '>', or a '/'.
    1.99 +       *  
   1.100 +       *  @param   aString - receives new data from stream
   1.101 +       *  @return  error code
   1.102 +       */
   1.103 +      nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
   1.104 +
   1.105 +      /**
   1.106 +       *  Consume characters until you run into a char that's not valid in an
   1.107 +       *  entity name
   1.108 +       *  
   1.109 +       *  @param   aString - receives new data from stream
   1.110 +       *  @return  error code
   1.111 +       */
   1.112 +      nsresult ReadEntityIdentifier(nsString& aString);
   1.113 +      nsresult ReadNumber(nsString& aString,int32_t aBase);
   1.114 +      nsresult ReadWhitespace(nsScannerSharedSubstring& aString, 
   1.115 +                              int32_t& aNewlinesSkipped,
   1.116 +                              bool& aHaveCR);
   1.117 +      nsresult ReadWhitespace(nsScannerIterator& aStart, 
   1.118 +                              nsScannerIterator& aEnd,
   1.119 +                              int32_t& aNewlinesSkipped);
   1.120 +
   1.121 +      /**
   1.122 +       *  Consume characters until you find the terminal char
   1.123 +       *  
   1.124 +       *  @update  gess 3/25/98
   1.125 +       *  @param   aString receives new data from stream
   1.126 +       *  @param   aTerminal contains terminating char
   1.127 +       *  @param   addTerminal tells us whether to append terminal to aString
   1.128 +       *  @return  error code
   1.129 +       */
   1.130 +      nsresult ReadUntil(nsAString& aString,
   1.131 +                         char16_t aTerminal,
   1.132 +                         bool addTerminal);
   1.133 +
   1.134 +      /**
   1.135 +       *  Consume characters until you find one contained in given
   1.136 +       *  terminal set.
   1.137 +       *  
   1.138 +       *  @update  gess 3/25/98
   1.139 +       *  @param   aString receives new data from stream
   1.140 +       *  @param   aTermSet contains set of terminating chars
   1.141 +       *  @param   addTerminal tells us whether to append terminal to aString
   1.142 +       *  @return  error code
   1.143 +       */
   1.144 +      nsresult ReadUntil(nsAString& aString,
   1.145 +                         const nsReadEndCondition& aEndCondition, 
   1.146 +                         bool addTerminal);
   1.147 +
   1.148 +      nsresult ReadUntil(nsScannerSharedSubstring& aString,
   1.149 +                         const nsReadEndCondition& aEndCondition,
   1.150 +                         bool addTerminal);
   1.151 +
   1.152 +      nsresult ReadUntil(nsScannerIterator& aStart,
   1.153 +                         nsScannerIterator& aEnd,
   1.154 +                         const nsReadEndCondition& aEndCondition, 
   1.155 +                         bool addTerminal);
   1.156 +
   1.157 +      /**
   1.158 +       *  Records current offset position in input stream. This allows us
   1.159 +       *  to back up to this point if the need should arise, such as when
   1.160 +       *  tokenization gets interrupted.
   1.161 +       *  
   1.162 +       *  @update  gess 5/12/98
   1.163 +       *  @param   
   1.164 +       *  @return  
   1.165 +       */
   1.166 +      int32_t Mark(void);
   1.167 +
   1.168 +      /**
   1.169 +       *  Resets current offset position of input stream to marked position. 
   1.170 +       *  This allows us to back up to this point if the need should arise, 
   1.171 +       *  such as when tokenization gets interrupted.
   1.172 +       *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
   1.173 +       *  
   1.174 +       *  @update  gess 5/12/98
   1.175 +       *  @param   
   1.176 +       *  @return  
   1.177 +       */
   1.178 +      void RewindToMark(void);
   1.179 +
   1.180 +
   1.181 +      /**
   1.182 +       *  
   1.183 +       *  
   1.184 +       *  @update  harishd 01/12/99
   1.185 +       *  @param   
   1.186 +       *  @return  
   1.187 +       */
   1.188 +      bool UngetReadable(const nsAString& aBuffer);
   1.189 +
   1.190 +      /**
   1.191 +       *  
   1.192 +       *  
   1.193 +       *  @update  gess 5/13/98
   1.194 +       *  @param   
   1.195 +       *  @return  
   1.196 +       */
   1.197 +      nsresult Append(const nsAString& aBuffer);
   1.198 +
   1.199 +      /**
   1.200 +       *  
   1.201 +       *  
   1.202 +       *  @update  gess 5/21/98
   1.203 +       *  @param   
   1.204 +       *  @return  
   1.205 +       */
   1.206 +      nsresult Append(const char* aBuffer, uint32_t aLen,
   1.207 +                      nsIRequest *aRequest);
   1.208 +
   1.209 +      /**
   1.210 +       *  Call this to copy bytes out of the scanner that have not yet been consumed
   1.211 +       *  by the tokenization process.
   1.212 +       *  
   1.213 +       *  @update  gess 5/12/98
   1.214 +       *  @param   aCopyBuffer is where the scanner buffer will be copied to
   1.215 +       *  @return  nada
   1.216 +       */
   1.217 +      void CopyUnusedData(nsString& aCopyBuffer);
   1.218 +
   1.219 +      /**
   1.220 +       *  Retrieve the name of the file that the scanner is reading from.
   1.221 +       *  In some cases, it's just a given name, because the scanner isn't
   1.222 +       *  really reading from a file.
   1.223 +       *  
   1.224 +       *  @update  gess 5/12/98
   1.225 +       *  @return  
   1.226 +       */
   1.227 +      nsString& GetFilename(void);
   1.228 +
   1.229 +      static void SelfTest();
   1.230 +
   1.231 +      /**
   1.232 +       *  Use this setter to change the scanner's unicode decoder
   1.233 +       *
   1.234 +       *  @update  ftang 3/02/99
   1.235 +       *  @param   aCharset a normalized (alias resolved) charset name
   1.236 +       *  @param   aCharsetSource- where the charset info came from
   1.237 +       *  @return  
   1.238 +       */
   1.239 +      nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
   1.240 +
   1.241 +      void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
   1.242 +      void CurrentPosition(nsScannerIterator& aPosition);
   1.243 +      void EndReading(nsScannerIterator& aPosition);
   1.244 +      void SetPosition(nsScannerIterator& aPosition,
   1.245 +                       bool aTruncate = false,
   1.246 +                       bool aReverse = false);
   1.247 +      void ReplaceCharacter(nsScannerIterator& aPosition,
   1.248 +                            char16_t aChar);
   1.249 +
   1.250 +      /**
   1.251 +       * Internal method used to cause the internal buffer to
   1.252 +       * be filled with data. 
   1.253 +       *
   1.254 +       * @update  gess4/3/98
   1.255 +       */
   1.256 +      bool      IsIncremental(void) {return mIncremental;}
   1.257 +      void      SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
   1.258 +
   1.259 +      /**
   1.260 +       * Return the position of the first non-whitespace
   1.261 +       * character. This is only reliable before consumers start
   1.262 +       * reading from this scanner.
   1.263 +       */
   1.264 +      int32_t FirstNonWhitespacePosition()
   1.265 +      {
   1.266 +        return mFirstNonWhitespacePosition;
   1.267 +      }
   1.268 +
   1.269 +      /**
   1.270 +       * Override replacement character used by nsIUnicodeDecoder.
   1.271 +       * Default behavior is that it uses nsIUnicodeDecoder's mapping.
   1.272 +       *
   1.273 +       * @param aReplacementCharacter the replacement character
   1.274 +       *        XML (expat) parser uses 0xffff
   1.275 +       */
   1.276 +      void OverrideReplacementCharacter(char16_t aReplacementCharacter);
   1.277 +
   1.278 +  protected:
   1.279 +
   1.280 +      bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1);
   1.281 +      bool AppendToBuffer(const nsAString& aStr)
   1.282 +      {
   1.283 +        nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
   1.284 +        if (!buf)
   1.285 +          return false;
   1.286 +        AppendToBuffer(buf, nullptr);
   1.287 +        return true;
   1.288 +      }
   1.289 +
   1.290 +      nsScannerString*             mSlidingBuffer;
   1.291 +      nsScannerIterator            mCurrentPosition; // The position we will next read from in the scanner buffer
   1.292 +      nsScannerIterator            mMarkPosition;    // The position last marked (we may rewind to here)
   1.293 +      nsScannerIterator            mEndPosition;     // The current end of the scanner buffer
   1.294 +      nsScannerIterator            mFirstInvalidPosition; // The position of the first invalid character that was detected
   1.295 +      nsString        mFilename;
   1.296 +      uint32_t        mCountRemaining; // The number of bytes still to be read
   1.297 +                                       // from the scanner buffer
   1.298 +      bool            mIncremental;
   1.299 +      bool            mHasInvalidCharacter;
   1.300 +      char16_t       mReplacementCharacter;
   1.301 +      int32_t         mFirstNonWhitespacePosition;
   1.302 +      int32_t         mCharsetSource;
   1.303 +      nsCString       mCharset;
   1.304 +      nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
   1.305 +
   1.306 +  private:
   1.307 +      nsScanner &operator =(const nsScanner &); // Not implemented.
   1.308 +};
   1.309 +
   1.310 +#endif
   1.311 +
   1.312 +

mercurial