parser/htmlparser/src/nsScanner.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6
michael@0 7 /**
michael@0 8 * MODULE NOTES:
michael@0 9 * @update gess 4/1/98
michael@0 10 *
michael@0 11 * The scanner is a low-level service class that knows
michael@0 12 * how to consume characters out of an (internal) stream.
michael@0 13 * This class also offers a series of utility methods
michael@0 14 * that most tokenizers want, such as readUntil()
michael@0 15 * and SkipWhitespace().
michael@0 16 */
michael@0 17
michael@0 18
michael@0 19 #ifndef SCANNER
michael@0 20 #define SCANNER
michael@0 21
michael@0 22 #include "nsCOMPtr.h"
michael@0 23 #include "nsString.h"
michael@0 24 #include "nsIParser.h"
michael@0 25 #include "nsIUnicodeDecoder.h"
michael@0 26 #include "nsScannerString.h"
michael@0 27
michael@0 28 class nsParser;
michael@0 29
michael@0 30 class nsReadEndCondition {
michael@0 31 public:
michael@0 32 const char16_t *mChars;
michael@0 33 char16_t mFilter;
michael@0 34 explicit nsReadEndCondition(const char16_t* aTerminateChars);
michael@0 35 private:
michael@0 36 nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
michael@0 37 void operator=(const nsReadEndCondition& aOther); // No assigning
michael@0 38 };
michael@0 39
michael@0 40 class nsScanner {
michael@0 41 public:
michael@0 42
michael@0 43 /**
michael@0 44 * Use this constructor for the XML fragment parsing case
michael@0 45 */
michael@0 46 nsScanner(const nsAString& anHTMLString);
michael@0 47
michael@0 48 /**
michael@0 49 * Use this constructor if you want i/o to be based on
michael@0 50 * a file (therefore a stream) or just data you provide via Append().
michael@0 51 */
michael@0 52 nsScanner(nsString& aFilename, bool aCreateStream);
michael@0 53
michael@0 54 ~nsScanner();
michael@0 55
michael@0 56 /**
michael@0 57 * retrieve next char from internal input stream
michael@0 58 *
michael@0 59 * @update gess 3/25/98
michael@0 60 * @param ch is the char to accept new value
michael@0 61 * @return error code reflecting read status
michael@0 62 */
michael@0 63 nsresult GetChar(char16_t& ch);
michael@0 64
michael@0 65 /**
michael@0 66 * peek ahead to consume next char from scanner's internal
michael@0 67 * input buffer
michael@0 68 *
michael@0 69 * @update gess 3/25/98
michael@0 70 * @param ch is the char to accept new value
michael@0 71 * @return error code reflecting read status
michael@0 72 */
michael@0 73 nsresult Peek(char16_t& ch, uint32_t aOffset=0);
michael@0 74
michael@0 75 nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
michael@0 76
michael@0 77 /**
michael@0 78 * Skip over chars as long as they equal given char
michael@0 79 *
michael@0 80 * @update gess 3/25/98
michael@0 81 * @param char to be skipped
michael@0 82 * @return error code
michael@0 83 */
michael@0 84 nsresult SkipOver(char16_t aSkipChar);
michael@0 85
michael@0 86 /**
michael@0 87 * Skip whitespace on scanner input stream
michael@0 88 *
michael@0 89 * @update gess 3/25/98
michael@0 90 * @return error status
michael@0 91 */
michael@0 92 nsresult SkipWhitespace(int32_t& aNewlinesSkipped);
michael@0 93
michael@0 94 /**
michael@0 95 * Consume characters until you run into space, a '<', a '>', or a '/'.
michael@0 96 *
michael@0 97 * @param aString - receives new data from stream
michael@0 98 * @return error code
michael@0 99 */
michael@0 100 nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
michael@0 101
michael@0 102 /**
michael@0 103 * Consume characters until you run into a char that's not valid in an
michael@0 104 * entity name
michael@0 105 *
michael@0 106 * @param aString - receives new data from stream
michael@0 107 * @return error code
michael@0 108 */
michael@0 109 nsresult ReadEntityIdentifier(nsString& aString);
michael@0 110 nsresult ReadNumber(nsString& aString,int32_t aBase);
michael@0 111 nsresult ReadWhitespace(nsScannerSharedSubstring& aString,
michael@0 112 int32_t& aNewlinesSkipped,
michael@0 113 bool& aHaveCR);
michael@0 114 nsresult ReadWhitespace(nsScannerIterator& aStart,
michael@0 115 nsScannerIterator& aEnd,
michael@0 116 int32_t& aNewlinesSkipped);
michael@0 117
michael@0 118 /**
michael@0 119 * Consume characters until you find the terminal char
michael@0 120 *
michael@0 121 * @update gess 3/25/98
michael@0 122 * @param aString receives new data from stream
michael@0 123 * @param aTerminal contains terminating char
michael@0 124 * @param addTerminal tells us whether to append terminal to aString
michael@0 125 * @return error code
michael@0 126 */
michael@0 127 nsresult ReadUntil(nsAString& aString,
michael@0 128 char16_t aTerminal,
michael@0 129 bool addTerminal);
michael@0 130
michael@0 131 /**
michael@0 132 * Consume characters until you find one contained in given
michael@0 133 * terminal set.
michael@0 134 *
michael@0 135 * @update gess 3/25/98
michael@0 136 * @param aString receives new data from stream
michael@0 137 * @param aTermSet contains set of terminating chars
michael@0 138 * @param addTerminal tells us whether to append terminal to aString
michael@0 139 * @return error code
michael@0 140 */
michael@0 141 nsresult ReadUntil(nsAString& aString,
michael@0 142 const nsReadEndCondition& aEndCondition,
michael@0 143 bool addTerminal);
michael@0 144
michael@0 145 nsresult ReadUntil(nsScannerSharedSubstring& aString,
michael@0 146 const nsReadEndCondition& aEndCondition,
michael@0 147 bool addTerminal);
michael@0 148
michael@0 149 nsresult ReadUntil(nsScannerIterator& aStart,
michael@0 150 nsScannerIterator& aEnd,
michael@0 151 const nsReadEndCondition& aEndCondition,
michael@0 152 bool addTerminal);
michael@0 153
michael@0 154 /**
michael@0 155 * Records current offset position in input stream. This allows us
michael@0 156 * to back up to this point if the need should arise, such as when
michael@0 157 * tokenization gets interrupted.
michael@0 158 *
michael@0 159 * @update gess 5/12/98
michael@0 160 * @param
michael@0 161 * @return
michael@0 162 */
michael@0 163 int32_t Mark(void);
michael@0 164
michael@0 165 /**
michael@0 166 * Resets current offset position of input stream to marked position.
michael@0 167 * This allows us to back up to this point if the need should arise,
michael@0 168 * such as when tokenization gets interrupted.
michael@0 169 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
michael@0 170 *
michael@0 171 * @update gess 5/12/98
michael@0 172 * @param
michael@0 173 * @return
michael@0 174 */
michael@0 175 void RewindToMark(void);
michael@0 176
michael@0 177
michael@0 178 /**
michael@0 179 *
michael@0 180 *
michael@0 181 * @update harishd 01/12/99
michael@0 182 * @param
michael@0 183 * @return
michael@0 184 */
michael@0 185 bool UngetReadable(const nsAString& aBuffer);
michael@0 186
michael@0 187 /**
michael@0 188 *
michael@0 189 *
michael@0 190 * @update gess 5/13/98
michael@0 191 * @param
michael@0 192 * @return
michael@0 193 */
michael@0 194 nsresult Append(const nsAString& aBuffer);
michael@0 195
michael@0 196 /**
michael@0 197 *
michael@0 198 *
michael@0 199 * @update gess 5/21/98
michael@0 200 * @param
michael@0 201 * @return
michael@0 202 */
michael@0 203 nsresult Append(const char* aBuffer, uint32_t aLen,
michael@0 204 nsIRequest *aRequest);
michael@0 205
michael@0 206 /**
michael@0 207 * Call this to copy bytes out of the scanner that have not yet been consumed
michael@0 208 * by the tokenization process.
michael@0 209 *
michael@0 210 * @update gess 5/12/98
michael@0 211 * @param aCopyBuffer is where the scanner buffer will be copied to
michael@0 212 * @return nada
michael@0 213 */
michael@0 214 void CopyUnusedData(nsString& aCopyBuffer);
michael@0 215
michael@0 216 /**
michael@0 217 * Retrieve the name of the file that the scanner is reading from.
michael@0 218 * In some cases, it's just a given name, because the scanner isn't
michael@0 219 * really reading from a file.
michael@0 220 *
michael@0 221 * @update gess 5/12/98
michael@0 222 * @return
michael@0 223 */
michael@0 224 nsString& GetFilename(void);
michael@0 225
michael@0 226 static void SelfTest();
michael@0 227
michael@0 228 /**
michael@0 229 * Use this setter to change the scanner's unicode decoder
michael@0 230 *
michael@0 231 * @update ftang 3/02/99
michael@0 232 * @param aCharset a normalized (alias resolved) charset name
michael@0 233 * @param aCharsetSource- where the charset info came from
michael@0 234 * @return
michael@0 235 */
michael@0 236 nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
michael@0 237
michael@0 238 void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
michael@0 239 void CurrentPosition(nsScannerIterator& aPosition);
michael@0 240 void EndReading(nsScannerIterator& aPosition);
michael@0 241 void SetPosition(nsScannerIterator& aPosition,
michael@0 242 bool aTruncate = false,
michael@0 243 bool aReverse = false);
michael@0 244 void ReplaceCharacter(nsScannerIterator& aPosition,
michael@0 245 char16_t aChar);
michael@0 246
michael@0 247 /**
michael@0 248 * Internal method used to cause the internal buffer to
michael@0 249 * be filled with data.
michael@0 250 *
michael@0 251 * @update gess4/3/98
michael@0 252 */
michael@0 253 bool IsIncremental(void) {return mIncremental;}
michael@0 254 void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
michael@0 255
michael@0 256 /**
michael@0 257 * Return the position of the first non-whitespace
michael@0 258 * character. This is only reliable before consumers start
michael@0 259 * reading from this scanner.
michael@0 260 */
michael@0 261 int32_t FirstNonWhitespacePosition()
michael@0 262 {
michael@0 263 return mFirstNonWhitespacePosition;
michael@0 264 }
michael@0 265
michael@0 266 /**
michael@0 267 * Override replacement character used by nsIUnicodeDecoder.
michael@0 268 * Default behavior is that it uses nsIUnicodeDecoder's mapping.
michael@0 269 *
michael@0 270 * @param aReplacementCharacter the replacement character
michael@0 271 * XML (expat) parser uses 0xffff
michael@0 272 */
michael@0 273 void OverrideReplacementCharacter(char16_t aReplacementCharacter);
michael@0 274
michael@0 275 protected:
michael@0 276
michael@0 277 bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1);
michael@0 278 bool AppendToBuffer(const nsAString& aStr)
michael@0 279 {
michael@0 280 nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
michael@0 281 if (!buf)
michael@0 282 return false;
michael@0 283 AppendToBuffer(buf, nullptr);
michael@0 284 return true;
michael@0 285 }
michael@0 286
michael@0 287 nsScannerString* mSlidingBuffer;
michael@0 288 nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
michael@0 289 nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
michael@0 290 nsScannerIterator mEndPosition; // The current end of the scanner buffer
michael@0 291 nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected
michael@0 292 nsString mFilename;
michael@0 293 uint32_t mCountRemaining; // The number of bytes still to be read
michael@0 294 // from the scanner buffer
michael@0 295 bool mIncremental;
michael@0 296 bool mHasInvalidCharacter;
michael@0 297 char16_t mReplacementCharacter;
michael@0 298 int32_t mFirstNonWhitespacePosition;
michael@0 299 int32_t mCharsetSource;
michael@0 300 nsCString mCharset;
michael@0 301 nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
michael@0 302
michael@0 303 private:
michael@0 304 nsScanner &operator =(const nsScanner &); // Not implemented.
michael@0 305 };
michael@0 306
michael@0 307 #endif
michael@0 308
michael@0 309

mercurial