Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | |
michael@0 | 7 | /** |
michael@0 | 8 | * MODULE NOTES: |
michael@0 | 9 | * @update gess 4/1/98 |
michael@0 | 10 | * |
michael@0 | 11 | * The scanner is a low-level service class that knows |
michael@0 | 12 | * how to consume characters out of an (internal) stream. |
michael@0 | 13 | * This class also offers a series of utility methods |
michael@0 | 14 | * that most tokenizers want, such as readUntil() |
michael@0 | 15 | * and SkipWhitespace(). |
michael@0 | 16 | */ |
michael@0 | 17 | |
michael@0 | 18 | |
michael@0 | 19 | #ifndef SCANNER |
michael@0 | 20 | #define SCANNER |
michael@0 | 21 | |
michael@0 | 22 | #include "nsCOMPtr.h" |
michael@0 | 23 | #include "nsString.h" |
michael@0 | 24 | #include "nsIParser.h" |
michael@0 | 25 | #include "nsIUnicodeDecoder.h" |
michael@0 | 26 | #include "nsScannerString.h" |
michael@0 | 27 | |
michael@0 | 28 | class nsParser; |
michael@0 | 29 | |
michael@0 | 30 | class nsReadEndCondition { |
michael@0 | 31 | public: |
michael@0 | 32 | const char16_t *mChars; |
michael@0 | 33 | char16_t mFilter; |
michael@0 | 34 | explicit nsReadEndCondition(const char16_t* aTerminateChars); |
michael@0 | 35 | private: |
michael@0 | 36 | nsReadEndCondition(const nsReadEndCondition& aOther); // No copying |
michael@0 | 37 | void operator=(const nsReadEndCondition& aOther); // No assigning |
michael@0 | 38 | }; |
michael@0 | 39 | |
michael@0 | 40 | class nsScanner { |
michael@0 | 41 | public: |
michael@0 | 42 | |
michael@0 | 43 | /** |
michael@0 | 44 | * Use this constructor for the XML fragment parsing case |
michael@0 | 45 | */ |
michael@0 | 46 | nsScanner(const nsAString& anHTMLString); |
michael@0 | 47 | |
michael@0 | 48 | /** |
michael@0 | 49 | * Use this constructor if you want i/o to be based on |
michael@0 | 50 | * a file (therefore a stream) or just data you provide via Append(). |
michael@0 | 51 | */ |
michael@0 | 52 | nsScanner(nsString& aFilename, bool aCreateStream); |
michael@0 | 53 | |
michael@0 | 54 | ~nsScanner(); |
michael@0 | 55 | |
michael@0 | 56 | /** |
michael@0 | 57 | * retrieve next char from internal input stream |
michael@0 | 58 | * |
michael@0 | 59 | * @update gess 3/25/98 |
michael@0 | 60 | * @param ch is the char to accept new value |
michael@0 | 61 | * @return error code reflecting read status |
michael@0 | 62 | */ |
michael@0 | 63 | nsresult GetChar(char16_t& ch); |
michael@0 | 64 | |
michael@0 | 65 | /** |
michael@0 | 66 | * peek ahead to consume next char from scanner's internal |
michael@0 | 67 | * input buffer |
michael@0 | 68 | * |
michael@0 | 69 | * @update gess 3/25/98 |
michael@0 | 70 | * @param ch is the char to accept new value |
michael@0 | 71 | * @return error code reflecting read status |
michael@0 | 72 | */ |
michael@0 | 73 | nsresult Peek(char16_t& ch, uint32_t aOffset=0); |
michael@0 | 74 | |
michael@0 | 75 | nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0); |
michael@0 | 76 | |
michael@0 | 77 | /** |
michael@0 | 78 | * Skip over chars as long as they equal given char |
michael@0 | 79 | * |
michael@0 | 80 | * @update gess 3/25/98 |
michael@0 | 81 | * @param char to be skipped |
michael@0 | 82 | * @return error code |
michael@0 | 83 | */ |
michael@0 | 84 | nsresult SkipOver(char16_t aSkipChar); |
michael@0 | 85 | |
michael@0 | 86 | /** |
michael@0 | 87 | * Skip whitespace on scanner input stream |
michael@0 | 88 | * |
michael@0 | 89 | * @update gess 3/25/98 |
michael@0 | 90 | * @return error status |
michael@0 | 91 | */ |
michael@0 | 92 | nsresult SkipWhitespace(int32_t& aNewlinesSkipped); |
michael@0 | 93 | |
michael@0 | 94 | /** |
michael@0 | 95 | * Consume characters until you run into space, a '<', a '>', or a '/'. |
michael@0 | 96 | * |
michael@0 | 97 | * @param aString - receives new data from stream |
michael@0 | 98 | * @return error code |
michael@0 | 99 | */ |
michael@0 | 100 | nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString); |
michael@0 | 101 | |
michael@0 | 102 | /** |
michael@0 | 103 | * Consume characters until you run into a char that's not valid in an |
michael@0 | 104 | * entity name |
michael@0 | 105 | * |
michael@0 | 106 | * @param aString - receives new data from stream |
michael@0 | 107 | * @return error code |
michael@0 | 108 | */ |
michael@0 | 109 | nsresult ReadEntityIdentifier(nsString& aString); |
michael@0 | 110 | nsresult ReadNumber(nsString& aString,int32_t aBase); |
michael@0 | 111 | nsresult ReadWhitespace(nsScannerSharedSubstring& aString, |
michael@0 | 112 | int32_t& aNewlinesSkipped, |
michael@0 | 113 | bool& aHaveCR); |
michael@0 | 114 | nsresult ReadWhitespace(nsScannerIterator& aStart, |
michael@0 | 115 | nsScannerIterator& aEnd, |
michael@0 | 116 | int32_t& aNewlinesSkipped); |
michael@0 | 117 | |
michael@0 | 118 | /** |
michael@0 | 119 | * Consume characters until you find the terminal char |
michael@0 | 120 | * |
michael@0 | 121 | * @update gess 3/25/98 |
michael@0 | 122 | * @param aString receives new data from stream |
michael@0 | 123 | * @param aTerminal contains terminating char |
michael@0 | 124 | * @param addTerminal tells us whether to append terminal to aString |
michael@0 | 125 | * @return error code |
michael@0 | 126 | */ |
michael@0 | 127 | nsresult ReadUntil(nsAString& aString, |
michael@0 | 128 | char16_t aTerminal, |
michael@0 | 129 | bool addTerminal); |
michael@0 | 130 | |
michael@0 | 131 | /** |
michael@0 | 132 | * Consume characters until you find one contained in given |
michael@0 | 133 | * terminal set. |
michael@0 | 134 | * |
michael@0 | 135 | * @update gess 3/25/98 |
michael@0 | 136 | * @param aString receives new data from stream |
michael@0 | 137 | * @param aTermSet contains set of terminating chars |
michael@0 | 138 | * @param addTerminal tells us whether to append terminal to aString |
michael@0 | 139 | * @return error code |
michael@0 | 140 | */ |
michael@0 | 141 | nsresult ReadUntil(nsAString& aString, |
michael@0 | 142 | const nsReadEndCondition& aEndCondition, |
michael@0 | 143 | bool addTerminal); |
michael@0 | 144 | |
michael@0 | 145 | nsresult ReadUntil(nsScannerSharedSubstring& aString, |
michael@0 | 146 | const nsReadEndCondition& aEndCondition, |
michael@0 | 147 | bool addTerminal); |
michael@0 | 148 | |
michael@0 | 149 | nsresult ReadUntil(nsScannerIterator& aStart, |
michael@0 | 150 | nsScannerIterator& aEnd, |
michael@0 | 151 | const nsReadEndCondition& aEndCondition, |
michael@0 | 152 | bool addTerminal); |
michael@0 | 153 | |
michael@0 | 154 | /** |
michael@0 | 155 | * Records current offset position in input stream. This allows us |
michael@0 | 156 | * to back up to this point if the need should arise, such as when |
michael@0 | 157 | * tokenization gets interrupted. |
michael@0 | 158 | * |
michael@0 | 159 | * @update gess 5/12/98 |
michael@0 | 160 | * @param |
michael@0 | 161 | * @return |
michael@0 | 162 | */ |
michael@0 | 163 | int32_t Mark(void); |
michael@0 | 164 | |
michael@0 | 165 | /** |
michael@0 | 166 | * Resets current offset position of input stream to marked position. |
michael@0 | 167 | * This allows us to back up to this point if the need should arise, |
michael@0 | 168 | * such as when tokenization gets interrupted. |
michael@0 | 169 | * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! |
michael@0 | 170 | * |
michael@0 | 171 | * @update gess 5/12/98 |
michael@0 | 172 | * @param |
michael@0 | 173 | * @return |
michael@0 | 174 | */ |
michael@0 | 175 | void RewindToMark(void); |
michael@0 | 176 | |
michael@0 | 177 | |
michael@0 | 178 | /** |
michael@0 | 179 | * |
michael@0 | 180 | * |
michael@0 | 181 | * @update harishd 01/12/99 |
michael@0 | 182 | * @param |
michael@0 | 183 | * @return |
michael@0 | 184 | */ |
michael@0 | 185 | bool UngetReadable(const nsAString& aBuffer); |
michael@0 | 186 | |
michael@0 | 187 | /** |
michael@0 | 188 | * |
michael@0 | 189 | * |
michael@0 | 190 | * @update gess 5/13/98 |
michael@0 | 191 | * @param |
michael@0 | 192 | * @return |
michael@0 | 193 | */ |
michael@0 | 194 | nsresult Append(const nsAString& aBuffer); |
michael@0 | 195 | |
michael@0 | 196 | /** |
michael@0 | 197 | * |
michael@0 | 198 | * |
michael@0 | 199 | * @update gess 5/21/98 |
michael@0 | 200 | * @param |
michael@0 | 201 | * @return |
michael@0 | 202 | */ |
michael@0 | 203 | nsresult Append(const char* aBuffer, uint32_t aLen, |
michael@0 | 204 | nsIRequest *aRequest); |
michael@0 | 205 | |
michael@0 | 206 | /** |
michael@0 | 207 | * Call this to copy bytes out of the scanner that have not yet been consumed |
michael@0 | 208 | * by the tokenization process. |
michael@0 | 209 | * |
michael@0 | 210 | * @update gess 5/12/98 |
michael@0 | 211 | * @param aCopyBuffer is where the scanner buffer will be copied to |
michael@0 | 212 | * @return nada |
michael@0 | 213 | */ |
michael@0 | 214 | void CopyUnusedData(nsString& aCopyBuffer); |
michael@0 | 215 | |
michael@0 | 216 | /** |
michael@0 | 217 | * Retrieve the name of the file that the scanner is reading from. |
michael@0 | 218 | * In some cases, it's just a given name, because the scanner isn't |
michael@0 | 219 | * really reading from a file. |
michael@0 | 220 | * |
michael@0 | 221 | * @update gess 5/12/98 |
michael@0 | 222 | * @return |
michael@0 | 223 | */ |
michael@0 | 224 | nsString& GetFilename(void); |
michael@0 | 225 | |
michael@0 | 226 | static void SelfTest(); |
michael@0 | 227 | |
michael@0 | 228 | /** |
michael@0 | 229 | * Use this setter to change the scanner's unicode decoder |
michael@0 | 230 | * |
michael@0 | 231 | * @update ftang 3/02/99 |
michael@0 | 232 | * @param aCharset a normalized (alias resolved) charset name |
michael@0 | 233 | * @param aCharsetSource- where the charset info came from |
michael@0 | 234 | * @return |
michael@0 | 235 | */ |
michael@0 | 236 | nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource); |
michael@0 | 237 | |
michael@0 | 238 | void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); |
michael@0 | 239 | void CurrentPosition(nsScannerIterator& aPosition); |
michael@0 | 240 | void EndReading(nsScannerIterator& aPosition); |
michael@0 | 241 | void SetPosition(nsScannerIterator& aPosition, |
michael@0 | 242 | bool aTruncate = false, |
michael@0 | 243 | bool aReverse = false); |
michael@0 | 244 | void ReplaceCharacter(nsScannerIterator& aPosition, |
michael@0 | 245 | char16_t aChar); |
michael@0 | 246 | |
michael@0 | 247 | /** |
michael@0 | 248 | * Internal method used to cause the internal buffer to |
michael@0 | 249 | * be filled with data. |
michael@0 | 250 | * |
michael@0 | 251 | * @update gess4/3/98 |
michael@0 | 252 | */ |
michael@0 | 253 | bool IsIncremental(void) {return mIncremental;} |
michael@0 | 254 | void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;} |
michael@0 | 255 | |
michael@0 | 256 | /** |
michael@0 | 257 | * Return the position of the first non-whitespace |
michael@0 | 258 | * character. This is only reliable before consumers start |
michael@0 | 259 | * reading from this scanner. |
michael@0 | 260 | */ |
michael@0 | 261 | int32_t FirstNonWhitespacePosition() |
michael@0 | 262 | { |
michael@0 | 263 | return mFirstNonWhitespacePosition; |
michael@0 | 264 | } |
michael@0 | 265 | |
michael@0 | 266 | /** |
michael@0 | 267 | * Override replacement character used by nsIUnicodeDecoder. |
michael@0 | 268 | * Default behavior is that it uses nsIUnicodeDecoder's mapping. |
michael@0 | 269 | * |
michael@0 | 270 | * @param aReplacementCharacter the replacement character |
michael@0 | 271 | * XML (expat) parser uses 0xffff |
michael@0 | 272 | */ |
michael@0 | 273 | void OverrideReplacementCharacter(char16_t aReplacementCharacter); |
michael@0 | 274 | |
michael@0 | 275 | protected: |
michael@0 | 276 | |
michael@0 | 277 | bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1); |
michael@0 | 278 | bool AppendToBuffer(const nsAString& aStr) |
michael@0 | 279 | { |
michael@0 | 280 | nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); |
michael@0 | 281 | if (!buf) |
michael@0 | 282 | return false; |
michael@0 | 283 | AppendToBuffer(buf, nullptr); |
michael@0 | 284 | return true; |
michael@0 | 285 | } |
michael@0 | 286 | |
michael@0 | 287 | nsScannerString* mSlidingBuffer; |
michael@0 | 288 | nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer |
michael@0 | 289 | nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here) |
michael@0 | 290 | nsScannerIterator mEndPosition; // The current end of the scanner buffer |
michael@0 | 291 | nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected |
michael@0 | 292 | nsString mFilename; |
michael@0 | 293 | uint32_t mCountRemaining; // The number of bytes still to be read |
michael@0 | 294 | // from the scanner buffer |
michael@0 | 295 | bool mIncremental; |
michael@0 | 296 | bool mHasInvalidCharacter; |
michael@0 | 297 | char16_t mReplacementCharacter; |
michael@0 | 298 | int32_t mFirstNonWhitespacePosition; |
michael@0 | 299 | int32_t mCharsetSource; |
michael@0 | 300 | nsCString mCharset; |
michael@0 | 301 | nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder; |
michael@0 | 302 | |
michael@0 | 303 | private: |
michael@0 | 304 | nsScanner &operator =(const nsScanner &); // Not implemented. |
michael@0 | 305 | }; |
michael@0 | 306 | |
michael@0 | 307 | #endif |
michael@0 | 308 | |
michael@0 | 309 |