Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /**
8 * MODULE NOTES:
9 * @update gess 4/1/98
10 *
11 * The scanner is a low-level service class that knows
12 * how to consume characters out of an (internal) stream.
13 * This class also offers a series of utility methods
14 * that most tokenizers want, such as readUntil()
15 * and SkipWhitespace().
16 */
19 #ifndef SCANNER
20 #define SCANNER
22 #include "nsCOMPtr.h"
23 #include "nsString.h"
24 #include "nsIParser.h"
25 #include "nsIUnicodeDecoder.h"
26 #include "nsScannerString.h"
28 class nsParser;
30 class nsReadEndCondition {
31 public:
32 const char16_t *mChars;
33 char16_t mFilter;
34 explicit nsReadEndCondition(const char16_t* aTerminateChars);
35 private:
36 nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
37 void operator=(const nsReadEndCondition& aOther); // No assigning
38 };
40 class nsScanner {
41 public:
43 /**
44 * Use this constructor for the XML fragment parsing case
45 */
46 nsScanner(const nsAString& anHTMLString);
48 /**
49 * Use this constructor if you want i/o to be based on
50 * a file (therefore a stream) or just data you provide via Append().
51 */
52 nsScanner(nsString& aFilename, bool aCreateStream);
54 ~nsScanner();
56 /**
57 * retrieve next char from internal input stream
58 *
59 * @update gess 3/25/98
60 * @param ch is the char to accept new value
61 * @return error code reflecting read status
62 */
63 nsresult GetChar(char16_t& ch);
65 /**
66 * peek ahead to consume next char from scanner's internal
67 * input buffer
68 *
69 * @update gess 3/25/98
70 * @param ch is the char to accept new value
71 * @return error code reflecting read status
72 */
73 nsresult Peek(char16_t& ch, uint32_t aOffset=0);
75 nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0);
77 /**
78 * Skip over chars as long as they equal given char
79 *
80 * @update gess 3/25/98
81 * @param char to be skipped
82 * @return error code
83 */
84 nsresult SkipOver(char16_t aSkipChar);
86 /**
87 * Skip whitespace on scanner input stream
88 *
89 * @update gess 3/25/98
90 * @return error status
91 */
92 nsresult SkipWhitespace(int32_t& aNewlinesSkipped);
94 /**
95 * Consume characters until you run into space, a '<', a '>', or a '/'.
96 *
97 * @param aString - receives new data from stream
98 * @return error code
99 */
100 nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString);
102 /**
103 * Consume characters until you run into a char that's not valid in an
104 * entity name
105 *
106 * @param aString - receives new data from stream
107 * @return error code
108 */
109 nsresult ReadEntityIdentifier(nsString& aString);
110 nsresult ReadNumber(nsString& aString,int32_t aBase);
111 nsresult ReadWhitespace(nsScannerSharedSubstring& aString,
112 int32_t& aNewlinesSkipped,
113 bool& aHaveCR);
114 nsresult ReadWhitespace(nsScannerIterator& aStart,
115 nsScannerIterator& aEnd,
116 int32_t& aNewlinesSkipped);
118 /**
119 * Consume characters until you find the terminal char
120 *
121 * @update gess 3/25/98
122 * @param aString receives new data from stream
123 * @param aTerminal contains terminating char
124 * @param addTerminal tells us whether to append terminal to aString
125 * @return error code
126 */
127 nsresult ReadUntil(nsAString& aString,
128 char16_t aTerminal,
129 bool addTerminal);
131 /**
132 * Consume characters until you find one contained in given
133 * terminal set.
134 *
135 * @update gess 3/25/98
136 * @param aString receives new data from stream
137 * @param aTermSet contains set of terminating chars
138 * @param addTerminal tells us whether to append terminal to aString
139 * @return error code
140 */
141 nsresult ReadUntil(nsAString& aString,
142 const nsReadEndCondition& aEndCondition,
143 bool addTerminal);
145 nsresult ReadUntil(nsScannerSharedSubstring& aString,
146 const nsReadEndCondition& aEndCondition,
147 bool addTerminal);
149 nsresult ReadUntil(nsScannerIterator& aStart,
150 nsScannerIterator& aEnd,
151 const nsReadEndCondition& aEndCondition,
152 bool addTerminal);
154 /**
155 * Records current offset position in input stream. This allows us
156 * to back up to this point if the need should arise, such as when
157 * tokenization gets interrupted.
158 *
159 * @update gess 5/12/98
160 * @param
161 * @return
162 */
163 int32_t Mark(void);
165 /**
166 * Resets current offset position of input stream to marked position.
167 * This allows us to back up to this point if the need should arise,
168 * such as when tokenization gets interrupted.
169 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
170 *
171 * @update gess 5/12/98
172 * @param
173 * @return
174 */
175 void RewindToMark(void);
178 /**
179 *
180 *
181 * @update harishd 01/12/99
182 * @param
183 * @return
184 */
185 bool UngetReadable(const nsAString& aBuffer);
187 /**
188 *
189 *
190 * @update gess 5/13/98
191 * @param
192 * @return
193 */
194 nsresult Append(const nsAString& aBuffer);
196 /**
197 *
198 *
199 * @update gess 5/21/98
200 * @param
201 * @return
202 */
203 nsresult Append(const char* aBuffer, uint32_t aLen,
204 nsIRequest *aRequest);
206 /**
207 * Call this to copy bytes out of the scanner that have not yet been consumed
208 * by the tokenization process.
209 *
210 * @update gess 5/12/98
211 * @param aCopyBuffer is where the scanner buffer will be copied to
212 * @return nada
213 */
214 void CopyUnusedData(nsString& aCopyBuffer);
216 /**
217 * Retrieve the name of the file that the scanner is reading from.
218 * In some cases, it's just a given name, because the scanner isn't
219 * really reading from a file.
220 *
221 * @update gess 5/12/98
222 * @return
223 */
224 nsString& GetFilename(void);
226 static void SelfTest();
228 /**
229 * Use this setter to change the scanner's unicode decoder
230 *
231 * @update ftang 3/02/99
232 * @param aCharset a normalized (alias resolved) charset name
233 * @param aCharsetSource- where the charset info came from
234 * @return
235 */
236 nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource);
238 void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
239 void CurrentPosition(nsScannerIterator& aPosition);
240 void EndReading(nsScannerIterator& aPosition);
241 void SetPosition(nsScannerIterator& aPosition,
242 bool aTruncate = false,
243 bool aReverse = false);
244 void ReplaceCharacter(nsScannerIterator& aPosition,
245 char16_t aChar);
247 /**
248 * Internal method used to cause the internal buffer to
249 * be filled with data.
250 *
251 * @update gess4/3/98
252 */
253 bool IsIncremental(void) {return mIncremental;}
254 void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
256 /**
257 * Return the position of the first non-whitespace
258 * character. This is only reliable before consumers start
259 * reading from this scanner.
260 */
261 int32_t FirstNonWhitespacePosition()
262 {
263 return mFirstNonWhitespacePosition;
264 }
266 /**
267 * Override replacement character used by nsIUnicodeDecoder.
268 * Default behavior is that it uses nsIUnicodeDecoder's mapping.
269 *
270 * @param aReplacementCharacter the replacement character
271 * XML (expat) parser uses 0xffff
272 */
273 void OverrideReplacementCharacter(char16_t aReplacementCharacter);
275 protected:
277 bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1);
278 bool AppendToBuffer(const nsAString& aStr)
279 {
280 nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
281 if (!buf)
282 return false;
283 AppendToBuffer(buf, nullptr);
284 return true;
285 }
287 nsScannerString* mSlidingBuffer;
288 nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
289 nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
290 nsScannerIterator mEndPosition; // The current end of the scanner buffer
291 nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected
292 nsString mFilename;
293 uint32_t mCountRemaining; // The number of bytes still to be read
294 // from the scanner buffer
295 bool mIncremental;
296 bool mHasInvalidCharacter;
297 char16_t mReplacementCharacter;
298 int32_t mFirstNonWhitespacePosition;
299 int32_t mCharsetSource;
300 nsCString mCharset;
301 nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder;
303 private:
304 nsScanner &operator =(const nsScanner &); // Not implemented.
305 };
307 #endif