|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 |
|
7 /** |
|
8 * MODULE NOTES: |
|
9 * @update gess 4/1/98 |
|
10 * |
|
11 * The scanner is a low-level service class that knows |
|
12 * how to consume characters out of an (internal) stream. |
|
13 * This class also offers a series of utility methods |
|
14 * that most tokenizers want, such as readUntil() |
|
15 * and SkipWhitespace(). |
|
16 */ |
|
17 |
|
18 |
|
19 #ifndef SCANNER |
|
20 #define SCANNER |
|
21 |
|
22 #include "nsCOMPtr.h" |
|
23 #include "nsString.h" |
|
24 #include "nsIParser.h" |
|
25 #include "nsIUnicodeDecoder.h" |
|
26 #include "nsScannerString.h" |
|
27 |
|
28 class nsParser; |
|
29 |
|
30 class nsReadEndCondition { |
|
31 public: |
|
32 const char16_t *mChars; |
|
33 char16_t mFilter; |
|
34 explicit nsReadEndCondition(const char16_t* aTerminateChars); |
|
35 private: |
|
36 nsReadEndCondition(const nsReadEndCondition& aOther); // No copying |
|
37 void operator=(const nsReadEndCondition& aOther); // No assigning |
|
38 }; |
|
39 |
|
40 class nsScanner { |
|
41 public: |
|
42 |
|
43 /** |
|
44 * Use this constructor for the XML fragment parsing case |
|
45 */ |
|
46 nsScanner(const nsAString& anHTMLString); |
|
47 |
|
48 /** |
|
49 * Use this constructor if you want i/o to be based on |
|
50 * a file (therefore a stream) or just data you provide via Append(). |
|
51 */ |
|
52 nsScanner(nsString& aFilename, bool aCreateStream); |
|
53 |
|
54 ~nsScanner(); |
|
55 |
|
56 /** |
|
57 * retrieve next char from internal input stream |
|
58 * |
|
59 * @update gess 3/25/98 |
|
60 * @param ch is the char to accept new value |
|
61 * @return error code reflecting read status |
|
62 */ |
|
63 nsresult GetChar(char16_t& ch); |
|
64 |
|
65 /** |
|
66 * peek ahead to consume next char from scanner's internal |
|
67 * input buffer |
|
68 * |
|
69 * @update gess 3/25/98 |
|
70 * @param ch is the char to accept new value |
|
71 * @return error code reflecting read status |
|
72 */ |
|
73 nsresult Peek(char16_t& ch, uint32_t aOffset=0); |
|
74 |
|
75 nsresult Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset = 0); |
|
76 |
|
77 /** |
|
78 * Skip over chars as long as they equal given char |
|
79 * |
|
80 * @update gess 3/25/98 |
|
81 * @param char to be skipped |
|
82 * @return error code |
|
83 */ |
|
84 nsresult SkipOver(char16_t aSkipChar); |
|
85 |
|
86 /** |
|
87 * Skip whitespace on scanner input stream |
|
88 * |
|
89 * @update gess 3/25/98 |
|
90 * @return error status |
|
91 */ |
|
92 nsresult SkipWhitespace(int32_t& aNewlinesSkipped); |
|
93 |
|
94 /** |
|
95 * Consume characters until you run into space, a '<', a '>', or a '/'. |
|
96 * |
|
97 * @param aString - receives new data from stream |
|
98 * @return error code |
|
99 */ |
|
100 nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString); |
|
101 |
|
102 /** |
|
103 * Consume characters until you run into a char that's not valid in an |
|
104 * entity name |
|
105 * |
|
106 * @param aString - receives new data from stream |
|
107 * @return error code |
|
108 */ |
|
109 nsresult ReadEntityIdentifier(nsString& aString); |
|
110 nsresult ReadNumber(nsString& aString,int32_t aBase); |
|
111 nsresult ReadWhitespace(nsScannerSharedSubstring& aString, |
|
112 int32_t& aNewlinesSkipped, |
|
113 bool& aHaveCR); |
|
114 nsresult ReadWhitespace(nsScannerIterator& aStart, |
|
115 nsScannerIterator& aEnd, |
|
116 int32_t& aNewlinesSkipped); |
|
117 |
|
118 /** |
|
119 * Consume characters until you find the terminal char |
|
120 * |
|
121 * @update gess 3/25/98 |
|
122 * @param aString receives new data from stream |
|
123 * @param aTerminal contains terminating char |
|
124 * @param addTerminal tells us whether to append terminal to aString |
|
125 * @return error code |
|
126 */ |
|
127 nsresult ReadUntil(nsAString& aString, |
|
128 char16_t aTerminal, |
|
129 bool addTerminal); |
|
130 |
|
131 /** |
|
132 * Consume characters until you find one contained in given |
|
133 * terminal set. |
|
134 * |
|
135 * @update gess 3/25/98 |
|
136 * @param aString receives new data from stream |
|
137 * @param aTermSet contains set of terminating chars |
|
138 * @param addTerminal tells us whether to append terminal to aString |
|
139 * @return error code |
|
140 */ |
|
141 nsresult ReadUntil(nsAString& aString, |
|
142 const nsReadEndCondition& aEndCondition, |
|
143 bool addTerminal); |
|
144 |
|
145 nsresult ReadUntil(nsScannerSharedSubstring& aString, |
|
146 const nsReadEndCondition& aEndCondition, |
|
147 bool addTerminal); |
|
148 |
|
149 nsresult ReadUntil(nsScannerIterator& aStart, |
|
150 nsScannerIterator& aEnd, |
|
151 const nsReadEndCondition& aEndCondition, |
|
152 bool addTerminal); |
|
153 |
|
154 /** |
|
155 * Records current offset position in input stream. This allows us |
|
156 * to back up to this point if the need should arise, such as when |
|
157 * tokenization gets interrupted. |
|
158 * |
|
159 * @update gess 5/12/98 |
|
160 * @param |
|
161 * @return |
|
162 */ |
|
163 int32_t Mark(void); |
|
164 |
|
165 /** |
|
166 * Resets current offset position of input stream to marked position. |
|
167 * This allows us to back up to this point if the need should arise, |
|
168 * such as when tokenization gets interrupted. |
|
169 * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! |
|
170 * |
|
171 * @update gess 5/12/98 |
|
172 * @param |
|
173 * @return |
|
174 */ |
|
175 void RewindToMark(void); |
|
176 |
|
177 |
|
178 /** |
|
179 * |
|
180 * |
|
181 * @update harishd 01/12/99 |
|
182 * @param |
|
183 * @return |
|
184 */ |
|
185 bool UngetReadable(const nsAString& aBuffer); |
|
186 |
|
187 /** |
|
188 * |
|
189 * |
|
190 * @update gess 5/13/98 |
|
191 * @param |
|
192 * @return |
|
193 */ |
|
194 nsresult Append(const nsAString& aBuffer); |
|
195 |
|
196 /** |
|
197 * |
|
198 * |
|
199 * @update gess 5/21/98 |
|
200 * @param |
|
201 * @return |
|
202 */ |
|
203 nsresult Append(const char* aBuffer, uint32_t aLen, |
|
204 nsIRequest *aRequest); |
|
205 |
|
206 /** |
|
207 * Call this to copy bytes out of the scanner that have not yet been consumed |
|
208 * by the tokenization process. |
|
209 * |
|
210 * @update gess 5/12/98 |
|
211 * @param aCopyBuffer is where the scanner buffer will be copied to |
|
212 * @return nada |
|
213 */ |
|
214 void CopyUnusedData(nsString& aCopyBuffer); |
|
215 |
|
216 /** |
|
217 * Retrieve the name of the file that the scanner is reading from. |
|
218 * In some cases, it's just a given name, because the scanner isn't |
|
219 * really reading from a file. |
|
220 * |
|
221 * @update gess 5/12/98 |
|
222 * @return |
|
223 */ |
|
224 nsString& GetFilename(void); |
|
225 |
|
226 static void SelfTest(); |
|
227 |
|
228 /** |
|
229 * Use this setter to change the scanner's unicode decoder |
|
230 * |
|
231 * @update ftang 3/02/99 |
|
232 * @param aCharset a normalized (alias resolved) charset name |
|
233 * @param aCharsetSource- where the charset info came from |
|
234 * @return |
|
235 */ |
|
236 nsresult SetDocumentCharset(const nsACString& aCharset, int32_t aSource); |
|
237 |
|
238 void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); |
|
239 void CurrentPosition(nsScannerIterator& aPosition); |
|
240 void EndReading(nsScannerIterator& aPosition); |
|
241 void SetPosition(nsScannerIterator& aPosition, |
|
242 bool aTruncate = false, |
|
243 bool aReverse = false); |
|
244 void ReplaceCharacter(nsScannerIterator& aPosition, |
|
245 char16_t aChar); |
|
246 |
|
247 /** |
|
248 * Internal method used to cause the internal buffer to |
|
249 * be filled with data. |
|
250 * |
|
251 * @update gess4/3/98 |
|
252 */ |
|
253 bool IsIncremental(void) {return mIncremental;} |
|
254 void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;} |
|
255 |
|
256 /** |
|
257 * Return the position of the first non-whitespace |
|
258 * character. This is only reliable before consumers start |
|
259 * reading from this scanner. |
|
260 */ |
|
261 int32_t FirstNonWhitespacePosition() |
|
262 { |
|
263 return mFirstNonWhitespacePosition; |
|
264 } |
|
265 |
|
266 /** |
|
267 * Override replacement character used by nsIUnicodeDecoder. |
|
268 * Default behavior is that it uses nsIUnicodeDecoder's mapping. |
|
269 * |
|
270 * @param aReplacementCharacter the replacement character |
|
271 * XML (expat) parser uses 0xffff |
|
272 */ |
|
273 void OverrideReplacementCharacter(char16_t aReplacementCharacter); |
|
274 |
|
275 protected: |
|
276 |
|
277 bool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, int32_t aErrorPos = -1); |
|
278 bool AppendToBuffer(const nsAString& aStr) |
|
279 { |
|
280 nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); |
|
281 if (!buf) |
|
282 return false; |
|
283 AppendToBuffer(buf, nullptr); |
|
284 return true; |
|
285 } |
|
286 |
|
287 nsScannerString* mSlidingBuffer; |
|
288 nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer |
|
289 nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here) |
|
290 nsScannerIterator mEndPosition; // The current end of the scanner buffer |
|
291 nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected |
|
292 nsString mFilename; |
|
293 uint32_t mCountRemaining; // The number of bytes still to be read |
|
294 // from the scanner buffer |
|
295 bool mIncremental; |
|
296 bool mHasInvalidCharacter; |
|
297 char16_t mReplacementCharacter; |
|
298 int32_t mFirstNonWhitespacePosition; |
|
299 int32_t mCharsetSource; |
|
300 nsCString mCharset; |
|
301 nsCOMPtr<nsIUnicodeDecoder> mUnicodeDecoder; |
|
302 |
|
303 private: |
|
304 nsScanner &operator =(const nsScanner &); // Not implemented. |
|
305 }; |
|
306 |
|
307 #endif |
|
308 |
|
309 |