|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /** |
|
7 * MODULE NOTES: |
|
8 * |
|
9 * This class does two primary jobs: |
|
10 * 1) It iterates the tokens provided during the |
|
11 * tokenization process, identifing where elements |
|
12 * begin and end (doing validation and normalization). |
|
13 * 2) It controls and coordinates with an instance of |
|
14 * the IContentSink interface, to coordinate the |
|
15 * the production of the content model. |
|
16 * |
|
17 * The basic operation of this class assumes that an HTML |
|
18 * document is non-normalized. Therefore, we don't process |
|
19 * the document in a normalized way. Don't bother to look |
|
20 * for methods like: doHead() or doBody(). |
|
21 * |
|
22 * Instead, in order to be backward compatible, we must |
|
23 * scan the set of tokens and perform this basic set of |
|
24 * operations: |
|
25 * 1) Determine the token type (easy, since the tokens know) |
|
26 * 2) Determine the appropriate section of the HTML document |
|
27 * each token belongs in (HTML,HEAD,BODY,FRAMESET). |
|
28 * 3) Insert content into our document (via the sink) into |
|
29 * the correct section. |
|
30 * 4) In the case of tags that belong in the BODY, we must |
|
31 * ensure that our underlying document state reflects |
|
32 * the appropriate context for our tag. |
|
33 * |
|
34 * For example,if we see a <TR>, we must ensure our |
|
35 * document contains a table into which the row can |
|
36 * be placed. This may result in "implicit containers" |
|
37 * created to ensure a well-formed document. |
|
38 * |
|
39 */ |
|
40 |
|
41 #ifndef NS_PARSER__ |
|
42 #define NS_PARSER__ |
|
43 |
|
44 #include "nsIParser.h" |
|
45 #include "nsDeque.h" |
|
46 #include "nsIURL.h" |
|
47 #include "CParserContext.h" |
|
48 #include "nsParserCIID.h" |
|
49 #include "nsITokenizer.h" |
|
50 #include "nsHTMLTags.h" |
|
51 #include "nsIContentSink.h" |
|
52 #include "nsCOMArray.h" |
|
53 #include "nsCycleCollectionParticipant.h" |
|
54 #include "nsWeakReference.h" |
|
55 |
|
56 class nsIDTD; |
|
57 class nsScanner; |
|
58 class nsIRunnable; |
|
59 |
|
60 #ifdef _MSC_VER |
|
61 #pragma warning( disable : 4275 ) |
|
62 #endif |
|
63 |
|
64 |
|
65 class nsParser : public nsIParser, |
|
66 public nsIStreamListener, |
|
67 public nsSupportsWeakReference |
|
68 { |
|
69 public: |
|
70 /** |
|
71 * Called on module init |
|
72 */ |
|
73 static nsresult Init(); |
|
74 |
|
75 /** |
|
76 * Called on module shutdown |
|
77 */ |
|
78 static void Shutdown(); |
|
79 |
|
80 NS_DECL_CYCLE_COLLECTING_ISUPPORTS |
|
81 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser) |
|
82 |
|
83 /** |
|
84 * default constructor |
|
85 * @update gess5/11/98 |
|
86 */ |
|
87 nsParser(); |
|
88 |
|
89 /** |
|
90 * Destructor |
|
91 * @update gess5/11/98 |
|
92 */ |
|
93 virtual ~nsParser(); |
|
94 |
|
95 /** |
|
96 * Select given content sink into parser for parser output |
|
97 * @update gess5/11/98 |
|
98 * @param aSink is the new sink to be used by parser |
|
99 * @return old sink, or nullptr |
|
100 */ |
|
101 NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink); |
|
102 |
|
103 /** |
|
104 * retrive the sink set into the parser |
|
105 * @update gess5/11/98 |
|
106 * @param aSink is the new sink to be used by parser |
|
107 * @return old sink, or nullptr |
|
108 */ |
|
109 NS_IMETHOD_(nsIContentSink*) GetContentSink(void); |
|
110 |
|
111 /** |
|
112 * Call this method once you've created a parser, and want to instruct it |
|
113 * about the command which caused the parser to be constructed. For example, |
|
114 * this allows us to select a DTD which can do, say, view-source. |
|
115 * |
|
116 * @update gess 3/25/98 |
|
117 * @param aCommand -- ptrs to string that contains command |
|
118 * @return nada |
|
119 */ |
|
120 NS_IMETHOD_(void) GetCommand(nsCString& aCommand); |
|
121 NS_IMETHOD_(void) SetCommand(const char* aCommand); |
|
122 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand); |
|
123 |
|
124 /** |
|
125 * Call this method once you've created a parser, and want to instruct it |
|
126 * about what charset to load |
|
127 * |
|
128 * @update ftang 4/23/99 |
|
129 * @param aCharset- the charset of a document |
|
130 * @param aCharsetSource- the source of the charset |
|
131 * @return nada |
|
132 */ |
|
133 NS_IMETHOD_(void) SetDocumentCharset(const nsACString& aCharset, int32_t aSource); |
|
134 |
|
135 NS_IMETHOD_(void) GetDocumentCharset(nsACString& aCharset, int32_t& aSource) |
|
136 { |
|
137 aCharset = mCharset; |
|
138 aSource = mCharsetSource; |
|
139 } |
|
140 |
|
141 /** |
|
142 * Cause parser to parse input from given URL |
|
143 * @update gess5/11/98 |
|
144 * @param aURL is a descriptor for source document |
|
145 * @param aListener is a listener to forward notifications to |
|
146 * @return TRUE if all went well -- FALSE otherwise |
|
147 */ |
|
148 NS_IMETHOD Parse(nsIURI* aURL, |
|
149 nsIRequestObserver* aListener = nullptr, |
|
150 void* aKey = 0, |
|
151 nsDTDMode aMode = eDTDMode_autodetect); |
|
152 |
|
153 /** |
|
154 * This method needs documentation |
|
155 */ |
|
156 NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, |
|
157 nsTArray<nsString>& aTagStack); |
|
158 |
|
159 /** |
|
160 * This method gets called when the tokens have been consumed, and it's time |
|
161 * to build the model via the content sink. |
|
162 * @update gess5/11/98 |
|
163 * @return YES if model building went well -- NO otherwise. |
|
164 */ |
|
165 NS_IMETHOD BuildModel(void); |
|
166 |
|
167 NS_IMETHOD ContinueInterruptedParsing(); |
|
168 NS_IMETHOD_(void) BlockParser(); |
|
169 NS_IMETHOD_(void) UnblockParser(); |
|
170 NS_IMETHOD_(void) ContinueInterruptedParsingAsync(); |
|
171 NS_IMETHOD Terminate(void); |
|
172 |
|
173 /** |
|
174 * Call this to query whether the parser is enabled or not. |
|
175 * |
|
176 * @update vidur 4/12/99 |
|
177 * @return current state |
|
178 */ |
|
179 NS_IMETHOD_(bool) IsParserEnabled(); |
|
180 |
|
181 /** |
|
182 * Call this to query whether the parser thinks it's done with parsing. |
|
183 * |
|
184 * @update rickg 5/12/01 |
|
185 * @return complete state |
|
186 */ |
|
187 NS_IMETHOD_(bool) IsComplete(); |
|
188 |
|
189 /** |
|
190 * This rather arcane method (hack) is used as a signal between the |
|
191 * DTD and the parser. It allows the DTD to tell the parser that content |
|
192 * that comes through (parser::parser(string)) but not consumed should |
|
193 * propagate into the next string based parse call. |
|
194 * |
|
195 * @update gess 9/1/98 |
|
196 * @param aState determines whether we propagate unused string content. |
|
197 * @return current state |
|
198 */ |
|
199 void SetUnusedInput(nsString& aBuffer); |
|
200 |
|
201 /** |
|
202 * This method gets called (automatically) during incremental parsing |
|
203 * @update gess5/11/98 |
|
204 * @return TRUE if all went well, otherwise FALSE |
|
205 */ |
|
206 virtual nsresult ResumeParse(bool allowIteration = true, |
|
207 bool aIsFinalChunk = false, |
|
208 bool aCanInterrupt = true); |
|
209 |
|
210 //********************************************* |
|
211 // These methods are callback methods used by |
|
212 // net lib to let us know about our inputstream. |
|
213 //********************************************* |
|
214 // nsIRequestObserver methods: |
|
215 NS_DECL_NSIREQUESTOBSERVER |
|
216 |
|
217 // nsIStreamListener methods: |
|
218 NS_DECL_NSISTREAMLISTENER |
|
219 |
|
220 void PushContext(CParserContext& aContext); |
|
221 CParserContext* PopContext(); |
|
222 CParserContext* PeekContext() {return mParserContext;} |
|
223 |
|
224 /** |
|
225 * Get the channel associated with this parser |
|
226 * @update harishd,gagan 07/17/01 |
|
227 * @param aChannel out param that will contain the result |
|
228 * @return NS_OK if successful |
|
229 */ |
|
230 NS_IMETHOD GetChannel(nsIChannel** aChannel); |
|
231 |
|
232 /** |
|
233 * Get the DTD associated with this parser |
|
234 * @update vidur 9/29/99 |
|
235 * @param aDTD out param that will contain the result |
|
236 * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error |
|
237 */ |
|
238 NS_IMETHOD GetDTD(nsIDTD** aDTD); |
|
239 |
|
240 /** |
|
241 * Get the nsIStreamListener for this parser |
|
242 */ |
|
243 virtual nsIStreamListener* GetStreamListener(); |
|
244 |
|
245 void SetSinkCharset(nsACString& aCharset); |
|
246 |
|
247 /** |
|
248 * Removes continue parsing events |
|
249 * @update kmcclusk 5/18/98 |
|
250 */ |
|
251 |
|
252 NS_IMETHODIMP CancelParsingEvents(); |
|
253 |
|
254 /** |
|
255 * Return true. |
|
256 */ |
|
257 virtual bool IsInsertionPointDefined(); |
|
258 |
|
259 /** |
|
260 * No-op. |
|
261 */ |
|
262 virtual void BeginEvaluatingParserInsertedScript(); |
|
263 |
|
264 /** |
|
265 * No-op. |
|
266 */ |
|
267 virtual void EndEvaluatingParserInsertedScript(); |
|
268 |
|
269 /** |
|
270 * No-op. |
|
271 */ |
|
272 virtual void MarkAsNotScriptCreated(const char* aCommand); |
|
273 |
|
274 /** |
|
275 * Always false. |
|
276 */ |
|
277 virtual bool IsScriptCreated(); |
|
278 |
|
279 /** |
|
280 * Set to parser state to indicate whether parsing tokens can be interrupted |
|
281 * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted. |
|
282 * @update kmcclusk 5/18/98 |
|
283 */ |
|
284 void SetCanInterrupt(bool aCanInterrupt); |
|
285 |
|
286 /** |
|
287 * This is called when the final chunk has been |
|
288 * passed to the parser and the content sink has |
|
289 * interrupted token processing. It schedules |
|
290 * a ParserContinue PL_Event which will ask the parser |
|
291 * to HandleParserContinueEvent when it is handled. |
|
292 * @update kmcclusk6/1/2001 |
|
293 */ |
|
294 nsresult PostContinueEvent(); |
|
295 |
|
296 /** |
|
297 * Fired when the continue parse event is triggered. |
|
298 * @update kmcclusk 5/18/98 |
|
299 */ |
|
300 void HandleParserContinueEvent(class nsParserContinueEvent *); |
|
301 |
|
302 virtual void Reset() { |
|
303 Cleanup(); |
|
304 Initialize(); |
|
305 } |
|
306 |
|
307 bool IsScriptExecuting() { |
|
308 return mSink && mSink->IsScriptExecuting(); |
|
309 } |
|
310 |
|
311 bool IsOkToProcessNetworkData() { |
|
312 return !IsScriptExecuting() && !mProcessingNetworkData; |
|
313 } |
|
314 |
|
315 protected: |
|
316 |
|
317 void Initialize(bool aConstructor = false); |
|
318 void Cleanup(); |
|
319 |
|
320 /** |
|
321 * |
|
322 * @update gess5/18/98 |
|
323 * @param |
|
324 * @return |
|
325 */ |
|
326 nsresult WillBuildModel(nsString& aFilename); |
|
327 |
|
328 /** |
|
329 * |
|
330 * @update gess5/18/98 |
|
331 * @param |
|
332 * @return |
|
333 */ |
|
334 nsresult DidBuildModel(nsresult anErrorCode); |
|
335 |
|
336 private: |
|
337 |
|
338 /******************************************* |
|
339 These are the tokenization methods... |
|
340 *******************************************/ |
|
341 |
|
342 /** |
|
343 * Part of the code sandwich, this gets called right before |
|
344 * the tokenization process begins. The main reason for |
|
345 * this call is to allow the delegate to do initialization. |
|
346 * |
|
347 * @update gess 3/25/98 |
|
348 * @param |
|
349 * @return TRUE if it's ok to proceed |
|
350 */ |
|
351 bool WillTokenize(bool aIsFinalChunk = false); |
|
352 |
|
353 |
|
354 /** |
|
355 * This is the primary control routine. It iteratively |
|
356 * consumes tokens until an error occurs or you run out |
|
357 * of data. |
|
358 * |
|
359 * @update gess 3/25/98 |
|
360 * @return error code |
|
361 */ |
|
362 nsresult Tokenize(bool aIsFinalChunk = false); |
|
363 |
|
364 /** |
|
365 * Pushes XML fragment parsing data to expat without an input stream. |
|
366 */ |
|
367 nsresult Parse(const nsAString& aSourceBuffer, |
|
368 void* aKey, |
|
369 bool aLastCall); |
|
370 |
|
371 protected: |
|
372 //********************************************* |
|
373 // And now, some data members... |
|
374 //********************************************* |
|
375 |
|
376 |
|
377 CParserContext* mParserContext; |
|
378 nsCOMPtr<nsIDTD> mDTD; |
|
379 nsCOMPtr<nsIRequestObserver> mObserver; |
|
380 nsCOMPtr<nsIContentSink> mSink; |
|
381 nsIRunnable* mContinueEvent; // weak ref |
|
382 |
|
383 eParserCommands mCommand; |
|
384 nsresult mInternalState; |
|
385 nsresult mStreamStatus; |
|
386 int32_t mCharsetSource; |
|
387 |
|
388 uint16_t mFlags; |
|
389 |
|
390 nsString mUnusedInput; |
|
391 nsCString mCharset; |
|
392 nsCString mCommandStr; |
|
393 |
|
394 bool mProcessingNetworkData; |
|
395 bool mIsAboutBlank; |
|
396 }; |
|
397 |
|
398 #endif |
|
399 |