|
1 /* |
|
2 * Copyright (c) 2005-2007 Henri Sivonen |
|
3 * Copyright (c) 2007-2013 Mozilla Foundation |
|
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla |
|
5 * Foundation, and Opera Software ASA. |
|
6 * |
|
7 * Permission is hereby granted, free of charge, to any person obtaining a |
|
8 * copy of this software and associated documentation files (the "Software"), |
|
9 * to deal in the Software without restriction, including without limitation |
|
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
11 * and/or sell copies of the Software, and to permit persons to whom the |
|
12 * Software is furnished to do so, subject to the following conditions: |
|
13 * |
|
14 * The above copyright notice and this permission notice shall be included in |
|
15 * all copies or substantial portions of the Software. |
|
16 * |
|
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
23 * DEALINGS IN THE SOFTWARE. |
|
24 */ |
|
25 |
|
26 /* |
|
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. |
|
28 * Please edit Tokenizer.java instead and regenerate. |
|
29 */ |
|
30 |
|
31 #ifndef nsHtml5Tokenizer_h |
|
32 #define nsHtml5Tokenizer_h |
|
33 |
|
34 #include "nsIAtom.h" |
|
35 #include "nsHtml5AtomTable.h" |
|
36 #include "nsString.h" |
|
37 #include "nsIContent.h" |
|
38 #include "nsTraceRefcnt.h" |
|
39 #include "jArray.h" |
|
40 #include "nsHtml5DocumentMode.h" |
|
41 #include "nsHtml5ArrayCopy.h" |
|
42 #include "nsHtml5NamedCharacters.h" |
|
43 #include "nsHtml5NamedCharactersAccel.h" |
|
44 #include "nsHtml5Atoms.h" |
|
45 #include "nsAHtml5TreeBuilderState.h" |
|
46 #include "nsHtml5Macros.h" |
|
47 #include "nsHtml5Highlighter.h" |
|
48 #include "nsHtml5TokenizerLoopPolicies.h" |
|
49 |
|
50 class nsHtml5StreamParser; |
|
51 |
|
52 class nsHtml5TreeBuilder; |
|
53 class nsHtml5MetaScanner; |
|
54 class nsHtml5AttributeName; |
|
55 class nsHtml5ElementName; |
|
56 class nsHtml5HtmlAttributes; |
|
57 class nsHtml5UTF16Buffer; |
|
58 class nsHtml5StateSnapshot; |
|
59 class nsHtml5Portability; |
|
60 |
|
61 |
|
62 class nsHtml5Tokenizer |
|
63 { |
|
64 private: |
|
65 static char16_t LT_GT[]; |
|
66 static char16_t LT_SOLIDUS[]; |
|
67 static char16_t RSQB_RSQB[]; |
|
68 static char16_t REPLACEMENT_CHARACTER[]; |
|
69 static char16_t LF[]; |
|
70 static char16_t CDATA_LSQB[]; |
|
71 static char16_t OCTYPE[]; |
|
72 static char16_t UBLIC[]; |
|
73 static char16_t YSTEM[]; |
|
74 static staticJArray<char16_t,int32_t> TITLE_ARR; |
|
75 static staticJArray<char16_t,int32_t> SCRIPT_ARR; |
|
76 static staticJArray<char16_t,int32_t> STYLE_ARR; |
|
77 static staticJArray<char16_t,int32_t> PLAINTEXT_ARR; |
|
78 static staticJArray<char16_t,int32_t> XMP_ARR; |
|
79 static staticJArray<char16_t,int32_t> TEXTAREA_ARR; |
|
80 static staticJArray<char16_t,int32_t> IFRAME_ARR; |
|
81 static staticJArray<char16_t,int32_t> NOEMBED_ARR; |
|
82 static staticJArray<char16_t,int32_t> NOSCRIPT_ARR; |
|
83 static staticJArray<char16_t,int32_t> NOFRAMES_ARR; |
|
84 protected: |
|
85 nsHtml5TreeBuilder* tokenHandler; |
|
86 nsHtml5StreamParser* encodingDeclarationHandler; |
|
87 bool lastCR; |
|
88 int32_t stateSave; |
|
89 private: |
|
90 int32_t returnStateSave; |
|
91 protected: |
|
92 int32_t index; |
|
93 private: |
|
94 bool forceQuirks; |
|
95 char16_t additional; |
|
96 int32_t entCol; |
|
97 int32_t firstCharKey; |
|
98 int32_t lo; |
|
99 int32_t hi; |
|
100 int32_t candidate; |
|
101 int32_t strBufMark; |
|
102 int32_t prevValue; |
|
103 protected: |
|
104 int32_t value; |
|
105 private: |
|
106 bool seenDigits; |
|
107 protected: |
|
108 int32_t cstart; |
|
109 private: |
|
110 nsString* publicId; |
|
111 nsString* systemId; |
|
112 autoJArray<char16_t,int32_t> strBuf; |
|
113 int32_t strBufLen; |
|
114 autoJArray<char16_t,int32_t> longStrBuf; |
|
115 int32_t longStrBufLen; |
|
116 autoJArray<char16_t,int32_t> bmpChar; |
|
117 autoJArray<char16_t,int32_t> astralChar; |
|
118 protected: |
|
119 nsHtml5ElementName* endTagExpectation; |
|
120 private: |
|
121 jArray<char16_t,int32_t> endTagExpectationAsArray; |
|
122 protected: |
|
123 bool endTag; |
|
124 private: |
|
125 nsHtml5ElementName* tagName; |
|
126 protected: |
|
127 nsHtml5AttributeName* attributeName; |
|
128 private: |
|
129 nsIAtom* doctypeName; |
|
130 nsString* publicIdentifier; |
|
131 nsString* systemIdentifier; |
|
132 nsHtml5HtmlAttributes* attributes; |
|
133 bool newAttributesEachTime; |
|
134 bool shouldSuspend; |
|
135 protected: |
|
136 bool confident; |
|
137 private: |
|
138 int32_t line; |
|
139 nsHtml5AtomTable* interner; |
|
140 bool viewingXmlSource; |
|
141 public: |
|
142 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource); |
|
143 void setInterner(nsHtml5AtomTable* interner); |
|
144 void initLocation(nsString* newPublicId, nsString* newSystemId); |
|
145 bool isViewingXmlSource(); |
|
146 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation); |
|
147 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation); |
|
148 private: |
|
149 void endTagExpectationToArray(); |
|
150 public: |
|
151 void setLineNumber(int32_t line); |
|
152 inline int32_t getLineNumber() |
|
153 { |
|
154 return line; |
|
155 } |
|
156 |
|
157 nsHtml5HtmlAttributes* emptyAttributes(); |
|
158 private: |
|
159 inline void clearStrBufAndAppend(char16_t c) |
|
160 { |
|
161 strBuf[0] = c; |
|
162 strBufLen = 1; |
|
163 } |
|
164 |
|
165 inline void clearStrBuf() |
|
166 { |
|
167 strBufLen = 0; |
|
168 } |
|
169 |
|
170 void appendStrBuf(char16_t c); |
|
171 protected: |
|
172 nsString* strBufToString(); |
|
173 private: |
|
174 void strBufToDoctypeName(); |
|
175 void emitStrBuf(); |
|
176 inline void clearLongStrBuf() |
|
177 { |
|
178 longStrBufLen = 0; |
|
179 } |
|
180 |
|
181 inline void clearLongStrBufAndAppend(char16_t c) |
|
182 { |
|
183 longStrBuf[0] = c; |
|
184 longStrBufLen = 1; |
|
185 } |
|
186 |
|
187 void appendLongStrBuf(char16_t c); |
|
188 inline void appendSecondHyphenToBogusComment() |
|
189 { |
|
190 appendLongStrBuf('-'); |
|
191 } |
|
192 |
|
193 inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c) |
|
194 { |
|
195 errConsecutiveHyphens(); |
|
196 appendLongStrBuf(c); |
|
197 } |
|
198 |
|
199 void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length); |
|
200 inline void appendStrBufToLongStrBuf() |
|
201 { |
|
202 appendLongStrBuf(strBuf, 0, strBufLen); |
|
203 } |
|
204 |
|
205 nsString* longStrBufToString(); |
|
206 void emitComment(int32_t provisionalHyphens, int32_t pos); |
|
207 protected: |
|
208 void flushChars(char16_t* buf, int32_t pos); |
|
209 private: |
|
210 void strBufToElementNameString(); |
|
211 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos); |
|
212 void attributeNameComplete(); |
|
213 void addAttributeWithoutValue(); |
|
214 void addAttributeWithValue(); |
|
215 public: |
|
216 void start(); |
|
217 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); |
|
218 private: |
|
219 template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos); |
|
220 void initDoctypeFields(); |
|
221 inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() |
|
222 { |
|
223 silentCarriageReturn(); |
|
224 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); |
|
225 } |
|
226 |
|
227 inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() |
|
228 { |
|
229 silentLineFeed(); |
|
230 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); |
|
231 } |
|
232 |
|
233 inline void appendLongStrBufLineFeed() |
|
234 { |
|
235 silentLineFeed(); |
|
236 appendLongStrBuf('\n'); |
|
237 } |
|
238 |
|
239 inline void appendLongStrBufCarriageReturn() |
|
240 { |
|
241 silentCarriageReturn(); |
|
242 appendLongStrBuf('\n'); |
|
243 } |
|
244 |
|
245 protected: |
|
246 inline void silentCarriageReturn() |
|
247 { |
|
248 ++line; |
|
249 lastCR = true; |
|
250 } |
|
251 |
|
252 inline void silentLineFeed() |
|
253 { |
|
254 ++line; |
|
255 } |
|
256 |
|
257 private: |
|
258 void emitCarriageReturn(char16_t* buf, int32_t pos); |
|
259 void emitReplacementCharacter(char16_t* buf, int32_t pos); |
|
260 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); |
|
261 void setAdditionalAndRememberAmpersandLocation(char16_t add); |
|
262 void bogusDoctype(); |
|
263 void bogusDoctypeWithoutQuirks(); |
|
264 void emitOrAppendStrBuf(int32_t returnState); |
|
265 void handleNcrValue(int32_t returnState); |
|
266 public: |
|
267 void eof(); |
|
268 private: |
|
269 void emitDoctypeToken(int32_t pos); |
|
270 protected: |
|
271 inline char16_t checkChar(char16_t* buf, int32_t pos) |
|
272 { |
|
273 return buf[pos]; |
|
274 } |
|
275 |
|
276 public: |
|
277 bool internalEncodingDeclaration(nsString* internalCharset); |
|
278 private: |
|
279 void emitOrAppendTwo(const char16_t* val, int32_t returnState); |
|
280 void emitOrAppendOne(const char16_t* val, int32_t returnState); |
|
281 public: |
|
282 void end(); |
|
283 void requestSuspension(); |
|
284 bool isInDataState(); |
|
285 void resetToDataState(); |
|
286 void loadState(nsHtml5Tokenizer* other); |
|
287 void initializeWithoutStarting(); |
|
288 void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler); |
|
289 ~nsHtml5Tokenizer(); |
|
290 static void initializeStatics(); |
|
291 static void releaseStatics(); |
|
292 |
|
293 #include "nsHtml5TokenizerHSupplement.h" |
|
294 }; |
|
295 |
|
296 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1 |
|
297 #define NS_HTML5TOKENIZER_DATA 0 |
|
298 #define NS_HTML5TOKENIZER_RCDATA 1 |
|
299 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2 |
|
300 #define NS_HTML5TOKENIZER_RAWTEXT 3 |
|
301 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4 |
|
302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5 |
|
303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6 |
|
304 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7 |
|
305 #define NS_HTML5TOKENIZER_PLAINTEXT 8 |
|
306 #define NS_HTML5TOKENIZER_TAG_OPEN 9 |
|
307 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10 |
|
308 #define NS_HTML5TOKENIZER_TAG_NAME 11 |
|
309 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12 |
|
310 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13 |
|
311 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14 |
|
312 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15 |
|
313 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16 |
|
314 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17 |
|
315 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18 |
|
316 #define NS_HTML5TOKENIZER_DOCTYPE 19 |
|
317 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20 |
|
318 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21 |
|
319 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22 |
|
320 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23 |
|
321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24 |
|
322 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25 |
|
323 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26 |
|
324 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27 |
|
325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28 |
|
326 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29 |
|
327 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30 |
|
328 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31 |
|
329 #define NS_HTML5TOKENIZER_COMMENT_START 32 |
|
330 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33 |
|
331 #define NS_HTML5TOKENIZER_COMMENT 34 |
|
332 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35 |
|
333 #define NS_HTML5TOKENIZER_COMMENT_END 36 |
|
334 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37 |
|
335 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38 |
|
336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39 |
|
337 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40 |
|
338 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41 |
|
339 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42 |
|
340 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43 |
|
341 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44 |
|
342 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45 |
|
343 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46 |
|
344 #define NS_HTML5TOKENIZER_CONSUME_NCR 47 |
|
345 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48 |
|
346 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49 |
|
347 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50 |
|
348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51 |
|
349 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52 |
|
350 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53 |
|
351 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54 |
|
352 #define NS_HTML5TOKENIZER_CDATA_START 55 |
|
353 #define NS_HTML5TOKENIZER_CDATA_SECTION 56 |
|
354 #define NS_HTML5TOKENIZER_CDATA_RSQB 57 |
|
355 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58 |
|
356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59 |
|
357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60 |
|
358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61 |
|
359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62 |
|
360 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63 |
|
361 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64 |
|
362 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65 |
|
363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66 |
|
364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67 |
|
365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68 |
|
366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69 |
|
367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70 |
|
368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71 |
|
369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72 |
|
370 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73 |
|
371 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74 |
|
372 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10)) |
|
373 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024 |
|
374 |
|
375 |
|
376 #endif |
|
377 |