Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
1 /*
2 * Copyright (c) 2005-2007 Henri Sivonen
3 * Copyright (c) 2007-2013 Mozilla Foundation
4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 * Foundation, and Opera Software ASA.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
26 /*
27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 * Please edit Tokenizer.java instead and regenerate.
29 */
31 #ifndef nsHtml5Tokenizer_h
32 #define nsHtml5Tokenizer_h
34 #include "nsIAtom.h"
35 #include "nsHtml5AtomTable.h"
36 #include "nsString.h"
37 #include "nsIContent.h"
38 #include "nsTraceRefcnt.h"
39 #include "jArray.h"
40 #include "nsHtml5DocumentMode.h"
41 #include "nsHtml5ArrayCopy.h"
42 #include "nsHtml5NamedCharacters.h"
43 #include "nsHtml5NamedCharactersAccel.h"
44 #include "nsHtml5Atoms.h"
45 #include "nsAHtml5TreeBuilderState.h"
46 #include "nsHtml5Macros.h"
47 #include "nsHtml5Highlighter.h"
48 #include "nsHtml5TokenizerLoopPolicies.h"
50 class nsHtml5StreamParser;
52 class nsHtml5TreeBuilder;
53 class nsHtml5MetaScanner;
54 class nsHtml5AttributeName;
55 class nsHtml5ElementName;
56 class nsHtml5HtmlAttributes;
57 class nsHtml5UTF16Buffer;
58 class nsHtml5StateSnapshot;
59 class nsHtml5Portability;
62 class nsHtml5Tokenizer
63 {
64 private:
65 static char16_t LT_GT[];
66 static char16_t LT_SOLIDUS[];
67 static char16_t RSQB_RSQB[];
68 static char16_t REPLACEMENT_CHARACTER[];
69 static char16_t LF[];
70 static char16_t CDATA_LSQB[];
71 static char16_t OCTYPE[];
72 static char16_t UBLIC[];
73 static char16_t YSTEM[];
74 static staticJArray<char16_t,int32_t> TITLE_ARR;
75 static staticJArray<char16_t,int32_t> SCRIPT_ARR;
76 static staticJArray<char16_t,int32_t> STYLE_ARR;
77 static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
78 static staticJArray<char16_t,int32_t> XMP_ARR;
79 static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
80 static staticJArray<char16_t,int32_t> IFRAME_ARR;
81 static staticJArray<char16_t,int32_t> NOEMBED_ARR;
82 static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
83 static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
84 protected:
85 nsHtml5TreeBuilder* tokenHandler;
86 nsHtml5StreamParser* encodingDeclarationHandler;
87 bool lastCR;
88 int32_t stateSave;
89 private:
90 int32_t returnStateSave;
91 protected:
92 int32_t index;
93 private:
94 bool forceQuirks;
95 char16_t additional;
96 int32_t entCol;
97 int32_t firstCharKey;
98 int32_t lo;
99 int32_t hi;
100 int32_t candidate;
101 int32_t strBufMark;
102 int32_t prevValue;
103 protected:
104 int32_t value;
105 private:
106 bool seenDigits;
107 protected:
108 int32_t cstart;
109 private:
110 nsString* publicId;
111 nsString* systemId;
112 autoJArray<char16_t,int32_t> strBuf;
113 int32_t strBufLen;
114 autoJArray<char16_t,int32_t> longStrBuf;
115 int32_t longStrBufLen;
116 autoJArray<char16_t,int32_t> bmpChar;
117 autoJArray<char16_t,int32_t> astralChar;
118 protected:
119 nsHtml5ElementName* endTagExpectation;
120 private:
121 jArray<char16_t,int32_t> endTagExpectationAsArray;
122 protected:
123 bool endTag;
124 private:
125 nsHtml5ElementName* tagName;
126 protected:
127 nsHtml5AttributeName* attributeName;
128 private:
129 nsIAtom* doctypeName;
130 nsString* publicIdentifier;
131 nsString* systemIdentifier;
132 nsHtml5HtmlAttributes* attributes;
133 bool newAttributesEachTime;
134 bool shouldSuspend;
135 protected:
136 bool confident;
137 private:
138 int32_t line;
139 nsHtml5AtomTable* interner;
140 bool viewingXmlSource;
141 public:
142 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
143 void setInterner(nsHtml5AtomTable* interner);
144 void initLocation(nsString* newPublicId, nsString* newSystemId);
145 bool isViewingXmlSource();
146 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
147 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
148 private:
149 void endTagExpectationToArray();
150 public:
151 void setLineNumber(int32_t line);
152 inline int32_t getLineNumber()
153 {
154 return line;
155 }
157 nsHtml5HtmlAttributes* emptyAttributes();
158 private:
159 inline void clearStrBufAndAppend(char16_t c)
160 {
161 strBuf[0] = c;
162 strBufLen = 1;
163 }
165 inline void clearStrBuf()
166 {
167 strBufLen = 0;
168 }
170 void appendStrBuf(char16_t c);
171 protected:
172 nsString* strBufToString();
173 private:
174 void strBufToDoctypeName();
175 void emitStrBuf();
176 inline void clearLongStrBuf()
177 {
178 longStrBufLen = 0;
179 }
181 inline void clearLongStrBufAndAppend(char16_t c)
182 {
183 longStrBuf[0] = c;
184 longStrBufLen = 1;
185 }
187 void appendLongStrBuf(char16_t c);
188 inline void appendSecondHyphenToBogusComment()
189 {
190 appendLongStrBuf('-');
191 }
193 inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
194 {
195 errConsecutiveHyphens();
196 appendLongStrBuf(c);
197 }
199 void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
200 inline void appendStrBufToLongStrBuf()
201 {
202 appendLongStrBuf(strBuf, 0, strBufLen);
203 }
205 nsString* longStrBufToString();
206 void emitComment(int32_t provisionalHyphens, int32_t pos);
207 protected:
208 void flushChars(char16_t* buf, int32_t pos);
209 private:
210 void strBufToElementNameString();
211 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
212 void attributeNameComplete();
213 void addAttributeWithoutValue();
214 void addAttributeWithValue();
215 public:
216 void start();
217 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
218 private:
219 template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
220 void initDoctypeFields();
221 inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
222 {
223 silentCarriageReturn();
224 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
225 }
227 inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
228 {
229 silentLineFeed();
230 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
231 }
233 inline void appendLongStrBufLineFeed()
234 {
235 silentLineFeed();
236 appendLongStrBuf('\n');
237 }
239 inline void appendLongStrBufCarriageReturn()
240 {
241 silentCarriageReturn();
242 appendLongStrBuf('\n');
243 }
245 protected:
246 inline void silentCarriageReturn()
247 {
248 ++line;
249 lastCR = true;
250 }
252 inline void silentLineFeed()
253 {
254 ++line;
255 }
257 private:
258 void emitCarriageReturn(char16_t* buf, int32_t pos);
259 void emitReplacementCharacter(char16_t* buf, int32_t pos);
260 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
261 void setAdditionalAndRememberAmpersandLocation(char16_t add);
262 void bogusDoctype();
263 void bogusDoctypeWithoutQuirks();
264 void emitOrAppendStrBuf(int32_t returnState);
265 void handleNcrValue(int32_t returnState);
266 public:
267 void eof();
268 private:
269 void emitDoctypeToken(int32_t pos);
270 protected:
271 inline char16_t checkChar(char16_t* buf, int32_t pos)
272 {
273 return buf[pos];
274 }
276 public:
277 bool internalEncodingDeclaration(nsString* internalCharset);
278 private:
279 void emitOrAppendTwo(const char16_t* val, int32_t returnState);
280 void emitOrAppendOne(const char16_t* val, int32_t returnState);
281 public:
282 void end();
283 void requestSuspension();
284 bool isInDataState();
285 void resetToDataState();
286 void loadState(nsHtml5Tokenizer* other);
287 void initializeWithoutStarting();
288 void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
289 ~nsHtml5Tokenizer();
290 static void initializeStatics();
291 static void releaseStatics();
293 #include "nsHtml5TokenizerHSupplement.h"
294 };
296 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
297 #define NS_HTML5TOKENIZER_DATA 0
298 #define NS_HTML5TOKENIZER_RCDATA 1
299 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
300 #define NS_HTML5TOKENIZER_RAWTEXT 3
301 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
304 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
305 #define NS_HTML5TOKENIZER_PLAINTEXT 8
306 #define NS_HTML5TOKENIZER_TAG_OPEN 9
307 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
308 #define NS_HTML5TOKENIZER_TAG_NAME 11
309 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
310 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
311 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
312 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
313 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
314 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
315 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
316 #define NS_HTML5TOKENIZER_DOCTYPE 19
317 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
318 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
319 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
320 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
322 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
323 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
324 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
326 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
327 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
328 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
329 #define NS_HTML5TOKENIZER_COMMENT_START 32
330 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
331 #define NS_HTML5TOKENIZER_COMMENT 34
332 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
333 #define NS_HTML5TOKENIZER_COMMENT_END 36
334 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
335 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
337 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
338 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
339 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
340 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
341 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
342 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
343 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
344 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
345 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
346 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
347 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
349 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
350 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
351 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
352 #define NS_HTML5TOKENIZER_CDATA_START 55
353 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
354 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
355 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
360 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
361 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
362 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
370 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
371 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
372 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
373 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
376 #endif