1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/html/nsHtml5Tokenizer.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,377 @@ 1.4 +/* 1.5 + * Copyright (c) 2005-2007 Henri Sivonen 1.6 + * Copyright (c) 2007-2013 Mozilla Foundation 1.7 + * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 1.8 + * Foundation, and Opera Software ASA. 1.9 + * 1.10 + * Permission is hereby granted, free of charge, to any person obtaining a 1.11 + * copy of this software and associated documentation files (the "Software"), 1.12 + * to deal in the Software without restriction, including without limitation 1.13 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1.14 + * and/or sell copies of the Software, and to permit persons to whom the 1.15 + * Software is furnished to do so, subject to the following conditions: 1.16 + * 1.17 + * The above copyright notice and this permission notice shall be included in 1.18 + * all copies or substantial portions of the Software. 1.19 + * 1.20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1.21 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1.22 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1.23 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1.24 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 1.25 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 1.26 + * DEALINGS IN THE SOFTWARE. 1.27 + */ 1.28 + 1.29 +/* 1.30 + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. 1.31 + * Please edit Tokenizer.java instead and regenerate. 1.32 + */ 1.33 + 1.34 +#ifndef nsHtml5Tokenizer_h 1.35 +#define nsHtml5Tokenizer_h 1.36 + 1.37 +#include "nsIAtom.h" 1.38 +#include "nsHtml5AtomTable.h" 1.39 +#include "nsString.h" 1.40 +#include "nsIContent.h" 1.41 +#include "nsTraceRefcnt.h" 1.42 +#include "jArray.h" 1.43 +#include "nsHtml5DocumentMode.h" 1.44 +#include "nsHtml5ArrayCopy.h" 1.45 +#include "nsHtml5NamedCharacters.h" 1.46 +#include "nsHtml5NamedCharactersAccel.h" 1.47 +#include "nsHtml5Atoms.h" 1.48 +#include "nsAHtml5TreeBuilderState.h" 1.49 +#include "nsHtml5Macros.h" 1.50 +#include "nsHtml5Highlighter.h" 1.51 +#include "nsHtml5TokenizerLoopPolicies.h" 1.52 + 1.53 +class nsHtml5StreamParser; 1.54 + 1.55 +class nsHtml5TreeBuilder; 1.56 +class nsHtml5MetaScanner; 1.57 +class nsHtml5AttributeName; 1.58 +class nsHtml5ElementName; 1.59 +class nsHtml5HtmlAttributes; 1.60 +class nsHtml5UTF16Buffer; 1.61 +class nsHtml5StateSnapshot; 1.62 +class nsHtml5Portability; 1.63 + 1.64 + 1.65 +class nsHtml5Tokenizer 1.66 +{ 1.67 + private: 1.68 + static char16_t LT_GT[]; 1.69 + static char16_t LT_SOLIDUS[]; 1.70 + static char16_t RSQB_RSQB[]; 1.71 + static char16_t REPLACEMENT_CHARACTER[]; 1.72 + static char16_t LF[]; 1.73 + static char16_t CDATA_LSQB[]; 1.74 + static char16_t OCTYPE[]; 1.75 + static char16_t UBLIC[]; 1.76 + static char16_t YSTEM[]; 1.77 + static staticJArray<char16_t,int32_t> TITLE_ARR; 1.78 + static staticJArray<char16_t,int32_t> SCRIPT_ARR; 1.79 + static staticJArray<char16_t,int32_t> STYLE_ARR; 1.80 + static staticJArray<char16_t,int32_t> PLAINTEXT_ARR; 1.81 + static staticJArray<char16_t,int32_t> XMP_ARR; 1.82 + static staticJArray<char16_t,int32_t> TEXTAREA_ARR; 1.83 + static staticJArray<char16_t,int32_t> IFRAME_ARR; 1.84 + static staticJArray<char16_t,int32_t> NOEMBED_ARR; 1.85 + static staticJArray<char16_t,int32_t> NOSCRIPT_ARR; 1.86 + static staticJArray<char16_t,int32_t> NOFRAMES_ARR; 1.87 + protected: 1.88 + nsHtml5TreeBuilder* tokenHandler; 1.89 + nsHtml5StreamParser* encodingDeclarationHandler; 1.90 + bool lastCR; 1.91 + int32_t stateSave; 1.92 + private: 1.93 + int32_t returnStateSave; 1.94 + protected: 1.95 + int32_t index; 1.96 + private: 1.97 + bool forceQuirks; 1.98 + char16_t additional; 1.99 + int32_t entCol; 1.100 + int32_t firstCharKey; 1.101 + int32_t lo; 1.102 + int32_t hi; 1.103 + int32_t candidate; 1.104 + int32_t strBufMark; 1.105 + int32_t prevValue; 1.106 + protected: 1.107 + int32_t value; 1.108 + private: 1.109 + bool seenDigits; 1.110 + protected: 1.111 + int32_t cstart; 1.112 + private: 1.113 + nsString* publicId; 1.114 + nsString* systemId; 1.115 + autoJArray<char16_t,int32_t> strBuf; 1.116 + int32_t strBufLen; 1.117 + autoJArray<char16_t,int32_t> longStrBuf; 1.118 + int32_t longStrBufLen; 1.119 + autoJArray<char16_t,int32_t> bmpChar; 1.120 + autoJArray<char16_t,int32_t> astralChar; 1.121 + protected: 1.122 + nsHtml5ElementName* endTagExpectation; 1.123 + private: 1.124 + jArray<char16_t,int32_t> endTagExpectationAsArray; 1.125 + protected: 1.126 + bool endTag; 1.127 + private: 1.128 + nsHtml5ElementName* tagName; 1.129 + protected: 1.130 + nsHtml5AttributeName* attributeName; 1.131 + private: 1.132 + nsIAtom* doctypeName; 1.133 + nsString* publicIdentifier; 1.134 + nsString* systemIdentifier; 1.135 + nsHtml5HtmlAttributes* attributes; 1.136 + bool newAttributesEachTime; 1.137 + bool shouldSuspend; 1.138 + protected: 1.139 + bool confident; 1.140 + private: 1.141 + int32_t line; 1.142 + nsHtml5AtomTable* interner; 1.143 + bool viewingXmlSource; 1.144 + public: 1.145 + nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource); 1.146 + void setInterner(nsHtml5AtomTable* interner); 1.147 + void initLocation(nsString* newPublicId, nsString* newSystemId); 1.148 + bool isViewingXmlSource(); 1.149 + void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation); 1.150 + void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation); 1.151 + private: 1.152 + void endTagExpectationToArray(); 1.153 + public: 1.154 + void setLineNumber(int32_t line); 1.155 + inline int32_t getLineNumber() 1.156 + { 1.157 + return line; 1.158 + } 1.159 + 1.160 + nsHtml5HtmlAttributes* emptyAttributes(); 1.161 + private: 1.162 + inline void clearStrBufAndAppend(char16_t c) 1.163 + { 1.164 + strBuf[0] = c; 1.165 + strBufLen = 1; 1.166 + } 1.167 + 1.168 + inline void clearStrBuf() 1.169 + { 1.170 + strBufLen = 0; 1.171 + } 1.172 + 1.173 + void appendStrBuf(char16_t c); 1.174 + protected: 1.175 + nsString* strBufToString(); 1.176 + private: 1.177 + void strBufToDoctypeName(); 1.178 + void emitStrBuf(); 1.179 + inline void clearLongStrBuf() 1.180 + { 1.181 + longStrBufLen = 0; 1.182 + } 1.183 + 1.184 + inline void clearLongStrBufAndAppend(char16_t c) 1.185 + { 1.186 + longStrBuf[0] = c; 1.187 + longStrBufLen = 1; 1.188 + } 1.189 + 1.190 + void appendLongStrBuf(char16_t c); 1.191 + inline void appendSecondHyphenToBogusComment() 1.192 + { 1.193 + appendLongStrBuf('-'); 1.194 + } 1.195 + 1.196 + inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c) 1.197 + { 1.198 + errConsecutiveHyphens(); 1.199 + appendLongStrBuf(c); 1.200 + } 1.201 + 1.202 + void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length); 1.203 + inline void appendStrBufToLongStrBuf() 1.204 + { 1.205 + appendLongStrBuf(strBuf, 0, strBufLen); 1.206 + } 1.207 + 1.208 + nsString* longStrBufToString(); 1.209 + void emitComment(int32_t provisionalHyphens, int32_t pos); 1.210 + protected: 1.211 + void flushChars(char16_t* buf, int32_t pos); 1.212 + private: 1.213 + void strBufToElementNameString(); 1.214 + int32_t emitCurrentTagToken(bool selfClosing, int32_t pos); 1.215 + void attributeNameComplete(); 1.216 + void addAttributeWithoutValue(); 1.217 + void addAttributeWithValue(); 1.218 + public: 1.219 + void start(); 1.220 + bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); 1.221 + private: 1.222 + template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos); 1.223 + void initDoctypeFields(); 1.224 + inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() 1.225 + { 1.226 + silentCarriageReturn(); 1.227 + adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); 1.228 + } 1.229 + 1.230 + inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() 1.231 + { 1.232 + silentLineFeed(); 1.233 + adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); 1.234 + } 1.235 + 1.236 + inline void appendLongStrBufLineFeed() 1.237 + { 1.238 + silentLineFeed(); 1.239 + appendLongStrBuf('\n'); 1.240 + } 1.241 + 1.242 + inline void appendLongStrBufCarriageReturn() 1.243 + { 1.244 + silentCarriageReturn(); 1.245 + appendLongStrBuf('\n'); 1.246 + } 1.247 + 1.248 + protected: 1.249 + inline void silentCarriageReturn() 1.250 + { 1.251 + ++line; 1.252 + lastCR = true; 1.253 + } 1.254 + 1.255 + inline void silentLineFeed() 1.256 + { 1.257 + ++line; 1.258 + } 1.259 + 1.260 + private: 1.261 + void emitCarriageReturn(char16_t* buf, int32_t pos); 1.262 + void emitReplacementCharacter(char16_t* buf, int32_t pos); 1.263 + void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); 1.264 + void setAdditionalAndRememberAmpersandLocation(char16_t add); 1.265 + void bogusDoctype(); 1.266 + void bogusDoctypeWithoutQuirks(); 1.267 + void emitOrAppendStrBuf(int32_t returnState); 1.268 + void handleNcrValue(int32_t returnState); 1.269 + public: 1.270 + void eof(); 1.271 + private: 1.272 + void emitDoctypeToken(int32_t pos); 1.273 + protected: 1.274 + inline char16_t checkChar(char16_t* buf, int32_t pos) 1.275 + { 1.276 + return buf[pos]; 1.277 + } 1.278 + 1.279 + public: 1.280 + bool internalEncodingDeclaration(nsString* internalCharset); 1.281 + private: 1.282 + void emitOrAppendTwo(const char16_t* val, int32_t returnState); 1.283 + void emitOrAppendOne(const char16_t* val, int32_t returnState); 1.284 + public: 1.285 + void end(); 1.286 + void requestSuspension(); 1.287 + bool isInDataState(); 1.288 + void resetToDataState(); 1.289 + void loadState(nsHtml5Tokenizer* other); 1.290 + void initializeWithoutStarting(); 1.291 + void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler); 1.292 + ~nsHtml5Tokenizer(); 1.293 + static void initializeStatics(); 1.294 + static void releaseStatics(); 1.295 + 1.296 +#include "nsHtml5TokenizerHSupplement.h" 1.297 +}; 1.298 + 1.299 +#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1 1.300 +#define NS_HTML5TOKENIZER_DATA 0 1.301 +#define NS_HTML5TOKENIZER_RCDATA 1 1.302 +#define NS_HTML5TOKENIZER_SCRIPT_DATA 2 1.303 +#define NS_HTML5TOKENIZER_RAWTEXT 3 1.304 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4 1.305 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5 1.306 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6 1.307 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7 1.308 +#define NS_HTML5TOKENIZER_PLAINTEXT 8 1.309 +#define NS_HTML5TOKENIZER_TAG_OPEN 9 1.310 +#define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10 1.311 +#define NS_HTML5TOKENIZER_TAG_NAME 11 1.312 +#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12 1.313 +#define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13 1.314 +#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14 1.315 +#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15 1.316 +#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16 1.317 +#define NS_HTML5TOKENIZER_BOGUS_COMMENT 17 1.318 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18 1.319 +#define NS_HTML5TOKENIZER_DOCTYPE 19 1.320 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20 1.321 +#define NS_HTML5TOKENIZER_DOCTYPE_NAME 21 1.322 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22 1.323 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23 1.324 +#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24 1.325 +#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25 1.326 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26 1.327 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27 1.328 +#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28 1.329 +#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29 1.330 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30 1.331 +#define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31 1.332 +#define NS_HTML5TOKENIZER_COMMENT_START 32 1.333 +#define NS_HTML5TOKENIZER_COMMENT_START_DASH 33 1.334 +#define NS_HTML5TOKENIZER_COMMENT 34 1.335 +#define NS_HTML5TOKENIZER_COMMENT_END_DASH 35 1.336 +#define NS_HTML5TOKENIZER_COMMENT_END 36 1.337 +#define NS_HTML5TOKENIZER_COMMENT_END_BANG 37 1.338 +#define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38 1.339 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39 1.340 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40 1.341 +#define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41 1.342 +#define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42 1.343 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43 1.344 +#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44 1.345 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45 1.346 +#define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46 1.347 +#define NS_HTML5TOKENIZER_CONSUME_NCR 47 1.348 +#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48 1.349 +#define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49 1.350 +#define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50 1.351 +#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51 1.352 +#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52 1.353 +#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53 1.354 +#define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54 1.355 +#define NS_HTML5TOKENIZER_CDATA_START 55 1.356 +#define NS_HTML5TOKENIZER_CDATA_SECTION 56 1.357 +#define NS_HTML5TOKENIZER_CDATA_RSQB 57 1.358 +#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58 1.359 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59 1.360 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60 1.361 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61 1.362 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62 1.363 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63 1.364 +#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64 1.365 +#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65 1.366 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66 1.367 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67 1.368 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68 1.369 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69 1.370 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70 1.371 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71 1.372 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72 1.373 +#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73 1.374 +#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74 1.375 +#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10)) 1.376 +#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024 1.377 + 1.378 + 1.379 +#endif 1.380 +