parser/html/nsHtml5Tokenizer.h

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

michael@0 1 /*
michael@0 2 * Copyright (c) 2005-2007 Henri Sivonen
michael@0 3 * Copyright (c) 2007-2013 Mozilla Foundation
michael@0 4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
michael@0 5 * Foundation, and Opera Software ASA.
michael@0 6 *
michael@0 7 * Permission is hereby granted, free of charge, to any person obtaining a
michael@0 8 * copy of this software and associated documentation files (the "Software"),
michael@0 9 * to deal in the Software without restriction, including without limitation
michael@0 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
michael@0 11 * and/or sell copies of the Software, and to permit persons to whom the
michael@0 12 * Software is furnished to do so, subject to the following conditions:
michael@0 13 *
michael@0 14 * The above copyright notice and this permission notice shall be included in
michael@0 15 * all copies or substantial portions of the Software.
michael@0 16 *
michael@0 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
michael@0 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
michael@0 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
michael@0 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
michael@0 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
michael@0 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
michael@0 23 * DEALINGS IN THE SOFTWARE.
michael@0 24 */
michael@0 25
michael@0 26 /*
michael@0 27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
michael@0 28 * Please edit Tokenizer.java instead and regenerate.
michael@0 29 */
michael@0 30
michael@0 31 #ifndef nsHtml5Tokenizer_h
michael@0 32 #define nsHtml5Tokenizer_h
michael@0 33
michael@0 34 #include "nsIAtom.h"
michael@0 35 #include "nsHtml5AtomTable.h"
michael@0 36 #include "nsString.h"
michael@0 37 #include "nsIContent.h"
michael@0 38 #include "nsTraceRefcnt.h"
michael@0 39 #include "jArray.h"
michael@0 40 #include "nsHtml5DocumentMode.h"
michael@0 41 #include "nsHtml5ArrayCopy.h"
michael@0 42 #include "nsHtml5NamedCharacters.h"
michael@0 43 #include "nsHtml5NamedCharactersAccel.h"
michael@0 44 #include "nsHtml5Atoms.h"
michael@0 45 #include "nsAHtml5TreeBuilderState.h"
michael@0 46 #include "nsHtml5Macros.h"
michael@0 47 #include "nsHtml5Highlighter.h"
michael@0 48 #include "nsHtml5TokenizerLoopPolicies.h"
michael@0 49
michael@0 50 class nsHtml5StreamParser;
michael@0 51
michael@0 52 class nsHtml5TreeBuilder;
michael@0 53 class nsHtml5MetaScanner;
michael@0 54 class nsHtml5AttributeName;
michael@0 55 class nsHtml5ElementName;
michael@0 56 class nsHtml5HtmlAttributes;
michael@0 57 class nsHtml5UTF16Buffer;
michael@0 58 class nsHtml5StateSnapshot;
michael@0 59 class nsHtml5Portability;
michael@0 60
michael@0 61
michael@0 62 class nsHtml5Tokenizer
michael@0 63 {
michael@0 64 private:
michael@0 65 static char16_t LT_GT[];
michael@0 66 static char16_t LT_SOLIDUS[];
michael@0 67 static char16_t RSQB_RSQB[];
michael@0 68 static char16_t REPLACEMENT_CHARACTER[];
michael@0 69 static char16_t LF[];
michael@0 70 static char16_t CDATA_LSQB[];
michael@0 71 static char16_t OCTYPE[];
michael@0 72 static char16_t UBLIC[];
michael@0 73 static char16_t YSTEM[];
michael@0 74 static staticJArray<char16_t,int32_t> TITLE_ARR;
michael@0 75 static staticJArray<char16_t,int32_t> SCRIPT_ARR;
michael@0 76 static staticJArray<char16_t,int32_t> STYLE_ARR;
michael@0 77 static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
michael@0 78 static staticJArray<char16_t,int32_t> XMP_ARR;
michael@0 79 static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
michael@0 80 static staticJArray<char16_t,int32_t> IFRAME_ARR;
michael@0 81 static staticJArray<char16_t,int32_t> NOEMBED_ARR;
michael@0 82 static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
michael@0 83 static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
michael@0 84 protected:
michael@0 85 nsHtml5TreeBuilder* tokenHandler;
michael@0 86 nsHtml5StreamParser* encodingDeclarationHandler;
michael@0 87 bool lastCR;
michael@0 88 int32_t stateSave;
michael@0 89 private:
michael@0 90 int32_t returnStateSave;
michael@0 91 protected:
michael@0 92 int32_t index;
michael@0 93 private:
michael@0 94 bool forceQuirks;
michael@0 95 char16_t additional;
michael@0 96 int32_t entCol;
michael@0 97 int32_t firstCharKey;
michael@0 98 int32_t lo;
michael@0 99 int32_t hi;
michael@0 100 int32_t candidate;
michael@0 101 int32_t strBufMark;
michael@0 102 int32_t prevValue;
michael@0 103 protected:
michael@0 104 int32_t value;
michael@0 105 private:
michael@0 106 bool seenDigits;
michael@0 107 protected:
michael@0 108 int32_t cstart;
michael@0 109 private:
michael@0 110 nsString* publicId;
michael@0 111 nsString* systemId;
michael@0 112 autoJArray<char16_t,int32_t> strBuf;
michael@0 113 int32_t strBufLen;
michael@0 114 autoJArray<char16_t,int32_t> longStrBuf;
michael@0 115 int32_t longStrBufLen;
michael@0 116 autoJArray<char16_t,int32_t> bmpChar;
michael@0 117 autoJArray<char16_t,int32_t> astralChar;
michael@0 118 protected:
michael@0 119 nsHtml5ElementName* endTagExpectation;
michael@0 120 private:
michael@0 121 jArray<char16_t,int32_t> endTagExpectationAsArray;
michael@0 122 protected:
michael@0 123 bool endTag;
michael@0 124 private:
michael@0 125 nsHtml5ElementName* tagName;
michael@0 126 protected:
michael@0 127 nsHtml5AttributeName* attributeName;
michael@0 128 private:
michael@0 129 nsIAtom* doctypeName;
michael@0 130 nsString* publicIdentifier;
michael@0 131 nsString* systemIdentifier;
michael@0 132 nsHtml5HtmlAttributes* attributes;
michael@0 133 bool newAttributesEachTime;
michael@0 134 bool shouldSuspend;
michael@0 135 protected:
michael@0 136 bool confident;
michael@0 137 private:
michael@0 138 int32_t line;
michael@0 139 nsHtml5AtomTable* interner;
michael@0 140 bool viewingXmlSource;
michael@0 141 public:
michael@0 142 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
michael@0 143 void setInterner(nsHtml5AtomTable* interner);
michael@0 144 void initLocation(nsString* newPublicId, nsString* newSystemId);
michael@0 145 bool isViewingXmlSource();
michael@0 146 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
michael@0 147 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
michael@0 148 private:
michael@0 149 void endTagExpectationToArray();
michael@0 150 public:
michael@0 151 void setLineNumber(int32_t line);
michael@0 152 inline int32_t getLineNumber()
michael@0 153 {
michael@0 154 return line;
michael@0 155 }
michael@0 156
michael@0 157 nsHtml5HtmlAttributes* emptyAttributes();
michael@0 158 private:
michael@0 159 inline void clearStrBufAndAppend(char16_t c)
michael@0 160 {
michael@0 161 strBuf[0] = c;
michael@0 162 strBufLen = 1;
michael@0 163 }
michael@0 164
michael@0 165 inline void clearStrBuf()
michael@0 166 {
michael@0 167 strBufLen = 0;
michael@0 168 }
michael@0 169
michael@0 170 void appendStrBuf(char16_t c);
michael@0 171 protected:
michael@0 172 nsString* strBufToString();
michael@0 173 private:
michael@0 174 void strBufToDoctypeName();
michael@0 175 void emitStrBuf();
michael@0 176 inline void clearLongStrBuf()
michael@0 177 {
michael@0 178 longStrBufLen = 0;
michael@0 179 }
michael@0 180
michael@0 181 inline void clearLongStrBufAndAppend(char16_t c)
michael@0 182 {
michael@0 183 longStrBuf[0] = c;
michael@0 184 longStrBufLen = 1;
michael@0 185 }
michael@0 186
michael@0 187 void appendLongStrBuf(char16_t c);
michael@0 188 inline void appendSecondHyphenToBogusComment()
michael@0 189 {
michael@0 190 appendLongStrBuf('-');
michael@0 191 }
michael@0 192
michael@0 193 inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
michael@0 194 {
michael@0 195 errConsecutiveHyphens();
michael@0 196 appendLongStrBuf(c);
michael@0 197 }
michael@0 198
michael@0 199 void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
michael@0 200 inline void appendStrBufToLongStrBuf()
michael@0 201 {
michael@0 202 appendLongStrBuf(strBuf, 0, strBufLen);
michael@0 203 }
michael@0 204
michael@0 205 nsString* longStrBufToString();
michael@0 206 void emitComment(int32_t provisionalHyphens, int32_t pos);
michael@0 207 protected:
michael@0 208 void flushChars(char16_t* buf, int32_t pos);
michael@0 209 private:
michael@0 210 void strBufToElementNameString();
michael@0 211 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
michael@0 212 void attributeNameComplete();
michael@0 213 void addAttributeWithoutValue();
michael@0 214 void addAttributeWithValue();
michael@0 215 public:
michael@0 216 void start();
michael@0 217 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
michael@0 218 private:
michael@0 219 template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
michael@0 220 void initDoctypeFields();
michael@0 221 inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
michael@0 222 {
michael@0 223 silentCarriageReturn();
michael@0 224 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
michael@0 225 }
michael@0 226
michael@0 227 inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
michael@0 228 {
michael@0 229 silentLineFeed();
michael@0 230 adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
michael@0 231 }
michael@0 232
michael@0 233 inline void appendLongStrBufLineFeed()
michael@0 234 {
michael@0 235 silentLineFeed();
michael@0 236 appendLongStrBuf('\n');
michael@0 237 }
michael@0 238
michael@0 239 inline void appendLongStrBufCarriageReturn()
michael@0 240 {
michael@0 241 silentCarriageReturn();
michael@0 242 appendLongStrBuf('\n');
michael@0 243 }
michael@0 244
michael@0 245 protected:
michael@0 246 inline void silentCarriageReturn()
michael@0 247 {
michael@0 248 ++line;
michael@0 249 lastCR = true;
michael@0 250 }
michael@0 251
michael@0 252 inline void silentLineFeed()
michael@0 253 {
michael@0 254 ++line;
michael@0 255 }
michael@0 256
michael@0 257 private:
michael@0 258 void emitCarriageReturn(char16_t* buf, int32_t pos);
michael@0 259 void emitReplacementCharacter(char16_t* buf, int32_t pos);
michael@0 260 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
michael@0 261 void setAdditionalAndRememberAmpersandLocation(char16_t add);
michael@0 262 void bogusDoctype();
michael@0 263 void bogusDoctypeWithoutQuirks();
michael@0 264 void emitOrAppendStrBuf(int32_t returnState);
michael@0 265 void handleNcrValue(int32_t returnState);
michael@0 266 public:
michael@0 267 void eof();
michael@0 268 private:
michael@0 269 void emitDoctypeToken(int32_t pos);
michael@0 270 protected:
michael@0 271 inline char16_t checkChar(char16_t* buf, int32_t pos)
michael@0 272 {
michael@0 273 return buf[pos];
michael@0 274 }
michael@0 275
michael@0 276 public:
michael@0 277 bool internalEncodingDeclaration(nsString* internalCharset);
michael@0 278 private:
michael@0 279 void emitOrAppendTwo(const char16_t* val, int32_t returnState);
michael@0 280 void emitOrAppendOne(const char16_t* val, int32_t returnState);
michael@0 281 public:
michael@0 282 void end();
michael@0 283 void requestSuspension();
michael@0 284 bool isInDataState();
michael@0 285 void resetToDataState();
michael@0 286 void loadState(nsHtml5Tokenizer* other);
michael@0 287 void initializeWithoutStarting();
michael@0 288 void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
michael@0 289 ~nsHtml5Tokenizer();
michael@0 290 static void initializeStatics();
michael@0 291 static void releaseStatics();
michael@0 292
michael@0 293 #include "nsHtml5TokenizerHSupplement.h"
michael@0 294 };
michael@0 295
michael@0 296 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
michael@0 297 #define NS_HTML5TOKENIZER_DATA 0
michael@0 298 #define NS_HTML5TOKENIZER_RCDATA 1
michael@0 299 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
michael@0 300 #define NS_HTML5TOKENIZER_RAWTEXT 3
michael@0 301 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
michael@0 302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
michael@0 303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
michael@0 304 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
michael@0 305 #define NS_HTML5TOKENIZER_PLAINTEXT 8
michael@0 306 #define NS_HTML5TOKENIZER_TAG_OPEN 9
michael@0 307 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
michael@0 308 #define NS_HTML5TOKENIZER_TAG_NAME 11
michael@0 309 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
michael@0 310 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
michael@0 311 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
michael@0 312 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
michael@0 313 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
michael@0 314 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
michael@0 315 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
michael@0 316 #define NS_HTML5TOKENIZER_DOCTYPE 19
michael@0 317 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
michael@0 318 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
michael@0 319 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
michael@0 320 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
michael@0 321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
michael@0 322 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
michael@0 323 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
michael@0 324 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
michael@0 325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
michael@0 326 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
michael@0 327 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
michael@0 328 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
michael@0 329 #define NS_HTML5TOKENIZER_COMMENT_START 32
michael@0 330 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
michael@0 331 #define NS_HTML5TOKENIZER_COMMENT 34
michael@0 332 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
michael@0 333 #define NS_HTML5TOKENIZER_COMMENT_END 36
michael@0 334 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
michael@0 335 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
michael@0 336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
michael@0 337 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
michael@0 338 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
michael@0 339 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
michael@0 340 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
michael@0 341 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
michael@0 342 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
michael@0 343 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
michael@0 344 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
michael@0 345 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
michael@0 346 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
michael@0 347 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
michael@0 348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
michael@0 349 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
michael@0 350 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
michael@0 351 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
michael@0 352 #define NS_HTML5TOKENIZER_CDATA_START 55
michael@0 353 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
michael@0 354 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
michael@0 355 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
michael@0 356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
michael@0 357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
michael@0 358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
michael@0 359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
michael@0 360 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
michael@0 361 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
michael@0 362 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
michael@0 363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
michael@0 364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
michael@0 365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
michael@0 366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
michael@0 367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
michael@0 368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
michael@0 369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
michael@0 370 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
michael@0 371 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
michael@0 372 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
michael@0 373 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
michael@0 374
michael@0 375
michael@0 376 #endif
michael@0 377

mercurial