parser/html/nsHtml5Tokenizer.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/html/nsHtml5Tokenizer.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,377 @@
     1.4 +/*
     1.5 + * Copyright (c) 2005-2007 Henri Sivonen
     1.6 + * Copyright (c) 2007-2013 Mozilla Foundation
     1.7 + * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
     1.8 + * Foundation, and Opera Software ASA.
     1.9 + *
    1.10 + * Permission is hereby granted, free of charge, to any person obtaining a 
    1.11 + * copy of this software and associated documentation files (the "Software"), 
    1.12 + * to deal in the Software without restriction, including without limitation 
    1.13 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
    1.14 + * and/or sell copies of the Software, and to permit persons to whom the 
    1.15 + * Software is furnished to do so, subject to the following conditions:
    1.16 + *
    1.17 + * The above copyright notice and this permission notice shall be included in 
    1.18 + * all copies or substantial portions of the Software.
    1.19 + *
    1.20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
    1.21 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
    1.22 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
    1.23 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
    1.24 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
    1.25 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
    1.26 + * DEALINGS IN THE SOFTWARE.
    1.27 + */
    1.28 +
    1.29 +/*
    1.30 + * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
    1.31 + * Please edit Tokenizer.java instead and regenerate.
    1.32 + */
    1.33 +
    1.34 +#ifndef nsHtml5Tokenizer_h
    1.35 +#define nsHtml5Tokenizer_h
    1.36 +
    1.37 +#include "nsIAtom.h"
    1.38 +#include "nsHtml5AtomTable.h"
    1.39 +#include "nsString.h"
    1.40 +#include "nsIContent.h"
    1.41 +#include "nsTraceRefcnt.h"
    1.42 +#include "jArray.h"
    1.43 +#include "nsHtml5DocumentMode.h"
    1.44 +#include "nsHtml5ArrayCopy.h"
    1.45 +#include "nsHtml5NamedCharacters.h"
    1.46 +#include "nsHtml5NamedCharactersAccel.h"
    1.47 +#include "nsHtml5Atoms.h"
    1.48 +#include "nsAHtml5TreeBuilderState.h"
    1.49 +#include "nsHtml5Macros.h"
    1.50 +#include "nsHtml5Highlighter.h"
    1.51 +#include "nsHtml5TokenizerLoopPolicies.h"
    1.52 +
    1.53 +class nsHtml5StreamParser;
    1.54 +
    1.55 +class nsHtml5TreeBuilder;
    1.56 +class nsHtml5MetaScanner;
    1.57 +class nsHtml5AttributeName;
    1.58 +class nsHtml5ElementName;
    1.59 +class nsHtml5HtmlAttributes;
    1.60 +class nsHtml5UTF16Buffer;
    1.61 +class nsHtml5StateSnapshot;
    1.62 +class nsHtml5Portability;
    1.63 +
    1.64 +
    1.65 +class nsHtml5Tokenizer
    1.66 +{
    1.67 +  private:
    1.68 +    static char16_t LT_GT[];
    1.69 +    static char16_t LT_SOLIDUS[];
    1.70 +    static char16_t RSQB_RSQB[];
    1.71 +    static char16_t REPLACEMENT_CHARACTER[];
    1.72 +    static char16_t LF[];
    1.73 +    static char16_t CDATA_LSQB[];
    1.74 +    static char16_t OCTYPE[];
    1.75 +    static char16_t UBLIC[];
    1.76 +    static char16_t YSTEM[];
    1.77 +    static staticJArray<char16_t,int32_t> TITLE_ARR;
    1.78 +    static staticJArray<char16_t,int32_t> SCRIPT_ARR;
    1.79 +    static staticJArray<char16_t,int32_t> STYLE_ARR;
    1.80 +    static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
    1.81 +    static staticJArray<char16_t,int32_t> XMP_ARR;
    1.82 +    static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
    1.83 +    static staticJArray<char16_t,int32_t> IFRAME_ARR;
    1.84 +    static staticJArray<char16_t,int32_t> NOEMBED_ARR;
    1.85 +    static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
    1.86 +    static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
    1.87 +  protected:
    1.88 +    nsHtml5TreeBuilder* tokenHandler;
    1.89 +    nsHtml5StreamParser* encodingDeclarationHandler;
    1.90 +    bool lastCR;
    1.91 +    int32_t stateSave;
    1.92 +  private:
    1.93 +    int32_t returnStateSave;
    1.94 +  protected:
    1.95 +    int32_t index;
    1.96 +  private:
    1.97 +    bool forceQuirks;
    1.98 +    char16_t additional;
    1.99 +    int32_t entCol;
   1.100 +    int32_t firstCharKey;
   1.101 +    int32_t lo;
   1.102 +    int32_t hi;
   1.103 +    int32_t candidate;
   1.104 +    int32_t strBufMark;
   1.105 +    int32_t prevValue;
   1.106 +  protected:
   1.107 +    int32_t value;
   1.108 +  private:
   1.109 +    bool seenDigits;
   1.110 +  protected:
   1.111 +    int32_t cstart;
   1.112 +  private:
   1.113 +    nsString* publicId;
   1.114 +    nsString* systemId;
   1.115 +    autoJArray<char16_t,int32_t> strBuf;
   1.116 +    int32_t strBufLen;
   1.117 +    autoJArray<char16_t,int32_t> longStrBuf;
   1.118 +    int32_t longStrBufLen;
   1.119 +    autoJArray<char16_t,int32_t> bmpChar;
   1.120 +    autoJArray<char16_t,int32_t> astralChar;
   1.121 +  protected:
   1.122 +    nsHtml5ElementName* endTagExpectation;
   1.123 +  private:
   1.124 +    jArray<char16_t,int32_t> endTagExpectationAsArray;
   1.125 +  protected:
   1.126 +    bool endTag;
   1.127 +  private:
   1.128 +    nsHtml5ElementName* tagName;
   1.129 +  protected:
   1.130 +    nsHtml5AttributeName* attributeName;
   1.131 +  private:
   1.132 +    nsIAtom* doctypeName;
   1.133 +    nsString* publicIdentifier;
   1.134 +    nsString* systemIdentifier;
   1.135 +    nsHtml5HtmlAttributes* attributes;
   1.136 +    bool newAttributesEachTime;
   1.137 +    bool shouldSuspend;
   1.138 +  protected:
   1.139 +    bool confident;
   1.140 +  private:
   1.141 +    int32_t line;
   1.142 +    nsHtml5AtomTable* interner;
   1.143 +    bool viewingXmlSource;
   1.144 +  public:
   1.145 +    nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
   1.146 +    void setInterner(nsHtml5AtomTable* interner);
   1.147 +    void initLocation(nsString* newPublicId, nsString* newSystemId);
   1.148 +    bool isViewingXmlSource();
   1.149 +    void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
   1.150 +    void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
   1.151 +  private:
   1.152 +    void endTagExpectationToArray();
   1.153 +  public:
   1.154 +    void setLineNumber(int32_t line);
   1.155 +    inline int32_t getLineNumber()
   1.156 +    {
   1.157 +      return line;
   1.158 +    }
   1.159 +
   1.160 +    nsHtml5HtmlAttributes* emptyAttributes();
   1.161 +  private:
   1.162 +    inline void clearStrBufAndAppend(char16_t c)
   1.163 +    {
   1.164 +      strBuf[0] = c;
   1.165 +      strBufLen = 1;
   1.166 +    }
   1.167 +
   1.168 +    inline void clearStrBuf()
   1.169 +    {
   1.170 +      strBufLen = 0;
   1.171 +    }
   1.172 +
   1.173 +    void appendStrBuf(char16_t c);
   1.174 +  protected:
   1.175 +    nsString* strBufToString();
   1.176 +  private:
   1.177 +    void strBufToDoctypeName();
   1.178 +    void emitStrBuf();
   1.179 +    inline void clearLongStrBuf()
   1.180 +    {
   1.181 +      longStrBufLen = 0;
   1.182 +    }
   1.183 +
   1.184 +    inline void clearLongStrBufAndAppend(char16_t c)
   1.185 +    {
   1.186 +      longStrBuf[0] = c;
   1.187 +      longStrBufLen = 1;
   1.188 +    }
   1.189 +
   1.190 +    void appendLongStrBuf(char16_t c);
   1.191 +    inline void appendSecondHyphenToBogusComment()
   1.192 +    {
   1.193 +      appendLongStrBuf('-');
   1.194 +    }
   1.195 +
   1.196 +    inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
   1.197 +    {
   1.198 +      errConsecutiveHyphens();
   1.199 +      appendLongStrBuf(c);
   1.200 +    }
   1.201 +
   1.202 +    void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
   1.203 +    inline void appendStrBufToLongStrBuf()
   1.204 +    {
   1.205 +      appendLongStrBuf(strBuf, 0, strBufLen);
   1.206 +    }
   1.207 +
   1.208 +    nsString* longStrBufToString();
   1.209 +    void emitComment(int32_t provisionalHyphens, int32_t pos);
   1.210 +  protected:
   1.211 +    void flushChars(char16_t* buf, int32_t pos);
   1.212 +  private:
   1.213 +    void strBufToElementNameString();
   1.214 +    int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
   1.215 +    void attributeNameComplete();
   1.216 +    void addAttributeWithoutValue();
   1.217 +    void addAttributeWithValue();
   1.218 +  public:
   1.219 +    void start();
   1.220 +    bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
   1.221 +  private:
   1.222 +    template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
   1.223 +    void initDoctypeFields();
   1.224 +    inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
   1.225 +    {
   1.226 +      silentCarriageReturn();
   1.227 +      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
   1.228 +    }
   1.229 +
   1.230 +    inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
   1.231 +    {
   1.232 +      silentLineFeed();
   1.233 +      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
   1.234 +    }
   1.235 +
   1.236 +    inline void appendLongStrBufLineFeed()
   1.237 +    {
   1.238 +      silentLineFeed();
   1.239 +      appendLongStrBuf('\n');
   1.240 +    }
   1.241 +
   1.242 +    inline void appendLongStrBufCarriageReturn()
   1.243 +    {
   1.244 +      silentCarriageReturn();
   1.245 +      appendLongStrBuf('\n');
   1.246 +    }
   1.247 +
   1.248 +  protected:
   1.249 +    inline void silentCarriageReturn()
   1.250 +    {
   1.251 +      ++line;
   1.252 +      lastCR = true;
   1.253 +    }
   1.254 +
   1.255 +    inline void silentLineFeed()
   1.256 +    {
   1.257 +      ++line;
   1.258 +    }
   1.259 +
   1.260 +  private:
   1.261 +    void emitCarriageReturn(char16_t* buf, int32_t pos);
   1.262 +    void emitReplacementCharacter(char16_t* buf, int32_t pos);
   1.263 +    void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
   1.264 +    void setAdditionalAndRememberAmpersandLocation(char16_t add);
   1.265 +    void bogusDoctype();
   1.266 +    void bogusDoctypeWithoutQuirks();
   1.267 +    void emitOrAppendStrBuf(int32_t returnState);
   1.268 +    void handleNcrValue(int32_t returnState);
   1.269 +  public:
   1.270 +    void eof();
   1.271 +  private:
   1.272 +    void emitDoctypeToken(int32_t pos);
   1.273 +  protected:
   1.274 +    inline char16_t checkChar(char16_t* buf, int32_t pos)
   1.275 +    {
   1.276 +      return buf[pos];
   1.277 +    }
   1.278 +
   1.279 +  public:
   1.280 +    bool internalEncodingDeclaration(nsString* internalCharset);
   1.281 +  private:
   1.282 +    void emitOrAppendTwo(const char16_t* val, int32_t returnState);
   1.283 +    void emitOrAppendOne(const char16_t* val, int32_t returnState);
   1.284 +  public:
   1.285 +    void end();
   1.286 +    void requestSuspension();
   1.287 +    bool isInDataState();
   1.288 +    void resetToDataState();
   1.289 +    void loadState(nsHtml5Tokenizer* other);
   1.290 +    void initializeWithoutStarting();
   1.291 +    void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
   1.292 +    ~nsHtml5Tokenizer();
   1.293 +    static void initializeStatics();
   1.294 +    static void releaseStatics();
   1.295 +
   1.296 +#include "nsHtml5TokenizerHSupplement.h"
   1.297 +};
   1.298 +
   1.299 +#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
   1.300 +#define NS_HTML5TOKENIZER_DATA 0
   1.301 +#define NS_HTML5TOKENIZER_RCDATA 1
   1.302 +#define NS_HTML5TOKENIZER_SCRIPT_DATA 2
   1.303 +#define NS_HTML5TOKENIZER_RAWTEXT 3
   1.304 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
   1.305 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
   1.306 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
   1.307 +#define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
   1.308 +#define NS_HTML5TOKENIZER_PLAINTEXT 8
   1.309 +#define NS_HTML5TOKENIZER_TAG_OPEN 9
   1.310 +#define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
   1.311 +#define NS_HTML5TOKENIZER_TAG_NAME 11
   1.312 +#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
   1.313 +#define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
   1.314 +#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
   1.315 +#define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
   1.316 +#define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
   1.317 +#define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
   1.318 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
   1.319 +#define NS_HTML5TOKENIZER_DOCTYPE 19
   1.320 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
   1.321 +#define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
   1.322 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
   1.323 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
   1.324 +#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
   1.325 +#define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
   1.326 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
   1.327 +#define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
   1.328 +#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
   1.329 +#define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
   1.330 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
   1.331 +#define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
   1.332 +#define NS_HTML5TOKENIZER_COMMENT_START 32
   1.333 +#define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
   1.334 +#define NS_HTML5TOKENIZER_COMMENT 34
   1.335 +#define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
   1.336 +#define NS_HTML5TOKENIZER_COMMENT_END 36
   1.337 +#define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
   1.338 +#define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
   1.339 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
   1.340 +#define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
   1.341 +#define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
   1.342 +#define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
   1.343 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
   1.344 +#define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
   1.345 +#define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
   1.346 +#define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
   1.347 +#define NS_HTML5TOKENIZER_CONSUME_NCR 47
   1.348 +#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
   1.349 +#define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
   1.350 +#define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
   1.351 +#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
   1.352 +#define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
   1.353 +#define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
   1.354 +#define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
   1.355 +#define NS_HTML5TOKENIZER_CDATA_START 55
   1.356 +#define NS_HTML5TOKENIZER_CDATA_SECTION 56
   1.357 +#define NS_HTML5TOKENIZER_CDATA_RSQB 57
   1.358 +#define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
   1.359 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
   1.360 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
   1.361 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
   1.362 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
   1.363 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
   1.364 +#define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
   1.365 +#define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
   1.366 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
   1.367 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
   1.368 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
   1.369 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
   1.370 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
   1.371 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
   1.372 +#define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
   1.373 +#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
   1.374 +#define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
   1.375 +#define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
   1.376 +#define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
   1.377 +
   1.378 +
   1.379 +#endif
   1.380 +

mercurial