parser/html/nsHtml5Tokenizer.h

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 /*
     2  * Copyright (c) 2005-2007 Henri Sivonen
     3  * Copyright (c) 2007-2013 Mozilla Foundation
     4  * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
     5  * Foundation, and Opera Software ASA.
     6  *
     7  * Permission is hereby granted, free of charge, to any person obtaining a 
     8  * copy of this software and associated documentation files (the "Software"), 
     9  * to deal in the Software without restriction, including without limitation 
    10  * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
    11  * and/or sell copies of the Software, and to permit persons to whom the 
    12  * Software is furnished to do so, subject to the following conditions:
    13  *
    14  * The above copyright notice and this permission notice shall be included in 
    15  * all copies or substantial portions of the Software.
    16  *
    17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
    18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
    19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
    20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
    21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
    22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
    23  * DEALINGS IN THE SOFTWARE.
    24  */
    26 /*
    27  * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
    28  * Please edit Tokenizer.java instead and regenerate.
    29  */
    31 #ifndef nsHtml5Tokenizer_h
    32 #define nsHtml5Tokenizer_h
    34 #include "nsIAtom.h"
    35 #include "nsHtml5AtomTable.h"
    36 #include "nsString.h"
    37 #include "nsIContent.h"
    38 #include "nsTraceRefcnt.h"
    39 #include "jArray.h"
    40 #include "nsHtml5DocumentMode.h"
    41 #include "nsHtml5ArrayCopy.h"
    42 #include "nsHtml5NamedCharacters.h"
    43 #include "nsHtml5NamedCharactersAccel.h"
    44 #include "nsHtml5Atoms.h"
    45 #include "nsAHtml5TreeBuilderState.h"
    46 #include "nsHtml5Macros.h"
    47 #include "nsHtml5Highlighter.h"
    48 #include "nsHtml5TokenizerLoopPolicies.h"
    50 class nsHtml5StreamParser;
    52 class nsHtml5TreeBuilder;
    53 class nsHtml5MetaScanner;
    54 class nsHtml5AttributeName;
    55 class nsHtml5ElementName;
    56 class nsHtml5HtmlAttributes;
    57 class nsHtml5UTF16Buffer;
    58 class nsHtml5StateSnapshot;
    59 class nsHtml5Portability;
    62 class nsHtml5Tokenizer
    63 {
    64   private:
    65     static char16_t LT_GT[];
    66     static char16_t LT_SOLIDUS[];
    67     static char16_t RSQB_RSQB[];
    68     static char16_t REPLACEMENT_CHARACTER[];
    69     static char16_t LF[];
    70     static char16_t CDATA_LSQB[];
    71     static char16_t OCTYPE[];
    72     static char16_t UBLIC[];
    73     static char16_t YSTEM[];
    74     static staticJArray<char16_t,int32_t> TITLE_ARR;
    75     static staticJArray<char16_t,int32_t> SCRIPT_ARR;
    76     static staticJArray<char16_t,int32_t> STYLE_ARR;
    77     static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
    78     static staticJArray<char16_t,int32_t> XMP_ARR;
    79     static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
    80     static staticJArray<char16_t,int32_t> IFRAME_ARR;
    81     static staticJArray<char16_t,int32_t> NOEMBED_ARR;
    82     static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
    83     static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
    84   protected:
    85     nsHtml5TreeBuilder* tokenHandler;
    86     nsHtml5StreamParser* encodingDeclarationHandler;
    87     bool lastCR;
    88     int32_t stateSave;
    89   private:
    90     int32_t returnStateSave;
    91   protected:
    92     int32_t index;
    93   private:
    94     bool forceQuirks;
    95     char16_t additional;
    96     int32_t entCol;
    97     int32_t firstCharKey;
    98     int32_t lo;
    99     int32_t hi;
   100     int32_t candidate;
   101     int32_t strBufMark;
   102     int32_t prevValue;
   103   protected:
   104     int32_t value;
   105   private:
   106     bool seenDigits;
   107   protected:
   108     int32_t cstart;
   109   private:
   110     nsString* publicId;
   111     nsString* systemId;
   112     autoJArray<char16_t,int32_t> strBuf;
   113     int32_t strBufLen;
   114     autoJArray<char16_t,int32_t> longStrBuf;
   115     int32_t longStrBufLen;
   116     autoJArray<char16_t,int32_t> bmpChar;
   117     autoJArray<char16_t,int32_t> astralChar;
   118   protected:
   119     nsHtml5ElementName* endTagExpectation;
   120   private:
   121     jArray<char16_t,int32_t> endTagExpectationAsArray;
   122   protected:
   123     bool endTag;
   124   private:
   125     nsHtml5ElementName* tagName;
   126   protected:
   127     nsHtml5AttributeName* attributeName;
   128   private:
   129     nsIAtom* doctypeName;
   130     nsString* publicIdentifier;
   131     nsString* systemIdentifier;
   132     nsHtml5HtmlAttributes* attributes;
   133     bool newAttributesEachTime;
   134     bool shouldSuspend;
   135   protected:
   136     bool confident;
   137   private:
   138     int32_t line;
   139     nsHtml5AtomTable* interner;
   140     bool viewingXmlSource;
   141   public:
   142     nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
   143     void setInterner(nsHtml5AtomTable* interner);
   144     void initLocation(nsString* newPublicId, nsString* newSystemId);
   145     bool isViewingXmlSource();
   146     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
   147     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
   148   private:
   149     void endTagExpectationToArray();
   150   public:
   151     void setLineNumber(int32_t line);
   152     inline int32_t getLineNumber()
   153     {
   154       return line;
   155     }
   157     nsHtml5HtmlAttributes* emptyAttributes();
   158   private:
   159     inline void clearStrBufAndAppend(char16_t c)
   160     {
   161       strBuf[0] = c;
   162       strBufLen = 1;
   163     }
   165     inline void clearStrBuf()
   166     {
   167       strBufLen = 0;
   168     }
   170     void appendStrBuf(char16_t c);
   171   protected:
   172     nsString* strBufToString();
   173   private:
   174     void strBufToDoctypeName();
   175     void emitStrBuf();
   176     inline void clearLongStrBuf()
   177     {
   178       longStrBufLen = 0;
   179     }
   181     inline void clearLongStrBufAndAppend(char16_t c)
   182     {
   183       longStrBuf[0] = c;
   184       longStrBufLen = 1;
   185     }
   187     void appendLongStrBuf(char16_t c);
   188     inline void appendSecondHyphenToBogusComment()
   189     {
   190       appendLongStrBuf('-');
   191     }
   193     inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
   194     {
   195       errConsecutiveHyphens();
   196       appendLongStrBuf(c);
   197     }
   199     void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
   200     inline void appendStrBufToLongStrBuf()
   201     {
   202       appendLongStrBuf(strBuf, 0, strBufLen);
   203     }
   205     nsString* longStrBufToString();
   206     void emitComment(int32_t provisionalHyphens, int32_t pos);
   207   protected:
   208     void flushChars(char16_t* buf, int32_t pos);
   209   private:
   210     void strBufToElementNameString();
   211     int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
   212     void attributeNameComplete();
   213     void addAttributeWithoutValue();
   214     void addAttributeWithValue();
   215   public:
   216     void start();
   217     bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
   218   private:
   219     template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
   220     void initDoctypeFields();
   221     inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
   222     {
   223       silentCarriageReturn();
   224       adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
   225     }
   227     inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
   228     {
   229       silentLineFeed();
   230       adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
   231     }
   233     inline void appendLongStrBufLineFeed()
   234     {
   235       silentLineFeed();
   236       appendLongStrBuf('\n');
   237     }
   239     inline void appendLongStrBufCarriageReturn()
   240     {
   241       silentCarriageReturn();
   242       appendLongStrBuf('\n');
   243     }
   245   protected:
   246     inline void silentCarriageReturn()
   247     {
   248       ++line;
   249       lastCR = true;
   250     }
   252     inline void silentLineFeed()
   253     {
   254       ++line;
   255     }
   257   private:
   258     void emitCarriageReturn(char16_t* buf, int32_t pos);
   259     void emitReplacementCharacter(char16_t* buf, int32_t pos);
   260     void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
   261     void setAdditionalAndRememberAmpersandLocation(char16_t add);
   262     void bogusDoctype();
   263     void bogusDoctypeWithoutQuirks();
   264     void emitOrAppendStrBuf(int32_t returnState);
   265     void handleNcrValue(int32_t returnState);
   266   public:
   267     void eof();
   268   private:
   269     void emitDoctypeToken(int32_t pos);
   270   protected:
   271     inline char16_t checkChar(char16_t* buf, int32_t pos)
   272     {
   273       return buf[pos];
   274     }
   276   public:
   277     bool internalEncodingDeclaration(nsString* internalCharset);
   278   private:
   279     void emitOrAppendTwo(const char16_t* val, int32_t returnState);
   280     void emitOrAppendOne(const char16_t* val, int32_t returnState);
   281   public:
   282     void end();
   283     void requestSuspension();
   284     bool isInDataState();
   285     void resetToDataState();
   286     void loadState(nsHtml5Tokenizer* other);
   287     void initializeWithoutStarting();
   288     void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
   289     ~nsHtml5Tokenizer();
   290     static void initializeStatics();
   291     static void releaseStatics();
   293 #include "nsHtml5TokenizerHSupplement.h"
   294 };
   296 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
   297 #define NS_HTML5TOKENIZER_DATA 0
   298 #define NS_HTML5TOKENIZER_RCDATA 1
   299 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
   300 #define NS_HTML5TOKENIZER_RAWTEXT 3
   301 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
   302 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
   303 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
   304 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
   305 #define NS_HTML5TOKENIZER_PLAINTEXT 8
   306 #define NS_HTML5TOKENIZER_TAG_OPEN 9
   307 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
   308 #define NS_HTML5TOKENIZER_TAG_NAME 11
   309 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
   310 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
   311 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
   312 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
   313 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
   314 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
   315 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
   316 #define NS_HTML5TOKENIZER_DOCTYPE 19
   317 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
   318 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
   319 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
   320 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
   321 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
   322 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
   323 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
   324 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
   325 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
   326 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
   327 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
   328 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
   329 #define NS_HTML5TOKENIZER_COMMENT_START 32
   330 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
   331 #define NS_HTML5TOKENIZER_COMMENT 34
   332 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
   333 #define NS_HTML5TOKENIZER_COMMENT_END 36
   334 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
   335 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
   336 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
   337 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
   338 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
   339 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
   340 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
   341 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
   342 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
   343 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
   344 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
   345 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
   346 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
   347 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
   348 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
   349 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
   350 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
   351 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
   352 #define NS_HTML5TOKENIZER_CDATA_START 55
   353 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
   354 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
   355 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
   356 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
   357 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
   358 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
   359 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
   360 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
   361 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
   362 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
   363 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
   364 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
   365 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
   366 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
   367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
   368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
   369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
   370 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
   371 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
   372 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
   373 #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024
   376 #endif

mercurial