michael@0: /* michael@0: * Copyright (c) 2005-2007 Henri Sivonen michael@0: * Copyright (c) 2007-2013 Mozilla Foundation michael@0: * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla michael@0: * Foundation, and Opera Software ASA. michael@0: * michael@0: * Permission is hereby granted, free of charge, to any person obtaining a michael@0: * copy of this software and associated documentation files (the "Software"), michael@0: * to deal in the Software without restriction, including without limitation michael@0: * the rights to use, copy, modify, merge, publish, distribute, sublicense, michael@0: * and/or sell copies of the Software, and to permit persons to whom the michael@0: * Software is furnished to do so, subject to the following conditions: michael@0: * michael@0: * The above copyright notice and this permission notice shall be included in michael@0: * all copies or substantial portions of the Software. michael@0: * michael@0: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR michael@0: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, michael@0: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL michael@0: * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER michael@0: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING michael@0: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER michael@0: * DEALINGS IN THE SOFTWARE. michael@0: */ michael@0: michael@0: /* michael@0: * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. michael@0: * Please edit Tokenizer.java instead and regenerate. michael@0: */ michael@0: michael@0: #ifndef nsHtml5Tokenizer_h michael@0: #define nsHtml5Tokenizer_h michael@0: michael@0: #include "nsIAtom.h" michael@0: #include "nsHtml5AtomTable.h" michael@0: #include "nsString.h" michael@0: #include "nsIContent.h" michael@0: #include "nsTraceRefcnt.h" michael@0: #include "jArray.h" michael@0: #include "nsHtml5DocumentMode.h" michael@0: #include "nsHtml5ArrayCopy.h" michael@0: #include "nsHtml5NamedCharacters.h" michael@0: #include "nsHtml5NamedCharactersAccel.h" michael@0: #include "nsHtml5Atoms.h" michael@0: #include "nsAHtml5TreeBuilderState.h" michael@0: #include "nsHtml5Macros.h" michael@0: #include "nsHtml5Highlighter.h" michael@0: #include "nsHtml5TokenizerLoopPolicies.h" michael@0: michael@0: class nsHtml5StreamParser; michael@0: michael@0: class nsHtml5TreeBuilder; michael@0: class nsHtml5MetaScanner; michael@0: class nsHtml5AttributeName; michael@0: class nsHtml5ElementName; michael@0: class nsHtml5HtmlAttributes; michael@0: class nsHtml5UTF16Buffer; michael@0: class nsHtml5StateSnapshot; michael@0: class nsHtml5Portability; michael@0: michael@0: michael@0: class nsHtml5Tokenizer michael@0: { michael@0: private: michael@0: static char16_t LT_GT[]; michael@0: static char16_t LT_SOLIDUS[]; michael@0: static char16_t RSQB_RSQB[]; michael@0: static char16_t REPLACEMENT_CHARACTER[]; michael@0: static char16_t LF[]; michael@0: static char16_t CDATA_LSQB[]; michael@0: static char16_t OCTYPE[]; michael@0: static char16_t UBLIC[]; michael@0: static char16_t YSTEM[]; michael@0: static staticJArray TITLE_ARR; michael@0: static staticJArray SCRIPT_ARR; michael@0: static staticJArray STYLE_ARR; michael@0: static staticJArray PLAINTEXT_ARR; michael@0: static staticJArray XMP_ARR; michael@0: static staticJArray TEXTAREA_ARR; michael@0: static staticJArray IFRAME_ARR; michael@0: static staticJArray NOEMBED_ARR; michael@0: static staticJArray NOSCRIPT_ARR; michael@0: static staticJArray NOFRAMES_ARR; michael@0: protected: michael@0: nsHtml5TreeBuilder* tokenHandler; michael@0: nsHtml5StreamParser* encodingDeclarationHandler; michael@0: bool lastCR; michael@0: int32_t stateSave; michael@0: private: michael@0: int32_t returnStateSave; michael@0: protected: michael@0: int32_t index; michael@0: private: michael@0: bool forceQuirks; michael@0: char16_t additional; michael@0: int32_t entCol; michael@0: int32_t firstCharKey; michael@0: int32_t lo; michael@0: int32_t hi; michael@0: int32_t candidate; michael@0: int32_t strBufMark; michael@0: int32_t prevValue; michael@0: protected: michael@0: int32_t value; michael@0: private: michael@0: bool seenDigits; michael@0: protected: michael@0: int32_t cstart; michael@0: private: michael@0: nsString* publicId; michael@0: nsString* systemId; michael@0: autoJArray strBuf; michael@0: int32_t strBufLen; michael@0: autoJArray longStrBuf; michael@0: int32_t longStrBufLen; michael@0: autoJArray bmpChar; michael@0: autoJArray astralChar; michael@0: protected: michael@0: nsHtml5ElementName* endTagExpectation; michael@0: private: michael@0: jArray endTagExpectationAsArray; michael@0: protected: michael@0: bool endTag; michael@0: private: michael@0: nsHtml5ElementName* tagName; michael@0: protected: michael@0: nsHtml5AttributeName* attributeName; michael@0: private: michael@0: nsIAtom* doctypeName; michael@0: nsString* publicIdentifier; michael@0: nsString* systemIdentifier; michael@0: nsHtml5HtmlAttributes* attributes; michael@0: bool newAttributesEachTime; michael@0: bool shouldSuspend; michael@0: protected: michael@0: bool confident; michael@0: private: michael@0: int32_t line; michael@0: nsHtml5AtomTable* interner; michael@0: bool viewingXmlSource; michael@0: public: michael@0: nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource); michael@0: void setInterner(nsHtml5AtomTable* interner); michael@0: void initLocation(nsString* newPublicId, nsString* newSystemId); michael@0: bool isViewingXmlSource(); michael@0: void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation); michael@0: void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation); michael@0: private: michael@0: void endTagExpectationToArray(); michael@0: public: michael@0: void setLineNumber(int32_t line); michael@0: inline int32_t getLineNumber() michael@0: { michael@0: return line; michael@0: } michael@0: michael@0: nsHtml5HtmlAttributes* emptyAttributes(); michael@0: private: michael@0: inline void clearStrBufAndAppend(char16_t c) michael@0: { michael@0: strBuf[0] = c; michael@0: strBufLen = 1; michael@0: } michael@0: michael@0: inline void clearStrBuf() michael@0: { michael@0: strBufLen = 0; michael@0: } michael@0: michael@0: void appendStrBuf(char16_t c); michael@0: protected: michael@0: nsString* strBufToString(); michael@0: private: michael@0: void strBufToDoctypeName(); michael@0: void emitStrBuf(); michael@0: inline void clearLongStrBuf() michael@0: { michael@0: longStrBufLen = 0; michael@0: } michael@0: michael@0: inline void clearLongStrBufAndAppend(char16_t c) michael@0: { michael@0: longStrBuf[0] = c; michael@0: longStrBufLen = 1; michael@0: } michael@0: michael@0: void appendLongStrBuf(char16_t c); michael@0: inline void appendSecondHyphenToBogusComment() michael@0: { michael@0: appendLongStrBuf('-'); michael@0: } michael@0: michael@0: inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c) michael@0: { michael@0: errConsecutiveHyphens(); michael@0: appendLongStrBuf(c); michael@0: } michael@0: michael@0: void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length); michael@0: inline void appendStrBufToLongStrBuf() michael@0: { michael@0: appendLongStrBuf(strBuf, 0, strBufLen); michael@0: } michael@0: michael@0: nsString* longStrBufToString(); michael@0: void emitComment(int32_t provisionalHyphens, int32_t pos); michael@0: protected: michael@0: void flushChars(char16_t* buf, int32_t pos); michael@0: private: michael@0: void strBufToElementNameString(); michael@0: int32_t emitCurrentTagToken(bool selfClosing, int32_t pos); michael@0: void attributeNameComplete(); michael@0: void addAttributeWithoutValue(); michael@0: void addAttributeWithValue(); michael@0: public: michael@0: void start(); michael@0: bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); michael@0: private: michael@0: template int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos); michael@0: void initDoctypeFields(); michael@0: inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() michael@0: { michael@0: silentCarriageReturn(); michael@0: adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); michael@0: } michael@0: michael@0: inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed() michael@0: { michael@0: silentLineFeed(); michael@0: adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n'); michael@0: } michael@0: michael@0: inline void appendLongStrBufLineFeed() michael@0: { michael@0: silentLineFeed(); michael@0: appendLongStrBuf('\n'); michael@0: } michael@0: michael@0: inline void appendLongStrBufCarriageReturn() michael@0: { michael@0: silentCarriageReturn(); michael@0: appendLongStrBuf('\n'); michael@0: } michael@0: michael@0: protected: michael@0: inline void silentCarriageReturn() michael@0: { michael@0: ++line; michael@0: lastCR = true; michael@0: } michael@0: michael@0: inline void silentLineFeed() michael@0: { michael@0: ++line; michael@0: } michael@0: michael@0: private: michael@0: void emitCarriageReturn(char16_t* buf, int32_t pos); michael@0: void emitReplacementCharacter(char16_t* buf, int32_t pos); michael@0: void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); michael@0: void setAdditionalAndRememberAmpersandLocation(char16_t add); michael@0: void bogusDoctype(); michael@0: void bogusDoctypeWithoutQuirks(); michael@0: void emitOrAppendStrBuf(int32_t returnState); michael@0: void handleNcrValue(int32_t returnState); michael@0: public: michael@0: void eof(); michael@0: private: michael@0: void emitDoctypeToken(int32_t pos); michael@0: protected: michael@0: inline char16_t checkChar(char16_t* buf, int32_t pos) michael@0: { michael@0: return buf[pos]; michael@0: } michael@0: michael@0: public: michael@0: bool internalEncodingDeclaration(nsString* internalCharset); michael@0: private: michael@0: void emitOrAppendTwo(const char16_t* val, int32_t returnState); michael@0: void emitOrAppendOne(const char16_t* val, int32_t returnState); michael@0: public: michael@0: void end(); michael@0: void requestSuspension(); michael@0: bool isInDataState(); michael@0: void resetToDataState(); michael@0: void loadState(nsHtml5Tokenizer* other); michael@0: void initializeWithoutStarting(); michael@0: void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler); michael@0: ~nsHtml5Tokenizer(); michael@0: static void initializeStatics(); michael@0: static void releaseStatics(); michael@0: michael@0: #include "nsHtml5TokenizerHSupplement.h" michael@0: }; michael@0: michael@0: #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1 michael@0: #define NS_HTML5TOKENIZER_DATA 0 michael@0: #define NS_HTML5TOKENIZER_RCDATA 1 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA 2 michael@0: #define NS_HTML5TOKENIZER_RAWTEXT 3 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4 michael@0: #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5 michael@0: #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6 michael@0: #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7 michael@0: #define NS_HTML5TOKENIZER_PLAINTEXT 8 michael@0: #define NS_HTML5TOKENIZER_TAG_OPEN 9 michael@0: #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10 michael@0: #define NS_HTML5TOKENIZER_TAG_NAME 11 michael@0: #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12 michael@0: #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13 michael@0: #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14 michael@0: #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15 michael@0: #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16 michael@0: #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17 michael@0: #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE 19 michael@0: #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21 michael@0: #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22 michael@0: #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25 michael@0: #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26 michael@0: #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29 michael@0: #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30 michael@0: #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31 michael@0: #define NS_HTML5TOKENIZER_COMMENT_START 32 michael@0: #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33 michael@0: #define NS_HTML5TOKENIZER_COMMENT 34 michael@0: #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35 michael@0: #define NS_HTML5TOKENIZER_COMMENT_END 36 michael@0: #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37 michael@0: #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38 michael@0: #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39 michael@0: #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41 michael@0: #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42 michael@0: #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43 michael@0: #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44 michael@0: #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45 michael@0: #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46 michael@0: #define NS_HTML5TOKENIZER_CONSUME_NCR 47 michael@0: #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48 michael@0: #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49 michael@0: #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50 michael@0: #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51 michael@0: #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52 michael@0: #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53 michael@0: #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54 michael@0: #define NS_HTML5TOKENIZER_CDATA_START 55 michael@0: #define NS_HTML5TOKENIZER_CDATA_SECTION 56 michael@0: #define NS_HTML5TOKENIZER_CDATA_RSQB 57 michael@0: #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63 michael@0: #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64 michael@0: #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71 michael@0: #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72 michael@0: #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73 michael@0: #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74 michael@0: #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10)) michael@0: #define NS_HTML5TOKENIZER_BUFFER_GROW_BY 1024 michael@0: michael@0: michael@0: #endif michael@0: