michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #ifndef nsHtml5Highlighter_h michael@0: #define nsHtml5Highlighter_h michael@0: michael@0: #include "nsCOMPtr.h" michael@0: #include "nsHtml5TreeOperation.h" michael@0: #include "nsHtml5UTF16Buffer.h" michael@0: #include "nsHtml5TreeOperation.h" michael@0: #include "nsAHtml5TreeOpSink.h" michael@0: michael@0: #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 michael@0: michael@0: /** michael@0: * A state machine for generating HTML for display in View Source based on michael@0: * the transitions the tokenizer makes on the source being viewed. michael@0: */ michael@0: class nsHtml5Highlighter michael@0: { michael@0: public: michael@0: /** michael@0: * The constructor. michael@0: * michael@0: * @param aOpSink the sink for the tree ops generated by this highlighter michael@0: */ michael@0: nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); michael@0: michael@0: /** michael@0: * The destructor. michael@0: */ michael@0: ~nsHtml5Highlighter(); michael@0: michael@0: /** michael@0: * Starts the generated document. michael@0: */ michael@0: void Start(const nsAutoString& aTitle); michael@0: michael@0: /** michael@0: * Report a tokenizer state transition. michael@0: * michael@0: * @param aState the state being transitioned to michael@0: * @param aReconsume whether this is a reconsuming transition michael@0: * @param aPos the tokenizer's current position into the buffer michael@0: */ michael@0: int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); michael@0: michael@0: /** michael@0: * Report end of file. michael@0: */ michael@0: void End(); michael@0: michael@0: /** michael@0: * Set the current buffer being tokenized michael@0: */ michael@0: void SetBuffer(nsHtml5UTF16Buffer* aBuffer); michael@0: michael@0: /** michael@0: * Let go of the buffer being tokenized but first, flush text from it. michael@0: * michael@0: * @param aPos the first UTF-16 code unit not to flush michael@0: */ michael@0: void DropBuffer(int32_t aPos); michael@0: michael@0: /** michael@0: * Flush the tree ops into the sink. michael@0: * michael@0: * @return true if there were ops to flush michael@0: */ michael@0: bool FlushOps(); michael@0: michael@0: /** michael@0: * Linkify the current attribute value if the attribute name is one of michael@0: * known URL attributes. (When executing tree ops, javascript: URLs will michael@0: * not be linkified, though.) michael@0: * michael@0: * @param aName the name of the attribute michael@0: * @param aValue the value of the attribute michael@0: */ michael@0: void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, michael@0: nsString* aValue); michael@0: michael@0: /** michael@0: * Inform the highlighter that the tokenizer successfully completed a michael@0: * named character reference. michael@0: */ michael@0: void CompletedNamedCharacterReference(); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that's currently on top of michael@0: * mStack. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: */ michael@0: void AddErrorToCurrentNode(const char* aMsgId); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that corresponds to the most michael@0: * recently opened markup declaration/tag span, character reference or michael@0: * run of text. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: */ michael@0: void AddErrorToCurrentRun(const char* aMsgId); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that corresponds to the most michael@0: * recently opened markup declaration/tag span, character reference or michael@0: * run of text with one atom to use when formatting the message. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: * @param aName the atom michael@0: */ michael@0: void AddErrorToCurrentRun(const char* aMsgId, nsIAtom* aName); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that corresponds to the most michael@0: * recently opened markup declaration/tag span, character reference or michael@0: * run of text with two atoms to use when formatting the message. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: * @param aName the first atom michael@0: * @param aOther the second atom michael@0: */ michael@0: void AddErrorToCurrentRun(const char* aMsgId, michael@0: nsIAtom* aName, michael@0: nsIAtom* aOther); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that corresponds to the most michael@0: * recent potentially character reference-starting ampersand. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: */ michael@0: void AddErrorToCurrentAmpersand(const char* aMsgId); michael@0: michael@0: /** michael@0: * Adds an error annotation to the node that corresponds to the most michael@0: * recent potentially self-closing slash. michael@0: * michael@0: * @param aMsgId the id of the message in the property file michael@0: */ michael@0: void AddErrorToCurrentSlash(const char* aMsgId); michael@0: michael@0: private: michael@0: michael@0: /** michael@0: * Starts a span with no class. michael@0: */ michael@0: void StartSpan(); michael@0: michael@0: /** michael@0: * Starts a and sets the class attribute on it. michael@0: * michael@0: * @param aClass the class to set (MUST be a static string that does not michael@0: * need to be released!) michael@0: */ michael@0: void StartSpan(const char16_t* aClass); michael@0: michael@0: /** michael@0: * End the current or in the highlighter output. michael@0: */ michael@0: void EndSpanOrA(); michael@0: michael@0: /** michael@0: * Starts a wrapper around a run of characters. michael@0: */ michael@0: void StartCharacters(); michael@0: michael@0: /** michael@0: * Ends a wrapper around a run of characters. michael@0: */ michael@0: void EndCharactersAndStartMarkupRun(); michael@0: michael@0: /** michael@0: * Starts an . michael@0: */ michael@0: void StartA(); michael@0: michael@0: /** michael@0: * Flushes characters up to but not including the current one. michael@0: */ michael@0: void FlushChars(); michael@0: michael@0: /** michael@0: * Flushes characters up to and including the current one. michael@0: */ michael@0: void FlushCurrent(); michael@0: michael@0: /** michael@0: * Finishes highlighting a tag in the input data by closing the open michael@0: * and elements in the highlighter output and then starts michael@0: * another for potentially highlighting characters potentially michael@0: * appearing next. michael@0: */ michael@0: void FinishTag(); michael@0: michael@0: /** michael@0: * Adds a class attribute to the current node. michael@0: * michael@0: * @param aClass the class to set (MUST be a static string that does not michael@0: * need to be released!) michael@0: */ michael@0: void AddClass(const char16_t* aClass); michael@0: michael@0: /** michael@0: * Allocates a handle for an element. michael@0: * michael@0: * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() michael@0: * in nsHtml5TreeBuilderHSupplement.h. michael@0: * michael@0: * @return the handle michael@0: */ michael@0: nsIContent** AllocateContentHandle(); michael@0: michael@0: /** michael@0: * Enqueues an element creation tree operation. michael@0: * michael@0: * @param aName the name of the element michael@0: * @param aAttributes the attribute holder (ownership will be taken) or michael@0: * nullptr for no attributes michael@0: * @return the handle for the element that will be created michael@0: */ michael@0: nsIContent** CreateElement(nsIAtom* aName, michael@0: nsHtml5HtmlAttributes* aAttributes); michael@0: michael@0: /** michael@0: * Gets the handle for the current node. May be called only after the michael@0: * root element has been set. michael@0: * michael@0: * @return the handle for the current node michael@0: */ michael@0: nsIContent** CurrentNode(); michael@0: michael@0: /** michael@0: * Create an element and push it (its handle) on the stack. michael@0: * michael@0: * @param aName the name of the element michael@0: * @param aAttributes the attribute holder (ownership will be taken) or michael@0: * nullptr for no attributes michael@0: */ michael@0: void Push(nsIAtom* aName, nsHtml5HtmlAttributes* aAttributes); michael@0: michael@0: /** michael@0: * Pops the current node off the stack. michael@0: */ michael@0: void Pop(); michael@0: michael@0: /** michael@0: * Appends text content to the current node. michael@0: * michael@0: * @param aBuffer the buffer to copy from michael@0: * @param aStart the index of the first code unit to copy michael@0: * @param aLength the number of code units to copy michael@0: */ michael@0: void AppendCharacters(const char16_t* aBuffer, michael@0: int32_t aStart, michael@0: int32_t aLength); michael@0: michael@0: /** michael@0: * Enqueues a tree op for adding an href attribute with the view-source: michael@0: * URL scheme to the current node. michael@0: * michael@0: * @param aValue the (potentially relative) URL to link to michael@0: */ michael@0: void AddViewSourceHref(const nsString& aValue); michael@0: michael@0: /** michael@0: * The state we are transitioning away from. michael@0: */ michael@0: int32_t mState; michael@0: michael@0: /** michael@0: * The index of the first UTF-16 code unit in mBuffer that hasn't been michael@0: * flushed yet. michael@0: */ michael@0: int32_t mCStart; michael@0: michael@0: /** michael@0: * The position of the code unit in mBuffer that caused the current michael@0: * transition. michael@0: */ michael@0: int32_t mPos; michael@0: michael@0: /** michael@0: * The current line number. michael@0: */ michael@0: int32_t mLineNumber; michael@0: michael@0: /** michael@0: * The number of inline elements open inside the
 excluding the
michael@0:      * span potentially wrapping a run of characters.
michael@0:      */
michael@0:     int32_t mInlinesOpen;
michael@0: 
michael@0:     /**
michael@0:      * Whether there's a span wrapping a run of characters (excluding CDATA
michael@0:      * section) open.
michael@0:      */
michael@0:     bool mInCharacters;
michael@0: 
michael@0:     /**
michael@0:      * The current buffer being tokenized.
michael@0:      */
michael@0:     nsHtml5UTF16Buffer* mBuffer;
michael@0: 
michael@0:     /**
michael@0:      * Whether to highlight syntax visibly initially.
michael@0:      */
michael@0:     bool mSyntaxHighlight;
michael@0: 
michael@0:     /**
michael@0:      * The outgoing tree op queue.
michael@0:      */
michael@0:     nsTArray mOpQueue;
michael@0: 
michael@0:     /**
michael@0:      * The tree op stage for the tree op executor.
michael@0:      */
michael@0:     nsAHtml5TreeOpSink* mOpSink;
michael@0: 
michael@0:     /**
michael@0:      * The most recently opened markup declaration/tag or run of characters.
michael@0:      */
michael@0:     nsIContent** mCurrentRun;
michael@0: 
michael@0:     /**
michael@0:      * The most recent ampersand in a place where character references were
michael@0:      * allowed.
michael@0:      */
michael@0:     nsIContent** mAmpersand;
michael@0: 
michael@0:     /**
michael@0:      * The most recent slash that might become a self-closing slash.
michael@0:      */
michael@0:     nsIContent** mSlash;
michael@0: 
michael@0:     /**
michael@0:      * Memory for element handles.
michael@0:      */
michael@0:     nsAutoArrayPtr mHandles;
michael@0: 
michael@0:     /**
michael@0:      * Number of handles used in mHandles
michael@0:      */
michael@0:     int32_t mHandlesUsed;
michael@0: 
michael@0:     /**
michael@0:      * A holder for old contents of mHandles
michael@0:      */
michael@0:     nsTArray > mOldHandles;
michael@0: 
michael@0:     /**
michael@0:      * The element stack.
michael@0:      */
michael@0:     nsTArray mStack;
michael@0: 
michael@0:     /**
michael@0:      * The string "comment"
michael@0:      */
michael@0:     static char16_t sComment[];
michael@0: 
michael@0:     /**
michael@0:      * The string "cdata"
michael@0:      */
michael@0:     static char16_t sCdata[];
michael@0: 
michael@0:     /**
michael@0:      * The string "start-tag"
michael@0:      */
michael@0:     static char16_t sStartTag[];
michael@0: 
michael@0:     /**
michael@0:      * The string "attribute-name"
michael@0:      */
michael@0:     static char16_t sAttributeName[];
michael@0: 
michael@0:     /**
michael@0:      * The string "attribute-value"
michael@0:      */
michael@0:     static char16_t sAttributeValue[];
michael@0: 
michael@0:     /**
michael@0:      * The string "end-tag"
michael@0:      */
michael@0:     static char16_t sEndTag[];
michael@0: 
michael@0:     /**
michael@0:      * The string "doctype"
michael@0:      */
michael@0:     static char16_t sDoctype[];
michael@0: 
michael@0:     /**
michael@0:      * The string "entity"
michael@0:      */
michael@0:     static char16_t sEntity[];
michael@0: 
michael@0:     /**
michael@0:      * The string "pi"
michael@0:      */
michael@0:     static char16_t sPi[];
michael@0: };
michael@0: 
michael@0: #endif // nsHtml5Highlighter_h