1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/html/nsHtml5Highlighter.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,404 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 +#ifndef nsHtml5Highlighter_h 1.8 +#define nsHtml5Highlighter_h 1.9 + 1.10 +#include "nsCOMPtr.h" 1.11 +#include "nsHtml5TreeOperation.h" 1.12 +#include "nsHtml5UTF16Buffer.h" 1.13 +#include "nsHtml5TreeOperation.h" 1.14 +#include "nsAHtml5TreeOpSink.h" 1.15 + 1.16 +#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 1.17 + 1.18 +/** 1.19 + * A state machine for generating HTML for display in View Source based on 1.20 + * the transitions the tokenizer makes on the source being viewed. 1.21 + */ 1.22 +class nsHtml5Highlighter 1.23 +{ 1.24 + public: 1.25 + /** 1.26 + * The constructor. 1.27 + * 1.28 + * @param aOpSink the sink for the tree ops generated by this highlighter 1.29 + */ 1.30 + nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); 1.31 + 1.32 + /** 1.33 + * The destructor. 1.34 + */ 1.35 + ~nsHtml5Highlighter(); 1.36 + 1.37 + /** 1.38 + * Starts the generated document. 1.39 + */ 1.40 + void Start(const nsAutoString& aTitle); 1.41 + 1.42 + /** 1.43 + * Report a tokenizer state transition. 1.44 + * 1.45 + * @param aState the state being transitioned to 1.46 + * @param aReconsume whether this is a reconsuming transition 1.47 + * @param aPos the tokenizer's current position into the buffer 1.48 + */ 1.49 + int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); 1.50 + 1.51 + /** 1.52 + * Report end of file. 1.53 + */ 1.54 + void End(); 1.55 + 1.56 + /** 1.57 + * Set the current buffer being tokenized 1.58 + */ 1.59 + void SetBuffer(nsHtml5UTF16Buffer* aBuffer); 1.60 + 1.61 + /** 1.62 + * Let go of the buffer being tokenized but first, flush text from it. 1.63 + * 1.64 + * @param aPos the first UTF-16 code unit not to flush 1.65 + */ 1.66 + void DropBuffer(int32_t aPos); 1.67 + 1.68 + /** 1.69 + * Flush the tree ops into the sink. 1.70 + * 1.71 + * @return true if there were ops to flush 1.72 + */ 1.73 + bool FlushOps(); 1.74 + 1.75 + /** 1.76 + * Linkify the current attribute value if the attribute name is one of 1.77 + * known URL attributes. (When executing tree ops, javascript: URLs will 1.78 + * not be linkified, though.) 1.79 + * 1.80 + * @param aName the name of the attribute 1.81 + * @param aValue the value of the attribute 1.82 + */ 1.83 + void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, 1.84 + nsString* aValue); 1.85 + 1.86 + /** 1.87 + * Inform the highlighter that the tokenizer successfully completed a 1.88 + * named character reference. 1.89 + */ 1.90 + void CompletedNamedCharacterReference(); 1.91 + 1.92 + /** 1.93 + * Adds an error annotation to the node that's currently on top of 1.94 + * mStack. 1.95 + * 1.96 + * @param aMsgId the id of the message in the property file 1.97 + */ 1.98 + void AddErrorToCurrentNode(const char* aMsgId); 1.99 + 1.100 + /** 1.101 + * Adds an error annotation to the node that corresponds to the most 1.102 + * recently opened markup declaration/tag span, character reference or 1.103 + * run of text. 1.104 + * 1.105 + * @param aMsgId the id of the message in the property file 1.106 + */ 1.107 + void AddErrorToCurrentRun(const char* aMsgId); 1.108 + 1.109 + /** 1.110 + * Adds an error annotation to the node that corresponds to the most 1.111 + * recently opened markup declaration/tag span, character reference or 1.112 + * run of text with one atom to use when formatting the message. 1.113 + * 1.114 + * @param aMsgId the id of the message in the property file 1.115 + * @param aName the atom 1.116 + */ 1.117 + void AddErrorToCurrentRun(const char* aMsgId, nsIAtom* aName); 1.118 + 1.119 + /** 1.120 + * Adds an error annotation to the node that corresponds to the most 1.121 + * recently opened markup declaration/tag span, character reference or 1.122 + * run of text with two atoms to use when formatting the message. 1.123 + * 1.124 + * @param aMsgId the id of the message in the property file 1.125 + * @param aName the first atom 1.126 + * @param aOther the second atom 1.127 + */ 1.128 + void AddErrorToCurrentRun(const char* aMsgId, 1.129 + nsIAtom* aName, 1.130 + nsIAtom* aOther); 1.131 + 1.132 + /** 1.133 + * Adds an error annotation to the node that corresponds to the most 1.134 + * recent potentially character reference-starting ampersand. 1.135 + * 1.136 + * @param aMsgId the id of the message in the property file 1.137 + */ 1.138 + void AddErrorToCurrentAmpersand(const char* aMsgId); 1.139 + 1.140 + /** 1.141 + * Adds an error annotation to the node that corresponds to the most 1.142 + * recent potentially self-closing slash. 1.143 + * 1.144 + * @param aMsgId the id of the message in the property file 1.145 + */ 1.146 + void AddErrorToCurrentSlash(const char* aMsgId); 1.147 + 1.148 + private: 1.149 + 1.150 + /** 1.151 + * Starts a span with no class. 1.152 + */ 1.153 + void StartSpan(); 1.154 + 1.155 + /** 1.156 + * Starts a <span> and sets the class attribute on it. 1.157 + * 1.158 + * @param aClass the class to set (MUST be a static string that does not 1.159 + * need to be released!) 1.160 + */ 1.161 + void StartSpan(const char16_t* aClass); 1.162 + 1.163 + /** 1.164 + * End the current <span> or <a> in the highlighter output. 1.165 + */ 1.166 + void EndSpanOrA(); 1.167 + 1.168 + /** 1.169 + * Starts a wrapper around a run of characters. 1.170 + */ 1.171 + void StartCharacters(); 1.172 + 1.173 + /** 1.174 + * Ends a wrapper around a run of characters. 1.175 + */ 1.176 + void EndCharactersAndStartMarkupRun(); 1.177 + 1.178 + /** 1.179 + * Starts an <a>. 1.180 + */ 1.181 + void StartA(); 1.182 + 1.183 + /** 1.184 + * Flushes characters up to but not including the current one. 1.185 + */ 1.186 + void FlushChars(); 1.187 + 1.188 + /** 1.189 + * Flushes characters up to and including the current one. 1.190 + */ 1.191 + void FlushCurrent(); 1.192 + 1.193 + /** 1.194 + * Finishes highlighting a tag in the input data by closing the open 1.195 + * <span> and <a> elements in the highlighter output and then starts 1.196 + * another <span> for potentially highlighting characters potentially 1.197 + * appearing next. 1.198 + */ 1.199 + void FinishTag(); 1.200 + 1.201 + /** 1.202 + * Adds a class attribute to the current node. 1.203 + * 1.204 + * @param aClass the class to set (MUST be a static string that does not 1.205 + * need to be released!) 1.206 + */ 1.207 + void AddClass(const char16_t* aClass); 1.208 + 1.209 + /** 1.210 + * Allocates a handle for an element. 1.211 + * 1.212 + * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() 1.213 + * in nsHtml5TreeBuilderHSupplement.h. 1.214 + * 1.215 + * @return the handle 1.216 + */ 1.217 + nsIContent** AllocateContentHandle(); 1.218 + 1.219 + /** 1.220 + * Enqueues an element creation tree operation. 1.221 + * 1.222 + * @param aName the name of the element 1.223 + * @param aAttributes the attribute holder (ownership will be taken) or 1.224 + * nullptr for no attributes 1.225 + * @return the handle for the element that will be created 1.226 + */ 1.227 + nsIContent** CreateElement(nsIAtom* aName, 1.228 + nsHtml5HtmlAttributes* aAttributes); 1.229 + 1.230 + /** 1.231 + * Gets the handle for the current node. May be called only after the 1.232 + * root element has been set. 1.233 + * 1.234 + * @return the handle for the current node 1.235 + */ 1.236 + nsIContent** CurrentNode(); 1.237 + 1.238 + /** 1.239 + * Create an element and push it (its handle) on the stack. 1.240 + * 1.241 + * @param aName the name of the element 1.242 + * @param aAttributes the attribute holder (ownership will be taken) or 1.243 + * nullptr for no attributes 1.244 + */ 1.245 + void Push(nsIAtom* aName, nsHtml5HtmlAttributes* aAttributes); 1.246 + 1.247 + /** 1.248 + * Pops the current node off the stack. 1.249 + */ 1.250 + void Pop(); 1.251 + 1.252 + /** 1.253 + * Appends text content to the current node. 1.254 + * 1.255 + * @param aBuffer the buffer to copy from 1.256 + * @param aStart the index of the first code unit to copy 1.257 + * @param aLength the number of code units to copy 1.258 + */ 1.259 + void AppendCharacters(const char16_t* aBuffer, 1.260 + int32_t aStart, 1.261 + int32_t aLength); 1.262 + 1.263 + /** 1.264 + * Enqueues a tree op for adding an href attribute with the view-source: 1.265 + * URL scheme to the current node. 1.266 + * 1.267 + * @param aValue the (potentially relative) URL to link to 1.268 + */ 1.269 + void AddViewSourceHref(const nsString& aValue); 1.270 + 1.271 + /** 1.272 + * The state we are transitioning away from. 1.273 + */ 1.274 + int32_t mState; 1.275 + 1.276 + /** 1.277 + * The index of the first UTF-16 code unit in mBuffer that hasn't been 1.278 + * flushed yet. 1.279 + */ 1.280 + int32_t mCStart; 1.281 + 1.282 + /** 1.283 + * The position of the code unit in mBuffer that caused the current 1.284 + * transition. 1.285 + */ 1.286 + int32_t mPos; 1.287 + 1.288 + /** 1.289 + * The current line number. 1.290 + */ 1.291 + int32_t mLineNumber; 1.292 + 1.293 + /** 1.294 + * The number of inline elements open inside the <pre> excluding the 1.295 + * span potentially wrapping a run of characters. 1.296 + */ 1.297 + int32_t mInlinesOpen; 1.298 + 1.299 + /** 1.300 + * Whether there's a span wrapping a run of characters (excluding CDATA 1.301 + * section) open. 1.302 + */ 1.303 + bool mInCharacters; 1.304 + 1.305 + /** 1.306 + * The current buffer being tokenized. 1.307 + */ 1.308 + nsHtml5UTF16Buffer* mBuffer; 1.309 + 1.310 + /** 1.311 + * Whether to highlight syntax visibly initially. 1.312 + */ 1.313 + bool mSyntaxHighlight; 1.314 + 1.315 + /** 1.316 + * The outgoing tree op queue. 1.317 + */ 1.318 + nsTArray<nsHtml5TreeOperation> mOpQueue; 1.319 + 1.320 + /** 1.321 + * The tree op stage for the tree op executor. 1.322 + */ 1.323 + nsAHtml5TreeOpSink* mOpSink; 1.324 + 1.325 + /** 1.326 + * The most recently opened markup declaration/tag or run of characters. 1.327 + */ 1.328 + nsIContent** mCurrentRun; 1.329 + 1.330 + /** 1.331 + * The most recent ampersand in a place where character references were 1.332 + * allowed. 1.333 + */ 1.334 + nsIContent** mAmpersand; 1.335 + 1.336 + /** 1.337 + * The most recent slash that might become a self-closing slash. 1.338 + */ 1.339 + nsIContent** mSlash; 1.340 + 1.341 + /** 1.342 + * Memory for element handles. 1.343 + */ 1.344 + nsAutoArrayPtr<nsIContent*> mHandles; 1.345 + 1.346 + /** 1.347 + * Number of handles used in mHandles 1.348 + */ 1.349 + int32_t mHandlesUsed; 1.350 + 1.351 + /** 1.352 + * A holder for old contents of mHandles 1.353 + */ 1.354 + nsTArray<nsAutoArrayPtr<nsIContent*> > mOldHandles; 1.355 + 1.356 + /** 1.357 + * The element stack. 1.358 + */ 1.359 + nsTArray<nsIContent**> mStack; 1.360 + 1.361 + /** 1.362 + * The string "comment" 1.363 + */ 1.364 + static char16_t sComment[]; 1.365 + 1.366 + /** 1.367 + * The string "cdata" 1.368 + */ 1.369 + static char16_t sCdata[]; 1.370 + 1.371 + /** 1.372 + * The string "start-tag" 1.373 + */ 1.374 + static char16_t sStartTag[]; 1.375 + 1.376 + /** 1.377 + * The string "attribute-name" 1.378 + */ 1.379 + static char16_t sAttributeName[]; 1.380 + 1.381 + /** 1.382 + * The string "attribute-value" 1.383 + */ 1.384 + static char16_t sAttributeValue[]; 1.385 + 1.386 + /** 1.387 + * The string "end-tag" 1.388 + */ 1.389 + static char16_t sEndTag[]; 1.390 + 1.391 + /** 1.392 + * The string "doctype" 1.393 + */ 1.394 + static char16_t sDoctype[]; 1.395 + 1.396 + /** 1.397 + * The string "entity" 1.398 + */ 1.399 + static char16_t sEntity[]; 1.400 + 1.401 + /** 1.402 + * The string "pi" 1.403 + */ 1.404 + static char16_t sPi[]; 1.405 +}; 1.406 + 1.407 +#endif // nsHtml5Highlighter_h