1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/layout/style/nsCSSScanner.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,378 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* tokenization of CSS style sheets */ 1.10 + 1.11 +#ifndef nsCSSScanner_h___ 1.12 +#define nsCSSScanner_h___ 1.13 + 1.14 +#include "nsString.h" 1.15 + 1.16 +namespace mozilla { 1.17 +namespace css { 1.18 +class ErrorReporter; 1.19 +} 1.20 +} 1.21 + 1.22 +// Token types; in close but not perfect correspondence to the token 1.23 +// categorization in section 4.1.1 of CSS2.1. (The deviations are all 1.24 +// the fault of css3-selectors, which has requirements that can only be 1.25 +// met by changing the generic tokenization.) The comment on each line 1.26 +// illustrates the form of each identifier. 1.27 + 1.28 +enum nsCSSTokenType { 1.29 + // White space of any kind. No value fields are used. Note that 1.30 + // comments do *not* count as white space; comments separate tokens 1.31 + // but are not themselves tokens. 1.32 + eCSSToken_Whitespace, // 1.33 + 1.34 + // Identifier-like tokens. mIdent is the text of the identifier. 1.35 + // The difference between ID and Hash is: if the text after the # 1.36 + // would have been a valid Ident if the # hadn't been there, the 1.37 + // scanner produces an ID token. Otherwise it produces a Hash token. 1.38 + // (This distinction is required by css3-selectors.) 1.39 + eCSSToken_Ident, // word 1.40 + eCSSToken_Function, // word( 1.41 + eCSSToken_AtKeyword, // @word 1.42 + eCSSToken_ID, // #word 1.43 + eCSSToken_Hash, // #0word 1.44 + 1.45 + // Numeric tokens. mNumber is the floating-point value of the 1.46 + // number, and mHasSign indicates whether there was an explicit sign 1.47 + // (+ or -) in front of the number. If mIntegerValid is true, the 1.48 + // number had the lexical form of an integer, and mInteger is its 1.49 + // integer value. Lexically integer values outside the range of a 1.50 + // 32-bit signed number are clamped to the maximum values; mNumber 1.51 + // will indicate a 'truer' value in that case. Percentage tokens 1.52 + // are always considered not to be integers, even if their numeric 1.53 + // value is integral (100% => mNumber = 1.0). For Dimension 1.54 + // tokens, mIdent holds the text of the unit. 1.55 + eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3 1.56 + eCSSToken_Dimension, // 24px 8.5in 1.57 + eCSSToken_Percentage, // 85% 1280.4% 1.58 + 1.59 + // String-like tokens. In all cases, mIdent holds the text 1.60 + // belonging to the string, and mSymbol holds the delimiter 1.61 + // character, which may be ', ", or zero (only for unquoted URLs). 1.62 + // Bad_String and Bad_URL tokens are emitted when the closing 1.63 + // delimiter or parenthesis was missing. 1.64 + eCSSToken_String, // 'foo bar' "foo bar" 1.65 + eCSSToken_Bad_String, // 'foo bar 1.66 + eCSSToken_URL, // url(foobar) url("foo bar") 1.67 + eCSSToken_Bad_URL, // url(foo 1.68 + 1.69 + // Any one-character symbol. mSymbol holds the character. 1.70 + eCSSToken_Symbol, // . ; { } ! * 1.71 + 1.72 + // Match operators. These are single tokens rather than pairs of 1.73 + // Symbol tokens because css3-selectors forbids the presence of 1.74 + // comments between the two characters. No value fields are used; 1.75 + // the token type indicates which operator. 1.76 + eCSSToken_Includes, // ~= 1.77 + eCSSToken_Dashmatch, // |= 1.78 + eCSSToken_Beginsmatch, // ^= 1.79 + eCSSToken_Endsmatch, // $= 1.80 + eCSSToken_Containsmatch, // *= 1.81 + 1.82 + // Unicode-range token: currently used only in @font-face. 1.83 + // The lexical rule for this token includes several forms that are 1.84 + // semantically invalid. Therefore, mIdent always holds the 1.85 + // complete original text of the token (so we can print it 1.86 + // accurately in diagnostics), and mIntegerValid is true iff the 1.87 + // token is semantically valid. In that case, mInteger holds the 1.88 + // lowest value included in the range, and mInteger2 holds the 1.89 + // highest value included in the range. 1.90 + eCSSToken_URange, // U+007e U+01?? U+2000-206F 1.91 + 1.92 + // HTML comment delimiters, ignored as a unit when they appear at 1.93 + // the top level of a style sheet, for compatibility with websites 1.94 + // written for compatibility with pre-CSS browsers. This token type 1.95 + // subsumes the css2.1 CDO and CDC tokens, which are always treated 1.96 + // the same by the parser. mIdent holds the text of the token, for 1.97 + // diagnostics. 1.98 + eCSSToken_HTMLComment, // <!-- --> 1.99 +}; 1.100 + 1.101 +// Classification of tokens used to determine if a "/**/" string must be 1.102 +// inserted if pasting token streams together when serializing. We include 1.103 +// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch, 1.104 +// as css-syntax does not treat these as whole tokens, but we will still 1.105 +// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch 1.106 +// and between a '/' delim and a '*=' containsmatch. 1.107 +// 1.108 +// https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization 1.109 +enum nsCSSTokenSerializationType { 1.110 + eCSSTokenSerialization_Nothing, 1.111 + eCSSTokenSerialization_Whitespace, 1.112 + eCSSTokenSerialization_AtKeyword_or_Hash, 1.113 + eCSSTokenSerialization_Number, 1.114 + eCSSTokenSerialization_Dimension, 1.115 + eCSSTokenSerialization_Percentage, 1.116 + eCSSTokenSerialization_URange, 1.117 + eCSSTokenSerialization_URL_or_BadURL, 1.118 + eCSSTokenSerialization_Function, 1.119 + eCSSTokenSerialization_Ident, 1.120 + eCSSTokenSerialization_CDC, 1.121 + eCSSTokenSerialization_DashMatch, 1.122 + eCSSTokenSerialization_ContainsMatch, 1.123 + eCSSTokenSerialization_Symbol_Hash, // '#' 1.124 + eCSSTokenSerialization_Symbol_At, // '@' 1.125 + eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+' 1.126 + eCSSTokenSerialization_Symbol_Minus, // '-' 1.127 + eCSSTokenSerialization_Symbol_OpenParen, // '(' 1.128 + eCSSTokenSerialization_Symbol_Question, // '?' 1.129 + eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~' 1.130 + eCSSTokenSerialization_Symbol_Equals, // '=' 1.131 + eCSSTokenSerialization_Symbol_Bar, // '|' 1.132 + eCSSTokenSerialization_Symbol_Slash, // '/' 1.133 + eCSSTokenSerialization_Symbol_Asterisk, // '*' 1.134 + eCSSTokenSerialization_Other // anything else 1.135 +}; 1.136 + 1.137 +// A single token returned from the scanner. mType is always 1.138 +// meaningful; comments above describe which other fields are 1.139 +// meaningful for which token types. 1.140 +struct nsCSSToken { 1.141 + nsAutoString mIdent; 1.142 + float mNumber; 1.143 + int32_t mInteger; 1.144 + int32_t mInteger2; 1.145 + nsCSSTokenType mType; 1.146 + char16_t mSymbol; 1.147 + bool mIntegerValid; 1.148 + bool mHasSign; 1.149 + 1.150 + nsCSSToken() 1.151 + : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace), 1.152 + mSymbol('\0'), mIntegerValid(false), mHasSign(false) 1.153 + {} 1.154 + 1.155 + bool IsSymbol(char16_t aSymbol) const { 1.156 + return mType == eCSSToken_Symbol && mSymbol == aSymbol; 1.157 + } 1.158 + 1.159 + void AppendToString(nsString& aBuffer) const; 1.160 +}; 1.161 + 1.162 +// Represents an nsCSSScanner's saved position in the input buffer. 1.163 +class nsCSSScannerPosition { 1.164 + friend class nsCSSScanner; 1.165 +public: 1.166 + nsCSSScannerPosition() : mInitialized(false) { } 1.167 + 1.168 + uint32_t LineNumber() { 1.169 + MOZ_ASSERT(mInitialized); 1.170 + return mLineNumber; 1.171 + } 1.172 + 1.173 + uint32_t LineOffset() { 1.174 + MOZ_ASSERT(mInitialized); 1.175 + return mLineOffset; 1.176 + } 1.177 + 1.178 +private: 1.179 + uint32_t mOffset; 1.180 + uint32_t mLineNumber; 1.181 + uint32_t mLineOffset; 1.182 + uint32_t mTokenLineNumber; 1.183 + uint32_t mTokenLineOffset; 1.184 + uint32_t mTokenOffset; 1.185 + bool mInitialized; 1.186 +}; 1.187 + 1.188 +// nsCSSScanner tokenizes an input stream using the CSS2.1 forward 1.189 +// compatible tokenization rules. Used internally by nsCSSParser; 1.190 +// not available for use by other code. 1.191 +class nsCSSScanner { 1.192 + public: 1.193 + // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0| 1.194 + // when the line number is unknown. 1.195 + nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber); 1.196 + ~nsCSSScanner(); 1.197 + 1.198 + void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) { 1.199 + mReporter = aReporter; 1.200 + } 1.201 + // Set whether or not we are processing SVG 1.202 + void SetSVGMode(bool aSVGMode) { 1.203 + mSVGMode = aSVGMode; 1.204 + } 1.205 + bool IsSVGMode() const { 1.206 + return mSVGMode; 1.207 + } 1.208 + 1.209 + // Reset or check whether a BAD_URL or BAD_STRING token has been seen. 1.210 + void ClearSeenBadToken() { mSeenBadToken = false; } 1.211 + bool SeenBadToken() const { return mSeenBadToken; } 1.212 + 1.213 + // Reset or check whether a "var(" FUNCTION token has been seen. 1.214 + void ClearSeenVariableReference() { mSeenVariableReference = false; } 1.215 + bool SeenVariableReference() const { return mSeenVariableReference; } 1.216 + 1.217 + // Get the 1-based line number of the last character of 1.218 + // the most recently processed token. 1.219 + uint32_t GetLineNumber() const { return mTokenLineNumber; } 1.220 + 1.221 + // Get the 0-based column number of the first character of 1.222 + // the most recently processed token. 1.223 + uint32_t GetColumnNumber() const 1.224 + { return mTokenOffset - mTokenLineOffset; } 1.225 + 1.226 + // Get the text of the line containing the first character of 1.227 + // the most recently processed token. 1.228 + nsDependentSubstring GetCurrentLine() const; 1.229 + 1.230 + // Get the next token. Return false on EOF. aTokenResult is filled 1.231 + // in with the data for the token. If aSkipWS is true, skip over 1.232 + // eCSSToken_Whitespace tokens rather than returning them. 1.233 + bool Next(nsCSSToken& aTokenResult, bool aSkipWS); 1.234 + 1.235 + // Get the body of an URL token (everything after the 'url('). 1.236 + // This is exposed for use by nsCSSParser::ParseMozDocumentRule, 1.237 + // which, for historical reasons, must make additional function 1.238 + // tokens behave like url(). Please do not add new uses to the 1.239 + // parser. 1.240 + bool NextURL(nsCSSToken& aTokenResult); 1.241 + 1.242 + // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg, 1.243 + // because "2n-1" is a single DIMENSION token, and "n-1" is a single 1.244 + // IDENT token, but the :nth() selector syntax wants to interpret 1.245 + // them the same as "2n -1" and "n -1" respectively. Please do not 1.246 + // add new uses to the parser. 1.247 + // 1.248 + // Note: this function may not be used to back up over a line boundary. 1.249 + void Backup(uint32_t n); 1.250 + 1.251 + // Starts recording the input stream from the current position. 1.252 + void StartRecording(); 1.253 + 1.254 + // Abandons recording of the input stream. 1.255 + void StopRecording(); 1.256 + 1.257 + // Stops recording of the input stream and appends the recorded 1.258 + // input to aBuffer. 1.259 + void StopRecording(nsString& aBuffer); 1.260 + 1.261 + // Returns the length of the current recording. 1.262 + uint32_t RecordingLength() const; 1.263 + 1.264 +#ifdef DEBUG 1.265 + bool IsRecording() const; 1.266 +#endif 1.267 + 1.268 + // Stores the current scanner offset into the specified object. 1.269 + void SavePosition(nsCSSScannerPosition& aState); 1.270 + 1.271 + // Resets the scanner offset to a position saved by SavePosition. 1.272 + void RestoreSavedPosition(const nsCSSScannerPosition& aState); 1.273 + 1.274 + enum EOFCharacters { 1.275 + eEOFCharacters_None = 0x0000, 1.276 + 1.277 + // to handle \<EOF> inside strings 1.278 + eEOFCharacters_DropBackslash = 0x0001, 1.279 + 1.280 + // to handle \<EOF> outside strings 1.281 + eEOFCharacters_ReplacementChar = 0x0002, 1.282 + 1.283 + // to close comments 1.284 + eEOFCharacters_Asterisk = 0x0004, 1.285 + eEOFCharacters_Slash = 0x0008, 1.286 + 1.287 + // to close double-quoted strings 1.288 + eEOFCharacters_DoubleQuote = 0x0010, 1.289 + 1.290 + // to close single-quoted strings 1.291 + eEOFCharacters_SingleQuote = 0x0020, 1.292 + 1.293 + // to close URLs 1.294 + eEOFCharacters_CloseParen = 0x0040, 1.295 + }; 1.296 + 1.297 + // Appends any characters to the specified string the input stream to make the 1.298 + // last token not rely on special EOF handling behavior. 1.299 + // 1.300 + // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored. 1.301 + static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, 1.302 + nsAString& aString); 1.303 + 1.304 + EOFCharacters GetEOFCharacters() const { 1.305 +#ifdef DEBUG 1.306 + AssertEOFCharactersValid(mEOFCharacters); 1.307 +#endif 1.308 + return mEOFCharacters; 1.309 + } 1.310 + 1.311 +#ifdef DEBUG 1.312 + static void AssertEOFCharactersValid(uint32_t c); 1.313 +#endif 1.314 + 1.315 +protected: 1.316 + int32_t Peek(uint32_t n = 0); 1.317 + void Advance(uint32_t n = 1); 1.318 + void AdvanceLine(); 1.319 + 1.320 + void SkipWhitespace(); 1.321 + void SkipComment(); 1.322 + 1.323 + bool GatherEscape(nsString& aOutput, bool aInString); 1.324 + bool GatherText(uint8_t aClass, nsString& aIdent); 1.325 + 1.326 + bool ScanIdent(nsCSSToken& aResult); 1.327 + bool ScanAtKeyword(nsCSSToken& aResult); 1.328 + bool ScanHash(nsCSSToken& aResult); 1.329 + bool ScanNumber(nsCSSToken& aResult); 1.330 + bool ScanString(nsCSSToken& aResult); 1.331 + bool ScanURange(nsCSSToken& aResult); 1.332 + 1.333 + void SetEOFCharacters(uint32_t aEOFCharacters); 1.334 + void AddEOFCharacters(uint32_t aEOFCharacters); 1.335 + 1.336 + const char16_t *mBuffer; 1.337 + uint32_t mOffset; 1.338 + uint32_t mCount; 1.339 + 1.340 + uint32_t mLineNumber; 1.341 + uint32_t mLineOffset; 1.342 + 1.343 + uint32_t mTokenLineNumber; 1.344 + uint32_t mTokenLineOffset; 1.345 + uint32_t mTokenOffset; 1.346 + 1.347 + uint32_t mRecordStartOffset; 1.348 + EOFCharacters mEOFCharacters; 1.349 + 1.350 + mozilla::css::ErrorReporter *mReporter; 1.351 + 1.352 + // True if we are in SVG mode; false in "normal" CSS 1.353 + bool mSVGMode; 1.354 + bool mRecording; 1.355 + bool mSeenBadToken; 1.356 + bool mSeenVariableReference; 1.357 +}; 1.358 + 1.359 +// Token for the grid-template-areas micro-syntax 1.360 +// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas 1.361 +struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken { 1.362 + nsAutoString mName; // Empty for a null cell, non-empty for a named cell 1.363 + bool isTrash; // True for a trash token, mName is ignored in this case. 1.364 +}; 1.365 + 1.366 +// Scanner for the grid-template-areas micro-syntax 1.367 +class nsCSSGridTemplateAreaScanner { 1.368 +public: 1.369 + nsCSSGridTemplateAreaScanner(const nsAString& aBuffer); 1.370 + 1.371 + // Get the next token. Return false on EOF. 1.372 + // aTokenResult is filled in with the data for the token. 1.373 + bool Next(nsCSSGridTemplateAreaToken& aTokenResult); 1.374 + 1.375 +private: 1.376 + const char16_t *mBuffer; 1.377 + uint32_t mOffset; 1.378 + uint32_t mCount; 1.379 +}; 1.380 + 1.381 +#endif /* nsCSSScanner_h___ */