layout/style/nsCSSScanner.h

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /* tokenization of CSS style sheets */
michael@0 7
michael@0 8 #ifndef nsCSSScanner_h___
michael@0 9 #define nsCSSScanner_h___
michael@0 10
michael@0 11 #include "nsString.h"
michael@0 12
michael@0 13 namespace mozilla {
michael@0 14 namespace css {
michael@0 15 class ErrorReporter;
michael@0 16 }
michael@0 17 }
michael@0 18
michael@0 19 // Token types; in close but not perfect correspondence to the token
michael@0 20 // categorization in section 4.1.1 of CSS2.1. (The deviations are all
michael@0 21 // the fault of css3-selectors, which has requirements that can only be
michael@0 22 // met by changing the generic tokenization.) The comment on each line
michael@0 23 // illustrates the form of each identifier.
michael@0 24
michael@0 25 enum nsCSSTokenType {
michael@0 26 // White space of any kind. No value fields are used. Note that
michael@0 27 // comments do *not* count as white space; comments separate tokens
michael@0 28 // but are not themselves tokens.
michael@0 29 eCSSToken_Whitespace, //
michael@0 30
michael@0 31 // Identifier-like tokens. mIdent is the text of the identifier.
michael@0 32 // The difference between ID and Hash is: if the text after the #
michael@0 33 // would have been a valid Ident if the # hadn't been there, the
michael@0 34 // scanner produces an ID token. Otherwise it produces a Hash token.
michael@0 35 // (This distinction is required by css3-selectors.)
michael@0 36 eCSSToken_Ident, // word
michael@0 37 eCSSToken_Function, // word(
michael@0 38 eCSSToken_AtKeyword, // @word
michael@0 39 eCSSToken_ID, // #word
michael@0 40 eCSSToken_Hash, // #0word
michael@0 41
michael@0 42 // Numeric tokens. mNumber is the floating-point value of the
michael@0 43 // number, and mHasSign indicates whether there was an explicit sign
michael@0 44 // (+ or -) in front of the number. If mIntegerValid is true, the
michael@0 45 // number had the lexical form of an integer, and mInteger is its
michael@0 46 // integer value. Lexically integer values outside the range of a
michael@0 47 // 32-bit signed number are clamped to the maximum values; mNumber
michael@0 48 // will indicate a 'truer' value in that case. Percentage tokens
michael@0 49 // are always considered not to be integers, even if their numeric
michael@0 50 // value is integral (100% => mNumber = 1.0). For Dimension
michael@0 51 // tokens, mIdent holds the text of the unit.
michael@0 52 eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
michael@0 53 eCSSToken_Dimension, // 24px 8.5in
michael@0 54 eCSSToken_Percentage, // 85% 1280.4%
michael@0 55
michael@0 56 // String-like tokens. In all cases, mIdent holds the text
michael@0 57 // belonging to the string, and mSymbol holds the delimiter
michael@0 58 // character, which may be ', ", or zero (only for unquoted URLs).
michael@0 59 // Bad_String and Bad_URL tokens are emitted when the closing
michael@0 60 // delimiter or parenthesis was missing.
michael@0 61 eCSSToken_String, // 'foo bar' "foo bar"
michael@0 62 eCSSToken_Bad_String, // 'foo bar
michael@0 63 eCSSToken_URL, // url(foobar) url("foo bar")
michael@0 64 eCSSToken_Bad_URL, // url(foo
michael@0 65
michael@0 66 // Any one-character symbol. mSymbol holds the character.
michael@0 67 eCSSToken_Symbol, // . ; { } ! *
michael@0 68
michael@0 69 // Match operators. These are single tokens rather than pairs of
michael@0 70 // Symbol tokens because css3-selectors forbids the presence of
michael@0 71 // comments between the two characters. No value fields are used;
michael@0 72 // the token type indicates which operator.
michael@0 73 eCSSToken_Includes, // ~=
michael@0 74 eCSSToken_Dashmatch, // |=
michael@0 75 eCSSToken_Beginsmatch, // ^=
michael@0 76 eCSSToken_Endsmatch, // $=
michael@0 77 eCSSToken_Containsmatch, // *=
michael@0 78
michael@0 79 // Unicode-range token: currently used only in @font-face.
michael@0 80 // The lexical rule for this token includes several forms that are
michael@0 81 // semantically invalid. Therefore, mIdent always holds the
michael@0 82 // complete original text of the token (so we can print it
michael@0 83 // accurately in diagnostics), and mIntegerValid is true iff the
michael@0 84 // token is semantically valid. In that case, mInteger holds the
michael@0 85 // lowest value included in the range, and mInteger2 holds the
michael@0 86 // highest value included in the range.
michael@0 87 eCSSToken_URange, // U+007e U+01?? U+2000-206F
michael@0 88
michael@0 89 // HTML comment delimiters, ignored as a unit when they appear at
michael@0 90 // the top level of a style sheet, for compatibility with websites
michael@0 91 // written for compatibility with pre-CSS browsers. This token type
michael@0 92 // subsumes the css2.1 CDO and CDC tokens, which are always treated
michael@0 93 // the same by the parser. mIdent holds the text of the token, for
michael@0 94 // diagnostics.
michael@0 95 eCSSToken_HTMLComment, // <!-- -->
michael@0 96 };
michael@0 97
michael@0 98 // Classification of tokens used to determine if a "/**/" string must be
michael@0 99 // inserted if pasting token streams together when serializing. We include
michael@0 100 // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
michael@0 101 // as css-syntax does not treat these as whole tokens, but we will still
michael@0 102 // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
michael@0 103 // and between a '/' delim and a '*=' containsmatch.
michael@0 104 //
michael@0 105 // https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization
michael@0 106 enum nsCSSTokenSerializationType {
michael@0 107 eCSSTokenSerialization_Nothing,
michael@0 108 eCSSTokenSerialization_Whitespace,
michael@0 109 eCSSTokenSerialization_AtKeyword_or_Hash,
michael@0 110 eCSSTokenSerialization_Number,
michael@0 111 eCSSTokenSerialization_Dimension,
michael@0 112 eCSSTokenSerialization_Percentage,
michael@0 113 eCSSTokenSerialization_URange,
michael@0 114 eCSSTokenSerialization_URL_or_BadURL,
michael@0 115 eCSSTokenSerialization_Function,
michael@0 116 eCSSTokenSerialization_Ident,
michael@0 117 eCSSTokenSerialization_CDC,
michael@0 118 eCSSTokenSerialization_DashMatch,
michael@0 119 eCSSTokenSerialization_ContainsMatch,
michael@0 120 eCSSTokenSerialization_Symbol_Hash, // '#'
michael@0 121 eCSSTokenSerialization_Symbol_At, // '@'
michael@0 122 eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
michael@0 123 eCSSTokenSerialization_Symbol_Minus, // '-'
michael@0 124 eCSSTokenSerialization_Symbol_OpenParen, // '('
michael@0 125 eCSSTokenSerialization_Symbol_Question, // '?'
michael@0 126 eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
michael@0 127 eCSSTokenSerialization_Symbol_Equals, // '='
michael@0 128 eCSSTokenSerialization_Symbol_Bar, // '|'
michael@0 129 eCSSTokenSerialization_Symbol_Slash, // '/'
michael@0 130 eCSSTokenSerialization_Symbol_Asterisk, // '*'
michael@0 131 eCSSTokenSerialization_Other // anything else
michael@0 132 };
michael@0 133
michael@0 134 // A single token returned from the scanner. mType is always
michael@0 135 // meaningful; comments above describe which other fields are
michael@0 136 // meaningful for which token types.
michael@0 137 struct nsCSSToken {
michael@0 138 nsAutoString mIdent;
michael@0 139 float mNumber;
michael@0 140 int32_t mInteger;
michael@0 141 int32_t mInteger2;
michael@0 142 nsCSSTokenType mType;
michael@0 143 char16_t mSymbol;
michael@0 144 bool mIntegerValid;
michael@0 145 bool mHasSign;
michael@0 146
michael@0 147 nsCSSToken()
michael@0 148 : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
michael@0 149 mSymbol('\0'), mIntegerValid(false), mHasSign(false)
michael@0 150 {}
michael@0 151
michael@0 152 bool IsSymbol(char16_t aSymbol) const {
michael@0 153 return mType == eCSSToken_Symbol && mSymbol == aSymbol;
michael@0 154 }
michael@0 155
michael@0 156 void AppendToString(nsString& aBuffer) const;
michael@0 157 };
michael@0 158
michael@0 159 // Represents an nsCSSScanner's saved position in the input buffer.
michael@0 160 class nsCSSScannerPosition {
michael@0 161 friend class nsCSSScanner;
michael@0 162 public:
michael@0 163 nsCSSScannerPosition() : mInitialized(false) { }
michael@0 164
michael@0 165 uint32_t LineNumber() {
michael@0 166 MOZ_ASSERT(mInitialized);
michael@0 167 return mLineNumber;
michael@0 168 }
michael@0 169
michael@0 170 uint32_t LineOffset() {
michael@0 171 MOZ_ASSERT(mInitialized);
michael@0 172 return mLineOffset;
michael@0 173 }
michael@0 174
michael@0 175 private:
michael@0 176 uint32_t mOffset;
michael@0 177 uint32_t mLineNumber;
michael@0 178 uint32_t mLineOffset;
michael@0 179 uint32_t mTokenLineNumber;
michael@0 180 uint32_t mTokenLineOffset;
michael@0 181 uint32_t mTokenOffset;
michael@0 182 bool mInitialized;
michael@0 183 };
michael@0 184
michael@0 185 // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
michael@0 186 // compatible tokenization rules. Used internally by nsCSSParser;
michael@0 187 // not available for use by other code.
michael@0 188 class nsCSSScanner {
michael@0 189 public:
michael@0 190 // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
michael@0 191 // when the line number is unknown.
michael@0 192 nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
michael@0 193 ~nsCSSScanner();
michael@0 194
michael@0 195 void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
michael@0 196 mReporter = aReporter;
michael@0 197 }
michael@0 198 // Set whether or not we are processing SVG
michael@0 199 void SetSVGMode(bool aSVGMode) {
michael@0 200 mSVGMode = aSVGMode;
michael@0 201 }
michael@0 202 bool IsSVGMode() const {
michael@0 203 return mSVGMode;
michael@0 204 }
michael@0 205
michael@0 206 // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
michael@0 207 void ClearSeenBadToken() { mSeenBadToken = false; }
michael@0 208 bool SeenBadToken() const { return mSeenBadToken; }
michael@0 209
michael@0 210 // Reset or check whether a "var(" FUNCTION token has been seen.
michael@0 211 void ClearSeenVariableReference() { mSeenVariableReference = false; }
michael@0 212 bool SeenVariableReference() const { return mSeenVariableReference; }
michael@0 213
michael@0 214 // Get the 1-based line number of the last character of
michael@0 215 // the most recently processed token.
michael@0 216 uint32_t GetLineNumber() const { return mTokenLineNumber; }
michael@0 217
michael@0 218 // Get the 0-based column number of the first character of
michael@0 219 // the most recently processed token.
michael@0 220 uint32_t GetColumnNumber() const
michael@0 221 { return mTokenOffset - mTokenLineOffset; }
michael@0 222
michael@0 223 // Get the text of the line containing the first character of
michael@0 224 // the most recently processed token.
michael@0 225 nsDependentSubstring GetCurrentLine() const;
michael@0 226
michael@0 227 // Get the next token. Return false on EOF. aTokenResult is filled
michael@0 228 // in with the data for the token. If aSkipWS is true, skip over
michael@0 229 // eCSSToken_Whitespace tokens rather than returning them.
michael@0 230 bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
michael@0 231
michael@0 232 // Get the body of an URL token (everything after the 'url(').
michael@0 233 // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
michael@0 234 // which, for historical reasons, must make additional function
michael@0 235 // tokens behave like url(). Please do not add new uses to the
michael@0 236 // parser.
michael@0 237 bool NextURL(nsCSSToken& aTokenResult);
michael@0 238
michael@0 239 // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
michael@0 240 // because "2n-1" is a single DIMENSION token, and "n-1" is a single
michael@0 241 // IDENT token, but the :nth() selector syntax wants to interpret
michael@0 242 // them the same as "2n -1" and "n -1" respectively. Please do not
michael@0 243 // add new uses to the parser.
michael@0 244 //
michael@0 245 // Note: this function may not be used to back up over a line boundary.
michael@0 246 void Backup(uint32_t n);
michael@0 247
michael@0 248 // Starts recording the input stream from the current position.
michael@0 249 void StartRecording();
michael@0 250
michael@0 251 // Abandons recording of the input stream.
michael@0 252 void StopRecording();
michael@0 253
michael@0 254 // Stops recording of the input stream and appends the recorded
michael@0 255 // input to aBuffer.
michael@0 256 void StopRecording(nsString& aBuffer);
michael@0 257
michael@0 258 // Returns the length of the current recording.
michael@0 259 uint32_t RecordingLength() const;
michael@0 260
michael@0 261 #ifdef DEBUG
michael@0 262 bool IsRecording() const;
michael@0 263 #endif
michael@0 264
michael@0 265 // Stores the current scanner offset into the specified object.
michael@0 266 void SavePosition(nsCSSScannerPosition& aState);
michael@0 267
michael@0 268 // Resets the scanner offset to a position saved by SavePosition.
michael@0 269 void RestoreSavedPosition(const nsCSSScannerPosition& aState);
michael@0 270
michael@0 271 enum EOFCharacters {
michael@0 272 eEOFCharacters_None = 0x0000,
michael@0 273
michael@0 274 // to handle \<EOF> inside strings
michael@0 275 eEOFCharacters_DropBackslash = 0x0001,
michael@0 276
michael@0 277 // to handle \<EOF> outside strings
michael@0 278 eEOFCharacters_ReplacementChar = 0x0002,
michael@0 279
michael@0 280 // to close comments
michael@0 281 eEOFCharacters_Asterisk = 0x0004,
michael@0 282 eEOFCharacters_Slash = 0x0008,
michael@0 283
michael@0 284 // to close double-quoted strings
michael@0 285 eEOFCharacters_DoubleQuote = 0x0010,
michael@0 286
michael@0 287 // to close single-quoted strings
michael@0 288 eEOFCharacters_SingleQuote = 0x0020,
michael@0 289
michael@0 290 // to close URLs
michael@0 291 eEOFCharacters_CloseParen = 0x0040,
michael@0 292 };
michael@0 293
michael@0 294 // Appends any characters to the specified string the input stream to make the
michael@0 295 // last token not rely on special EOF handling behavior.
michael@0 296 //
michael@0 297 // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
michael@0 298 static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
michael@0 299 nsAString& aString);
michael@0 300
michael@0 301 EOFCharacters GetEOFCharacters() const {
michael@0 302 #ifdef DEBUG
michael@0 303 AssertEOFCharactersValid(mEOFCharacters);
michael@0 304 #endif
michael@0 305 return mEOFCharacters;
michael@0 306 }
michael@0 307
michael@0 308 #ifdef DEBUG
michael@0 309 static void AssertEOFCharactersValid(uint32_t c);
michael@0 310 #endif
michael@0 311
michael@0 312 protected:
michael@0 313 int32_t Peek(uint32_t n = 0);
michael@0 314 void Advance(uint32_t n = 1);
michael@0 315 void AdvanceLine();
michael@0 316
michael@0 317 void SkipWhitespace();
michael@0 318 void SkipComment();
michael@0 319
michael@0 320 bool GatherEscape(nsString& aOutput, bool aInString);
michael@0 321 bool GatherText(uint8_t aClass, nsString& aIdent);
michael@0 322
michael@0 323 bool ScanIdent(nsCSSToken& aResult);
michael@0 324 bool ScanAtKeyword(nsCSSToken& aResult);
michael@0 325 bool ScanHash(nsCSSToken& aResult);
michael@0 326 bool ScanNumber(nsCSSToken& aResult);
michael@0 327 bool ScanString(nsCSSToken& aResult);
michael@0 328 bool ScanURange(nsCSSToken& aResult);
michael@0 329
michael@0 330 void SetEOFCharacters(uint32_t aEOFCharacters);
michael@0 331 void AddEOFCharacters(uint32_t aEOFCharacters);
michael@0 332
michael@0 333 const char16_t *mBuffer;
michael@0 334 uint32_t mOffset;
michael@0 335 uint32_t mCount;
michael@0 336
michael@0 337 uint32_t mLineNumber;
michael@0 338 uint32_t mLineOffset;
michael@0 339
michael@0 340 uint32_t mTokenLineNumber;
michael@0 341 uint32_t mTokenLineOffset;
michael@0 342 uint32_t mTokenOffset;
michael@0 343
michael@0 344 uint32_t mRecordStartOffset;
michael@0 345 EOFCharacters mEOFCharacters;
michael@0 346
michael@0 347 mozilla::css::ErrorReporter *mReporter;
michael@0 348
michael@0 349 // True if we are in SVG mode; false in "normal" CSS
michael@0 350 bool mSVGMode;
michael@0 351 bool mRecording;
michael@0 352 bool mSeenBadToken;
michael@0 353 bool mSeenVariableReference;
michael@0 354 };
michael@0 355
michael@0 356 // Token for the grid-template-areas micro-syntax
michael@0 357 // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
michael@0 358 struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
michael@0 359 nsAutoString mName; // Empty for a null cell, non-empty for a named cell
michael@0 360 bool isTrash; // True for a trash token, mName is ignored in this case.
michael@0 361 };
michael@0 362
michael@0 363 // Scanner for the grid-template-areas micro-syntax
michael@0 364 class nsCSSGridTemplateAreaScanner {
michael@0 365 public:
michael@0 366 nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
michael@0 367
michael@0 368 // Get the next token. Return false on EOF.
michael@0 369 // aTokenResult is filled in with the data for the token.
michael@0 370 bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
michael@0 371
michael@0 372 private:
michael@0 373 const char16_t *mBuffer;
michael@0 374 uint32_t mOffset;
michael@0 375 uint32_t mCount;
michael@0 376 };
michael@0 377
michael@0 378 #endif /* nsCSSScanner_h___ */

mercurial