layout/style/nsCSSScanner.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/layout/style/nsCSSScanner.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,378 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/* tokenization of CSS style sheets */
    1.10 +
    1.11 +#ifndef nsCSSScanner_h___
    1.12 +#define nsCSSScanner_h___
    1.13 +
    1.14 +#include "nsString.h"
    1.15 +
    1.16 +namespace mozilla {
    1.17 +namespace css {
    1.18 +class ErrorReporter;
    1.19 +}
    1.20 +}
    1.21 +
    1.22 +// Token types; in close but not perfect correspondence to the token
    1.23 +// categorization in section 4.1.1 of CSS2.1.  (The deviations are all
    1.24 +// the fault of css3-selectors, which has requirements that can only be
    1.25 +// met by changing the generic tokenization.)  The comment on each line
    1.26 +// illustrates the form of each identifier.
    1.27 +
    1.28 +enum nsCSSTokenType {
    1.29 +  // White space of any kind.  No value fields are used.  Note that
    1.30 +  // comments do *not* count as white space; comments separate tokens
    1.31 +  // but are not themselves tokens.
    1.32 +  eCSSToken_Whitespace,     //
    1.33 +
    1.34 +  // Identifier-like tokens.  mIdent is the text of the identifier.
    1.35 +  // The difference between ID and Hash is: if the text after the #
    1.36 +  // would have been a valid Ident if the # hadn't been there, the
    1.37 +  // scanner produces an ID token.  Otherwise it produces a Hash token.
    1.38 +  // (This distinction is required by css3-selectors.)
    1.39 +  eCSSToken_Ident,          // word
    1.40 +  eCSSToken_Function,       // word(
    1.41 +  eCSSToken_AtKeyword,      // @word
    1.42 +  eCSSToken_ID,             // #word
    1.43 +  eCSSToken_Hash,           // #0word
    1.44 +
    1.45 +  // Numeric tokens.  mNumber is the floating-point value of the
    1.46 +  // number, and mHasSign indicates whether there was an explicit sign
    1.47 +  // (+ or -) in front of the number.  If mIntegerValid is true, the
    1.48 +  // number had the lexical form of an integer, and mInteger is its
    1.49 +  // integer value.  Lexically integer values outside the range of a
    1.50 +  // 32-bit signed number are clamped to the maximum values; mNumber
    1.51 +  // will indicate a 'truer' value in that case.  Percentage tokens
    1.52 +  // are always considered not to be integers, even if their numeric
    1.53 +  // value is integral (100% => mNumber = 1.0).  For Dimension
    1.54 +  // tokens, mIdent holds the text of the unit.
    1.55 +  eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
    1.56 +  eCSSToken_Dimension,      // 24px 8.5in
    1.57 +  eCSSToken_Percentage,     // 85% 1280.4%
    1.58 +
    1.59 +  // String-like tokens.  In all cases, mIdent holds the text
    1.60 +  // belonging to the string, and mSymbol holds the delimiter
    1.61 +  // character, which may be ', ", or zero (only for unquoted URLs).
    1.62 +  // Bad_String and Bad_URL tokens are emitted when the closing
    1.63 +  // delimiter or parenthesis was missing.
    1.64 +  eCSSToken_String,         // 'foo bar' "foo bar"
    1.65 +  eCSSToken_Bad_String,     // 'foo bar
    1.66 +  eCSSToken_URL,            // url(foobar) url("foo bar")
    1.67 +  eCSSToken_Bad_URL,        // url(foo
    1.68 +
    1.69 +  // Any one-character symbol.  mSymbol holds the character.
    1.70 +  eCSSToken_Symbol,         // . ; { } ! *
    1.71 +
    1.72 +  // Match operators.  These are single tokens rather than pairs of
    1.73 +  // Symbol tokens because css3-selectors forbids the presence of
    1.74 +  // comments between the two characters.  No value fields are used;
    1.75 +  // the token type indicates which operator.
    1.76 +  eCSSToken_Includes,       // ~=
    1.77 +  eCSSToken_Dashmatch,      // |=
    1.78 +  eCSSToken_Beginsmatch,    // ^=
    1.79 +  eCSSToken_Endsmatch,      // $=
    1.80 +  eCSSToken_Containsmatch,  // *=
    1.81 +
    1.82 +  // Unicode-range token: currently used only in @font-face.
    1.83 +  // The lexical rule for this token includes several forms that are
    1.84 +  // semantically invalid.  Therefore, mIdent always holds the
    1.85 +  // complete original text of the token (so we can print it
    1.86 +  // accurately in diagnostics), and mIntegerValid is true iff the
    1.87 +  // token is semantically valid.  In that case, mInteger holds the
    1.88 +  // lowest value included in the range, and mInteger2 holds the
    1.89 +  // highest value included in the range.
    1.90 +  eCSSToken_URange,         // U+007e U+01?? U+2000-206F
    1.91 +
    1.92 +  // HTML comment delimiters, ignored as a unit when they appear at
    1.93 +  // the top level of a style sheet, for compatibility with websites
    1.94 +  // written for compatibility with pre-CSS browsers.  This token type
    1.95 +  // subsumes the css2.1 CDO and CDC tokens, which are always treated
    1.96 +  // the same by the parser.  mIdent holds the text of the token, for
    1.97 +  // diagnostics.
    1.98 +  eCSSToken_HTMLComment,    // <!-- -->
    1.99 +};
   1.100 +
   1.101 +// Classification of tokens used to determine if a "/**/" string must be
   1.102 +// inserted if pasting token streams together when serializing.  We include
   1.103 +// values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
   1.104 +// as css-syntax does not treat these as whole tokens, but we will still
   1.105 +// need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
   1.106 +// and between a '/' delim and a '*=' containsmatch.
   1.107 +//
   1.108 +// https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization
   1.109 +enum nsCSSTokenSerializationType {
   1.110 +  eCSSTokenSerialization_Nothing,
   1.111 +  eCSSTokenSerialization_Whitespace,
   1.112 +  eCSSTokenSerialization_AtKeyword_or_Hash,
   1.113 +  eCSSTokenSerialization_Number,
   1.114 +  eCSSTokenSerialization_Dimension,
   1.115 +  eCSSTokenSerialization_Percentage,
   1.116 +  eCSSTokenSerialization_URange,
   1.117 +  eCSSTokenSerialization_URL_or_BadURL,
   1.118 +  eCSSTokenSerialization_Function,
   1.119 +  eCSSTokenSerialization_Ident,
   1.120 +  eCSSTokenSerialization_CDC,
   1.121 +  eCSSTokenSerialization_DashMatch,
   1.122 +  eCSSTokenSerialization_ContainsMatch,
   1.123 +  eCSSTokenSerialization_Symbol_Hash,         // '#'
   1.124 +  eCSSTokenSerialization_Symbol_At,           // '@'
   1.125 +  eCSSTokenSerialization_Symbol_Dot_or_Plus,  // '.', '+'
   1.126 +  eCSSTokenSerialization_Symbol_Minus,        // '-'
   1.127 +  eCSSTokenSerialization_Symbol_OpenParen,    // '('
   1.128 +  eCSSTokenSerialization_Symbol_Question,     // '?'
   1.129 +  eCSSTokenSerialization_Symbol_Assorted,     // '$', '^', '~'
   1.130 +  eCSSTokenSerialization_Symbol_Equals,       // '='
   1.131 +  eCSSTokenSerialization_Symbol_Bar,          // '|'
   1.132 +  eCSSTokenSerialization_Symbol_Slash,        // '/'
   1.133 +  eCSSTokenSerialization_Symbol_Asterisk,     // '*'
   1.134 +  eCSSTokenSerialization_Other                // anything else
   1.135 +};
   1.136 +
   1.137 +// A single token returned from the scanner.  mType is always
   1.138 +// meaningful; comments above describe which other fields are
   1.139 +// meaningful for which token types.
   1.140 +struct nsCSSToken {
   1.141 +  nsAutoString    mIdent;
   1.142 +  float           mNumber;
   1.143 +  int32_t         mInteger;
   1.144 +  int32_t         mInteger2;
   1.145 +  nsCSSTokenType  mType;
   1.146 +  char16_t       mSymbol;
   1.147 +  bool            mIntegerValid;
   1.148 +  bool            mHasSign;
   1.149 +
   1.150 +  nsCSSToken()
   1.151 +    : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
   1.152 +      mSymbol('\0'), mIntegerValid(false), mHasSign(false)
   1.153 +  {}
   1.154 +
   1.155 +  bool IsSymbol(char16_t aSymbol) const {
   1.156 +    return mType == eCSSToken_Symbol && mSymbol == aSymbol;
   1.157 +  }
   1.158 +
   1.159 +  void AppendToString(nsString& aBuffer) const;
   1.160 +};
   1.161 +
   1.162 +// Represents an nsCSSScanner's saved position in the input buffer.
   1.163 +class nsCSSScannerPosition {
   1.164 +  friend class nsCSSScanner;
   1.165 +public:
   1.166 +  nsCSSScannerPosition() : mInitialized(false) { }
   1.167 +
   1.168 +  uint32_t LineNumber() {
   1.169 +    MOZ_ASSERT(mInitialized);
   1.170 +    return mLineNumber;
   1.171 +  }
   1.172 +
   1.173 +  uint32_t LineOffset() {
   1.174 +    MOZ_ASSERT(mInitialized);
   1.175 +    return mLineOffset;
   1.176 +  }
   1.177 +
   1.178 +private:
   1.179 +  uint32_t mOffset;
   1.180 +  uint32_t mLineNumber;
   1.181 +  uint32_t mLineOffset;
   1.182 +  uint32_t mTokenLineNumber;
   1.183 +  uint32_t mTokenLineOffset;
   1.184 +  uint32_t mTokenOffset;
   1.185 +  bool mInitialized;
   1.186 +};
   1.187 +
   1.188 +// nsCSSScanner tokenizes an input stream using the CSS2.1 forward
   1.189 +// compatible tokenization rules.  Used internally by nsCSSParser;
   1.190 +// not available for use by other code.
   1.191 +class nsCSSScanner {
   1.192 +  public:
   1.193 +  // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
   1.194 +  // when the line number is unknown.
   1.195 +  nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
   1.196 +  ~nsCSSScanner();
   1.197 +
   1.198 +  void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
   1.199 +    mReporter = aReporter;
   1.200 +  }
   1.201 +  // Set whether or not we are processing SVG
   1.202 +  void SetSVGMode(bool aSVGMode) {
   1.203 +    mSVGMode = aSVGMode;
   1.204 +  }
   1.205 +  bool IsSVGMode() const {
   1.206 +    return mSVGMode;
   1.207 +  }
   1.208 +
   1.209 +  // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
   1.210 +  void ClearSeenBadToken() { mSeenBadToken = false; }
   1.211 +  bool SeenBadToken() const { return mSeenBadToken; }
   1.212 +
   1.213 +  // Reset or check whether a "var(" FUNCTION token has been seen.
   1.214 +  void ClearSeenVariableReference() { mSeenVariableReference = false; }
   1.215 +  bool SeenVariableReference() const { return mSeenVariableReference; }
   1.216 +
   1.217 +  // Get the 1-based line number of the last character of
   1.218 +  // the most recently processed token.
   1.219 +  uint32_t GetLineNumber() const { return mTokenLineNumber; }
   1.220 +
   1.221 +  // Get the 0-based column number of the first character of
   1.222 +  // the most recently processed token.
   1.223 +  uint32_t GetColumnNumber() const
   1.224 +  { return mTokenOffset - mTokenLineOffset; }
   1.225 +
   1.226 +  // Get the text of the line containing the first character of
   1.227 +  // the most recently processed token.
   1.228 +  nsDependentSubstring GetCurrentLine() const;
   1.229 +
   1.230 +  // Get the next token.  Return false on EOF.  aTokenResult is filled
   1.231 +  // in with the data for the token.  If aSkipWS is true, skip over
   1.232 +  // eCSSToken_Whitespace tokens rather than returning them.
   1.233 +  bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
   1.234 +
   1.235 +  // Get the body of an URL token (everything after the 'url(').
   1.236 +  // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
   1.237 +  // which, for historical reasons, must make additional function
   1.238 +  // tokens behave like url().  Please do not add new uses to the
   1.239 +  // parser.
   1.240 +  bool NextURL(nsCSSToken& aTokenResult);
   1.241 +
   1.242 +  // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
   1.243 +  // because "2n-1" is a single DIMENSION token, and "n-1" is a single
   1.244 +  // IDENT token, but the :nth() selector syntax wants to interpret
   1.245 +  // them the same as "2n -1" and "n -1" respectively.  Please do not
   1.246 +  // add new uses to the parser.
   1.247 +  //
   1.248 +  // Note: this function may not be used to back up over a line boundary.
   1.249 +  void Backup(uint32_t n);
   1.250 +
   1.251 +  // Starts recording the input stream from the current position.
   1.252 +  void StartRecording();
   1.253 +
   1.254 +  // Abandons recording of the input stream.
   1.255 +  void StopRecording();
   1.256 +
   1.257 +  // Stops recording of the input stream and appends the recorded
   1.258 +  // input to aBuffer.
   1.259 +  void StopRecording(nsString& aBuffer);
   1.260 +
   1.261 +  // Returns the length of the current recording.
   1.262 +  uint32_t RecordingLength() const;
   1.263 +
   1.264 +#ifdef DEBUG
   1.265 +  bool IsRecording() const;
   1.266 +#endif
   1.267 +
   1.268 +  // Stores the current scanner offset into the specified object.
   1.269 +  void SavePosition(nsCSSScannerPosition& aState);
   1.270 +
   1.271 +  // Resets the scanner offset to a position saved by SavePosition.
   1.272 +  void RestoreSavedPosition(const nsCSSScannerPosition& aState);
   1.273 +
   1.274 +  enum EOFCharacters {
   1.275 +    eEOFCharacters_None =                    0x0000,
   1.276 +
   1.277 +    // to handle \<EOF> inside strings
   1.278 +    eEOFCharacters_DropBackslash =           0x0001,
   1.279 +
   1.280 +    // to handle \<EOF> outside strings
   1.281 +    eEOFCharacters_ReplacementChar =         0x0002,
   1.282 +
   1.283 +    // to close comments
   1.284 +    eEOFCharacters_Asterisk =                0x0004,
   1.285 +    eEOFCharacters_Slash =                   0x0008,
   1.286 +
   1.287 +    // to close double-quoted strings
   1.288 +    eEOFCharacters_DoubleQuote =             0x0010,
   1.289 +
   1.290 +    // to close single-quoted strings
   1.291 +    eEOFCharacters_SingleQuote =             0x0020,
   1.292 +
   1.293 +    // to close URLs
   1.294 +    eEOFCharacters_CloseParen =              0x0040,
   1.295 +  };
   1.296 +
   1.297 +  // Appends any characters to the specified string the input stream to make the
   1.298 +  // last token not rely on special EOF handling behavior.
   1.299 +  //
   1.300 +  // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
   1.301 +  static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
   1.302 +                                         nsAString& aString);
   1.303 +
   1.304 +  EOFCharacters GetEOFCharacters() const {
   1.305 +#ifdef DEBUG
   1.306 +    AssertEOFCharactersValid(mEOFCharacters);
   1.307 +#endif
   1.308 +    return mEOFCharacters;
   1.309 +  }
   1.310 +
   1.311 +#ifdef DEBUG
   1.312 +  static void AssertEOFCharactersValid(uint32_t c);
   1.313 +#endif
   1.314 +
   1.315 +protected:
   1.316 +  int32_t Peek(uint32_t n = 0);
   1.317 +  void Advance(uint32_t n = 1);
   1.318 +  void AdvanceLine();
   1.319 +
   1.320 +  void SkipWhitespace();
   1.321 +  void SkipComment();
   1.322 +
   1.323 +  bool GatherEscape(nsString& aOutput, bool aInString);
   1.324 +  bool GatherText(uint8_t aClass, nsString& aIdent);
   1.325 +
   1.326 +  bool ScanIdent(nsCSSToken& aResult);
   1.327 +  bool ScanAtKeyword(nsCSSToken& aResult);
   1.328 +  bool ScanHash(nsCSSToken& aResult);
   1.329 +  bool ScanNumber(nsCSSToken& aResult);
   1.330 +  bool ScanString(nsCSSToken& aResult);
   1.331 +  bool ScanURange(nsCSSToken& aResult);
   1.332 +
   1.333 +  void SetEOFCharacters(uint32_t aEOFCharacters);
   1.334 +  void AddEOFCharacters(uint32_t aEOFCharacters);
   1.335 +
   1.336 +  const char16_t *mBuffer;
   1.337 +  uint32_t mOffset;
   1.338 +  uint32_t mCount;
   1.339 +
   1.340 +  uint32_t mLineNumber;
   1.341 +  uint32_t mLineOffset;
   1.342 +
   1.343 +  uint32_t mTokenLineNumber;
   1.344 +  uint32_t mTokenLineOffset;
   1.345 +  uint32_t mTokenOffset;
   1.346 +
   1.347 +  uint32_t mRecordStartOffset;
   1.348 +  EOFCharacters mEOFCharacters;
   1.349 +
   1.350 +  mozilla::css::ErrorReporter *mReporter;
   1.351 +
   1.352 +  // True if we are in SVG mode; false in "normal" CSS
   1.353 +  bool mSVGMode;
   1.354 +  bool mRecording;
   1.355 +  bool mSeenBadToken;
   1.356 +  bool mSeenVariableReference;
   1.357 +};
   1.358 +
   1.359 +// Token for the grid-template-areas micro-syntax
   1.360 +// http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
   1.361 +struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
   1.362 +  nsAutoString mName;  // Empty for a null cell, non-empty for a named cell
   1.363 +  bool isTrash;  // True for a trash token, mName is ignored in this case.
   1.364 +};
   1.365 +
   1.366 +// Scanner for the grid-template-areas micro-syntax
   1.367 +class nsCSSGridTemplateAreaScanner {
   1.368 +public:
   1.369 +  nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
   1.370 +
   1.371 +  // Get the next token.  Return false on EOF.
   1.372 +  // aTokenResult is filled in with the data for the token.
   1.373 +  bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
   1.374 +
   1.375 +private:
   1.376 +  const char16_t *mBuffer;
   1.377 +  uint32_t mOffset;
   1.378 +  uint32_t mCount;
   1.379 +};
   1.380 +
   1.381 +#endif /* nsCSSScanner_h___ */

mercurial