layout/style/nsCSSScanner.h

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /* tokenization of CSS style sheets */
     8 #ifndef nsCSSScanner_h___
     9 #define nsCSSScanner_h___
    11 #include "nsString.h"
    13 namespace mozilla {
    14 namespace css {
    15 class ErrorReporter;
    16 }
    17 }
    19 // Token types; in close but not perfect correspondence to the token
    20 // categorization in section 4.1.1 of CSS2.1.  (The deviations are all
    21 // the fault of css3-selectors, which has requirements that can only be
    22 // met by changing the generic tokenization.)  The comment on each line
    23 // illustrates the form of each identifier.
    25 enum nsCSSTokenType {
    26   // White space of any kind.  No value fields are used.  Note that
    27   // comments do *not* count as white space; comments separate tokens
    28   // but are not themselves tokens.
    29   eCSSToken_Whitespace,     //
    31   // Identifier-like tokens.  mIdent is the text of the identifier.
    32   // The difference between ID and Hash is: if the text after the #
    33   // would have been a valid Ident if the # hadn't been there, the
    34   // scanner produces an ID token.  Otherwise it produces a Hash token.
    35   // (This distinction is required by css3-selectors.)
    36   eCSSToken_Ident,          // word
    37   eCSSToken_Function,       // word(
    38   eCSSToken_AtKeyword,      // @word
    39   eCSSToken_ID,             // #word
    40   eCSSToken_Hash,           // #0word
    42   // Numeric tokens.  mNumber is the floating-point value of the
    43   // number, and mHasSign indicates whether there was an explicit sign
    44   // (+ or -) in front of the number.  If mIntegerValid is true, the
    45   // number had the lexical form of an integer, and mInteger is its
    46   // integer value.  Lexically integer values outside the range of a
    47   // 32-bit signed number are clamped to the maximum values; mNumber
    48   // will indicate a 'truer' value in that case.  Percentage tokens
    49   // are always considered not to be integers, even if their numeric
    50   // value is integral (100% => mNumber = 1.0).  For Dimension
    51   // tokens, mIdent holds the text of the unit.
    52   eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
    53   eCSSToken_Dimension,      // 24px 8.5in
    54   eCSSToken_Percentage,     // 85% 1280.4%
    56   // String-like tokens.  In all cases, mIdent holds the text
    57   // belonging to the string, and mSymbol holds the delimiter
    58   // character, which may be ', ", or zero (only for unquoted URLs).
    59   // Bad_String and Bad_URL tokens are emitted when the closing
    60   // delimiter or parenthesis was missing.
    61   eCSSToken_String,         // 'foo bar' "foo bar"
    62   eCSSToken_Bad_String,     // 'foo bar
    63   eCSSToken_URL,            // url(foobar) url("foo bar")
    64   eCSSToken_Bad_URL,        // url(foo
    66   // Any one-character symbol.  mSymbol holds the character.
    67   eCSSToken_Symbol,         // . ; { } ! *
    69   // Match operators.  These are single tokens rather than pairs of
    70   // Symbol tokens because css3-selectors forbids the presence of
    71   // comments between the two characters.  No value fields are used;
    72   // the token type indicates which operator.
    73   eCSSToken_Includes,       // ~=
    74   eCSSToken_Dashmatch,      // |=
    75   eCSSToken_Beginsmatch,    // ^=
    76   eCSSToken_Endsmatch,      // $=
    77   eCSSToken_Containsmatch,  // *=
    79   // Unicode-range token: currently used only in @font-face.
    80   // The lexical rule for this token includes several forms that are
    81   // semantically invalid.  Therefore, mIdent always holds the
    82   // complete original text of the token (so we can print it
    83   // accurately in diagnostics), and mIntegerValid is true iff the
    84   // token is semantically valid.  In that case, mInteger holds the
    85   // lowest value included in the range, and mInteger2 holds the
    86   // highest value included in the range.
    87   eCSSToken_URange,         // U+007e U+01?? U+2000-206F
    89   // HTML comment delimiters, ignored as a unit when they appear at
    90   // the top level of a style sheet, for compatibility with websites
    91   // written for compatibility with pre-CSS browsers.  This token type
    92   // subsumes the css2.1 CDO and CDC tokens, which are always treated
    93   // the same by the parser.  mIdent holds the text of the token, for
    94   // diagnostics.
    95   eCSSToken_HTMLComment,    // <!-- -->
    96 };
    98 // Classification of tokens used to determine if a "/**/" string must be
    99 // inserted if pasting token streams together when serializing.  We include
   100 // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
   101 // as css-syntax does not treat these as whole tokens, but we will still
   102 // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
   103 // and between a '/' delim and a '*=' containsmatch.
   104 //
   105 // https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization
   106 enum nsCSSTokenSerializationType {
   107   eCSSTokenSerialization_Nothing,
   108   eCSSTokenSerialization_Whitespace,
   109   eCSSTokenSerialization_AtKeyword_or_Hash,
   110   eCSSTokenSerialization_Number,
   111   eCSSTokenSerialization_Dimension,
   112   eCSSTokenSerialization_Percentage,
   113   eCSSTokenSerialization_URange,
   114   eCSSTokenSerialization_URL_or_BadURL,
   115   eCSSTokenSerialization_Function,
   116   eCSSTokenSerialization_Ident,
   117   eCSSTokenSerialization_CDC,
   118   eCSSTokenSerialization_DashMatch,
   119   eCSSTokenSerialization_ContainsMatch,
   120   eCSSTokenSerialization_Symbol_Hash,         // '#'
   121   eCSSTokenSerialization_Symbol_At,           // '@'
   122   eCSSTokenSerialization_Symbol_Dot_or_Plus,  // '.', '+'
   123   eCSSTokenSerialization_Symbol_Minus,        // '-'
   124   eCSSTokenSerialization_Symbol_OpenParen,    // '('
   125   eCSSTokenSerialization_Symbol_Question,     // '?'
   126   eCSSTokenSerialization_Symbol_Assorted,     // '$', '^', '~'
   127   eCSSTokenSerialization_Symbol_Equals,       // '='
   128   eCSSTokenSerialization_Symbol_Bar,          // '|'
   129   eCSSTokenSerialization_Symbol_Slash,        // '/'
   130   eCSSTokenSerialization_Symbol_Asterisk,     // '*'
   131   eCSSTokenSerialization_Other                // anything else
   132 };
   134 // A single token returned from the scanner.  mType is always
   135 // meaningful; comments above describe which other fields are
   136 // meaningful for which token types.
   137 struct nsCSSToken {
   138   nsAutoString    mIdent;
   139   float           mNumber;
   140   int32_t         mInteger;
   141   int32_t         mInteger2;
   142   nsCSSTokenType  mType;
   143   char16_t       mSymbol;
   144   bool            mIntegerValid;
   145   bool            mHasSign;
   147   nsCSSToken()
   148     : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
   149       mSymbol('\0'), mIntegerValid(false), mHasSign(false)
   150   {}
   152   bool IsSymbol(char16_t aSymbol) const {
   153     return mType == eCSSToken_Symbol && mSymbol == aSymbol;
   154   }
   156   void AppendToString(nsString& aBuffer) const;
   157 };
   159 // Represents an nsCSSScanner's saved position in the input buffer.
   160 class nsCSSScannerPosition {
   161   friend class nsCSSScanner;
   162 public:
   163   nsCSSScannerPosition() : mInitialized(false) { }
   165   uint32_t LineNumber() {
   166     MOZ_ASSERT(mInitialized);
   167     return mLineNumber;
   168   }
   170   uint32_t LineOffset() {
   171     MOZ_ASSERT(mInitialized);
   172     return mLineOffset;
   173   }
   175 private:
   176   uint32_t mOffset;
   177   uint32_t mLineNumber;
   178   uint32_t mLineOffset;
   179   uint32_t mTokenLineNumber;
   180   uint32_t mTokenLineOffset;
   181   uint32_t mTokenOffset;
   182   bool mInitialized;
   183 };
   185 // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
   186 // compatible tokenization rules.  Used internally by nsCSSParser;
   187 // not available for use by other code.
   188 class nsCSSScanner {
   189   public:
   190   // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
   191   // when the line number is unknown.
   192   nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
   193   ~nsCSSScanner();
   195   void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
   196     mReporter = aReporter;
   197   }
   198   // Set whether or not we are processing SVG
   199   void SetSVGMode(bool aSVGMode) {
   200     mSVGMode = aSVGMode;
   201   }
   202   bool IsSVGMode() const {
   203     return mSVGMode;
   204   }
   206   // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
   207   void ClearSeenBadToken() { mSeenBadToken = false; }
   208   bool SeenBadToken() const { return mSeenBadToken; }
   210   // Reset or check whether a "var(" FUNCTION token has been seen.
   211   void ClearSeenVariableReference() { mSeenVariableReference = false; }
   212   bool SeenVariableReference() const { return mSeenVariableReference; }
   214   // Get the 1-based line number of the last character of
   215   // the most recently processed token.
   216   uint32_t GetLineNumber() const { return mTokenLineNumber; }
   218   // Get the 0-based column number of the first character of
   219   // the most recently processed token.
   220   uint32_t GetColumnNumber() const
   221   { return mTokenOffset - mTokenLineOffset; }
   223   // Get the text of the line containing the first character of
   224   // the most recently processed token.
   225   nsDependentSubstring GetCurrentLine() const;
   227   // Get the next token.  Return false on EOF.  aTokenResult is filled
   228   // in with the data for the token.  If aSkipWS is true, skip over
   229   // eCSSToken_Whitespace tokens rather than returning them.
   230   bool Next(nsCSSToken& aTokenResult, bool aSkipWS);
   232   // Get the body of an URL token (everything after the 'url(').
   233   // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
   234   // which, for historical reasons, must make additional function
   235   // tokens behave like url().  Please do not add new uses to the
   236   // parser.
   237   bool NextURL(nsCSSToken& aTokenResult);
   239   // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
   240   // because "2n-1" is a single DIMENSION token, and "n-1" is a single
   241   // IDENT token, but the :nth() selector syntax wants to interpret
   242   // them the same as "2n -1" and "n -1" respectively.  Please do not
   243   // add new uses to the parser.
   244   //
   245   // Note: this function may not be used to back up over a line boundary.
   246   void Backup(uint32_t n);
   248   // Starts recording the input stream from the current position.
   249   void StartRecording();
   251   // Abandons recording of the input stream.
   252   void StopRecording();
   254   // Stops recording of the input stream and appends the recorded
   255   // input to aBuffer.
   256   void StopRecording(nsString& aBuffer);
   258   // Returns the length of the current recording.
   259   uint32_t RecordingLength() const;
   261 #ifdef DEBUG
   262   bool IsRecording() const;
   263 #endif
   265   // Stores the current scanner offset into the specified object.
   266   void SavePosition(nsCSSScannerPosition& aState);
   268   // Resets the scanner offset to a position saved by SavePosition.
   269   void RestoreSavedPosition(const nsCSSScannerPosition& aState);
   271   enum EOFCharacters {
   272     eEOFCharacters_None =                    0x0000,
   274     // to handle \<EOF> inside strings
   275     eEOFCharacters_DropBackslash =           0x0001,
   277     // to handle \<EOF> outside strings
   278     eEOFCharacters_ReplacementChar =         0x0002,
   280     // to close comments
   281     eEOFCharacters_Asterisk =                0x0004,
   282     eEOFCharacters_Slash =                   0x0008,
   284     // to close double-quoted strings
   285     eEOFCharacters_DoubleQuote =             0x0010,
   287     // to close single-quoted strings
   288     eEOFCharacters_SingleQuote =             0x0020,
   290     // to close URLs
   291     eEOFCharacters_CloseParen =              0x0040,
   292   };
   294   // Appends any characters to the specified string the input stream to make the
   295   // last token not rely on special EOF handling behavior.
   296   //
   297   // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
   298   static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
   299                                          nsAString& aString);
   301   EOFCharacters GetEOFCharacters() const {
   302 #ifdef DEBUG
   303     AssertEOFCharactersValid(mEOFCharacters);
   304 #endif
   305     return mEOFCharacters;
   306   }
   308 #ifdef DEBUG
   309   static void AssertEOFCharactersValid(uint32_t c);
   310 #endif
   312 protected:
   313   int32_t Peek(uint32_t n = 0);
   314   void Advance(uint32_t n = 1);
   315   void AdvanceLine();
   317   void SkipWhitespace();
   318   void SkipComment();
   320   bool GatherEscape(nsString& aOutput, bool aInString);
   321   bool GatherText(uint8_t aClass, nsString& aIdent);
   323   bool ScanIdent(nsCSSToken& aResult);
   324   bool ScanAtKeyword(nsCSSToken& aResult);
   325   bool ScanHash(nsCSSToken& aResult);
   326   bool ScanNumber(nsCSSToken& aResult);
   327   bool ScanString(nsCSSToken& aResult);
   328   bool ScanURange(nsCSSToken& aResult);
   330   void SetEOFCharacters(uint32_t aEOFCharacters);
   331   void AddEOFCharacters(uint32_t aEOFCharacters);
   333   const char16_t *mBuffer;
   334   uint32_t mOffset;
   335   uint32_t mCount;
   337   uint32_t mLineNumber;
   338   uint32_t mLineOffset;
   340   uint32_t mTokenLineNumber;
   341   uint32_t mTokenLineOffset;
   342   uint32_t mTokenOffset;
   344   uint32_t mRecordStartOffset;
   345   EOFCharacters mEOFCharacters;
   347   mozilla::css::ErrorReporter *mReporter;
   349   // True if we are in SVG mode; false in "normal" CSS
   350   bool mSVGMode;
   351   bool mRecording;
   352   bool mSeenBadToken;
   353   bool mSeenVariableReference;
   354 };
   356 // Token for the grid-template-areas micro-syntax
   357 // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
   358 struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
   359   nsAutoString mName;  // Empty for a null cell, non-empty for a named cell
   360   bool isTrash;  // True for a trash token, mName is ignored in this case.
   361 };
   363 // Scanner for the grid-template-areas micro-syntax
   364 class nsCSSGridTemplateAreaScanner {
   365 public:
   366   nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
   368   // Get the next token.  Return false on EOF.
   369   // aTokenResult is filled in with the data for the token.
   370   bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
   372 private:
   373   const char16_t *mBuffer;
   374   uint32_t mOffset;
   375   uint32_t mCount;
   376 };
   378 #endif /* nsCSSScanner_h___ */

mercurial