michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* tokenization of CSS style sheets */ michael@0: michael@0: #ifndef nsCSSScanner_h___ michael@0: #define nsCSSScanner_h___ michael@0: michael@0: #include "nsString.h" michael@0: michael@0: namespace mozilla { michael@0: namespace css { michael@0: class ErrorReporter; michael@0: } michael@0: } michael@0: michael@0: // Token types; in close but not perfect correspondence to the token michael@0: // categorization in section 4.1.1 of CSS2.1. (The deviations are all michael@0: // the fault of css3-selectors, which has requirements that can only be michael@0: // met by changing the generic tokenization.) The comment on each line michael@0: // illustrates the form of each identifier. michael@0: michael@0: enum nsCSSTokenType { michael@0: // White space of any kind. No value fields are used. Note that michael@0: // comments do *not* count as white space; comments separate tokens michael@0: // but are not themselves tokens. michael@0: eCSSToken_Whitespace, // michael@0: michael@0: // Identifier-like tokens. mIdent is the text of the identifier. michael@0: // The difference between ID and Hash is: if the text after the # michael@0: // would have been a valid Ident if the # hadn't been there, the michael@0: // scanner produces an ID token. Otherwise it produces a Hash token. michael@0: // (This distinction is required by css3-selectors.) michael@0: eCSSToken_Ident, // word michael@0: eCSSToken_Function, // word( michael@0: eCSSToken_AtKeyword, // @word michael@0: eCSSToken_ID, // #word michael@0: eCSSToken_Hash, // #0word michael@0: michael@0: // Numeric tokens. mNumber is the floating-point value of the michael@0: // number, and mHasSign indicates whether there was an explicit sign michael@0: // (+ or -) in front of the number. If mIntegerValid is true, the michael@0: // number had the lexical form of an integer, and mInteger is its michael@0: // integer value. Lexically integer values outside the range of a michael@0: // 32-bit signed number are clamped to the maximum values; mNumber michael@0: // will indicate a 'truer' value in that case. Percentage tokens michael@0: // are always considered not to be integers, even if their numeric michael@0: // value is integral (100% => mNumber = 1.0). For Dimension michael@0: // tokens, mIdent holds the text of the unit. michael@0: eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3 michael@0: eCSSToken_Dimension, // 24px 8.5in michael@0: eCSSToken_Percentage, // 85% 1280.4% michael@0: michael@0: // String-like tokens. In all cases, mIdent holds the text michael@0: // belonging to the string, and mSymbol holds the delimiter michael@0: // character, which may be ', ", or zero (only for unquoted URLs). michael@0: // Bad_String and Bad_URL tokens are emitted when the closing michael@0: // delimiter or parenthesis was missing. michael@0: eCSSToken_String, // 'foo bar' "foo bar" michael@0: eCSSToken_Bad_String, // 'foo bar michael@0: eCSSToken_URL, // url(foobar) url("foo bar") michael@0: eCSSToken_Bad_URL, // url(foo michael@0: michael@0: // Any one-character symbol. mSymbol holds the character. michael@0: eCSSToken_Symbol, // . ; { } ! * michael@0: michael@0: // Match operators. These are single tokens rather than pairs of michael@0: // Symbol tokens because css3-selectors forbids the presence of michael@0: // comments between the two characters. No value fields are used; michael@0: // the token type indicates which operator. michael@0: eCSSToken_Includes, // ~= michael@0: eCSSToken_Dashmatch, // |= michael@0: eCSSToken_Beginsmatch, // ^= michael@0: eCSSToken_Endsmatch, // $= michael@0: eCSSToken_Containsmatch, // *= michael@0: michael@0: // Unicode-range token: currently used only in @font-face. michael@0: // The lexical rule for this token includes several forms that are michael@0: // semantically invalid. Therefore, mIdent always holds the michael@0: // complete original text of the token (so we can print it michael@0: // accurately in diagnostics), and mIntegerValid is true iff the michael@0: // token is semantically valid. In that case, mInteger holds the michael@0: // lowest value included in the range, and mInteger2 holds the michael@0: // highest value included in the range. michael@0: eCSSToken_URange, // U+007e U+01?? U+2000-206F michael@0: michael@0: // HTML comment delimiters, ignored as a unit when they appear at michael@0: // the top level of a style sheet, for compatibility with websites michael@0: // written for compatibility with pre-CSS browsers. This token type michael@0: // subsumes the css2.1 CDO and CDC tokens, which are always treated michael@0: // the same by the parser. mIdent holds the text of the token, for michael@0: // diagnostics. michael@0: eCSSToken_HTMLComment, // michael@0: }; michael@0: michael@0: // Classification of tokens used to determine if a "/**/" string must be michael@0: // inserted if pasting token streams together when serializing. We include michael@0: // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch, michael@0: // as css-syntax does not treat these as whole tokens, but we will still michael@0: // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch michael@0: // and between a '/' delim and a '*=' containsmatch. michael@0: // michael@0: // https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization michael@0: enum nsCSSTokenSerializationType { michael@0: eCSSTokenSerialization_Nothing, michael@0: eCSSTokenSerialization_Whitespace, michael@0: eCSSTokenSerialization_AtKeyword_or_Hash, michael@0: eCSSTokenSerialization_Number, michael@0: eCSSTokenSerialization_Dimension, michael@0: eCSSTokenSerialization_Percentage, michael@0: eCSSTokenSerialization_URange, michael@0: eCSSTokenSerialization_URL_or_BadURL, michael@0: eCSSTokenSerialization_Function, michael@0: eCSSTokenSerialization_Ident, michael@0: eCSSTokenSerialization_CDC, michael@0: eCSSTokenSerialization_DashMatch, michael@0: eCSSTokenSerialization_ContainsMatch, michael@0: eCSSTokenSerialization_Symbol_Hash, // '#' michael@0: eCSSTokenSerialization_Symbol_At, // '@' michael@0: eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+' michael@0: eCSSTokenSerialization_Symbol_Minus, // '-' michael@0: eCSSTokenSerialization_Symbol_OpenParen, // '(' michael@0: eCSSTokenSerialization_Symbol_Question, // '?' michael@0: eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~' michael@0: eCSSTokenSerialization_Symbol_Equals, // '=' michael@0: eCSSTokenSerialization_Symbol_Bar, // '|' michael@0: eCSSTokenSerialization_Symbol_Slash, // '/' michael@0: eCSSTokenSerialization_Symbol_Asterisk, // '*' michael@0: eCSSTokenSerialization_Other // anything else michael@0: }; michael@0: michael@0: // A single token returned from the scanner. mType is always michael@0: // meaningful; comments above describe which other fields are michael@0: // meaningful for which token types. michael@0: struct nsCSSToken { michael@0: nsAutoString mIdent; michael@0: float mNumber; michael@0: int32_t mInteger; michael@0: int32_t mInteger2; michael@0: nsCSSTokenType mType; michael@0: char16_t mSymbol; michael@0: bool mIntegerValid; michael@0: bool mHasSign; michael@0: michael@0: nsCSSToken() michael@0: : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace), michael@0: mSymbol('\0'), mIntegerValid(false), mHasSign(false) michael@0: {} michael@0: michael@0: bool IsSymbol(char16_t aSymbol) const { michael@0: return mType == eCSSToken_Symbol && mSymbol == aSymbol; michael@0: } michael@0: michael@0: void AppendToString(nsString& aBuffer) const; michael@0: }; michael@0: michael@0: // Represents an nsCSSScanner's saved position in the input buffer. michael@0: class nsCSSScannerPosition { michael@0: friend class nsCSSScanner; michael@0: public: michael@0: nsCSSScannerPosition() : mInitialized(false) { } michael@0: michael@0: uint32_t LineNumber() { michael@0: MOZ_ASSERT(mInitialized); michael@0: return mLineNumber; michael@0: } michael@0: michael@0: uint32_t LineOffset() { michael@0: MOZ_ASSERT(mInitialized); michael@0: return mLineOffset; michael@0: } michael@0: michael@0: private: michael@0: uint32_t mOffset; michael@0: uint32_t mLineNumber; michael@0: uint32_t mLineOffset; michael@0: uint32_t mTokenLineNumber; michael@0: uint32_t mTokenLineOffset; michael@0: uint32_t mTokenOffset; michael@0: bool mInitialized; michael@0: }; michael@0: michael@0: // nsCSSScanner tokenizes an input stream using the CSS2.1 forward michael@0: // compatible tokenization rules. Used internally by nsCSSParser; michael@0: // not available for use by other code. michael@0: class nsCSSScanner { michael@0: public: michael@0: // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0| michael@0: // when the line number is unknown. michael@0: nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber); michael@0: ~nsCSSScanner(); michael@0: michael@0: void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) { michael@0: mReporter = aReporter; michael@0: } michael@0: // Set whether or not we are processing SVG michael@0: void SetSVGMode(bool aSVGMode) { michael@0: mSVGMode = aSVGMode; michael@0: } michael@0: bool IsSVGMode() const { michael@0: return mSVGMode; michael@0: } michael@0: michael@0: // Reset or check whether a BAD_URL or BAD_STRING token has been seen. michael@0: void ClearSeenBadToken() { mSeenBadToken = false; } michael@0: bool SeenBadToken() const { return mSeenBadToken; } michael@0: michael@0: // Reset or check whether a "var(" FUNCTION token has been seen. michael@0: void ClearSeenVariableReference() { mSeenVariableReference = false; } michael@0: bool SeenVariableReference() const { return mSeenVariableReference; } michael@0: michael@0: // Get the 1-based line number of the last character of michael@0: // the most recently processed token. michael@0: uint32_t GetLineNumber() const { return mTokenLineNumber; } michael@0: michael@0: // Get the 0-based column number of the first character of michael@0: // the most recently processed token. michael@0: uint32_t GetColumnNumber() const michael@0: { return mTokenOffset - mTokenLineOffset; } michael@0: michael@0: // Get the text of the line containing the first character of michael@0: // the most recently processed token. michael@0: nsDependentSubstring GetCurrentLine() const; michael@0: michael@0: // Get the next token. Return false on EOF. aTokenResult is filled michael@0: // in with the data for the token. If aSkipWS is true, skip over michael@0: // eCSSToken_Whitespace tokens rather than returning them. michael@0: bool Next(nsCSSToken& aTokenResult, bool aSkipWS); michael@0: michael@0: // Get the body of an URL token (everything after the 'url('). michael@0: // This is exposed for use by nsCSSParser::ParseMozDocumentRule, michael@0: // which, for historical reasons, must make additional function michael@0: // tokens behave like url(). Please do not add new uses to the michael@0: // parser. michael@0: bool NextURL(nsCSSToken& aTokenResult); michael@0: michael@0: // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg, michael@0: // because "2n-1" is a single DIMENSION token, and "n-1" is a single michael@0: // IDENT token, but the :nth() selector syntax wants to interpret michael@0: // them the same as "2n -1" and "n -1" respectively. Please do not michael@0: // add new uses to the parser. michael@0: // michael@0: // Note: this function may not be used to back up over a line boundary. michael@0: void Backup(uint32_t n); michael@0: michael@0: // Starts recording the input stream from the current position. michael@0: void StartRecording(); michael@0: michael@0: // Abandons recording of the input stream. michael@0: void StopRecording(); michael@0: michael@0: // Stops recording of the input stream and appends the recorded michael@0: // input to aBuffer. michael@0: void StopRecording(nsString& aBuffer); michael@0: michael@0: // Returns the length of the current recording. michael@0: uint32_t RecordingLength() const; michael@0: michael@0: #ifdef DEBUG michael@0: bool IsRecording() const; michael@0: #endif michael@0: michael@0: // Stores the current scanner offset into the specified object. michael@0: void SavePosition(nsCSSScannerPosition& aState); michael@0: michael@0: // Resets the scanner offset to a position saved by SavePosition. michael@0: void RestoreSavedPosition(const nsCSSScannerPosition& aState); michael@0: michael@0: enum EOFCharacters { michael@0: eEOFCharacters_None = 0x0000, michael@0: michael@0: // to handle \ inside strings michael@0: eEOFCharacters_DropBackslash = 0x0001, michael@0: michael@0: // to handle \ outside strings michael@0: eEOFCharacters_ReplacementChar = 0x0002, michael@0: michael@0: // to close comments michael@0: eEOFCharacters_Asterisk = 0x0004, michael@0: eEOFCharacters_Slash = 0x0008, michael@0: michael@0: // to close double-quoted strings michael@0: eEOFCharacters_DoubleQuote = 0x0010, michael@0: michael@0: // to close single-quoted strings michael@0: eEOFCharacters_SingleQuote = 0x0020, michael@0: michael@0: // to close URLs michael@0: eEOFCharacters_CloseParen = 0x0040, michael@0: }; michael@0: michael@0: // Appends any characters to the specified string the input stream to make the michael@0: // last token not rely on special EOF handling behavior. michael@0: // michael@0: // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored. michael@0: static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, michael@0: nsAString& aString); michael@0: michael@0: EOFCharacters GetEOFCharacters() const { michael@0: #ifdef DEBUG michael@0: AssertEOFCharactersValid(mEOFCharacters); michael@0: #endif michael@0: return mEOFCharacters; michael@0: } michael@0: michael@0: #ifdef DEBUG michael@0: static void AssertEOFCharactersValid(uint32_t c); michael@0: #endif michael@0: michael@0: protected: michael@0: int32_t Peek(uint32_t n = 0); michael@0: void Advance(uint32_t n = 1); michael@0: void AdvanceLine(); michael@0: michael@0: void SkipWhitespace(); michael@0: void SkipComment(); michael@0: michael@0: bool GatherEscape(nsString& aOutput, bool aInString); michael@0: bool GatherText(uint8_t aClass, nsString& aIdent); michael@0: michael@0: bool ScanIdent(nsCSSToken& aResult); michael@0: bool ScanAtKeyword(nsCSSToken& aResult); michael@0: bool ScanHash(nsCSSToken& aResult); michael@0: bool ScanNumber(nsCSSToken& aResult); michael@0: bool ScanString(nsCSSToken& aResult); michael@0: bool ScanURange(nsCSSToken& aResult); michael@0: michael@0: void SetEOFCharacters(uint32_t aEOFCharacters); michael@0: void AddEOFCharacters(uint32_t aEOFCharacters); michael@0: michael@0: const char16_t *mBuffer; michael@0: uint32_t mOffset; michael@0: uint32_t mCount; michael@0: michael@0: uint32_t mLineNumber; michael@0: uint32_t mLineOffset; michael@0: michael@0: uint32_t mTokenLineNumber; michael@0: uint32_t mTokenLineOffset; michael@0: uint32_t mTokenOffset; michael@0: michael@0: uint32_t mRecordStartOffset; michael@0: EOFCharacters mEOFCharacters; michael@0: michael@0: mozilla::css::ErrorReporter *mReporter; michael@0: michael@0: // True if we are in SVG mode; false in "normal" CSS michael@0: bool mSVGMode; michael@0: bool mRecording; michael@0: bool mSeenBadToken; michael@0: bool mSeenVariableReference; michael@0: }; michael@0: michael@0: // Token for the grid-template-areas micro-syntax michael@0: // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas michael@0: struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken { michael@0: nsAutoString mName; // Empty for a null cell, non-empty for a named cell michael@0: bool isTrash; // True for a trash token, mName is ignored in this case. michael@0: }; michael@0: michael@0: // Scanner for the grid-template-areas micro-syntax michael@0: class nsCSSGridTemplateAreaScanner { michael@0: public: michael@0: nsCSSGridTemplateAreaScanner(const nsAString& aBuffer); michael@0: michael@0: // Get the next token. Return false on EOF. michael@0: // aTokenResult is filled in with the data for the token. michael@0: bool Next(nsCSSGridTemplateAreaToken& aTokenResult); michael@0: michael@0: private: michael@0: const char16_t *mBuffer; michael@0: uint32_t mOffset; michael@0: uint32_t mCount; michael@0: }; michael@0: michael@0: #endif /* nsCSSScanner_h___ */