michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: michael@0: /* tokenization of CSS style sheets */ michael@0: michael@0: #include "nsCSSScanner.h" michael@0: #include "nsStyleUtil.h" michael@0: #include "nsISupportsImpl.h" michael@0: #include "mozilla/ArrayUtils.h" michael@0: #include "mozilla/css/ErrorReporter.h" michael@0: #include "mozilla/Likely.h" michael@0: #include michael@0: michael@0: /* Character class tables and related helper functions. */ michael@0: michael@0: static const uint8_t IS_HEX_DIGIT = 0x01; michael@0: static const uint8_t IS_IDSTART = 0x02; michael@0: static const uint8_t IS_IDCHAR = 0x04; michael@0: static const uint8_t IS_URL_CHAR = 0x08; michael@0: static const uint8_t IS_HSPACE = 0x10; michael@0: static const uint8_t IS_VSPACE = 0x20; michael@0: static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE; michael@0: static const uint8_t IS_STRING = 0x40; michael@0: michael@0: #define H IS_HSPACE michael@0: #define V IS_VSPACE michael@0: #define I IS_IDCHAR michael@0: #define J IS_IDSTART michael@0: #define U IS_URL_CHAR michael@0: #define S IS_STRING michael@0: #define X IS_HEX_DIGIT michael@0: michael@0: #define SH S|H michael@0: #define SU S|U michael@0: #define SUI S|U|I michael@0: #define SUIJ S|U|I|J michael@0: #define SUIX S|U|I|X michael@0: #define SUIJX S|U|I|J|X michael@0: michael@0: static const uint8_t gLexTable[] = { michael@0: // 00 01 02 03 04 05 06 07 michael@0: 0, S, S, S, S, S, S, S, michael@0: // 08 TAB LF 0B FF CR 0E 0F michael@0: S, SH, V, S, V, V, S, S, michael@0: // 10 11 12 13 14 15 16 17 michael@0: S, S, S, S, S, S, S, S, michael@0: // 18 19 1A 1B 1C 1D 1E 1F michael@0: S, S, S, S, S, S, S, S, michael@0: //SPC ! " # $ % & ' michael@0: SH, SU, 0, SU, SU, SU, SU, 0, michael@0: // ( ) * + , - . / michael@0: S, S, SU, SU, SU, SUI, SU, SU, michael@0: // 0 1 2 3 4 5 6 7 michael@0: SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, michael@0: // 8 9 : ; < = > ? michael@0: SUIX, SUIX, SU, SU, SU, SU, SU, SU, michael@0: // @ A B C D E F G michael@0: SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ, michael@0: // H I J K L M N O michael@0: SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, michael@0: // P Q R S T U V W michael@0: SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, michael@0: // X Y Z [ \ ] ^ _ michael@0: SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ, michael@0: // ` a b c d e f g michael@0: SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ, michael@0: // h i j k l m n o michael@0: SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, michael@0: // p q r s t u v w michael@0: SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, michael@0: // x y z { | } ~ 7F michael@0: SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S, michael@0: }; michael@0: michael@0: static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128, michael@0: "gLexTable expected to cover all 128 ASCII characters"); michael@0: michael@0: #undef I michael@0: #undef J michael@0: #undef U michael@0: #undef S michael@0: #undef X michael@0: #undef SH michael@0: #undef SU michael@0: #undef SUI michael@0: #undef SUIJ michael@0: #undef SUIX michael@0: #undef SUIJX michael@0: michael@0: /** michael@0: * True if 'ch' is in character class 'cls', which should be one of michael@0: * the constants above or some combination of them. All characters michael@0: * above U+007F are considered to be in 'cls'. EOF is never in 'cls'. michael@0: */ michael@0: static inline bool michael@0: IsOpenCharClass(int32_t ch, uint8_t cls) { michael@0: return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is in character class 'cls', which should be one of michael@0: * the constants above or some combination of them. No characters michael@0: * above U+007F are considered to be in 'cls'. EOF is never in 'cls'. michael@0: */ michael@0: static inline bool michael@0: IsClosedCharClass(int32_t ch, uint8_t cls) { michael@0: return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0; michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters michael@0: * TAB, LF, FF, CR, or SPC. michael@0: */ michael@0: static inline bool michael@0: IsWhitespace(int32_t ch) { michael@0: return IsClosedCharClass(ch, IS_SPACE); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is horizontal whitespace, i.e. TAB or SPC. michael@0: */ michael@0: static inline bool michael@0: IsHorzSpace(int32_t ch) { michael@0: return IsClosedCharClass(ch, IS_HSPACE); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical michael@0: * whitespace requires special handling when consumed, see AdvanceLine. michael@0: */ michael@0: static inline bool michael@0: IsVertSpace(int32_t ch) { michael@0: return IsClosedCharClass(ch, IS_VSPACE); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is a character that can appear in the middle of an identifier. michael@0: * This includes U+0000 since it is handled as U+FFFD, but for purposes of michael@0: * GatherText it should not be included in IsOpenCharClass. michael@0: */ michael@0: static inline bool michael@0: IsIdentChar(int32_t ch) { michael@0: return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0; michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is a character that by itself begins an identifier. michael@0: * This includes U+0000 since it is handled as U+FFFD, but for purposes of michael@0: * GatherText it should not be included in IsOpenCharClass. michael@0: * (This is a subset of IsIdentChar.) michael@0: */ michael@0: static inline bool michael@0: IsIdentStart(int32_t ch) { michael@0: return IsOpenCharClass(ch, IS_IDSTART) || ch == 0; michael@0: } michael@0: michael@0: /** michael@0: * True if the two-character sequence aFirstChar+aSecondChar begins an michael@0: * identifier. michael@0: */ michael@0: static inline bool michael@0: StartsIdent(int32_t aFirstChar, int32_t aSecondChar) michael@0: { michael@0: return IsIdentStart(aFirstChar) || michael@0: (aFirstChar == '-' && IsIdentStart(aSecondChar)); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is a decimal digit. michael@0: */ michael@0: static inline bool michael@0: IsDigit(int32_t ch) { michael@0: return (ch >= '0') && (ch <= '9'); michael@0: } michael@0: michael@0: /** michael@0: * True if 'ch' is a hexadecimal digit. michael@0: */ michael@0: static inline bool michael@0: IsHexDigit(int32_t ch) { michael@0: return IsClosedCharClass(ch, IS_HEX_DIGIT); michael@0: } michael@0: michael@0: /** michael@0: * Assuming that 'ch' is a decimal digit, return its numeric value. michael@0: */ michael@0: static inline uint32_t michael@0: DecimalDigitValue(int32_t ch) michael@0: { michael@0: return ch - '0'; michael@0: } michael@0: michael@0: /** michael@0: * Assuming that 'ch' is a hexadecimal digit, return its numeric value. michael@0: */ michael@0: static inline uint32_t michael@0: HexDigitValue(int32_t ch) michael@0: { michael@0: if (IsDigit(ch)) { michael@0: return DecimalDigitValue(ch); michael@0: } else { michael@0: // Note: c&7 just keeps the low three bits which causes michael@0: // upper and lower case alphabetics to both yield their michael@0: // "relative to 10" value for computing the hex value. michael@0: return (ch & 0x7) + 9; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * If 'ch' can be the first character of a two-character match operator michael@0: * token, return the token type code for that token, otherwise return michael@0: * eCSSToken_Symbol to indicate that it can't. michael@0: */ michael@0: static inline nsCSSTokenType michael@0: MatchOperatorType(int32_t ch) michael@0: { michael@0: switch (ch) { michael@0: case '~': return eCSSToken_Includes; michael@0: case '|': return eCSSToken_Dashmatch; michael@0: case '^': return eCSSToken_Beginsmatch; michael@0: case '$': return eCSSToken_Endsmatch; michael@0: case '*': return eCSSToken_Containsmatch; michael@0: default: return eCSSToken_Symbol; michael@0: } michael@0: } michael@0: michael@0: /* Out-of-line nsCSSToken methods. */ michael@0: michael@0: /** michael@0: * Append the textual representation of |this| to |aBuffer|. michael@0: */ michael@0: void michael@0: nsCSSToken::AppendToString(nsString& aBuffer) const michael@0: { michael@0: switch (mType) { michael@0: case eCSSToken_Ident: michael@0: nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); michael@0: break; michael@0: michael@0: case eCSSToken_AtKeyword: michael@0: aBuffer.Append('@'); michael@0: nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); michael@0: break; michael@0: michael@0: case eCSSToken_ID: michael@0: case eCSSToken_Hash: michael@0: aBuffer.Append('#'); michael@0: nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); michael@0: break; michael@0: michael@0: case eCSSToken_Function: michael@0: nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); michael@0: aBuffer.Append('('); michael@0: break; michael@0: michael@0: case eCSSToken_URL: michael@0: case eCSSToken_Bad_URL: michael@0: aBuffer.AppendLiteral("url("); michael@0: if (mSymbol != char16_t(0)) { michael@0: nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); michael@0: } else { michael@0: aBuffer.Append(mIdent); michael@0: } michael@0: if (mType == eCSSToken_URL) { michael@0: aBuffer.Append(char16_t(')')); michael@0: } michael@0: break; michael@0: michael@0: case eCSSToken_Number: michael@0: if (mIntegerValid) { michael@0: aBuffer.AppendInt(mInteger, 10); michael@0: } else { michael@0: aBuffer.AppendFloat(mNumber); michael@0: } michael@0: break; michael@0: michael@0: case eCSSToken_Percentage: michael@0: aBuffer.AppendFloat(mNumber * 100.0f); michael@0: aBuffer.Append(char16_t('%')); michael@0: break; michael@0: michael@0: case eCSSToken_Dimension: michael@0: if (mIntegerValid) { michael@0: aBuffer.AppendInt(mInteger, 10); michael@0: } else { michael@0: aBuffer.AppendFloat(mNumber); michael@0: } michael@0: nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); michael@0: break; michael@0: michael@0: case eCSSToken_Bad_String: michael@0: nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); michael@0: // remove the trailing quote character michael@0: aBuffer.Truncate(aBuffer.Length() - 1); michael@0: break; michael@0: michael@0: case eCSSToken_String: michael@0: nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); michael@0: break; michael@0: michael@0: case eCSSToken_Symbol: michael@0: aBuffer.Append(mSymbol); michael@0: break; michael@0: michael@0: case eCSSToken_Whitespace: michael@0: aBuffer.Append(' '); michael@0: break; michael@0: michael@0: case eCSSToken_HTMLComment: michael@0: case eCSSToken_URange: michael@0: aBuffer.Append(mIdent); michael@0: break; michael@0: michael@0: case eCSSToken_Includes: michael@0: aBuffer.AppendLiteral("~="); michael@0: break; michael@0: case eCSSToken_Dashmatch: michael@0: aBuffer.AppendLiteral("|="); michael@0: break; michael@0: case eCSSToken_Beginsmatch: michael@0: aBuffer.AppendLiteral("^="); michael@0: break; michael@0: case eCSSToken_Endsmatch: michael@0: aBuffer.AppendLiteral("$="); michael@0: break; michael@0: case eCSSToken_Containsmatch: michael@0: aBuffer.AppendLiteral("*="); michael@0: break; michael@0: michael@0: default: michael@0: NS_ERROR("invalid token type"); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: /* nsCSSScanner methods. */ michael@0: michael@0: nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber) michael@0: : mBuffer(aBuffer.BeginReading()) michael@0: , mOffset(0) michael@0: , mCount(aBuffer.Length()) michael@0: , mLineNumber(aLineNumber) michael@0: , mLineOffset(0) michael@0: , mTokenLineNumber(aLineNumber) michael@0: , mTokenLineOffset(0) michael@0: , mTokenOffset(0) michael@0: , mRecordStartOffset(0) michael@0: , mEOFCharacters(eEOFCharacters_None) michael@0: , mReporter(nullptr) michael@0: , mSVGMode(false) michael@0: , mRecording(false) michael@0: , mSeenBadToken(false) michael@0: , mSeenVariableReference(false) michael@0: { michael@0: MOZ_COUNT_CTOR(nsCSSScanner); michael@0: } michael@0: michael@0: nsCSSScanner::~nsCSSScanner() michael@0: { michael@0: MOZ_COUNT_DTOR(nsCSSScanner); michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::StartRecording() michael@0: { michael@0: MOZ_ASSERT(!mRecording, "already started recording"); michael@0: mRecording = true; michael@0: mRecordStartOffset = mOffset; michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::StopRecording() michael@0: { michael@0: MOZ_ASSERT(mRecording, "haven't started recording"); michael@0: mRecording = false; michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::StopRecording(nsString& aBuffer) michael@0: { michael@0: MOZ_ASSERT(mRecording, "haven't started recording"); michael@0: mRecording = false; michael@0: aBuffer.Append(mBuffer + mRecordStartOffset, michael@0: mOffset - mRecordStartOffset); michael@0: } michael@0: michael@0: uint32_t michael@0: nsCSSScanner::RecordingLength() const michael@0: { michael@0: MOZ_ASSERT(mRecording, "haven't started recording"); michael@0: return mOffset - mRecordStartOffset; michael@0: } michael@0: michael@0: #ifdef DEBUG michael@0: bool michael@0: nsCSSScanner::IsRecording() const michael@0: { michael@0: return mRecording; michael@0: } michael@0: #endif michael@0: michael@0: nsDependentSubstring michael@0: nsCSSScanner::GetCurrentLine() const michael@0: { michael@0: uint32_t end = mTokenOffset; michael@0: while (end < mCount && !IsVertSpace(mBuffer[end])) { michael@0: end++; michael@0: } michael@0: return nsDependentSubstring(mBuffer + mTokenLineOffset, michael@0: mBuffer + end); michael@0: } michael@0: michael@0: /** michael@0: * Return the raw UTF-16 code unit at position |mOffset + n| within michael@0: * the read buffer. If that is beyond the end of the buffer, returns michael@0: * -1 to indicate end of input. michael@0: */ michael@0: inline int32_t michael@0: nsCSSScanner::Peek(uint32_t n) michael@0: { michael@0: if (mOffset + n >= mCount) { michael@0: return -1; michael@0: } michael@0: return mBuffer[mOffset + n]; michael@0: } michael@0: michael@0: /** michael@0: * Advance |mOffset| over |n| code units. Advance(0) is a no-op. michael@0: * If |n| is greater than the distance to end of input, will silently michael@0: * stop at the end. May not be used to advance over a line boundary; michael@0: * AdvanceLine() must be used instead. michael@0: */ michael@0: inline void michael@0: nsCSSScanner::Advance(uint32_t n) michael@0: { michael@0: #ifdef DEBUG michael@0: while (mOffset < mCount && n > 0) { michael@0: MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]), michael@0: "may not Advance() over a line boundary"); michael@0: mOffset++; michael@0: n--; michael@0: } michael@0: #else michael@0: if (mOffset + n >= mCount || mOffset + n < mOffset) michael@0: mOffset = mCount; michael@0: else michael@0: mOffset += n; michael@0: #endif michael@0: } michael@0: michael@0: /** michael@0: * Advance |mOffset| over a line boundary. michael@0: */ michael@0: void michael@0: nsCSSScanner::AdvanceLine() michael@0: { michael@0: MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]), michael@0: "may not AdvanceLine() over a horizontal character"); michael@0: // Advance over \r\n as a unit. michael@0: if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount && michael@0: mBuffer[mOffset+1] == '\n') michael@0: mOffset += 2; michael@0: else michael@0: mOffset += 1; michael@0: // 0 is a magical line number meaning that we don't know (i.e., script) michael@0: if (mLineNumber != 0) michael@0: mLineNumber++; michael@0: mLineOffset = mOffset; michael@0: } michael@0: michael@0: /** michael@0: * Back up |mOffset| over |n| code units. Backup(0) is a no-op. michael@0: * If |n| is greater than the distance to beginning of input, will michael@0: * silently stop at the beginning. May not be used to back up over a michael@0: * line boundary. michael@0: */ michael@0: void michael@0: nsCSSScanner::Backup(uint32_t n) michael@0: { michael@0: #ifdef DEBUG michael@0: while (mOffset > 0 && n > 0) { michael@0: MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]), michael@0: "may not Backup() over a line boundary"); michael@0: mOffset--; michael@0: n--; michael@0: } michael@0: #else michael@0: if (mOffset < n) michael@0: mOffset = 0; michael@0: else michael@0: mOffset -= n; michael@0: #endif michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::SavePosition(nsCSSScannerPosition& aState) michael@0: { michael@0: aState.mOffset = mOffset; michael@0: aState.mLineNumber = mLineNumber; michael@0: aState.mLineOffset = mLineOffset; michael@0: aState.mTokenLineNumber = mTokenLineNumber; michael@0: aState.mTokenLineOffset = mTokenLineOffset; michael@0: aState.mTokenOffset = mTokenOffset; michael@0: aState.mInitialized = true; michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState) michael@0: { michael@0: MOZ_ASSERT(aState.mInitialized, "have not saved state"); michael@0: if (aState.mInitialized) { michael@0: mOffset = aState.mOffset; michael@0: mLineNumber = aState.mLineNumber; michael@0: mLineOffset = aState.mLineOffset; michael@0: mTokenLineNumber = aState.mTokenLineNumber; michael@0: mTokenLineOffset = aState.mTokenLineOffset; michael@0: mTokenOffset = aState.mTokenOffset; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Skip over a sequence of whitespace characters (vertical or michael@0: * horizontal) starting at the current read position. michael@0: */ michael@0: void michael@0: nsCSSScanner::SkipWhitespace() michael@0: { michael@0: for (;;) { michael@0: int32_t ch = Peek(); michael@0: if (!IsWhitespace(ch)) { // EOF counts as non-whitespace michael@0: break; michael@0: } michael@0: if (IsVertSpace(ch)) { michael@0: AdvanceLine(); michael@0: } else { michael@0: Advance(); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Skip over one CSS comment starting at the current read position. michael@0: */ michael@0: void michael@0: nsCSSScanner::SkipComment() michael@0: { michael@0: MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called"); michael@0: Advance(2); michael@0: for (;;) { michael@0: int32_t ch = Peek(); michael@0: if (ch < 0) { michael@0: mReporter->ReportUnexpectedEOF("PECommentEOF"); michael@0: SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash); michael@0: return; michael@0: } michael@0: if (ch == '*') { michael@0: Advance(); michael@0: ch = Peek(); michael@0: if (ch < 0) { michael@0: mReporter->ReportUnexpectedEOF("PECommentEOF"); michael@0: SetEOFCharacters(eEOFCharacters_Slash); michael@0: return; michael@0: } michael@0: if (ch == '/') { michael@0: Advance(); michael@0: return; michael@0: } michael@0: } else if (IsVertSpace(ch)) { michael@0: AdvanceLine(); michael@0: } else { michael@0: Advance(); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * If there is a valid escape sequence starting at the current read michael@0: * position, consume it, decode it, append the result to |aOutput|, michael@0: * and return true. Otherwise, consume nothing, leave |aOutput| michael@0: * unmodified, and return false. If |aInString| is true, accept the michael@0: * additional form of escape sequence allowed within string-like tokens. michael@0: */ michael@0: bool michael@0: nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString) michael@0: { michael@0: MOZ_ASSERT(Peek() == '\\', "should not have been called"); michael@0: int32_t ch = Peek(1); michael@0: if (ch < 0) { michael@0: // If we are in a string (or a url() containing a string), we want to drop michael@0: // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD michael@0: // character. michael@0: Advance(); michael@0: if (aInString) { michael@0: SetEOFCharacters(eEOFCharacters_DropBackslash); michael@0: } else { michael@0: aOutput.Append(UCS2_REPLACEMENT_CHAR); michael@0: SetEOFCharacters(eEOFCharacters_ReplacementChar); michael@0: } michael@0: return true; michael@0: } michael@0: if (IsVertSpace(ch)) { michael@0: if (aInString) { michael@0: // In strings (and in url() containing a string), escaped michael@0: // newlines are completely removed, to allow splitting over michael@0: // multiple lines. michael@0: Advance(); michael@0: AdvanceLine(); michael@0: return true; michael@0: } michael@0: // Outside of strings, backslash followed by a newline is not an escape. michael@0: return false; michael@0: } michael@0: michael@0: if (!IsHexDigit(ch)) { michael@0: // "Any character (except a hexadecimal digit, linefeed, carriage michael@0: // return, or form feed) can be escaped with a backslash to remove michael@0: // its special meaning." -- CSS2.1 section 4.1.3 michael@0: Advance(2); michael@0: if (ch == 0) { michael@0: aOutput.Append(UCS2_REPLACEMENT_CHAR); michael@0: } else { michael@0: aOutput.Append(ch); michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // "[at most six hexadecimal digits following a backslash] stand michael@0: // for the ISO 10646 character with that number, which must not be michael@0: // zero. (It is undefined in CSS 2.1 what happens if a style sheet michael@0: // does contain a character with Unicode codepoint zero.)" michael@0: // -- CSS2.1 section 4.1.3 michael@0: michael@0: // At this point we know we have \ followed by at least one michael@0: // hexadecimal digit, therefore the escape sequence is valid and we michael@0: // can go ahead and consume the backslash. michael@0: Advance(); michael@0: uint32_t val = 0; michael@0: int i = 0; michael@0: do { michael@0: val = val * 16 + HexDigitValue(ch); michael@0: i++; michael@0: Advance(); michael@0: ch = Peek(); michael@0: } while (i < 6 && IsHexDigit(ch)); michael@0: michael@0: // "Interpret the hex digits as a hexadecimal number. If this number is zero, michael@0: // or is greater than the maximum allowed codepoint, return U+FFFD michael@0: // REPLACEMENT CHARACTER" -- CSS Syntax Level 3 michael@0: if (MOZ_UNLIKELY(val == 0)) { michael@0: aOutput.Append(UCS2_REPLACEMENT_CHAR); michael@0: } else { michael@0: AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput); michael@0: } michael@0: michael@0: // Consume exactly one whitespace character after a michael@0: // hexadecimal escape sequence. michael@0: if (IsVertSpace(ch)) { michael@0: AdvanceLine(); michael@0: } else if (IsHorzSpace(ch)) { michael@0: Advance(); michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Consume a run of "text" beginning with the current read position, michael@0: * consisting of characters in the class |aClass| (which must be a michael@0: * suitable argument to IsOpenCharClass) plus escape sequences. michael@0: * Append the text to |aText|, after decoding escape sequences. michael@0: * michael@0: * Returns true if at least one character was appended to |aText|, michael@0: * false otherwise. michael@0: */ michael@0: bool michael@0: nsCSSScanner::GatherText(uint8_t aClass, nsString& aText) michael@0: { michael@0: // This is all of the character classes currently used with michael@0: // GatherText. If you have a need to use this function with a michael@0: // different class, go ahead and add it. michael@0: MOZ_ASSERT(aClass == IS_STRING || michael@0: aClass == IS_IDCHAR || michael@0: aClass == IS_URL_CHAR, michael@0: "possibly-inappropriate character class"); michael@0: michael@0: uint32_t start = mOffset; michael@0: bool inString = aClass == IS_STRING; michael@0: michael@0: for (;;) { michael@0: // Consume runs of unescaped characters in one go. michael@0: uint32_t n = mOffset; michael@0: while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) { michael@0: n++; michael@0: } michael@0: if (n > mOffset) { michael@0: aText.Append(&mBuffer[mOffset], n - mOffset); michael@0: mOffset = n; michael@0: } michael@0: if (n == mCount) { michael@0: break; michael@0: } michael@0: michael@0: int32_t ch = Peek(); michael@0: MOZ_ASSERT(!IsOpenCharClass(ch, aClass), michael@0: "should not have exited the inner loop"); michael@0: if (ch == 0) { michael@0: Advance(); michael@0: aText.Append(UCS2_REPLACEMENT_CHAR); michael@0: continue; michael@0: } michael@0: michael@0: if (ch != '\\') { michael@0: break; michael@0: } michael@0: if (!GatherEscape(aText, inString)) { michael@0: break; michael@0: } michael@0: } michael@0: michael@0: return mOffset > start; michael@0: } michael@0: michael@0: /** michael@0: * Scan an Ident token. This also handles Function and URL tokens, michael@0: * both of which begin indistinguishably from an identifier. It can michael@0: * produce a Symbol token when an apparent identifier actually led michael@0: * into an invalid escape sequence. michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanIdent(nsCSSToken& aToken) michael@0: { michael@0: if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) { michael@0: MOZ_ASSERT(Peek() == '\\', michael@0: "unexpected IsIdentStart character that did not begin an ident"); michael@0: aToken.mSymbol = Peek(); michael@0: Advance(); michael@0: return true; michael@0: } michael@0: michael@0: if (MOZ_LIKELY(Peek() != '(')) { michael@0: aToken.mType = eCSSToken_Ident; michael@0: return true; michael@0: } michael@0: michael@0: Advance(); michael@0: aToken.mType = eCSSToken_Function; michael@0: if (aToken.mIdent.LowerCaseEqualsLiteral("url")) { michael@0: NextURL(aToken); michael@0: } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) { michael@0: mSeenVariableReference = true; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Scan an AtKeyword token. Also handles production of Symbol when michael@0: * an '@' is not followed by an identifier. michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken) michael@0: { michael@0: MOZ_ASSERT(Peek() == '@', "should not have been called"); michael@0: michael@0: // Fall back for when '@' isn't followed by an identifier. michael@0: aToken.mSymbol = '@'; michael@0: Advance(); michael@0: michael@0: int32_t ch = Peek(); michael@0: if (StartsIdent(ch, Peek(1))) { michael@0: if (GatherText(IS_IDCHAR, aToken.mIdent)) { michael@0: aToken.mType = eCSSToken_AtKeyword; michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Scan a Hash token. Handles the distinction between eCSSToken_ID michael@0: * and eCSSToken_Hash, and handles production of Symbol when a '#' michael@0: * is not followed by identifier characters. michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanHash(nsCSSToken& aToken) michael@0: { michael@0: MOZ_ASSERT(Peek() == '#', "should not have been called"); michael@0: michael@0: // Fall back for when '#' isn't followed by identifier characters. michael@0: aToken.mSymbol = '#'; michael@0: Advance(); michael@0: michael@0: int32_t ch = Peek(); michael@0: if (IsIdentChar(ch) || ch == '\\') { michael@0: nsCSSTokenType type = michael@0: StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash; michael@0: aToken.mIdent.SetLength(0); michael@0: if (GatherText(IS_IDCHAR, aToken.mIdent)) { michael@0: aToken.mType = type; michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Scan a Number, Percentage, or Dimension token (all of which begin michael@0: * like a Number). Can produce a Symbol when a '.' is not followed by michael@0: * digits, or when '+' or '-' are not followed by either a digit or a michael@0: * '.' and then a digit. Can also produce a HTMLComment when it michael@0: * encounters '-->'. michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanNumber(nsCSSToken& aToken) michael@0: { michael@0: int32_t c = Peek(); michael@0: #ifdef DEBUG michael@0: { michael@0: int32_t c2 = Peek(1); michael@0: int32_t c3 = Peek(2); michael@0: MOZ_ASSERT(IsDigit(c) || michael@0: (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) || michael@0: (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'), michael@0: "should not have been called"); michael@0: } michael@0: #endif michael@0: michael@0: // Sign of the mantissa (-1 or 1). michael@0: int32_t sign = c == '-' ? -1 : 1; michael@0: // Absolute value of the integer part of the mantissa. This is a double so michael@0: // we don't run into overflow issues for consumers that only care about our michael@0: // floating-point value while still being able to express the full int32_t michael@0: // range for consumers who want integers. michael@0: double intPart = 0; michael@0: // Fractional part of the mantissa. This is a double so that when we convert michael@0: // to float at the end we'll end up rounding to nearest float instead of michael@0: // truncating down (as we would if fracPart were a float and we just michael@0: // effectively lost the last several digits). michael@0: double fracPart = 0; michael@0: // Absolute value of the power of 10 that we should multiply by (only michael@0: // relevant for numbers in scientific notation). Has to be a signed integer, michael@0: // because multiplication of signed by unsigned converts the unsigned to michael@0: // signed, so if we plan to actually multiply by expSign... michael@0: int32_t exponent = 0; michael@0: // Sign of the exponent. michael@0: int32_t expSign = 1; michael@0: michael@0: aToken.mHasSign = (c == '+' || c == '-'); michael@0: if (aToken.mHasSign) { michael@0: Advance(); michael@0: c = Peek(); michael@0: } michael@0: michael@0: bool gotDot = (c == '.'); michael@0: michael@0: if (!gotDot) { michael@0: // Scan the integer part of the mantissa. michael@0: MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); michael@0: do { michael@0: intPart = 10*intPart + DecimalDigitValue(c); michael@0: Advance(); michael@0: c = Peek(); michael@0: } while (IsDigit(c)); michael@0: michael@0: gotDot = (c == '.') && IsDigit(Peek(1)); michael@0: } michael@0: michael@0: if (gotDot) { michael@0: // Scan the fractional part of the mantissa. michael@0: Advance(); michael@0: c = Peek(); michael@0: MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); michael@0: // Power of ten by which we need to divide our next digit michael@0: double divisor = 10; michael@0: do { michael@0: fracPart += DecimalDigitValue(c) / divisor; michael@0: divisor *= 10; michael@0: Advance(); michael@0: c = Peek(); michael@0: } while (IsDigit(c)); michael@0: } michael@0: michael@0: bool gotE = false; michael@0: if (c == 'e' || c == 'E') { michael@0: int32_t expSignChar = Peek(1); michael@0: int32_t nextChar = Peek(2); michael@0: if (IsDigit(expSignChar) || michael@0: ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) { michael@0: gotE = true; michael@0: if (expSignChar == '-') { michael@0: expSign = -1; michael@0: } michael@0: Advance(); // consumes the E michael@0: if (expSignChar == '-' || expSignChar == '+') { michael@0: Advance(); michael@0: c = nextChar; michael@0: } else { michael@0: c = expSignChar; michael@0: } michael@0: MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); michael@0: do { michael@0: exponent = 10*exponent + DecimalDigitValue(c); michael@0: Advance(); michael@0: c = Peek(); michael@0: } while (IsDigit(c)); michael@0: } michael@0: } michael@0: michael@0: nsCSSTokenType type = eCSSToken_Number; michael@0: michael@0: // Set mIntegerValid for all cases (except %, below) because we need michael@0: // it for the "2n" in :nth-child(2n). michael@0: aToken.mIntegerValid = false; michael@0: michael@0: // Time to reassemble our number. michael@0: // Do all the math in double precision so it's truncated only once. michael@0: double value = sign * (intPart + fracPart); michael@0: if (gotE) { michael@0: // Explicitly cast expSign*exponent to double to avoid issues with michael@0: // overloaded pow() on Windows. michael@0: value *= pow(10.0, double(expSign * exponent)); michael@0: } else if (!gotDot) { michael@0: // Clamp values outside of integer range. michael@0: if (sign > 0) { michael@0: aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX))); michael@0: } else { michael@0: aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN))); michael@0: } michael@0: aToken.mIntegerValid = true; michael@0: } michael@0: michael@0: nsString& ident = aToken.mIdent; michael@0: michael@0: // Check for Dimension and Percentage tokens. michael@0: if (c >= 0) { michael@0: if (StartsIdent(c, Peek(1))) { michael@0: if (GatherText(IS_IDCHAR, ident)) { michael@0: type = eCSSToken_Dimension; michael@0: } michael@0: } else if (c == '%') { michael@0: Advance(); michael@0: type = eCSSToken_Percentage; michael@0: value = value / 100.0f; michael@0: aToken.mIntegerValid = false; michael@0: } michael@0: } michael@0: aToken.mNumber = value; michael@0: aToken.mType = type; michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Scan a string constant ('foo' or "foo"). Will always produce michael@0: * either a String or a Bad_String token; the latter occurs when the michael@0: * close quote is missing. Always returns true (for convenience in Next()). michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanString(nsCSSToken& aToken) michael@0: { michael@0: int32_t aStop = Peek(); michael@0: MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called"); michael@0: aToken.mType = eCSSToken_String; michael@0: aToken.mSymbol = char16_t(aStop); // Remember how it's quoted. michael@0: Advance(); michael@0: michael@0: for (;;) { michael@0: GatherText(IS_STRING, aToken.mIdent); michael@0: michael@0: int32_t ch = Peek(); michael@0: if (ch == -1) { michael@0: AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote : michael@0: eEOFCharacters_SingleQuote); michael@0: break; // EOF ends a string token with no error. michael@0: } michael@0: if (ch == aStop) { michael@0: Advance(); michael@0: break; michael@0: } michael@0: // Both " and ' are excluded from IS_STRING. michael@0: if (ch == '"' || ch == '\'') { michael@0: aToken.mIdent.Append(ch); michael@0: Advance(); michael@0: continue; michael@0: } michael@0: michael@0: mSeenBadToken = true; michael@0: aToken.mType = eCSSToken_Bad_String; michael@0: mReporter->ReportUnexpected("SEUnterminatedString", aToken); michael@0: break; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Scan a unicode-range token. These match the regular expression michael@0: * michael@0: * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? michael@0: * michael@0: * However, some such tokens are "invalid". There are three valid forms: michael@0: * michael@0: * u+[0-9a-f]{x} 1 <= x <= 6 michael@0: * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6 michael@0: * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6 michael@0: * michael@0: * All unicode-range tokens have their text recorded in mIdent; valid ones michael@0: * are also decoded into mInteger and mInteger2, and mIntegerValid is set. michael@0: * Note that this does not validate the numeric range, only the syntactic michael@0: * form. michael@0: */ michael@0: bool michael@0: nsCSSScanner::ScanURange(nsCSSToken& aResult) michael@0: { michael@0: int32_t intro1 = Peek(); michael@0: int32_t intro2 = Peek(1); michael@0: int32_t ch = Peek(2); michael@0: michael@0: MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') && michael@0: intro2 == '+' && michael@0: (IsHexDigit(ch) || ch == '?'), michael@0: "should not have been called"); michael@0: michael@0: aResult.mIdent.Append(intro1); michael@0: aResult.mIdent.Append(intro2); michael@0: Advance(2); michael@0: michael@0: bool valid = true; michael@0: bool haveQues = false; michael@0: uint32_t low = 0; michael@0: uint32_t high = 0; michael@0: int i = 0; michael@0: michael@0: do { michael@0: aResult.mIdent.Append(ch); michael@0: if (IsHexDigit(ch)) { michael@0: if (haveQues) { michael@0: valid = false; // All question marks should be at the end. michael@0: } michael@0: low = low*16 + HexDigitValue(ch); michael@0: high = high*16 + HexDigitValue(ch); michael@0: } else { michael@0: haveQues = true; michael@0: low = low*16 + 0x0; michael@0: high = high*16 + 0xF; michael@0: } michael@0: michael@0: i++; michael@0: Advance(); michael@0: ch = Peek(); michael@0: } while (i < 6 && (IsHexDigit(ch) || ch == '?')); michael@0: michael@0: if (ch == '-' && IsHexDigit(Peek(1))) { michael@0: if (haveQues) { michael@0: valid = false; michael@0: } michael@0: michael@0: aResult.mIdent.Append(ch); michael@0: Advance(); michael@0: ch = Peek(); michael@0: high = 0; michael@0: i = 0; michael@0: do { michael@0: aResult.mIdent.Append(ch); michael@0: high = high*16 + HexDigitValue(ch); michael@0: michael@0: i++; michael@0: Advance(); michael@0: ch = Peek(); michael@0: } while (i < 6 && IsHexDigit(ch)); michael@0: } michael@0: michael@0: aResult.mInteger = low; michael@0: aResult.mInteger2 = high; michael@0: aResult.mIntegerValid = valid; michael@0: aResult.mType = eCSSToken_URange; michael@0: return true; michael@0: } michael@0: michael@0: #ifdef DEBUG michael@0: /* static */ void michael@0: nsCSSScanner::AssertEOFCharactersValid(uint32_t c) michael@0: { michael@0: MOZ_ASSERT(c == eEOFCharacters_None || michael@0: c == eEOFCharacters_ReplacementChar || michael@0: c == eEOFCharacters_Slash || michael@0: c == (eEOFCharacters_Asterisk | michael@0: eEOFCharacters_Slash) || michael@0: c == eEOFCharacters_DoubleQuote || michael@0: c == eEOFCharacters_SingleQuote || michael@0: c == (eEOFCharacters_DropBackslash | michael@0: eEOFCharacters_DoubleQuote) || michael@0: c == (eEOFCharacters_DropBackslash | michael@0: eEOFCharacters_SingleQuote) || michael@0: c == eEOFCharacters_CloseParen || michael@0: c == (eEOFCharacters_ReplacementChar | michael@0: eEOFCharacters_CloseParen) || michael@0: c == (eEOFCharacters_DoubleQuote | michael@0: eEOFCharacters_CloseParen) || michael@0: c == (eEOFCharacters_SingleQuote | michael@0: eEOFCharacters_CloseParen) || michael@0: c == (eEOFCharacters_DropBackslash | michael@0: eEOFCharacters_DoubleQuote | michael@0: eEOFCharacters_CloseParen) || michael@0: c == (eEOFCharacters_DropBackslash | michael@0: eEOFCharacters_SingleQuote | michael@0: eEOFCharacters_CloseParen), michael@0: "invalid EOFCharacters value"); michael@0: } michael@0: #endif michael@0: michael@0: void michael@0: nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters) michael@0: { michael@0: mEOFCharacters = EOFCharacters(aEOFCharacters); michael@0: } michael@0: michael@0: void michael@0: nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters) michael@0: { michael@0: mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters); michael@0: } michael@0: michael@0: static const char16_t kImpliedEOFCharacters[] = { michael@0: UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0 michael@0: }; michael@0: michael@0: /* static */ void michael@0: nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, michael@0: nsAString& aResult) michael@0: { michael@0: // First, ignore eEOFCharacters_DropBackslash. michael@0: uint32_t c = aEOFCharacters >> 1; michael@0: michael@0: // All of the remaining EOFCharacters bits represent appended characters, michael@0: // and the bits are in the order that they need appending. michael@0: for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) { michael@0: if (c & 1) { michael@0: aResult.Append(*p); michael@0: } michael@0: } michael@0: michael@0: MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters"); michael@0: } michael@0: michael@0: /** michael@0: * Consume the part of an URL token after the initial 'url('. Caller michael@0: * is assumed to have consumed 'url(' already. Will always produce michael@0: * either an URL or a Bad_URL token. michael@0: * michael@0: * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies michael@0: * the special lexical rules for URL tokens in a nonstandard context. michael@0: */ michael@0: bool michael@0: nsCSSScanner::NextURL(nsCSSToken& aToken) michael@0: { michael@0: SkipWhitespace(); michael@0: michael@0: int32_t ch = Peek(); michael@0: if (ch < 0) { michael@0: return false; michael@0: } michael@0: michael@0: // aToken.mIdent may be "url" at this point; clear that out michael@0: aToken.mIdent.Truncate(); michael@0: michael@0: // Do we have a string? michael@0: if (ch == '"' || ch == '\'') { michael@0: ScanString(aToken); michael@0: if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) { michael@0: aToken.mType = eCSSToken_Bad_URL; michael@0: return true; michael@0: } michael@0: MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type"); michael@0: michael@0: } else { michael@0: // Otherwise, this is the start of a non-quoted url (which may be empty). michael@0: aToken.mSymbol = char16_t(0); michael@0: GatherText(IS_URL_CHAR, aToken.mIdent); michael@0: } michael@0: michael@0: // Consume trailing whitespace and then look for a close parenthesis. michael@0: SkipWhitespace(); michael@0: ch = Peek(); michael@0: if (MOZ_LIKELY(ch < 0 || ch == ')')) { michael@0: Advance(); michael@0: aToken.mType = eCSSToken_URL; michael@0: if (ch < 0) { michael@0: AddEOFCharacters(eEOFCharacters_CloseParen); michael@0: } michael@0: } else { michael@0: mSeenBadToken = true; michael@0: aToken.mType = eCSSToken_Bad_URL; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: /** michael@0: * Primary scanner entry point. Consume one token and fill in michael@0: * |aToken| accordingly. Will skip over any number of comments first, michael@0: * and will also skip over rather than return whitespace tokens if michael@0: * |aSkipWS| is true. michael@0: * michael@0: * Returns true if it successfully consumed a token, false if EOF has michael@0: * been reached. Will always advance the current read position by at michael@0: * least one character unless called when already at EOF. michael@0: */ michael@0: bool michael@0: nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS) michael@0: { michael@0: int32_t ch; michael@0: michael@0: // do this here so we don't have to do it in dozens of other places michael@0: aToken.mIdent.Truncate(); michael@0: aToken.mType = eCSSToken_Symbol; michael@0: michael@0: for (;;) { michael@0: // Consume any number of comments, and possibly also whitespace tokens, michael@0: // in between other tokens. michael@0: mTokenOffset = mOffset; michael@0: mTokenLineOffset = mLineOffset; michael@0: mTokenLineNumber = mLineNumber; michael@0: michael@0: ch = Peek(); michael@0: if (IsWhitespace(ch)) { michael@0: SkipWhitespace(); michael@0: if (!aSkipWS) { michael@0: aToken.mType = eCSSToken_Whitespace; michael@0: return true; michael@0: } michael@0: continue; // start again at the beginning michael@0: } michael@0: if (ch == '/' && !IsSVGMode() && Peek(1) == '*') { michael@0: // FIXME: Editor wants comments to be preserved (bug 60290). michael@0: SkipComment(); michael@0: continue; // start again at the beginning michael@0: } michael@0: break; michael@0: } michael@0: michael@0: // EOF michael@0: if (ch < 0) { michael@0: return false; michael@0: } michael@0: michael@0: // 'u' could be UNICODE-RANGE or an identifier-family token michael@0: if (ch == 'u' || ch == 'U') { michael@0: int32_t c2 = Peek(1); michael@0: int32_t c3 = Peek(2); michael@0: if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) { michael@0: return ScanURange(aToken); michael@0: } michael@0: return ScanIdent(aToken); michael@0: } michael@0: michael@0: // identifier family michael@0: if (IsIdentStart(ch)) { michael@0: return ScanIdent(aToken); michael@0: } michael@0: michael@0: // number family michael@0: if (IsDigit(ch)) { michael@0: return ScanNumber(aToken); michael@0: } michael@0: michael@0: if (ch == '.' && IsDigit(Peek(1))) { michael@0: return ScanNumber(aToken); michael@0: } michael@0: michael@0: if (ch == '+') { michael@0: int32_t c2 = Peek(1); michael@0: if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) { michael@0: return ScanNumber(aToken); michael@0: } michael@0: } michael@0: michael@0: // '-' can start an identifier-family token, a number-family token, michael@0: // or an HTML-comment michael@0: if (ch == '-') { michael@0: int32_t c2 = Peek(1); michael@0: int32_t c3 = Peek(2); michael@0: if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) { michael@0: return ScanIdent(aToken); michael@0: } michael@0: if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) { michael@0: return ScanNumber(aToken); michael@0: } michael@0: if (c2 == '-' && c3 == '>') { michael@0: Advance(3); michael@0: aToken.mType = eCSSToken_HTMLComment; michael@0: aToken.mIdent.AssignLiteral("-->"); michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: // the other HTML-comment token michael@0: if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') { michael@0: Advance(4); michael@0: aToken.mType = eCSSToken_HTMLComment; michael@0: aToken.mIdent.AssignLiteral("