1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/layout/style/nsCSSScanner.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1370 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 + 1.10 +/* tokenization of CSS style sheets */ 1.11 + 1.12 +#include "nsCSSScanner.h" 1.13 +#include "nsStyleUtil.h" 1.14 +#include "nsISupportsImpl.h" 1.15 +#include "mozilla/ArrayUtils.h" 1.16 +#include "mozilla/css/ErrorReporter.h" 1.17 +#include "mozilla/Likely.h" 1.18 +#include <algorithm> 1.19 + 1.20 +/* Character class tables and related helper functions. */ 1.21 + 1.22 +static const uint8_t IS_HEX_DIGIT = 0x01; 1.23 +static const uint8_t IS_IDSTART = 0x02; 1.24 +static const uint8_t IS_IDCHAR = 0x04; 1.25 +static const uint8_t IS_URL_CHAR = 0x08; 1.26 +static const uint8_t IS_HSPACE = 0x10; 1.27 +static const uint8_t IS_VSPACE = 0x20; 1.28 +static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE; 1.29 +static const uint8_t IS_STRING = 0x40; 1.30 + 1.31 +#define H IS_HSPACE 1.32 +#define V IS_VSPACE 1.33 +#define I IS_IDCHAR 1.34 +#define J IS_IDSTART 1.35 +#define U IS_URL_CHAR 1.36 +#define S IS_STRING 1.37 +#define X IS_HEX_DIGIT 1.38 + 1.39 +#define SH S|H 1.40 +#define SU S|U 1.41 +#define SUI S|U|I 1.42 +#define SUIJ S|U|I|J 1.43 +#define SUIX S|U|I|X 1.44 +#define SUIJX S|U|I|J|X 1.45 + 1.46 +static const uint8_t gLexTable[] = { 1.47 +// 00 01 02 03 04 05 06 07 1.48 + 0, S, S, S, S, S, S, S, 1.49 +// 08 TAB LF 0B FF CR 0E 0F 1.50 + S, SH, V, S, V, V, S, S, 1.51 +// 10 11 12 13 14 15 16 17 1.52 + S, S, S, S, S, S, S, S, 1.53 +// 18 19 1A 1B 1C 1D 1E 1F 1.54 + S, S, S, S, S, S, S, S, 1.55 +//SPC ! " # $ % & ' 1.56 + SH, SU, 0, SU, SU, SU, SU, 0, 1.57 +// ( ) * + , - . / 1.58 + S, S, SU, SU, SU, SUI, SU, SU, 1.59 +// 0 1 2 3 4 5 6 7 1.60 + SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, 1.61 +// 8 9 : ; < = > ? 1.62 + SUIX, SUIX, SU, SU, SU, SU, SU, SU, 1.63 +// @ A B C D E F G 1.64 + SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ, 1.65 +// H I J K L M N O 1.66 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, 1.67 +// P Q R S T U V W 1.68 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, 1.69 +// X Y Z [ \ ] ^ _ 1.70 + SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ, 1.71 +// ` a b c d e f g 1.72 + SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ, 1.73 +// h i j k l m n o 1.74 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, 1.75 +// p q r s t u v w 1.76 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, 1.77 +// x y z { | } ~ 7F 1.78 + SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S, 1.79 +}; 1.80 + 1.81 +static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128, 1.82 + "gLexTable expected to cover all 128 ASCII characters"); 1.83 + 1.84 +#undef I 1.85 +#undef J 1.86 +#undef U 1.87 +#undef S 1.88 +#undef X 1.89 +#undef SH 1.90 +#undef SU 1.91 +#undef SUI 1.92 +#undef SUIJ 1.93 +#undef SUIX 1.94 +#undef SUIJX 1.95 + 1.96 +/** 1.97 + * True if 'ch' is in character class 'cls', which should be one of 1.98 + * the constants above or some combination of them. All characters 1.99 + * above U+007F are considered to be in 'cls'. EOF is never in 'cls'. 1.100 + */ 1.101 +static inline bool 1.102 +IsOpenCharClass(int32_t ch, uint8_t cls) { 1.103 + return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0); 1.104 +} 1.105 + 1.106 +/** 1.107 + * True if 'ch' is in character class 'cls', which should be one of 1.108 + * the constants above or some combination of them. No characters 1.109 + * above U+007F are considered to be in 'cls'. EOF is never in 'cls'. 1.110 + */ 1.111 +static inline bool 1.112 +IsClosedCharClass(int32_t ch, uint8_t cls) { 1.113 + return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0; 1.114 +} 1.115 + 1.116 +/** 1.117 + * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters 1.118 + * TAB, LF, FF, CR, or SPC. 1.119 + */ 1.120 +static inline bool 1.121 +IsWhitespace(int32_t ch) { 1.122 + return IsClosedCharClass(ch, IS_SPACE); 1.123 +} 1.124 + 1.125 +/** 1.126 + * True if 'ch' is horizontal whitespace, i.e. TAB or SPC. 1.127 + */ 1.128 +static inline bool 1.129 +IsHorzSpace(int32_t ch) { 1.130 + return IsClosedCharClass(ch, IS_HSPACE); 1.131 +} 1.132 + 1.133 +/** 1.134 + * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical 1.135 + * whitespace requires special handling when consumed, see AdvanceLine. 1.136 + */ 1.137 +static inline bool 1.138 +IsVertSpace(int32_t ch) { 1.139 + return IsClosedCharClass(ch, IS_VSPACE); 1.140 +} 1.141 + 1.142 +/** 1.143 + * True if 'ch' is a character that can appear in the middle of an identifier. 1.144 + * This includes U+0000 since it is handled as U+FFFD, but for purposes of 1.145 + * GatherText it should not be included in IsOpenCharClass. 1.146 + */ 1.147 +static inline bool 1.148 +IsIdentChar(int32_t ch) { 1.149 + return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0; 1.150 +} 1.151 + 1.152 +/** 1.153 + * True if 'ch' is a character that by itself begins an identifier. 1.154 + * This includes U+0000 since it is handled as U+FFFD, but for purposes of 1.155 + * GatherText it should not be included in IsOpenCharClass. 1.156 + * (This is a subset of IsIdentChar.) 1.157 + */ 1.158 +static inline bool 1.159 +IsIdentStart(int32_t ch) { 1.160 + return IsOpenCharClass(ch, IS_IDSTART) || ch == 0; 1.161 +} 1.162 + 1.163 +/** 1.164 + * True if the two-character sequence aFirstChar+aSecondChar begins an 1.165 + * identifier. 1.166 + */ 1.167 +static inline bool 1.168 +StartsIdent(int32_t aFirstChar, int32_t aSecondChar) 1.169 +{ 1.170 + return IsIdentStart(aFirstChar) || 1.171 + (aFirstChar == '-' && IsIdentStart(aSecondChar)); 1.172 +} 1.173 + 1.174 +/** 1.175 + * True if 'ch' is a decimal digit. 1.176 + */ 1.177 +static inline bool 1.178 +IsDigit(int32_t ch) { 1.179 + return (ch >= '0') && (ch <= '9'); 1.180 +} 1.181 + 1.182 +/** 1.183 + * True if 'ch' is a hexadecimal digit. 1.184 + */ 1.185 +static inline bool 1.186 +IsHexDigit(int32_t ch) { 1.187 + return IsClosedCharClass(ch, IS_HEX_DIGIT); 1.188 +} 1.189 + 1.190 +/** 1.191 + * Assuming that 'ch' is a decimal digit, return its numeric value. 1.192 + */ 1.193 +static inline uint32_t 1.194 +DecimalDigitValue(int32_t ch) 1.195 +{ 1.196 + return ch - '0'; 1.197 +} 1.198 + 1.199 +/** 1.200 + * Assuming that 'ch' is a hexadecimal digit, return its numeric value. 1.201 + */ 1.202 +static inline uint32_t 1.203 +HexDigitValue(int32_t ch) 1.204 +{ 1.205 + if (IsDigit(ch)) { 1.206 + return DecimalDigitValue(ch); 1.207 + } else { 1.208 + // Note: c&7 just keeps the low three bits which causes 1.209 + // upper and lower case alphabetics to both yield their 1.210 + // "relative to 10" value for computing the hex value. 1.211 + return (ch & 0x7) + 9; 1.212 + } 1.213 +} 1.214 + 1.215 +/** 1.216 + * If 'ch' can be the first character of a two-character match operator 1.217 + * token, return the token type code for that token, otherwise return 1.218 + * eCSSToken_Symbol to indicate that it can't. 1.219 + */ 1.220 +static inline nsCSSTokenType 1.221 +MatchOperatorType(int32_t ch) 1.222 +{ 1.223 + switch (ch) { 1.224 + case '~': return eCSSToken_Includes; 1.225 + case '|': return eCSSToken_Dashmatch; 1.226 + case '^': return eCSSToken_Beginsmatch; 1.227 + case '$': return eCSSToken_Endsmatch; 1.228 + case '*': return eCSSToken_Containsmatch; 1.229 + default: return eCSSToken_Symbol; 1.230 + } 1.231 +} 1.232 + 1.233 +/* Out-of-line nsCSSToken methods. */ 1.234 + 1.235 +/** 1.236 + * Append the textual representation of |this| to |aBuffer|. 1.237 + */ 1.238 +void 1.239 +nsCSSToken::AppendToString(nsString& aBuffer) const 1.240 +{ 1.241 + switch (mType) { 1.242 + case eCSSToken_Ident: 1.243 + nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); 1.244 + break; 1.245 + 1.246 + case eCSSToken_AtKeyword: 1.247 + aBuffer.Append('@'); 1.248 + nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); 1.249 + break; 1.250 + 1.251 + case eCSSToken_ID: 1.252 + case eCSSToken_Hash: 1.253 + aBuffer.Append('#'); 1.254 + nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); 1.255 + break; 1.256 + 1.257 + case eCSSToken_Function: 1.258 + nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); 1.259 + aBuffer.Append('('); 1.260 + break; 1.261 + 1.262 + case eCSSToken_URL: 1.263 + case eCSSToken_Bad_URL: 1.264 + aBuffer.AppendLiteral("url("); 1.265 + if (mSymbol != char16_t(0)) { 1.266 + nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); 1.267 + } else { 1.268 + aBuffer.Append(mIdent); 1.269 + } 1.270 + if (mType == eCSSToken_URL) { 1.271 + aBuffer.Append(char16_t(')')); 1.272 + } 1.273 + break; 1.274 + 1.275 + case eCSSToken_Number: 1.276 + if (mIntegerValid) { 1.277 + aBuffer.AppendInt(mInteger, 10); 1.278 + } else { 1.279 + aBuffer.AppendFloat(mNumber); 1.280 + } 1.281 + break; 1.282 + 1.283 + case eCSSToken_Percentage: 1.284 + aBuffer.AppendFloat(mNumber * 100.0f); 1.285 + aBuffer.Append(char16_t('%')); 1.286 + break; 1.287 + 1.288 + case eCSSToken_Dimension: 1.289 + if (mIntegerValid) { 1.290 + aBuffer.AppendInt(mInteger, 10); 1.291 + } else { 1.292 + aBuffer.AppendFloat(mNumber); 1.293 + } 1.294 + nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer); 1.295 + break; 1.296 + 1.297 + case eCSSToken_Bad_String: 1.298 + nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); 1.299 + // remove the trailing quote character 1.300 + aBuffer.Truncate(aBuffer.Length() - 1); 1.301 + break; 1.302 + 1.303 + case eCSSToken_String: 1.304 + nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol); 1.305 + break; 1.306 + 1.307 + case eCSSToken_Symbol: 1.308 + aBuffer.Append(mSymbol); 1.309 + break; 1.310 + 1.311 + case eCSSToken_Whitespace: 1.312 + aBuffer.Append(' '); 1.313 + break; 1.314 + 1.315 + case eCSSToken_HTMLComment: 1.316 + case eCSSToken_URange: 1.317 + aBuffer.Append(mIdent); 1.318 + break; 1.319 + 1.320 + case eCSSToken_Includes: 1.321 + aBuffer.AppendLiteral("~="); 1.322 + break; 1.323 + case eCSSToken_Dashmatch: 1.324 + aBuffer.AppendLiteral("|="); 1.325 + break; 1.326 + case eCSSToken_Beginsmatch: 1.327 + aBuffer.AppendLiteral("^="); 1.328 + break; 1.329 + case eCSSToken_Endsmatch: 1.330 + aBuffer.AppendLiteral("$="); 1.331 + break; 1.332 + case eCSSToken_Containsmatch: 1.333 + aBuffer.AppendLiteral("*="); 1.334 + break; 1.335 + 1.336 + default: 1.337 + NS_ERROR("invalid token type"); 1.338 + break; 1.339 + } 1.340 +} 1.341 + 1.342 +/* nsCSSScanner methods. */ 1.343 + 1.344 +nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber) 1.345 + : mBuffer(aBuffer.BeginReading()) 1.346 + , mOffset(0) 1.347 + , mCount(aBuffer.Length()) 1.348 + , mLineNumber(aLineNumber) 1.349 + , mLineOffset(0) 1.350 + , mTokenLineNumber(aLineNumber) 1.351 + , mTokenLineOffset(0) 1.352 + , mTokenOffset(0) 1.353 + , mRecordStartOffset(0) 1.354 + , mEOFCharacters(eEOFCharacters_None) 1.355 + , mReporter(nullptr) 1.356 + , mSVGMode(false) 1.357 + , mRecording(false) 1.358 + , mSeenBadToken(false) 1.359 + , mSeenVariableReference(false) 1.360 +{ 1.361 + MOZ_COUNT_CTOR(nsCSSScanner); 1.362 +} 1.363 + 1.364 +nsCSSScanner::~nsCSSScanner() 1.365 +{ 1.366 + MOZ_COUNT_DTOR(nsCSSScanner); 1.367 +} 1.368 + 1.369 +void 1.370 +nsCSSScanner::StartRecording() 1.371 +{ 1.372 + MOZ_ASSERT(!mRecording, "already started recording"); 1.373 + mRecording = true; 1.374 + mRecordStartOffset = mOffset; 1.375 +} 1.376 + 1.377 +void 1.378 +nsCSSScanner::StopRecording() 1.379 +{ 1.380 + MOZ_ASSERT(mRecording, "haven't started recording"); 1.381 + mRecording = false; 1.382 +} 1.383 + 1.384 +void 1.385 +nsCSSScanner::StopRecording(nsString& aBuffer) 1.386 +{ 1.387 + MOZ_ASSERT(mRecording, "haven't started recording"); 1.388 + mRecording = false; 1.389 + aBuffer.Append(mBuffer + mRecordStartOffset, 1.390 + mOffset - mRecordStartOffset); 1.391 +} 1.392 + 1.393 +uint32_t 1.394 +nsCSSScanner::RecordingLength() const 1.395 +{ 1.396 + MOZ_ASSERT(mRecording, "haven't started recording"); 1.397 + return mOffset - mRecordStartOffset; 1.398 +} 1.399 + 1.400 +#ifdef DEBUG 1.401 +bool 1.402 +nsCSSScanner::IsRecording() const 1.403 +{ 1.404 + return mRecording; 1.405 +} 1.406 +#endif 1.407 + 1.408 +nsDependentSubstring 1.409 +nsCSSScanner::GetCurrentLine() const 1.410 +{ 1.411 + uint32_t end = mTokenOffset; 1.412 + while (end < mCount && !IsVertSpace(mBuffer[end])) { 1.413 + end++; 1.414 + } 1.415 + return nsDependentSubstring(mBuffer + mTokenLineOffset, 1.416 + mBuffer + end); 1.417 +} 1.418 + 1.419 +/** 1.420 + * Return the raw UTF-16 code unit at position |mOffset + n| within 1.421 + * the read buffer. If that is beyond the end of the buffer, returns 1.422 + * -1 to indicate end of input. 1.423 + */ 1.424 +inline int32_t 1.425 +nsCSSScanner::Peek(uint32_t n) 1.426 +{ 1.427 + if (mOffset + n >= mCount) { 1.428 + return -1; 1.429 + } 1.430 + return mBuffer[mOffset + n]; 1.431 +} 1.432 + 1.433 +/** 1.434 + * Advance |mOffset| over |n| code units. Advance(0) is a no-op. 1.435 + * If |n| is greater than the distance to end of input, will silently 1.436 + * stop at the end. May not be used to advance over a line boundary; 1.437 + * AdvanceLine() must be used instead. 1.438 + */ 1.439 +inline void 1.440 +nsCSSScanner::Advance(uint32_t n) 1.441 +{ 1.442 +#ifdef DEBUG 1.443 + while (mOffset < mCount && n > 0) { 1.444 + MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]), 1.445 + "may not Advance() over a line boundary"); 1.446 + mOffset++; 1.447 + n--; 1.448 + } 1.449 +#else 1.450 + if (mOffset + n >= mCount || mOffset + n < mOffset) 1.451 + mOffset = mCount; 1.452 + else 1.453 + mOffset += n; 1.454 +#endif 1.455 +} 1.456 + 1.457 +/** 1.458 + * Advance |mOffset| over a line boundary. 1.459 + */ 1.460 +void 1.461 +nsCSSScanner::AdvanceLine() 1.462 +{ 1.463 + MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]), 1.464 + "may not AdvanceLine() over a horizontal character"); 1.465 + // Advance over \r\n as a unit. 1.466 + if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount && 1.467 + mBuffer[mOffset+1] == '\n') 1.468 + mOffset += 2; 1.469 + else 1.470 + mOffset += 1; 1.471 + // 0 is a magical line number meaning that we don't know (i.e., script) 1.472 + if (mLineNumber != 0) 1.473 + mLineNumber++; 1.474 + mLineOffset = mOffset; 1.475 +} 1.476 + 1.477 +/** 1.478 + * Back up |mOffset| over |n| code units. Backup(0) is a no-op. 1.479 + * If |n| is greater than the distance to beginning of input, will 1.480 + * silently stop at the beginning. May not be used to back up over a 1.481 + * line boundary. 1.482 + */ 1.483 +void 1.484 +nsCSSScanner::Backup(uint32_t n) 1.485 +{ 1.486 +#ifdef DEBUG 1.487 + while (mOffset > 0 && n > 0) { 1.488 + MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]), 1.489 + "may not Backup() over a line boundary"); 1.490 + mOffset--; 1.491 + n--; 1.492 + } 1.493 +#else 1.494 + if (mOffset < n) 1.495 + mOffset = 0; 1.496 + else 1.497 + mOffset -= n; 1.498 +#endif 1.499 +} 1.500 + 1.501 +void 1.502 +nsCSSScanner::SavePosition(nsCSSScannerPosition& aState) 1.503 +{ 1.504 + aState.mOffset = mOffset; 1.505 + aState.mLineNumber = mLineNumber; 1.506 + aState.mLineOffset = mLineOffset; 1.507 + aState.mTokenLineNumber = mTokenLineNumber; 1.508 + aState.mTokenLineOffset = mTokenLineOffset; 1.509 + aState.mTokenOffset = mTokenOffset; 1.510 + aState.mInitialized = true; 1.511 +} 1.512 + 1.513 +void 1.514 +nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState) 1.515 +{ 1.516 + MOZ_ASSERT(aState.mInitialized, "have not saved state"); 1.517 + if (aState.mInitialized) { 1.518 + mOffset = aState.mOffset; 1.519 + mLineNumber = aState.mLineNumber; 1.520 + mLineOffset = aState.mLineOffset; 1.521 + mTokenLineNumber = aState.mTokenLineNumber; 1.522 + mTokenLineOffset = aState.mTokenLineOffset; 1.523 + mTokenOffset = aState.mTokenOffset; 1.524 + } 1.525 +} 1.526 + 1.527 +/** 1.528 + * Skip over a sequence of whitespace characters (vertical or 1.529 + * horizontal) starting at the current read position. 1.530 + */ 1.531 +void 1.532 +nsCSSScanner::SkipWhitespace() 1.533 +{ 1.534 + for (;;) { 1.535 + int32_t ch = Peek(); 1.536 + if (!IsWhitespace(ch)) { // EOF counts as non-whitespace 1.537 + break; 1.538 + } 1.539 + if (IsVertSpace(ch)) { 1.540 + AdvanceLine(); 1.541 + } else { 1.542 + Advance(); 1.543 + } 1.544 + } 1.545 +} 1.546 + 1.547 +/** 1.548 + * Skip over one CSS comment starting at the current read position. 1.549 + */ 1.550 +void 1.551 +nsCSSScanner::SkipComment() 1.552 +{ 1.553 + MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called"); 1.554 + Advance(2); 1.555 + for (;;) { 1.556 + int32_t ch = Peek(); 1.557 + if (ch < 0) { 1.558 + mReporter->ReportUnexpectedEOF("PECommentEOF"); 1.559 + SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash); 1.560 + return; 1.561 + } 1.562 + if (ch == '*') { 1.563 + Advance(); 1.564 + ch = Peek(); 1.565 + if (ch < 0) { 1.566 + mReporter->ReportUnexpectedEOF("PECommentEOF"); 1.567 + SetEOFCharacters(eEOFCharacters_Slash); 1.568 + return; 1.569 + } 1.570 + if (ch == '/') { 1.571 + Advance(); 1.572 + return; 1.573 + } 1.574 + } else if (IsVertSpace(ch)) { 1.575 + AdvanceLine(); 1.576 + } else { 1.577 + Advance(); 1.578 + } 1.579 + } 1.580 +} 1.581 + 1.582 +/** 1.583 + * If there is a valid escape sequence starting at the current read 1.584 + * position, consume it, decode it, append the result to |aOutput|, 1.585 + * and return true. Otherwise, consume nothing, leave |aOutput| 1.586 + * unmodified, and return false. If |aInString| is true, accept the 1.587 + * additional form of escape sequence allowed within string-like tokens. 1.588 + */ 1.589 +bool 1.590 +nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString) 1.591 +{ 1.592 + MOZ_ASSERT(Peek() == '\\', "should not have been called"); 1.593 + int32_t ch = Peek(1); 1.594 + if (ch < 0) { 1.595 + // If we are in a string (or a url() containing a string), we want to drop 1.596 + // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD 1.597 + // character. 1.598 + Advance(); 1.599 + if (aInString) { 1.600 + SetEOFCharacters(eEOFCharacters_DropBackslash); 1.601 + } else { 1.602 + aOutput.Append(UCS2_REPLACEMENT_CHAR); 1.603 + SetEOFCharacters(eEOFCharacters_ReplacementChar); 1.604 + } 1.605 + return true; 1.606 + } 1.607 + if (IsVertSpace(ch)) { 1.608 + if (aInString) { 1.609 + // In strings (and in url() containing a string), escaped 1.610 + // newlines are completely removed, to allow splitting over 1.611 + // multiple lines. 1.612 + Advance(); 1.613 + AdvanceLine(); 1.614 + return true; 1.615 + } 1.616 + // Outside of strings, backslash followed by a newline is not an escape. 1.617 + return false; 1.618 + } 1.619 + 1.620 + if (!IsHexDigit(ch)) { 1.621 + // "Any character (except a hexadecimal digit, linefeed, carriage 1.622 + // return, or form feed) can be escaped with a backslash to remove 1.623 + // its special meaning." -- CSS2.1 section 4.1.3 1.624 + Advance(2); 1.625 + if (ch == 0) { 1.626 + aOutput.Append(UCS2_REPLACEMENT_CHAR); 1.627 + } else { 1.628 + aOutput.Append(ch); 1.629 + } 1.630 + return true; 1.631 + } 1.632 + 1.633 + // "[at most six hexadecimal digits following a backslash] stand 1.634 + // for the ISO 10646 character with that number, which must not be 1.635 + // zero. (It is undefined in CSS 2.1 what happens if a style sheet 1.636 + // does contain a character with Unicode codepoint zero.)" 1.637 + // -- CSS2.1 section 4.1.3 1.638 + 1.639 + // At this point we know we have \ followed by at least one 1.640 + // hexadecimal digit, therefore the escape sequence is valid and we 1.641 + // can go ahead and consume the backslash. 1.642 + Advance(); 1.643 + uint32_t val = 0; 1.644 + int i = 0; 1.645 + do { 1.646 + val = val * 16 + HexDigitValue(ch); 1.647 + i++; 1.648 + Advance(); 1.649 + ch = Peek(); 1.650 + } while (i < 6 && IsHexDigit(ch)); 1.651 + 1.652 + // "Interpret the hex digits as a hexadecimal number. If this number is zero, 1.653 + // or is greater than the maximum allowed codepoint, return U+FFFD 1.654 + // REPLACEMENT CHARACTER" -- CSS Syntax Level 3 1.655 + if (MOZ_UNLIKELY(val == 0)) { 1.656 + aOutput.Append(UCS2_REPLACEMENT_CHAR); 1.657 + } else { 1.658 + AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput); 1.659 + } 1.660 + 1.661 + // Consume exactly one whitespace character after a 1.662 + // hexadecimal escape sequence. 1.663 + if (IsVertSpace(ch)) { 1.664 + AdvanceLine(); 1.665 + } else if (IsHorzSpace(ch)) { 1.666 + Advance(); 1.667 + } 1.668 + return true; 1.669 +} 1.670 + 1.671 +/** 1.672 + * Consume a run of "text" beginning with the current read position, 1.673 + * consisting of characters in the class |aClass| (which must be a 1.674 + * suitable argument to IsOpenCharClass) plus escape sequences. 1.675 + * Append the text to |aText|, after decoding escape sequences. 1.676 + * 1.677 + * Returns true if at least one character was appended to |aText|, 1.678 + * false otherwise. 1.679 + */ 1.680 +bool 1.681 +nsCSSScanner::GatherText(uint8_t aClass, nsString& aText) 1.682 +{ 1.683 + // This is all of the character classes currently used with 1.684 + // GatherText. If you have a need to use this function with a 1.685 + // different class, go ahead and add it. 1.686 + MOZ_ASSERT(aClass == IS_STRING || 1.687 + aClass == IS_IDCHAR || 1.688 + aClass == IS_URL_CHAR, 1.689 + "possibly-inappropriate character class"); 1.690 + 1.691 + uint32_t start = mOffset; 1.692 + bool inString = aClass == IS_STRING; 1.693 + 1.694 + for (;;) { 1.695 + // Consume runs of unescaped characters in one go. 1.696 + uint32_t n = mOffset; 1.697 + while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) { 1.698 + n++; 1.699 + } 1.700 + if (n > mOffset) { 1.701 + aText.Append(&mBuffer[mOffset], n - mOffset); 1.702 + mOffset = n; 1.703 + } 1.704 + if (n == mCount) { 1.705 + break; 1.706 + } 1.707 + 1.708 + int32_t ch = Peek(); 1.709 + MOZ_ASSERT(!IsOpenCharClass(ch, aClass), 1.710 + "should not have exited the inner loop"); 1.711 + if (ch == 0) { 1.712 + Advance(); 1.713 + aText.Append(UCS2_REPLACEMENT_CHAR); 1.714 + continue; 1.715 + } 1.716 + 1.717 + if (ch != '\\') { 1.718 + break; 1.719 + } 1.720 + if (!GatherEscape(aText, inString)) { 1.721 + break; 1.722 + } 1.723 + } 1.724 + 1.725 + return mOffset > start; 1.726 +} 1.727 + 1.728 +/** 1.729 + * Scan an Ident token. This also handles Function and URL tokens, 1.730 + * both of which begin indistinguishably from an identifier. It can 1.731 + * produce a Symbol token when an apparent identifier actually led 1.732 + * into an invalid escape sequence. 1.733 + */ 1.734 +bool 1.735 +nsCSSScanner::ScanIdent(nsCSSToken& aToken) 1.736 +{ 1.737 + if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) { 1.738 + MOZ_ASSERT(Peek() == '\\', 1.739 + "unexpected IsIdentStart character that did not begin an ident"); 1.740 + aToken.mSymbol = Peek(); 1.741 + Advance(); 1.742 + return true; 1.743 + } 1.744 + 1.745 + if (MOZ_LIKELY(Peek() != '(')) { 1.746 + aToken.mType = eCSSToken_Ident; 1.747 + return true; 1.748 + } 1.749 + 1.750 + Advance(); 1.751 + aToken.mType = eCSSToken_Function; 1.752 + if (aToken.mIdent.LowerCaseEqualsLiteral("url")) { 1.753 + NextURL(aToken); 1.754 + } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) { 1.755 + mSeenVariableReference = true; 1.756 + } 1.757 + return true; 1.758 +} 1.759 + 1.760 +/** 1.761 + * Scan an AtKeyword token. Also handles production of Symbol when 1.762 + * an '@' is not followed by an identifier. 1.763 + */ 1.764 +bool 1.765 +nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken) 1.766 +{ 1.767 + MOZ_ASSERT(Peek() == '@', "should not have been called"); 1.768 + 1.769 + // Fall back for when '@' isn't followed by an identifier. 1.770 + aToken.mSymbol = '@'; 1.771 + Advance(); 1.772 + 1.773 + int32_t ch = Peek(); 1.774 + if (StartsIdent(ch, Peek(1))) { 1.775 + if (GatherText(IS_IDCHAR, aToken.mIdent)) { 1.776 + aToken.mType = eCSSToken_AtKeyword; 1.777 + } 1.778 + } 1.779 + return true; 1.780 +} 1.781 + 1.782 +/** 1.783 + * Scan a Hash token. Handles the distinction between eCSSToken_ID 1.784 + * and eCSSToken_Hash, and handles production of Symbol when a '#' 1.785 + * is not followed by identifier characters. 1.786 + */ 1.787 +bool 1.788 +nsCSSScanner::ScanHash(nsCSSToken& aToken) 1.789 +{ 1.790 + MOZ_ASSERT(Peek() == '#', "should not have been called"); 1.791 + 1.792 + // Fall back for when '#' isn't followed by identifier characters. 1.793 + aToken.mSymbol = '#'; 1.794 + Advance(); 1.795 + 1.796 + int32_t ch = Peek(); 1.797 + if (IsIdentChar(ch) || ch == '\\') { 1.798 + nsCSSTokenType type = 1.799 + StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash; 1.800 + aToken.mIdent.SetLength(0); 1.801 + if (GatherText(IS_IDCHAR, aToken.mIdent)) { 1.802 + aToken.mType = type; 1.803 + } 1.804 + } 1.805 + 1.806 + return true; 1.807 +} 1.808 + 1.809 +/** 1.810 + * Scan a Number, Percentage, or Dimension token (all of which begin 1.811 + * like a Number). Can produce a Symbol when a '.' is not followed by 1.812 + * digits, or when '+' or '-' are not followed by either a digit or a 1.813 + * '.' and then a digit. Can also produce a HTMLComment when it 1.814 + * encounters '-->'. 1.815 + */ 1.816 +bool 1.817 +nsCSSScanner::ScanNumber(nsCSSToken& aToken) 1.818 +{ 1.819 + int32_t c = Peek(); 1.820 +#ifdef DEBUG 1.821 + { 1.822 + int32_t c2 = Peek(1); 1.823 + int32_t c3 = Peek(2); 1.824 + MOZ_ASSERT(IsDigit(c) || 1.825 + (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) || 1.826 + (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'), 1.827 + "should not have been called"); 1.828 + } 1.829 +#endif 1.830 + 1.831 + // Sign of the mantissa (-1 or 1). 1.832 + int32_t sign = c == '-' ? -1 : 1; 1.833 + // Absolute value of the integer part of the mantissa. This is a double so 1.834 + // we don't run into overflow issues for consumers that only care about our 1.835 + // floating-point value while still being able to express the full int32_t 1.836 + // range for consumers who want integers. 1.837 + double intPart = 0; 1.838 + // Fractional part of the mantissa. This is a double so that when we convert 1.839 + // to float at the end we'll end up rounding to nearest float instead of 1.840 + // truncating down (as we would if fracPart were a float and we just 1.841 + // effectively lost the last several digits). 1.842 + double fracPart = 0; 1.843 + // Absolute value of the power of 10 that we should multiply by (only 1.844 + // relevant for numbers in scientific notation). Has to be a signed integer, 1.845 + // because multiplication of signed by unsigned converts the unsigned to 1.846 + // signed, so if we plan to actually multiply by expSign... 1.847 + int32_t exponent = 0; 1.848 + // Sign of the exponent. 1.849 + int32_t expSign = 1; 1.850 + 1.851 + aToken.mHasSign = (c == '+' || c == '-'); 1.852 + if (aToken.mHasSign) { 1.853 + Advance(); 1.854 + c = Peek(); 1.855 + } 1.856 + 1.857 + bool gotDot = (c == '.'); 1.858 + 1.859 + if (!gotDot) { 1.860 + // Scan the integer part of the mantissa. 1.861 + MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); 1.862 + do { 1.863 + intPart = 10*intPart + DecimalDigitValue(c); 1.864 + Advance(); 1.865 + c = Peek(); 1.866 + } while (IsDigit(c)); 1.867 + 1.868 + gotDot = (c == '.') && IsDigit(Peek(1)); 1.869 + } 1.870 + 1.871 + if (gotDot) { 1.872 + // Scan the fractional part of the mantissa. 1.873 + Advance(); 1.874 + c = Peek(); 1.875 + MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); 1.876 + // Power of ten by which we need to divide our next digit 1.877 + double divisor = 10; 1.878 + do { 1.879 + fracPart += DecimalDigitValue(c) / divisor; 1.880 + divisor *= 10; 1.881 + Advance(); 1.882 + c = Peek(); 1.883 + } while (IsDigit(c)); 1.884 + } 1.885 + 1.886 + bool gotE = false; 1.887 + if (c == 'e' || c == 'E') { 1.888 + int32_t expSignChar = Peek(1); 1.889 + int32_t nextChar = Peek(2); 1.890 + if (IsDigit(expSignChar) || 1.891 + ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) { 1.892 + gotE = true; 1.893 + if (expSignChar == '-') { 1.894 + expSign = -1; 1.895 + } 1.896 + Advance(); // consumes the E 1.897 + if (expSignChar == '-' || expSignChar == '+') { 1.898 + Advance(); 1.899 + c = nextChar; 1.900 + } else { 1.901 + c = expSignChar; 1.902 + } 1.903 + MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above"); 1.904 + do { 1.905 + exponent = 10*exponent + DecimalDigitValue(c); 1.906 + Advance(); 1.907 + c = Peek(); 1.908 + } while (IsDigit(c)); 1.909 + } 1.910 + } 1.911 + 1.912 + nsCSSTokenType type = eCSSToken_Number; 1.913 + 1.914 + // Set mIntegerValid for all cases (except %, below) because we need 1.915 + // it for the "2n" in :nth-child(2n). 1.916 + aToken.mIntegerValid = false; 1.917 + 1.918 + // Time to reassemble our number. 1.919 + // Do all the math in double precision so it's truncated only once. 1.920 + double value = sign * (intPart + fracPart); 1.921 + if (gotE) { 1.922 + // Explicitly cast expSign*exponent to double to avoid issues with 1.923 + // overloaded pow() on Windows. 1.924 + value *= pow(10.0, double(expSign * exponent)); 1.925 + } else if (!gotDot) { 1.926 + // Clamp values outside of integer range. 1.927 + if (sign > 0) { 1.928 + aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX))); 1.929 + } else { 1.930 + aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN))); 1.931 + } 1.932 + aToken.mIntegerValid = true; 1.933 + } 1.934 + 1.935 + nsString& ident = aToken.mIdent; 1.936 + 1.937 + // Check for Dimension and Percentage tokens. 1.938 + if (c >= 0) { 1.939 + if (StartsIdent(c, Peek(1))) { 1.940 + if (GatherText(IS_IDCHAR, ident)) { 1.941 + type = eCSSToken_Dimension; 1.942 + } 1.943 + } else if (c == '%') { 1.944 + Advance(); 1.945 + type = eCSSToken_Percentage; 1.946 + value = value / 100.0f; 1.947 + aToken.mIntegerValid = false; 1.948 + } 1.949 + } 1.950 + aToken.mNumber = value; 1.951 + aToken.mType = type; 1.952 + return true; 1.953 +} 1.954 + 1.955 +/** 1.956 + * Scan a string constant ('foo' or "foo"). Will always produce 1.957 + * either a String or a Bad_String token; the latter occurs when the 1.958 + * close quote is missing. Always returns true (for convenience in Next()). 1.959 + */ 1.960 +bool 1.961 +nsCSSScanner::ScanString(nsCSSToken& aToken) 1.962 +{ 1.963 + int32_t aStop = Peek(); 1.964 + MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called"); 1.965 + aToken.mType = eCSSToken_String; 1.966 + aToken.mSymbol = char16_t(aStop); // Remember how it's quoted. 1.967 + Advance(); 1.968 + 1.969 + for (;;) { 1.970 + GatherText(IS_STRING, aToken.mIdent); 1.971 + 1.972 + int32_t ch = Peek(); 1.973 + if (ch == -1) { 1.974 + AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote : 1.975 + eEOFCharacters_SingleQuote); 1.976 + break; // EOF ends a string token with no error. 1.977 + } 1.978 + if (ch == aStop) { 1.979 + Advance(); 1.980 + break; 1.981 + } 1.982 + // Both " and ' are excluded from IS_STRING. 1.983 + if (ch == '"' || ch == '\'') { 1.984 + aToken.mIdent.Append(ch); 1.985 + Advance(); 1.986 + continue; 1.987 + } 1.988 + 1.989 + mSeenBadToken = true; 1.990 + aToken.mType = eCSSToken_Bad_String; 1.991 + mReporter->ReportUnexpected("SEUnterminatedString", aToken); 1.992 + break; 1.993 + } 1.994 + return true; 1.995 +} 1.996 + 1.997 +/** 1.998 + * Scan a unicode-range token. These match the regular expression 1.999 + * 1.1000 + * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? 1.1001 + * 1.1002 + * However, some such tokens are "invalid". There are three valid forms: 1.1003 + * 1.1004 + * u+[0-9a-f]{x} 1 <= x <= 6 1.1005 + * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6 1.1006 + * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6 1.1007 + * 1.1008 + * All unicode-range tokens have their text recorded in mIdent; valid ones 1.1009 + * are also decoded into mInteger and mInteger2, and mIntegerValid is set. 1.1010 + * Note that this does not validate the numeric range, only the syntactic 1.1011 + * form. 1.1012 + */ 1.1013 +bool 1.1014 +nsCSSScanner::ScanURange(nsCSSToken& aResult) 1.1015 +{ 1.1016 + int32_t intro1 = Peek(); 1.1017 + int32_t intro2 = Peek(1); 1.1018 + int32_t ch = Peek(2); 1.1019 + 1.1020 + MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') && 1.1021 + intro2 == '+' && 1.1022 + (IsHexDigit(ch) || ch == '?'), 1.1023 + "should not have been called"); 1.1024 + 1.1025 + aResult.mIdent.Append(intro1); 1.1026 + aResult.mIdent.Append(intro2); 1.1027 + Advance(2); 1.1028 + 1.1029 + bool valid = true; 1.1030 + bool haveQues = false; 1.1031 + uint32_t low = 0; 1.1032 + uint32_t high = 0; 1.1033 + int i = 0; 1.1034 + 1.1035 + do { 1.1036 + aResult.mIdent.Append(ch); 1.1037 + if (IsHexDigit(ch)) { 1.1038 + if (haveQues) { 1.1039 + valid = false; // All question marks should be at the end. 1.1040 + } 1.1041 + low = low*16 + HexDigitValue(ch); 1.1042 + high = high*16 + HexDigitValue(ch); 1.1043 + } else { 1.1044 + haveQues = true; 1.1045 + low = low*16 + 0x0; 1.1046 + high = high*16 + 0xF; 1.1047 + } 1.1048 + 1.1049 + i++; 1.1050 + Advance(); 1.1051 + ch = Peek(); 1.1052 + } while (i < 6 && (IsHexDigit(ch) || ch == '?')); 1.1053 + 1.1054 + if (ch == '-' && IsHexDigit(Peek(1))) { 1.1055 + if (haveQues) { 1.1056 + valid = false; 1.1057 + } 1.1058 + 1.1059 + aResult.mIdent.Append(ch); 1.1060 + Advance(); 1.1061 + ch = Peek(); 1.1062 + high = 0; 1.1063 + i = 0; 1.1064 + do { 1.1065 + aResult.mIdent.Append(ch); 1.1066 + high = high*16 + HexDigitValue(ch); 1.1067 + 1.1068 + i++; 1.1069 + Advance(); 1.1070 + ch = Peek(); 1.1071 + } while (i < 6 && IsHexDigit(ch)); 1.1072 + } 1.1073 + 1.1074 + aResult.mInteger = low; 1.1075 + aResult.mInteger2 = high; 1.1076 + aResult.mIntegerValid = valid; 1.1077 + aResult.mType = eCSSToken_URange; 1.1078 + return true; 1.1079 +} 1.1080 + 1.1081 +#ifdef DEBUG 1.1082 +/* static */ void 1.1083 +nsCSSScanner::AssertEOFCharactersValid(uint32_t c) 1.1084 +{ 1.1085 + MOZ_ASSERT(c == eEOFCharacters_None || 1.1086 + c == eEOFCharacters_ReplacementChar || 1.1087 + c == eEOFCharacters_Slash || 1.1088 + c == (eEOFCharacters_Asterisk | 1.1089 + eEOFCharacters_Slash) || 1.1090 + c == eEOFCharacters_DoubleQuote || 1.1091 + c == eEOFCharacters_SingleQuote || 1.1092 + c == (eEOFCharacters_DropBackslash | 1.1093 + eEOFCharacters_DoubleQuote) || 1.1094 + c == (eEOFCharacters_DropBackslash | 1.1095 + eEOFCharacters_SingleQuote) || 1.1096 + c == eEOFCharacters_CloseParen || 1.1097 + c == (eEOFCharacters_ReplacementChar | 1.1098 + eEOFCharacters_CloseParen) || 1.1099 + c == (eEOFCharacters_DoubleQuote | 1.1100 + eEOFCharacters_CloseParen) || 1.1101 + c == (eEOFCharacters_SingleQuote | 1.1102 + eEOFCharacters_CloseParen) || 1.1103 + c == (eEOFCharacters_DropBackslash | 1.1104 + eEOFCharacters_DoubleQuote | 1.1105 + eEOFCharacters_CloseParen) || 1.1106 + c == (eEOFCharacters_DropBackslash | 1.1107 + eEOFCharacters_SingleQuote | 1.1108 + eEOFCharacters_CloseParen), 1.1109 + "invalid EOFCharacters value"); 1.1110 +} 1.1111 +#endif 1.1112 + 1.1113 +void 1.1114 +nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters) 1.1115 +{ 1.1116 + mEOFCharacters = EOFCharacters(aEOFCharacters); 1.1117 +} 1.1118 + 1.1119 +void 1.1120 +nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters) 1.1121 +{ 1.1122 + mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters); 1.1123 +} 1.1124 + 1.1125 +static const char16_t kImpliedEOFCharacters[] = { 1.1126 + UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0 1.1127 +}; 1.1128 + 1.1129 +/* static */ void 1.1130 +nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, 1.1131 + nsAString& aResult) 1.1132 +{ 1.1133 + // First, ignore eEOFCharacters_DropBackslash. 1.1134 + uint32_t c = aEOFCharacters >> 1; 1.1135 + 1.1136 + // All of the remaining EOFCharacters bits represent appended characters, 1.1137 + // and the bits are in the order that they need appending. 1.1138 + for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) { 1.1139 + if (c & 1) { 1.1140 + aResult.Append(*p); 1.1141 + } 1.1142 + } 1.1143 + 1.1144 + MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters"); 1.1145 +} 1.1146 + 1.1147 +/** 1.1148 + * Consume the part of an URL token after the initial 'url('. Caller 1.1149 + * is assumed to have consumed 'url(' already. Will always produce 1.1150 + * either an URL or a Bad_URL token. 1.1151 + * 1.1152 + * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies 1.1153 + * the special lexical rules for URL tokens in a nonstandard context. 1.1154 + */ 1.1155 +bool 1.1156 +nsCSSScanner::NextURL(nsCSSToken& aToken) 1.1157 +{ 1.1158 + SkipWhitespace(); 1.1159 + 1.1160 + int32_t ch = Peek(); 1.1161 + if (ch < 0) { 1.1162 + return false; 1.1163 + } 1.1164 + 1.1165 + // aToken.mIdent may be "url" at this point; clear that out 1.1166 + aToken.mIdent.Truncate(); 1.1167 + 1.1168 + // Do we have a string? 1.1169 + if (ch == '"' || ch == '\'') { 1.1170 + ScanString(aToken); 1.1171 + if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) { 1.1172 + aToken.mType = eCSSToken_Bad_URL; 1.1173 + return true; 1.1174 + } 1.1175 + MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type"); 1.1176 + 1.1177 + } else { 1.1178 + // Otherwise, this is the start of a non-quoted url (which may be empty). 1.1179 + aToken.mSymbol = char16_t(0); 1.1180 + GatherText(IS_URL_CHAR, aToken.mIdent); 1.1181 + } 1.1182 + 1.1183 + // Consume trailing whitespace and then look for a close parenthesis. 1.1184 + SkipWhitespace(); 1.1185 + ch = Peek(); 1.1186 + if (MOZ_LIKELY(ch < 0 || ch == ')')) { 1.1187 + Advance(); 1.1188 + aToken.mType = eCSSToken_URL; 1.1189 + if (ch < 0) { 1.1190 + AddEOFCharacters(eEOFCharacters_CloseParen); 1.1191 + } 1.1192 + } else { 1.1193 + mSeenBadToken = true; 1.1194 + aToken.mType = eCSSToken_Bad_URL; 1.1195 + } 1.1196 + return true; 1.1197 +} 1.1198 + 1.1199 +/** 1.1200 + * Primary scanner entry point. Consume one token and fill in 1.1201 + * |aToken| accordingly. Will skip over any number of comments first, 1.1202 + * and will also skip over rather than return whitespace tokens if 1.1203 + * |aSkipWS| is true. 1.1204 + * 1.1205 + * Returns true if it successfully consumed a token, false if EOF has 1.1206 + * been reached. Will always advance the current read position by at 1.1207 + * least one character unless called when already at EOF. 1.1208 + */ 1.1209 +bool 1.1210 +nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS) 1.1211 +{ 1.1212 + int32_t ch; 1.1213 + 1.1214 + // do this here so we don't have to do it in dozens of other places 1.1215 + aToken.mIdent.Truncate(); 1.1216 + aToken.mType = eCSSToken_Symbol; 1.1217 + 1.1218 + for (;;) { 1.1219 + // Consume any number of comments, and possibly also whitespace tokens, 1.1220 + // in between other tokens. 1.1221 + mTokenOffset = mOffset; 1.1222 + mTokenLineOffset = mLineOffset; 1.1223 + mTokenLineNumber = mLineNumber; 1.1224 + 1.1225 + ch = Peek(); 1.1226 + if (IsWhitespace(ch)) { 1.1227 + SkipWhitespace(); 1.1228 + if (!aSkipWS) { 1.1229 + aToken.mType = eCSSToken_Whitespace; 1.1230 + return true; 1.1231 + } 1.1232 + continue; // start again at the beginning 1.1233 + } 1.1234 + if (ch == '/' && !IsSVGMode() && Peek(1) == '*') { 1.1235 + // FIXME: Editor wants comments to be preserved (bug 60290). 1.1236 + SkipComment(); 1.1237 + continue; // start again at the beginning 1.1238 + } 1.1239 + break; 1.1240 + } 1.1241 + 1.1242 + // EOF 1.1243 + if (ch < 0) { 1.1244 + return false; 1.1245 + } 1.1246 + 1.1247 + // 'u' could be UNICODE-RANGE or an identifier-family token 1.1248 + if (ch == 'u' || ch == 'U') { 1.1249 + int32_t c2 = Peek(1); 1.1250 + int32_t c3 = Peek(2); 1.1251 + if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) { 1.1252 + return ScanURange(aToken); 1.1253 + } 1.1254 + return ScanIdent(aToken); 1.1255 + } 1.1256 + 1.1257 + // identifier family 1.1258 + if (IsIdentStart(ch)) { 1.1259 + return ScanIdent(aToken); 1.1260 + } 1.1261 + 1.1262 + // number family 1.1263 + if (IsDigit(ch)) { 1.1264 + return ScanNumber(aToken); 1.1265 + } 1.1266 + 1.1267 + if (ch == '.' && IsDigit(Peek(1))) { 1.1268 + return ScanNumber(aToken); 1.1269 + } 1.1270 + 1.1271 + if (ch == '+') { 1.1272 + int32_t c2 = Peek(1); 1.1273 + if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) { 1.1274 + return ScanNumber(aToken); 1.1275 + } 1.1276 + } 1.1277 + 1.1278 + // '-' can start an identifier-family token, a number-family token, 1.1279 + // or an HTML-comment 1.1280 + if (ch == '-') { 1.1281 + int32_t c2 = Peek(1); 1.1282 + int32_t c3 = Peek(2); 1.1283 + if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) { 1.1284 + return ScanIdent(aToken); 1.1285 + } 1.1286 + if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) { 1.1287 + return ScanNumber(aToken); 1.1288 + } 1.1289 + if (c2 == '-' && c3 == '>') { 1.1290 + Advance(3); 1.1291 + aToken.mType = eCSSToken_HTMLComment; 1.1292 + aToken.mIdent.AssignLiteral("-->"); 1.1293 + return true; 1.1294 + } 1.1295 + } 1.1296 + 1.1297 + // the other HTML-comment token 1.1298 + if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') { 1.1299 + Advance(4); 1.1300 + aToken.mType = eCSSToken_HTMLComment; 1.1301 + aToken.mIdent.AssignLiteral("<!--"); 1.1302 + return true; 1.1303 + } 1.1304 + 1.1305 + // AT_KEYWORD 1.1306 + if (ch == '@') { 1.1307 + return ScanAtKeyword(aToken); 1.1308 + } 1.1309 + 1.1310 + // HASH 1.1311 + if (ch == '#') { 1.1312 + return ScanHash(aToken); 1.1313 + } 1.1314 + 1.1315 + // STRING 1.1316 + if (ch == '"' || ch == '\'') { 1.1317 + return ScanString(aToken); 1.1318 + } 1.1319 + 1.1320 + // Match operators: ~= |= ^= $= *= 1.1321 + nsCSSTokenType opType = MatchOperatorType(ch); 1.1322 + if (opType != eCSSToken_Symbol && Peek(1) == '=') { 1.1323 + aToken.mType = opType; 1.1324 + Advance(2); 1.1325 + return true; 1.1326 + } 1.1327 + 1.1328 + // Otherwise, a symbol (DELIM). 1.1329 + aToken.mSymbol = ch; 1.1330 + Advance(); 1.1331 + return true; 1.1332 +} 1.1333 + 1.1334 +/* nsCSSGridTemplateAreaScanner methods. */ 1.1335 + 1.1336 +nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer) 1.1337 + : mBuffer(aBuffer.BeginReading()) 1.1338 + , mOffset(0) 1.1339 + , mCount(aBuffer.Length()) 1.1340 +{ 1.1341 +} 1.1342 + 1.1343 +bool 1.1344 +nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult) 1.1345 +{ 1.1346 + int32_t ch; 1.1347 + // Skip whitespace 1.1348 + do { 1.1349 + if (mOffset >= mCount) { 1.1350 + return false; 1.1351 + } 1.1352 + ch = mBuffer[mOffset]; 1.1353 + mOffset++; 1.1354 + } while (IsWhitespace(ch)); 1.1355 + 1.1356 + if (IsOpenCharClass(ch, IS_IDCHAR)) { 1.1357 + // Named cell token 1.1358 + uint32_t start = mOffset - 1; // offset of |ch| 1.1359 + while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) { 1.1360 + mOffset++; 1.1361 + } 1.1362 + aTokenResult.mName.Assign(&mBuffer[start], mOffset - start); 1.1363 + aTokenResult.isTrash = false; 1.1364 + } else if (ch == '.') { 1.1365 + // Null cell token 1.1366 + aTokenResult.mName.Truncate(); 1.1367 + aTokenResult.isTrash = false; 1.1368 + } else { 1.1369 + // Trash token 1.1370 + aTokenResult.isTrash = true; 1.1371 + } 1.1372 + return true; 1.1373 +}