layout/style/nsCSSScanner.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6
michael@0 7 /* tokenization of CSS style sheets */
michael@0 8
michael@0 9 #include "nsCSSScanner.h"
michael@0 10 #include "nsStyleUtil.h"
michael@0 11 #include "nsISupportsImpl.h"
michael@0 12 #include "mozilla/ArrayUtils.h"
michael@0 13 #include "mozilla/css/ErrorReporter.h"
michael@0 14 #include "mozilla/Likely.h"
michael@0 15 #include <algorithm>
michael@0 16
michael@0 17 /* Character class tables and related helper functions. */
michael@0 18
michael@0 19 static const uint8_t IS_HEX_DIGIT = 0x01;
michael@0 20 static const uint8_t IS_IDSTART = 0x02;
michael@0 21 static const uint8_t IS_IDCHAR = 0x04;
michael@0 22 static const uint8_t IS_URL_CHAR = 0x08;
michael@0 23 static const uint8_t IS_HSPACE = 0x10;
michael@0 24 static const uint8_t IS_VSPACE = 0x20;
michael@0 25 static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
michael@0 26 static const uint8_t IS_STRING = 0x40;
michael@0 27
michael@0 28 #define H IS_HSPACE
michael@0 29 #define V IS_VSPACE
michael@0 30 #define I IS_IDCHAR
michael@0 31 #define J IS_IDSTART
michael@0 32 #define U IS_URL_CHAR
michael@0 33 #define S IS_STRING
michael@0 34 #define X IS_HEX_DIGIT
michael@0 35
michael@0 36 #define SH S|H
michael@0 37 #define SU S|U
michael@0 38 #define SUI S|U|I
michael@0 39 #define SUIJ S|U|I|J
michael@0 40 #define SUIX S|U|I|X
michael@0 41 #define SUIJX S|U|I|J|X
michael@0 42
michael@0 43 static const uint8_t gLexTable[] = {
michael@0 44 // 00 01 02 03 04 05 06 07
michael@0 45 0, S, S, S, S, S, S, S,
michael@0 46 // 08 TAB LF 0B FF CR 0E 0F
michael@0 47 S, SH, V, S, V, V, S, S,
michael@0 48 // 10 11 12 13 14 15 16 17
michael@0 49 S, S, S, S, S, S, S, S,
michael@0 50 // 18 19 1A 1B 1C 1D 1E 1F
michael@0 51 S, S, S, S, S, S, S, S,
michael@0 52 //SPC ! " # $ % & '
michael@0 53 SH, SU, 0, SU, SU, SU, SU, 0,
michael@0 54 // ( ) * + , - . /
michael@0 55 S, S, SU, SU, SU, SUI, SU, SU,
michael@0 56 // 0 1 2 3 4 5 6 7
michael@0 57 SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
michael@0 58 // 8 9 : ; < = > ?
michael@0 59 SUIX, SUIX, SU, SU, SU, SU, SU, SU,
michael@0 60 // @ A B C D E F G
michael@0 61 SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
michael@0 62 // H I J K L M N O
michael@0 63 SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
michael@0 64 // P Q R S T U V W
michael@0 65 SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
michael@0 66 // X Y Z [ \ ] ^ _
michael@0 67 SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
michael@0 68 // ` a b c d e f g
michael@0 69 SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
michael@0 70 // h i j k l m n o
michael@0 71 SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
michael@0 72 // p q r s t u v w
michael@0 73 SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
michael@0 74 // x y z { | } ~ 7F
michael@0 75 SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
michael@0 76 };
michael@0 77
michael@0 78 static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
michael@0 79 "gLexTable expected to cover all 128 ASCII characters");
michael@0 80
michael@0 81 #undef I
michael@0 82 #undef J
michael@0 83 #undef U
michael@0 84 #undef S
michael@0 85 #undef X
michael@0 86 #undef SH
michael@0 87 #undef SU
michael@0 88 #undef SUI
michael@0 89 #undef SUIJ
michael@0 90 #undef SUIX
michael@0 91 #undef SUIJX
michael@0 92
michael@0 93 /**
michael@0 94 * True if 'ch' is in character class 'cls', which should be one of
michael@0 95 * the constants above or some combination of them. All characters
michael@0 96 * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
michael@0 97 */
michael@0 98 static inline bool
michael@0 99 IsOpenCharClass(int32_t ch, uint8_t cls) {
michael@0 100 return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
michael@0 101 }
michael@0 102
michael@0 103 /**
michael@0 104 * True if 'ch' is in character class 'cls', which should be one of
michael@0 105 * the constants above or some combination of them. No characters
michael@0 106 * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
michael@0 107 */
michael@0 108 static inline bool
michael@0 109 IsClosedCharClass(int32_t ch, uint8_t cls) {
michael@0 110 return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
michael@0 111 }
michael@0 112
michael@0 113 /**
michael@0 114 * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
michael@0 115 * TAB, LF, FF, CR, or SPC.
michael@0 116 */
michael@0 117 static inline bool
michael@0 118 IsWhitespace(int32_t ch) {
michael@0 119 return IsClosedCharClass(ch, IS_SPACE);
michael@0 120 }
michael@0 121
michael@0 122 /**
michael@0 123 * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
michael@0 124 */
michael@0 125 static inline bool
michael@0 126 IsHorzSpace(int32_t ch) {
michael@0 127 return IsClosedCharClass(ch, IS_HSPACE);
michael@0 128 }
michael@0 129
michael@0 130 /**
michael@0 131 * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
michael@0 132 * whitespace requires special handling when consumed, see AdvanceLine.
michael@0 133 */
michael@0 134 static inline bool
michael@0 135 IsVertSpace(int32_t ch) {
michael@0 136 return IsClosedCharClass(ch, IS_VSPACE);
michael@0 137 }
michael@0 138
michael@0 139 /**
michael@0 140 * True if 'ch' is a character that can appear in the middle of an identifier.
michael@0 141 * This includes U+0000 since it is handled as U+FFFD, but for purposes of
michael@0 142 * GatherText it should not be included in IsOpenCharClass.
michael@0 143 */
michael@0 144 static inline bool
michael@0 145 IsIdentChar(int32_t ch) {
michael@0 146 return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
michael@0 147 }
michael@0 148
michael@0 149 /**
michael@0 150 * True if 'ch' is a character that by itself begins an identifier.
michael@0 151 * This includes U+0000 since it is handled as U+FFFD, but for purposes of
michael@0 152 * GatherText it should not be included in IsOpenCharClass.
michael@0 153 * (This is a subset of IsIdentChar.)
michael@0 154 */
michael@0 155 static inline bool
michael@0 156 IsIdentStart(int32_t ch) {
michael@0 157 return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
michael@0 158 }
michael@0 159
michael@0 160 /**
michael@0 161 * True if the two-character sequence aFirstChar+aSecondChar begins an
michael@0 162 * identifier.
michael@0 163 */
michael@0 164 static inline bool
michael@0 165 StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
michael@0 166 {
michael@0 167 return IsIdentStart(aFirstChar) ||
michael@0 168 (aFirstChar == '-' && IsIdentStart(aSecondChar));
michael@0 169 }
michael@0 170
michael@0 171 /**
michael@0 172 * True if 'ch' is a decimal digit.
michael@0 173 */
michael@0 174 static inline bool
michael@0 175 IsDigit(int32_t ch) {
michael@0 176 return (ch >= '0') && (ch <= '9');
michael@0 177 }
michael@0 178
michael@0 179 /**
michael@0 180 * True if 'ch' is a hexadecimal digit.
michael@0 181 */
michael@0 182 static inline bool
michael@0 183 IsHexDigit(int32_t ch) {
michael@0 184 return IsClosedCharClass(ch, IS_HEX_DIGIT);
michael@0 185 }
michael@0 186
michael@0 187 /**
michael@0 188 * Assuming that 'ch' is a decimal digit, return its numeric value.
michael@0 189 */
michael@0 190 static inline uint32_t
michael@0 191 DecimalDigitValue(int32_t ch)
michael@0 192 {
michael@0 193 return ch - '0';
michael@0 194 }
michael@0 195
michael@0 196 /**
michael@0 197 * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
michael@0 198 */
michael@0 199 static inline uint32_t
michael@0 200 HexDigitValue(int32_t ch)
michael@0 201 {
michael@0 202 if (IsDigit(ch)) {
michael@0 203 return DecimalDigitValue(ch);
michael@0 204 } else {
michael@0 205 // Note: c&7 just keeps the low three bits which causes
michael@0 206 // upper and lower case alphabetics to both yield their
michael@0 207 // "relative to 10" value for computing the hex value.
michael@0 208 return (ch & 0x7) + 9;
michael@0 209 }
michael@0 210 }
michael@0 211
michael@0 212 /**
michael@0 213 * If 'ch' can be the first character of a two-character match operator
michael@0 214 * token, return the token type code for that token, otherwise return
michael@0 215 * eCSSToken_Symbol to indicate that it can't.
michael@0 216 */
michael@0 217 static inline nsCSSTokenType
michael@0 218 MatchOperatorType(int32_t ch)
michael@0 219 {
michael@0 220 switch (ch) {
michael@0 221 case '~': return eCSSToken_Includes;
michael@0 222 case '|': return eCSSToken_Dashmatch;
michael@0 223 case '^': return eCSSToken_Beginsmatch;
michael@0 224 case '$': return eCSSToken_Endsmatch;
michael@0 225 case '*': return eCSSToken_Containsmatch;
michael@0 226 default: return eCSSToken_Symbol;
michael@0 227 }
michael@0 228 }
michael@0 229
michael@0 230 /* Out-of-line nsCSSToken methods. */
michael@0 231
michael@0 232 /**
michael@0 233 * Append the textual representation of |this| to |aBuffer|.
michael@0 234 */
michael@0 235 void
michael@0 236 nsCSSToken::AppendToString(nsString& aBuffer) const
michael@0 237 {
michael@0 238 switch (mType) {
michael@0 239 case eCSSToken_Ident:
michael@0 240 nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
michael@0 241 break;
michael@0 242
michael@0 243 case eCSSToken_AtKeyword:
michael@0 244 aBuffer.Append('@');
michael@0 245 nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
michael@0 246 break;
michael@0 247
michael@0 248 case eCSSToken_ID:
michael@0 249 case eCSSToken_Hash:
michael@0 250 aBuffer.Append('#');
michael@0 251 nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
michael@0 252 break;
michael@0 253
michael@0 254 case eCSSToken_Function:
michael@0 255 nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
michael@0 256 aBuffer.Append('(');
michael@0 257 break;
michael@0 258
michael@0 259 case eCSSToken_URL:
michael@0 260 case eCSSToken_Bad_URL:
michael@0 261 aBuffer.AppendLiteral("url(");
michael@0 262 if (mSymbol != char16_t(0)) {
michael@0 263 nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
michael@0 264 } else {
michael@0 265 aBuffer.Append(mIdent);
michael@0 266 }
michael@0 267 if (mType == eCSSToken_URL) {
michael@0 268 aBuffer.Append(char16_t(')'));
michael@0 269 }
michael@0 270 break;
michael@0 271
michael@0 272 case eCSSToken_Number:
michael@0 273 if (mIntegerValid) {
michael@0 274 aBuffer.AppendInt(mInteger, 10);
michael@0 275 } else {
michael@0 276 aBuffer.AppendFloat(mNumber);
michael@0 277 }
michael@0 278 break;
michael@0 279
michael@0 280 case eCSSToken_Percentage:
michael@0 281 aBuffer.AppendFloat(mNumber * 100.0f);
michael@0 282 aBuffer.Append(char16_t('%'));
michael@0 283 break;
michael@0 284
michael@0 285 case eCSSToken_Dimension:
michael@0 286 if (mIntegerValid) {
michael@0 287 aBuffer.AppendInt(mInteger, 10);
michael@0 288 } else {
michael@0 289 aBuffer.AppendFloat(mNumber);
michael@0 290 }
michael@0 291 nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
michael@0 292 break;
michael@0 293
michael@0 294 case eCSSToken_Bad_String:
michael@0 295 nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
michael@0 296 // remove the trailing quote character
michael@0 297 aBuffer.Truncate(aBuffer.Length() - 1);
michael@0 298 break;
michael@0 299
michael@0 300 case eCSSToken_String:
michael@0 301 nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
michael@0 302 break;
michael@0 303
michael@0 304 case eCSSToken_Symbol:
michael@0 305 aBuffer.Append(mSymbol);
michael@0 306 break;
michael@0 307
michael@0 308 case eCSSToken_Whitespace:
michael@0 309 aBuffer.Append(' ');
michael@0 310 break;
michael@0 311
michael@0 312 case eCSSToken_HTMLComment:
michael@0 313 case eCSSToken_URange:
michael@0 314 aBuffer.Append(mIdent);
michael@0 315 break;
michael@0 316
michael@0 317 case eCSSToken_Includes:
michael@0 318 aBuffer.AppendLiteral("~=");
michael@0 319 break;
michael@0 320 case eCSSToken_Dashmatch:
michael@0 321 aBuffer.AppendLiteral("|=");
michael@0 322 break;
michael@0 323 case eCSSToken_Beginsmatch:
michael@0 324 aBuffer.AppendLiteral("^=");
michael@0 325 break;
michael@0 326 case eCSSToken_Endsmatch:
michael@0 327 aBuffer.AppendLiteral("$=");
michael@0 328 break;
michael@0 329 case eCSSToken_Containsmatch:
michael@0 330 aBuffer.AppendLiteral("*=");
michael@0 331 break;
michael@0 332
michael@0 333 default:
michael@0 334 NS_ERROR("invalid token type");
michael@0 335 break;
michael@0 336 }
michael@0 337 }
michael@0 338
michael@0 339 /* nsCSSScanner methods. */
michael@0 340
michael@0 341 nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
michael@0 342 : mBuffer(aBuffer.BeginReading())
michael@0 343 , mOffset(0)
michael@0 344 , mCount(aBuffer.Length())
michael@0 345 , mLineNumber(aLineNumber)
michael@0 346 , mLineOffset(0)
michael@0 347 , mTokenLineNumber(aLineNumber)
michael@0 348 , mTokenLineOffset(0)
michael@0 349 , mTokenOffset(0)
michael@0 350 , mRecordStartOffset(0)
michael@0 351 , mEOFCharacters(eEOFCharacters_None)
michael@0 352 , mReporter(nullptr)
michael@0 353 , mSVGMode(false)
michael@0 354 , mRecording(false)
michael@0 355 , mSeenBadToken(false)
michael@0 356 , mSeenVariableReference(false)
michael@0 357 {
michael@0 358 MOZ_COUNT_CTOR(nsCSSScanner);
michael@0 359 }
michael@0 360
michael@0 361 nsCSSScanner::~nsCSSScanner()
michael@0 362 {
michael@0 363 MOZ_COUNT_DTOR(nsCSSScanner);
michael@0 364 }
michael@0 365
michael@0 366 void
michael@0 367 nsCSSScanner::StartRecording()
michael@0 368 {
michael@0 369 MOZ_ASSERT(!mRecording, "already started recording");
michael@0 370 mRecording = true;
michael@0 371 mRecordStartOffset = mOffset;
michael@0 372 }
michael@0 373
michael@0 374 void
michael@0 375 nsCSSScanner::StopRecording()
michael@0 376 {
michael@0 377 MOZ_ASSERT(mRecording, "haven't started recording");
michael@0 378 mRecording = false;
michael@0 379 }
michael@0 380
michael@0 381 void
michael@0 382 nsCSSScanner::StopRecording(nsString& aBuffer)
michael@0 383 {
michael@0 384 MOZ_ASSERT(mRecording, "haven't started recording");
michael@0 385 mRecording = false;
michael@0 386 aBuffer.Append(mBuffer + mRecordStartOffset,
michael@0 387 mOffset - mRecordStartOffset);
michael@0 388 }
michael@0 389
michael@0 390 uint32_t
michael@0 391 nsCSSScanner::RecordingLength() const
michael@0 392 {
michael@0 393 MOZ_ASSERT(mRecording, "haven't started recording");
michael@0 394 return mOffset - mRecordStartOffset;
michael@0 395 }
michael@0 396
michael@0 397 #ifdef DEBUG
michael@0 398 bool
michael@0 399 nsCSSScanner::IsRecording() const
michael@0 400 {
michael@0 401 return mRecording;
michael@0 402 }
michael@0 403 #endif
michael@0 404
michael@0 405 nsDependentSubstring
michael@0 406 nsCSSScanner::GetCurrentLine() const
michael@0 407 {
michael@0 408 uint32_t end = mTokenOffset;
michael@0 409 while (end < mCount && !IsVertSpace(mBuffer[end])) {
michael@0 410 end++;
michael@0 411 }
michael@0 412 return nsDependentSubstring(mBuffer + mTokenLineOffset,
michael@0 413 mBuffer + end);
michael@0 414 }
michael@0 415
michael@0 416 /**
michael@0 417 * Return the raw UTF-16 code unit at position |mOffset + n| within
michael@0 418 * the read buffer. If that is beyond the end of the buffer, returns
michael@0 419 * -1 to indicate end of input.
michael@0 420 */
michael@0 421 inline int32_t
michael@0 422 nsCSSScanner::Peek(uint32_t n)
michael@0 423 {
michael@0 424 if (mOffset + n >= mCount) {
michael@0 425 return -1;
michael@0 426 }
michael@0 427 return mBuffer[mOffset + n];
michael@0 428 }
michael@0 429
michael@0 430 /**
michael@0 431 * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
michael@0 432 * If |n| is greater than the distance to end of input, will silently
michael@0 433 * stop at the end. May not be used to advance over a line boundary;
michael@0 434 * AdvanceLine() must be used instead.
michael@0 435 */
michael@0 436 inline void
michael@0 437 nsCSSScanner::Advance(uint32_t n)
michael@0 438 {
michael@0 439 #ifdef DEBUG
michael@0 440 while (mOffset < mCount && n > 0) {
michael@0 441 MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
michael@0 442 "may not Advance() over a line boundary");
michael@0 443 mOffset++;
michael@0 444 n--;
michael@0 445 }
michael@0 446 #else
michael@0 447 if (mOffset + n >= mCount || mOffset + n < mOffset)
michael@0 448 mOffset = mCount;
michael@0 449 else
michael@0 450 mOffset += n;
michael@0 451 #endif
michael@0 452 }
michael@0 453
michael@0 454 /**
michael@0 455 * Advance |mOffset| over a line boundary.
michael@0 456 */
michael@0 457 void
michael@0 458 nsCSSScanner::AdvanceLine()
michael@0 459 {
michael@0 460 MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
michael@0 461 "may not AdvanceLine() over a horizontal character");
michael@0 462 // Advance over \r\n as a unit.
michael@0 463 if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
michael@0 464 mBuffer[mOffset+1] == '\n')
michael@0 465 mOffset += 2;
michael@0 466 else
michael@0 467 mOffset += 1;
michael@0 468 // 0 is a magical line number meaning that we don't know (i.e., script)
michael@0 469 if (mLineNumber != 0)
michael@0 470 mLineNumber++;
michael@0 471 mLineOffset = mOffset;
michael@0 472 }
michael@0 473
michael@0 474 /**
michael@0 475 * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
michael@0 476 * If |n| is greater than the distance to beginning of input, will
michael@0 477 * silently stop at the beginning. May not be used to back up over a
michael@0 478 * line boundary.
michael@0 479 */
michael@0 480 void
michael@0 481 nsCSSScanner::Backup(uint32_t n)
michael@0 482 {
michael@0 483 #ifdef DEBUG
michael@0 484 while (mOffset > 0 && n > 0) {
michael@0 485 MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
michael@0 486 "may not Backup() over a line boundary");
michael@0 487 mOffset--;
michael@0 488 n--;
michael@0 489 }
michael@0 490 #else
michael@0 491 if (mOffset < n)
michael@0 492 mOffset = 0;
michael@0 493 else
michael@0 494 mOffset -= n;
michael@0 495 #endif
michael@0 496 }
michael@0 497
michael@0 498 void
michael@0 499 nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
michael@0 500 {
michael@0 501 aState.mOffset = mOffset;
michael@0 502 aState.mLineNumber = mLineNumber;
michael@0 503 aState.mLineOffset = mLineOffset;
michael@0 504 aState.mTokenLineNumber = mTokenLineNumber;
michael@0 505 aState.mTokenLineOffset = mTokenLineOffset;
michael@0 506 aState.mTokenOffset = mTokenOffset;
michael@0 507 aState.mInitialized = true;
michael@0 508 }
michael@0 509
michael@0 510 void
michael@0 511 nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
michael@0 512 {
michael@0 513 MOZ_ASSERT(aState.mInitialized, "have not saved state");
michael@0 514 if (aState.mInitialized) {
michael@0 515 mOffset = aState.mOffset;
michael@0 516 mLineNumber = aState.mLineNumber;
michael@0 517 mLineOffset = aState.mLineOffset;
michael@0 518 mTokenLineNumber = aState.mTokenLineNumber;
michael@0 519 mTokenLineOffset = aState.mTokenLineOffset;
michael@0 520 mTokenOffset = aState.mTokenOffset;
michael@0 521 }
michael@0 522 }
michael@0 523
michael@0 524 /**
michael@0 525 * Skip over a sequence of whitespace characters (vertical or
michael@0 526 * horizontal) starting at the current read position.
michael@0 527 */
michael@0 528 void
michael@0 529 nsCSSScanner::SkipWhitespace()
michael@0 530 {
michael@0 531 for (;;) {
michael@0 532 int32_t ch = Peek();
michael@0 533 if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
michael@0 534 break;
michael@0 535 }
michael@0 536 if (IsVertSpace(ch)) {
michael@0 537 AdvanceLine();
michael@0 538 } else {
michael@0 539 Advance();
michael@0 540 }
michael@0 541 }
michael@0 542 }
michael@0 543
michael@0 544 /**
michael@0 545 * Skip over one CSS comment starting at the current read position.
michael@0 546 */
michael@0 547 void
michael@0 548 nsCSSScanner::SkipComment()
michael@0 549 {
michael@0 550 MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
michael@0 551 Advance(2);
michael@0 552 for (;;) {
michael@0 553 int32_t ch = Peek();
michael@0 554 if (ch < 0) {
michael@0 555 mReporter->ReportUnexpectedEOF("PECommentEOF");
michael@0 556 SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
michael@0 557 return;
michael@0 558 }
michael@0 559 if (ch == '*') {
michael@0 560 Advance();
michael@0 561 ch = Peek();
michael@0 562 if (ch < 0) {
michael@0 563 mReporter->ReportUnexpectedEOF("PECommentEOF");
michael@0 564 SetEOFCharacters(eEOFCharacters_Slash);
michael@0 565 return;
michael@0 566 }
michael@0 567 if (ch == '/') {
michael@0 568 Advance();
michael@0 569 return;
michael@0 570 }
michael@0 571 } else if (IsVertSpace(ch)) {
michael@0 572 AdvanceLine();
michael@0 573 } else {
michael@0 574 Advance();
michael@0 575 }
michael@0 576 }
michael@0 577 }
michael@0 578
michael@0 579 /**
michael@0 580 * If there is a valid escape sequence starting at the current read
michael@0 581 * position, consume it, decode it, append the result to |aOutput|,
michael@0 582 * and return true. Otherwise, consume nothing, leave |aOutput|
michael@0 583 * unmodified, and return false. If |aInString| is true, accept the
michael@0 584 * additional form of escape sequence allowed within string-like tokens.
michael@0 585 */
michael@0 586 bool
michael@0 587 nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
michael@0 588 {
michael@0 589 MOZ_ASSERT(Peek() == '\\', "should not have been called");
michael@0 590 int32_t ch = Peek(1);
michael@0 591 if (ch < 0) {
michael@0 592 // If we are in a string (or a url() containing a string), we want to drop
michael@0 593 // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
michael@0 594 // character.
michael@0 595 Advance();
michael@0 596 if (aInString) {
michael@0 597 SetEOFCharacters(eEOFCharacters_DropBackslash);
michael@0 598 } else {
michael@0 599 aOutput.Append(UCS2_REPLACEMENT_CHAR);
michael@0 600 SetEOFCharacters(eEOFCharacters_ReplacementChar);
michael@0 601 }
michael@0 602 return true;
michael@0 603 }
michael@0 604 if (IsVertSpace(ch)) {
michael@0 605 if (aInString) {
michael@0 606 // In strings (and in url() containing a string), escaped
michael@0 607 // newlines are completely removed, to allow splitting over
michael@0 608 // multiple lines.
michael@0 609 Advance();
michael@0 610 AdvanceLine();
michael@0 611 return true;
michael@0 612 }
michael@0 613 // Outside of strings, backslash followed by a newline is not an escape.
michael@0 614 return false;
michael@0 615 }
michael@0 616
michael@0 617 if (!IsHexDigit(ch)) {
michael@0 618 // "Any character (except a hexadecimal digit, linefeed, carriage
michael@0 619 // return, or form feed) can be escaped with a backslash to remove
michael@0 620 // its special meaning." -- CSS2.1 section 4.1.3
michael@0 621 Advance(2);
michael@0 622 if (ch == 0) {
michael@0 623 aOutput.Append(UCS2_REPLACEMENT_CHAR);
michael@0 624 } else {
michael@0 625 aOutput.Append(ch);
michael@0 626 }
michael@0 627 return true;
michael@0 628 }
michael@0 629
michael@0 630 // "[at most six hexadecimal digits following a backslash] stand
michael@0 631 // for the ISO 10646 character with that number, which must not be
michael@0 632 // zero. (It is undefined in CSS 2.1 what happens if a style sheet
michael@0 633 // does contain a character with Unicode codepoint zero.)"
michael@0 634 // -- CSS2.1 section 4.1.3
michael@0 635
michael@0 636 // At this point we know we have \ followed by at least one
michael@0 637 // hexadecimal digit, therefore the escape sequence is valid and we
michael@0 638 // can go ahead and consume the backslash.
michael@0 639 Advance();
michael@0 640 uint32_t val = 0;
michael@0 641 int i = 0;
michael@0 642 do {
michael@0 643 val = val * 16 + HexDigitValue(ch);
michael@0 644 i++;
michael@0 645 Advance();
michael@0 646 ch = Peek();
michael@0 647 } while (i < 6 && IsHexDigit(ch));
michael@0 648
michael@0 649 // "Interpret the hex digits as a hexadecimal number. If this number is zero,
michael@0 650 // or is greater than the maximum allowed codepoint, return U+FFFD
michael@0 651 // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
michael@0 652 if (MOZ_UNLIKELY(val == 0)) {
michael@0 653 aOutput.Append(UCS2_REPLACEMENT_CHAR);
michael@0 654 } else {
michael@0 655 AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
michael@0 656 }
michael@0 657
michael@0 658 // Consume exactly one whitespace character after a
michael@0 659 // hexadecimal escape sequence.
michael@0 660 if (IsVertSpace(ch)) {
michael@0 661 AdvanceLine();
michael@0 662 } else if (IsHorzSpace(ch)) {
michael@0 663 Advance();
michael@0 664 }
michael@0 665 return true;
michael@0 666 }
michael@0 667
michael@0 668 /**
michael@0 669 * Consume a run of "text" beginning with the current read position,
michael@0 670 * consisting of characters in the class |aClass| (which must be a
michael@0 671 * suitable argument to IsOpenCharClass) plus escape sequences.
michael@0 672 * Append the text to |aText|, after decoding escape sequences.
michael@0 673 *
michael@0 674 * Returns true if at least one character was appended to |aText|,
michael@0 675 * false otherwise.
michael@0 676 */
michael@0 677 bool
michael@0 678 nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
michael@0 679 {
michael@0 680 // This is all of the character classes currently used with
michael@0 681 // GatherText. If you have a need to use this function with a
michael@0 682 // different class, go ahead and add it.
michael@0 683 MOZ_ASSERT(aClass == IS_STRING ||
michael@0 684 aClass == IS_IDCHAR ||
michael@0 685 aClass == IS_URL_CHAR,
michael@0 686 "possibly-inappropriate character class");
michael@0 687
michael@0 688 uint32_t start = mOffset;
michael@0 689 bool inString = aClass == IS_STRING;
michael@0 690
michael@0 691 for (;;) {
michael@0 692 // Consume runs of unescaped characters in one go.
michael@0 693 uint32_t n = mOffset;
michael@0 694 while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
michael@0 695 n++;
michael@0 696 }
michael@0 697 if (n > mOffset) {
michael@0 698 aText.Append(&mBuffer[mOffset], n - mOffset);
michael@0 699 mOffset = n;
michael@0 700 }
michael@0 701 if (n == mCount) {
michael@0 702 break;
michael@0 703 }
michael@0 704
michael@0 705 int32_t ch = Peek();
michael@0 706 MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
michael@0 707 "should not have exited the inner loop");
michael@0 708 if (ch == 0) {
michael@0 709 Advance();
michael@0 710 aText.Append(UCS2_REPLACEMENT_CHAR);
michael@0 711 continue;
michael@0 712 }
michael@0 713
michael@0 714 if (ch != '\\') {
michael@0 715 break;
michael@0 716 }
michael@0 717 if (!GatherEscape(aText, inString)) {
michael@0 718 break;
michael@0 719 }
michael@0 720 }
michael@0 721
michael@0 722 return mOffset > start;
michael@0 723 }
michael@0 724
michael@0 725 /**
michael@0 726 * Scan an Ident token. This also handles Function and URL tokens,
michael@0 727 * both of which begin indistinguishably from an identifier. It can
michael@0 728 * produce a Symbol token when an apparent identifier actually led
michael@0 729 * into an invalid escape sequence.
michael@0 730 */
michael@0 731 bool
michael@0 732 nsCSSScanner::ScanIdent(nsCSSToken& aToken)
michael@0 733 {
michael@0 734 if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
michael@0 735 MOZ_ASSERT(Peek() == '\\',
michael@0 736 "unexpected IsIdentStart character that did not begin an ident");
michael@0 737 aToken.mSymbol = Peek();
michael@0 738 Advance();
michael@0 739 return true;
michael@0 740 }
michael@0 741
michael@0 742 if (MOZ_LIKELY(Peek() != '(')) {
michael@0 743 aToken.mType = eCSSToken_Ident;
michael@0 744 return true;
michael@0 745 }
michael@0 746
michael@0 747 Advance();
michael@0 748 aToken.mType = eCSSToken_Function;
michael@0 749 if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
michael@0 750 NextURL(aToken);
michael@0 751 } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
michael@0 752 mSeenVariableReference = true;
michael@0 753 }
michael@0 754 return true;
michael@0 755 }
michael@0 756
michael@0 757 /**
michael@0 758 * Scan an AtKeyword token. Also handles production of Symbol when
michael@0 759 * an '@' is not followed by an identifier.
michael@0 760 */
michael@0 761 bool
michael@0 762 nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
michael@0 763 {
michael@0 764 MOZ_ASSERT(Peek() == '@', "should not have been called");
michael@0 765
michael@0 766 // Fall back for when '@' isn't followed by an identifier.
michael@0 767 aToken.mSymbol = '@';
michael@0 768 Advance();
michael@0 769
michael@0 770 int32_t ch = Peek();
michael@0 771 if (StartsIdent(ch, Peek(1))) {
michael@0 772 if (GatherText(IS_IDCHAR, aToken.mIdent)) {
michael@0 773 aToken.mType = eCSSToken_AtKeyword;
michael@0 774 }
michael@0 775 }
michael@0 776 return true;
michael@0 777 }
michael@0 778
michael@0 779 /**
michael@0 780 * Scan a Hash token. Handles the distinction between eCSSToken_ID
michael@0 781 * and eCSSToken_Hash, and handles production of Symbol when a '#'
michael@0 782 * is not followed by identifier characters.
michael@0 783 */
michael@0 784 bool
michael@0 785 nsCSSScanner::ScanHash(nsCSSToken& aToken)
michael@0 786 {
michael@0 787 MOZ_ASSERT(Peek() == '#', "should not have been called");
michael@0 788
michael@0 789 // Fall back for when '#' isn't followed by identifier characters.
michael@0 790 aToken.mSymbol = '#';
michael@0 791 Advance();
michael@0 792
michael@0 793 int32_t ch = Peek();
michael@0 794 if (IsIdentChar(ch) || ch == '\\') {
michael@0 795 nsCSSTokenType type =
michael@0 796 StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
michael@0 797 aToken.mIdent.SetLength(0);
michael@0 798 if (GatherText(IS_IDCHAR, aToken.mIdent)) {
michael@0 799 aToken.mType = type;
michael@0 800 }
michael@0 801 }
michael@0 802
michael@0 803 return true;
michael@0 804 }
michael@0 805
michael@0 806 /**
michael@0 807 * Scan a Number, Percentage, or Dimension token (all of which begin
michael@0 808 * like a Number). Can produce a Symbol when a '.' is not followed by
michael@0 809 * digits, or when '+' or '-' are not followed by either a digit or a
michael@0 810 * '.' and then a digit. Can also produce a HTMLComment when it
michael@0 811 * encounters '-->'.
michael@0 812 */
michael@0 813 bool
michael@0 814 nsCSSScanner::ScanNumber(nsCSSToken& aToken)
michael@0 815 {
michael@0 816 int32_t c = Peek();
michael@0 817 #ifdef DEBUG
michael@0 818 {
michael@0 819 int32_t c2 = Peek(1);
michael@0 820 int32_t c3 = Peek(2);
michael@0 821 MOZ_ASSERT(IsDigit(c) ||
michael@0 822 (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
michael@0 823 (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
michael@0 824 "should not have been called");
michael@0 825 }
michael@0 826 #endif
michael@0 827
michael@0 828 // Sign of the mantissa (-1 or 1).
michael@0 829 int32_t sign = c == '-' ? -1 : 1;
michael@0 830 // Absolute value of the integer part of the mantissa. This is a double so
michael@0 831 // we don't run into overflow issues for consumers that only care about our
michael@0 832 // floating-point value while still being able to express the full int32_t
michael@0 833 // range for consumers who want integers.
michael@0 834 double intPart = 0;
michael@0 835 // Fractional part of the mantissa. This is a double so that when we convert
michael@0 836 // to float at the end we'll end up rounding to nearest float instead of
michael@0 837 // truncating down (as we would if fracPart were a float and we just
michael@0 838 // effectively lost the last several digits).
michael@0 839 double fracPart = 0;
michael@0 840 // Absolute value of the power of 10 that we should multiply by (only
michael@0 841 // relevant for numbers in scientific notation). Has to be a signed integer,
michael@0 842 // because multiplication of signed by unsigned converts the unsigned to
michael@0 843 // signed, so if we plan to actually multiply by expSign...
michael@0 844 int32_t exponent = 0;
michael@0 845 // Sign of the exponent.
michael@0 846 int32_t expSign = 1;
michael@0 847
michael@0 848 aToken.mHasSign = (c == '+' || c == '-');
michael@0 849 if (aToken.mHasSign) {
michael@0 850 Advance();
michael@0 851 c = Peek();
michael@0 852 }
michael@0 853
michael@0 854 bool gotDot = (c == '.');
michael@0 855
michael@0 856 if (!gotDot) {
michael@0 857 // Scan the integer part of the mantissa.
michael@0 858 MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
michael@0 859 do {
michael@0 860 intPart = 10*intPart + DecimalDigitValue(c);
michael@0 861 Advance();
michael@0 862 c = Peek();
michael@0 863 } while (IsDigit(c));
michael@0 864
michael@0 865 gotDot = (c == '.') && IsDigit(Peek(1));
michael@0 866 }
michael@0 867
michael@0 868 if (gotDot) {
michael@0 869 // Scan the fractional part of the mantissa.
michael@0 870 Advance();
michael@0 871 c = Peek();
michael@0 872 MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
michael@0 873 // Power of ten by which we need to divide our next digit
michael@0 874 double divisor = 10;
michael@0 875 do {
michael@0 876 fracPart += DecimalDigitValue(c) / divisor;
michael@0 877 divisor *= 10;
michael@0 878 Advance();
michael@0 879 c = Peek();
michael@0 880 } while (IsDigit(c));
michael@0 881 }
michael@0 882
michael@0 883 bool gotE = false;
michael@0 884 if (c == 'e' || c == 'E') {
michael@0 885 int32_t expSignChar = Peek(1);
michael@0 886 int32_t nextChar = Peek(2);
michael@0 887 if (IsDigit(expSignChar) ||
michael@0 888 ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
michael@0 889 gotE = true;
michael@0 890 if (expSignChar == '-') {
michael@0 891 expSign = -1;
michael@0 892 }
michael@0 893 Advance(); // consumes the E
michael@0 894 if (expSignChar == '-' || expSignChar == '+') {
michael@0 895 Advance();
michael@0 896 c = nextChar;
michael@0 897 } else {
michael@0 898 c = expSignChar;
michael@0 899 }
michael@0 900 MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
michael@0 901 do {
michael@0 902 exponent = 10*exponent + DecimalDigitValue(c);
michael@0 903 Advance();
michael@0 904 c = Peek();
michael@0 905 } while (IsDigit(c));
michael@0 906 }
michael@0 907 }
michael@0 908
michael@0 909 nsCSSTokenType type = eCSSToken_Number;
michael@0 910
michael@0 911 // Set mIntegerValid for all cases (except %, below) because we need
michael@0 912 // it for the "2n" in :nth-child(2n).
michael@0 913 aToken.mIntegerValid = false;
michael@0 914
michael@0 915 // Time to reassemble our number.
michael@0 916 // Do all the math in double precision so it's truncated only once.
michael@0 917 double value = sign * (intPart + fracPart);
michael@0 918 if (gotE) {
michael@0 919 // Explicitly cast expSign*exponent to double to avoid issues with
michael@0 920 // overloaded pow() on Windows.
michael@0 921 value *= pow(10.0, double(expSign * exponent));
michael@0 922 } else if (!gotDot) {
michael@0 923 // Clamp values outside of integer range.
michael@0 924 if (sign > 0) {
michael@0 925 aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
michael@0 926 } else {
michael@0 927 aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
michael@0 928 }
michael@0 929 aToken.mIntegerValid = true;
michael@0 930 }
michael@0 931
michael@0 932 nsString& ident = aToken.mIdent;
michael@0 933
michael@0 934 // Check for Dimension and Percentage tokens.
michael@0 935 if (c >= 0) {
michael@0 936 if (StartsIdent(c, Peek(1))) {
michael@0 937 if (GatherText(IS_IDCHAR, ident)) {
michael@0 938 type = eCSSToken_Dimension;
michael@0 939 }
michael@0 940 } else if (c == '%') {
michael@0 941 Advance();
michael@0 942 type = eCSSToken_Percentage;
michael@0 943 value = value / 100.0f;
michael@0 944 aToken.mIntegerValid = false;
michael@0 945 }
michael@0 946 }
michael@0 947 aToken.mNumber = value;
michael@0 948 aToken.mType = type;
michael@0 949 return true;
michael@0 950 }
michael@0 951
michael@0 952 /**
michael@0 953 * Scan a string constant ('foo' or "foo"). Will always produce
michael@0 954 * either a String or a Bad_String token; the latter occurs when the
michael@0 955 * close quote is missing. Always returns true (for convenience in Next()).
michael@0 956 */
michael@0 957 bool
michael@0 958 nsCSSScanner::ScanString(nsCSSToken& aToken)
michael@0 959 {
michael@0 960 int32_t aStop = Peek();
michael@0 961 MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
michael@0 962 aToken.mType = eCSSToken_String;
michael@0 963 aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
michael@0 964 Advance();
michael@0 965
michael@0 966 for (;;) {
michael@0 967 GatherText(IS_STRING, aToken.mIdent);
michael@0 968
michael@0 969 int32_t ch = Peek();
michael@0 970 if (ch == -1) {
michael@0 971 AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
michael@0 972 eEOFCharacters_SingleQuote);
michael@0 973 break; // EOF ends a string token with no error.
michael@0 974 }
michael@0 975 if (ch == aStop) {
michael@0 976 Advance();
michael@0 977 break;
michael@0 978 }
michael@0 979 // Both " and ' are excluded from IS_STRING.
michael@0 980 if (ch == '"' || ch == '\'') {
michael@0 981 aToken.mIdent.Append(ch);
michael@0 982 Advance();
michael@0 983 continue;
michael@0 984 }
michael@0 985
michael@0 986 mSeenBadToken = true;
michael@0 987 aToken.mType = eCSSToken_Bad_String;
michael@0 988 mReporter->ReportUnexpected("SEUnterminatedString", aToken);
michael@0 989 break;
michael@0 990 }
michael@0 991 return true;
michael@0 992 }
michael@0 993
michael@0 994 /**
michael@0 995 * Scan a unicode-range token. These match the regular expression
michael@0 996 *
michael@0 997 * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
michael@0 998 *
michael@0 999 * However, some such tokens are "invalid". There are three valid forms:
michael@0 1000 *
michael@0 1001 * u+[0-9a-f]{x} 1 <= x <= 6
michael@0 1002 * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
michael@0 1003 * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
michael@0 1004 *
michael@0 1005 * All unicode-range tokens have their text recorded in mIdent; valid ones
michael@0 1006 * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
michael@0 1007 * Note that this does not validate the numeric range, only the syntactic
michael@0 1008 * form.
michael@0 1009 */
michael@0 1010 bool
michael@0 1011 nsCSSScanner::ScanURange(nsCSSToken& aResult)
michael@0 1012 {
michael@0 1013 int32_t intro1 = Peek();
michael@0 1014 int32_t intro2 = Peek(1);
michael@0 1015 int32_t ch = Peek(2);
michael@0 1016
michael@0 1017 MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
michael@0 1018 intro2 == '+' &&
michael@0 1019 (IsHexDigit(ch) || ch == '?'),
michael@0 1020 "should not have been called");
michael@0 1021
michael@0 1022 aResult.mIdent.Append(intro1);
michael@0 1023 aResult.mIdent.Append(intro2);
michael@0 1024 Advance(2);
michael@0 1025
michael@0 1026 bool valid = true;
michael@0 1027 bool haveQues = false;
michael@0 1028 uint32_t low = 0;
michael@0 1029 uint32_t high = 0;
michael@0 1030 int i = 0;
michael@0 1031
michael@0 1032 do {
michael@0 1033 aResult.mIdent.Append(ch);
michael@0 1034 if (IsHexDigit(ch)) {
michael@0 1035 if (haveQues) {
michael@0 1036 valid = false; // All question marks should be at the end.
michael@0 1037 }
michael@0 1038 low = low*16 + HexDigitValue(ch);
michael@0 1039 high = high*16 + HexDigitValue(ch);
michael@0 1040 } else {
michael@0 1041 haveQues = true;
michael@0 1042 low = low*16 + 0x0;
michael@0 1043 high = high*16 + 0xF;
michael@0 1044 }
michael@0 1045
michael@0 1046 i++;
michael@0 1047 Advance();
michael@0 1048 ch = Peek();
michael@0 1049 } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
michael@0 1050
michael@0 1051 if (ch == '-' && IsHexDigit(Peek(1))) {
michael@0 1052 if (haveQues) {
michael@0 1053 valid = false;
michael@0 1054 }
michael@0 1055
michael@0 1056 aResult.mIdent.Append(ch);
michael@0 1057 Advance();
michael@0 1058 ch = Peek();
michael@0 1059 high = 0;
michael@0 1060 i = 0;
michael@0 1061 do {
michael@0 1062 aResult.mIdent.Append(ch);
michael@0 1063 high = high*16 + HexDigitValue(ch);
michael@0 1064
michael@0 1065 i++;
michael@0 1066 Advance();
michael@0 1067 ch = Peek();
michael@0 1068 } while (i < 6 && IsHexDigit(ch));
michael@0 1069 }
michael@0 1070
michael@0 1071 aResult.mInteger = low;
michael@0 1072 aResult.mInteger2 = high;
michael@0 1073 aResult.mIntegerValid = valid;
michael@0 1074 aResult.mType = eCSSToken_URange;
michael@0 1075 return true;
michael@0 1076 }
michael@0 1077
michael@0 1078 #ifdef DEBUG
michael@0 1079 /* static */ void
michael@0 1080 nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
michael@0 1081 {
michael@0 1082 MOZ_ASSERT(c == eEOFCharacters_None ||
michael@0 1083 c == eEOFCharacters_ReplacementChar ||
michael@0 1084 c == eEOFCharacters_Slash ||
michael@0 1085 c == (eEOFCharacters_Asterisk |
michael@0 1086 eEOFCharacters_Slash) ||
michael@0 1087 c == eEOFCharacters_DoubleQuote ||
michael@0 1088 c == eEOFCharacters_SingleQuote ||
michael@0 1089 c == (eEOFCharacters_DropBackslash |
michael@0 1090 eEOFCharacters_DoubleQuote) ||
michael@0 1091 c == (eEOFCharacters_DropBackslash |
michael@0 1092 eEOFCharacters_SingleQuote) ||
michael@0 1093 c == eEOFCharacters_CloseParen ||
michael@0 1094 c == (eEOFCharacters_ReplacementChar |
michael@0 1095 eEOFCharacters_CloseParen) ||
michael@0 1096 c == (eEOFCharacters_DoubleQuote |
michael@0 1097 eEOFCharacters_CloseParen) ||
michael@0 1098 c == (eEOFCharacters_SingleQuote |
michael@0 1099 eEOFCharacters_CloseParen) ||
michael@0 1100 c == (eEOFCharacters_DropBackslash |
michael@0 1101 eEOFCharacters_DoubleQuote |
michael@0 1102 eEOFCharacters_CloseParen) ||
michael@0 1103 c == (eEOFCharacters_DropBackslash |
michael@0 1104 eEOFCharacters_SingleQuote |
michael@0 1105 eEOFCharacters_CloseParen),
michael@0 1106 "invalid EOFCharacters value");
michael@0 1107 }
michael@0 1108 #endif
michael@0 1109
michael@0 1110 void
michael@0 1111 nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
michael@0 1112 {
michael@0 1113 mEOFCharacters = EOFCharacters(aEOFCharacters);
michael@0 1114 }
michael@0 1115
michael@0 1116 void
michael@0 1117 nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
michael@0 1118 {
michael@0 1119 mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
michael@0 1120 }
michael@0 1121
michael@0 1122 static const char16_t kImpliedEOFCharacters[] = {
michael@0 1123 UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
michael@0 1124 };
michael@0 1125
michael@0 1126 /* static */ void
michael@0 1127 nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
michael@0 1128 nsAString& aResult)
michael@0 1129 {
michael@0 1130 // First, ignore eEOFCharacters_DropBackslash.
michael@0 1131 uint32_t c = aEOFCharacters >> 1;
michael@0 1132
michael@0 1133 // All of the remaining EOFCharacters bits represent appended characters,
michael@0 1134 // and the bits are in the order that they need appending.
michael@0 1135 for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
michael@0 1136 if (c & 1) {
michael@0 1137 aResult.Append(*p);
michael@0 1138 }
michael@0 1139 }
michael@0 1140
michael@0 1141 MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
michael@0 1142 }
michael@0 1143
michael@0 1144 /**
michael@0 1145 * Consume the part of an URL token after the initial 'url('. Caller
michael@0 1146 * is assumed to have consumed 'url(' already. Will always produce
michael@0 1147 * either an URL or a Bad_URL token.
michael@0 1148 *
michael@0 1149 * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
michael@0 1150 * the special lexical rules for URL tokens in a nonstandard context.
michael@0 1151 */
michael@0 1152 bool
michael@0 1153 nsCSSScanner::NextURL(nsCSSToken& aToken)
michael@0 1154 {
michael@0 1155 SkipWhitespace();
michael@0 1156
michael@0 1157 int32_t ch = Peek();
michael@0 1158 if (ch < 0) {
michael@0 1159 return false;
michael@0 1160 }
michael@0 1161
michael@0 1162 // aToken.mIdent may be "url" at this point; clear that out
michael@0 1163 aToken.mIdent.Truncate();
michael@0 1164
michael@0 1165 // Do we have a string?
michael@0 1166 if (ch == '"' || ch == '\'') {
michael@0 1167 ScanString(aToken);
michael@0 1168 if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
michael@0 1169 aToken.mType = eCSSToken_Bad_URL;
michael@0 1170 return true;
michael@0 1171 }
michael@0 1172 MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
michael@0 1173
michael@0 1174 } else {
michael@0 1175 // Otherwise, this is the start of a non-quoted url (which may be empty).
michael@0 1176 aToken.mSymbol = char16_t(0);
michael@0 1177 GatherText(IS_URL_CHAR, aToken.mIdent);
michael@0 1178 }
michael@0 1179
michael@0 1180 // Consume trailing whitespace and then look for a close parenthesis.
michael@0 1181 SkipWhitespace();
michael@0 1182 ch = Peek();
michael@0 1183 if (MOZ_LIKELY(ch < 0 || ch == ')')) {
michael@0 1184 Advance();
michael@0 1185 aToken.mType = eCSSToken_URL;
michael@0 1186 if (ch < 0) {
michael@0 1187 AddEOFCharacters(eEOFCharacters_CloseParen);
michael@0 1188 }
michael@0 1189 } else {
michael@0 1190 mSeenBadToken = true;
michael@0 1191 aToken.mType = eCSSToken_Bad_URL;
michael@0 1192 }
michael@0 1193 return true;
michael@0 1194 }
michael@0 1195
michael@0 1196 /**
michael@0 1197 * Primary scanner entry point. Consume one token and fill in
michael@0 1198 * |aToken| accordingly. Will skip over any number of comments first,
michael@0 1199 * and will also skip over rather than return whitespace tokens if
michael@0 1200 * |aSkipWS| is true.
michael@0 1201 *
michael@0 1202 * Returns true if it successfully consumed a token, false if EOF has
michael@0 1203 * been reached. Will always advance the current read position by at
michael@0 1204 * least one character unless called when already at EOF.
michael@0 1205 */
michael@0 1206 bool
michael@0 1207 nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
michael@0 1208 {
michael@0 1209 int32_t ch;
michael@0 1210
michael@0 1211 // do this here so we don't have to do it in dozens of other places
michael@0 1212 aToken.mIdent.Truncate();
michael@0 1213 aToken.mType = eCSSToken_Symbol;
michael@0 1214
michael@0 1215 for (;;) {
michael@0 1216 // Consume any number of comments, and possibly also whitespace tokens,
michael@0 1217 // in between other tokens.
michael@0 1218 mTokenOffset = mOffset;
michael@0 1219 mTokenLineOffset = mLineOffset;
michael@0 1220 mTokenLineNumber = mLineNumber;
michael@0 1221
michael@0 1222 ch = Peek();
michael@0 1223 if (IsWhitespace(ch)) {
michael@0 1224 SkipWhitespace();
michael@0 1225 if (!aSkipWS) {
michael@0 1226 aToken.mType = eCSSToken_Whitespace;
michael@0 1227 return true;
michael@0 1228 }
michael@0 1229 continue; // start again at the beginning
michael@0 1230 }
michael@0 1231 if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
michael@0 1232 // FIXME: Editor wants comments to be preserved (bug 60290).
michael@0 1233 SkipComment();
michael@0 1234 continue; // start again at the beginning
michael@0 1235 }
michael@0 1236 break;
michael@0 1237 }
michael@0 1238
michael@0 1239 // EOF
michael@0 1240 if (ch < 0) {
michael@0 1241 return false;
michael@0 1242 }
michael@0 1243
michael@0 1244 // 'u' could be UNICODE-RANGE or an identifier-family token
michael@0 1245 if (ch == 'u' || ch == 'U') {
michael@0 1246 int32_t c2 = Peek(1);
michael@0 1247 int32_t c3 = Peek(2);
michael@0 1248 if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
michael@0 1249 return ScanURange(aToken);
michael@0 1250 }
michael@0 1251 return ScanIdent(aToken);
michael@0 1252 }
michael@0 1253
michael@0 1254 // identifier family
michael@0 1255 if (IsIdentStart(ch)) {
michael@0 1256 return ScanIdent(aToken);
michael@0 1257 }
michael@0 1258
michael@0 1259 // number family
michael@0 1260 if (IsDigit(ch)) {
michael@0 1261 return ScanNumber(aToken);
michael@0 1262 }
michael@0 1263
michael@0 1264 if (ch == '.' && IsDigit(Peek(1))) {
michael@0 1265 return ScanNumber(aToken);
michael@0 1266 }
michael@0 1267
michael@0 1268 if (ch == '+') {
michael@0 1269 int32_t c2 = Peek(1);
michael@0 1270 if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
michael@0 1271 return ScanNumber(aToken);
michael@0 1272 }
michael@0 1273 }
michael@0 1274
michael@0 1275 // '-' can start an identifier-family token, a number-family token,
michael@0 1276 // or an HTML-comment
michael@0 1277 if (ch == '-') {
michael@0 1278 int32_t c2 = Peek(1);
michael@0 1279 int32_t c3 = Peek(2);
michael@0 1280 if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
michael@0 1281 return ScanIdent(aToken);
michael@0 1282 }
michael@0 1283 if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
michael@0 1284 return ScanNumber(aToken);
michael@0 1285 }
michael@0 1286 if (c2 == '-' && c3 == '>') {
michael@0 1287 Advance(3);
michael@0 1288 aToken.mType = eCSSToken_HTMLComment;
michael@0 1289 aToken.mIdent.AssignLiteral("-->");
michael@0 1290 return true;
michael@0 1291 }
michael@0 1292 }
michael@0 1293
michael@0 1294 // the other HTML-comment token
michael@0 1295 if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
michael@0 1296 Advance(4);
michael@0 1297 aToken.mType = eCSSToken_HTMLComment;
michael@0 1298 aToken.mIdent.AssignLiteral("<!--");
michael@0 1299 return true;
michael@0 1300 }
michael@0 1301
michael@0 1302 // AT_KEYWORD
michael@0 1303 if (ch == '@') {
michael@0 1304 return ScanAtKeyword(aToken);
michael@0 1305 }
michael@0 1306
michael@0 1307 // HASH
michael@0 1308 if (ch == '#') {
michael@0 1309 return ScanHash(aToken);
michael@0 1310 }
michael@0 1311
michael@0 1312 // STRING
michael@0 1313 if (ch == '"' || ch == '\'') {
michael@0 1314 return ScanString(aToken);
michael@0 1315 }
michael@0 1316
michael@0 1317 // Match operators: ~= |= ^= $= *=
michael@0 1318 nsCSSTokenType opType = MatchOperatorType(ch);
michael@0 1319 if (opType != eCSSToken_Symbol && Peek(1) == '=') {
michael@0 1320 aToken.mType = opType;
michael@0 1321 Advance(2);
michael@0 1322 return true;
michael@0 1323 }
michael@0 1324
michael@0 1325 // Otherwise, a symbol (DELIM).
michael@0 1326 aToken.mSymbol = ch;
michael@0 1327 Advance();
michael@0 1328 return true;
michael@0 1329 }
michael@0 1330
michael@0 1331 /* nsCSSGridTemplateAreaScanner methods. */
michael@0 1332
michael@0 1333 nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
michael@0 1334 : mBuffer(aBuffer.BeginReading())
michael@0 1335 , mOffset(0)
michael@0 1336 , mCount(aBuffer.Length())
michael@0 1337 {
michael@0 1338 }
michael@0 1339
michael@0 1340 bool
michael@0 1341 nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
michael@0 1342 {
michael@0 1343 int32_t ch;
michael@0 1344 // Skip whitespace
michael@0 1345 do {
michael@0 1346 if (mOffset >= mCount) {
michael@0 1347 return false;
michael@0 1348 }
michael@0 1349 ch = mBuffer[mOffset];
michael@0 1350 mOffset++;
michael@0 1351 } while (IsWhitespace(ch));
michael@0 1352
michael@0 1353 if (IsOpenCharClass(ch, IS_IDCHAR)) {
michael@0 1354 // Named cell token
michael@0 1355 uint32_t start = mOffset - 1; // offset of |ch|
michael@0 1356 while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
michael@0 1357 mOffset++;
michael@0 1358 }
michael@0 1359 aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
michael@0 1360 aTokenResult.isTrash = false;
michael@0 1361 } else if (ch == '.') {
michael@0 1362 // Null cell token
michael@0 1363 aTokenResult.mName.Truncate();
michael@0 1364 aTokenResult.isTrash = false;
michael@0 1365 } else {
michael@0 1366 // Trash token
michael@0 1367 aTokenResult.isTrash = true;
michael@0 1368 }
michael@0 1369 return true;
michael@0 1370 }

mercurial