layout/style/nsCSSScanner.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/layout/style/nsCSSScanner.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1370 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +
    1.10 +/* tokenization of CSS style sheets */
    1.11 +
    1.12 +#include "nsCSSScanner.h"
    1.13 +#include "nsStyleUtil.h"
    1.14 +#include "nsISupportsImpl.h"
    1.15 +#include "mozilla/ArrayUtils.h"
    1.16 +#include "mozilla/css/ErrorReporter.h"
    1.17 +#include "mozilla/Likely.h"
    1.18 +#include <algorithm>
    1.19 +
    1.20 +/* Character class tables and related helper functions. */
    1.21 +
    1.22 +static const uint8_t IS_HEX_DIGIT  = 0x01;
    1.23 +static const uint8_t IS_IDSTART    = 0x02;
    1.24 +static const uint8_t IS_IDCHAR     = 0x04;
    1.25 +static const uint8_t IS_URL_CHAR   = 0x08;
    1.26 +static const uint8_t IS_HSPACE     = 0x10;
    1.27 +static const uint8_t IS_VSPACE     = 0x20;
    1.28 +static const uint8_t IS_SPACE      = IS_HSPACE|IS_VSPACE;
    1.29 +static const uint8_t IS_STRING     = 0x40;
    1.30 +
    1.31 +#define H    IS_HSPACE
    1.32 +#define V    IS_VSPACE
    1.33 +#define I    IS_IDCHAR
    1.34 +#define J    IS_IDSTART
    1.35 +#define U    IS_URL_CHAR
    1.36 +#define S    IS_STRING
    1.37 +#define X    IS_HEX_DIGIT
    1.38 +
    1.39 +#define SH    S|H
    1.40 +#define SU    S|U
    1.41 +#define SUI   S|U|I
    1.42 +#define SUIJ  S|U|I|J
    1.43 +#define SUIX  S|U|I|X
    1.44 +#define SUIJX S|U|I|J|X
    1.45 +
    1.46 +static const uint8_t gLexTable[] = {
    1.47 +// 00    01    02    03    04    05    06    07
    1.48 +    0,    S,    S,    S,    S,    S,    S,    S,
    1.49 +// 08   TAB    LF    0B    FF    CR    0E    0F
    1.50 +    S,   SH,    V,    S,    V,    V,    S,    S,
    1.51 +// 10    11    12    13    14    15    16    17
    1.52 +    S,    S,    S,    S,    S,    S,    S,    S,
    1.53 +// 18    19    1A    1B    1C    1D    1E    1F
    1.54 +    S,    S,    S,    S,    S,    S,    S,    S,
    1.55 +//SPC     !     "     #     $     %     &     '
    1.56 +   SH,   SU,    0,   SU,   SU,   SU,   SU,    0,
    1.57 +//  (     )     *     +     ,     -     .     /
    1.58 +    S,    S,   SU,   SU,   SU,  SUI,   SU,   SU,
    1.59 +//  0     1     2     3     4     5     6     7
    1.60 + SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
    1.61 +//  8     9     :     ;     <     =     >     ?
    1.62 + SUIX, SUIX,   SU,   SU,   SU,   SU,   SU,   SU,
    1.63 +//  @     A     B     C     D     E     F     G
    1.64 +   SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
    1.65 +//  H     I     J     K     L     M     N     O
    1.66 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
    1.67 +//  P     Q     R     S     T     U     V     W
    1.68 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
    1.69 +//  X     Y     Z     [     \     ]     ^     _
    1.70 + SUIJ, SUIJ, SUIJ,   SU,    J,   SU,   SU, SUIJ,
    1.71 +//  `     a     b     c     d     e     f     g
    1.72 +   SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
    1.73 +//  h     i     j     k     l     m     n     o
    1.74 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
    1.75 +//  p     q     r     s     t     u     v     w
    1.76 + SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
    1.77 +//  x     y     z     {     |     }     ~    7F
    1.78 + SUIJ, SUIJ, SUIJ,   SU,   SU,   SU,   SU,    S,
    1.79 +};
    1.80 +
    1.81 +static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
    1.82 +              "gLexTable expected to cover all 128 ASCII characters");
    1.83 +
    1.84 +#undef I
    1.85 +#undef J
    1.86 +#undef U
    1.87 +#undef S
    1.88 +#undef X
    1.89 +#undef SH
    1.90 +#undef SU
    1.91 +#undef SUI
    1.92 +#undef SUIJ
    1.93 +#undef SUIX
    1.94 +#undef SUIJX
    1.95 +
    1.96 +/**
    1.97 + * True if 'ch' is in character class 'cls', which should be one of
    1.98 + * the constants above or some combination of them.  All characters
    1.99 + * above U+007F are considered to be in 'cls'.  EOF is never in 'cls'.
   1.100 + */
   1.101 +static inline bool
   1.102 +IsOpenCharClass(int32_t ch, uint8_t cls) {
   1.103 +  return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
   1.104 +}
   1.105 +
   1.106 +/**
   1.107 + * True if 'ch' is in character class 'cls', which should be one of
   1.108 + * the constants above or some combination of them.  No characters
   1.109 + * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
   1.110 + */
   1.111 +static inline bool
   1.112 +IsClosedCharClass(int32_t ch, uint8_t cls) {
   1.113 +  return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
   1.114 +}
   1.115 +
   1.116 +/**
   1.117 + * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
   1.118 + * TAB, LF, FF, CR, or SPC.
   1.119 + */
   1.120 +static inline bool
   1.121 +IsWhitespace(int32_t ch) {
   1.122 +  return IsClosedCharClass(ch, IS_SPACE);
   1.123 +}
   1.124 +
   1.125 +/**
   1.126 + * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
   1.127 + */
   1.128 +static inline bool
   1.129 +IsHorzSpace(int32_t ch) {
   1.130 +  return IsClosedCharClass(ch, IS_HSPACE);
   1.131 +}
   1.132 +
   1.133 +/**
   1.134 + * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR.  Vertical
   1.135 + * whitespace requires special handling when consumed, see AdvanceLine.
   1.136 + */
   1.137 +static inline bool
   1.138 +IsVertSpace(int32_t ch) {
   1.139 +  return IsClosedCharClass(ch, IS_VSPACE);
   1.140 +}
   1.141 +
   1.142 +/**
   1.143 + * True if 'ch' is a character that can appear in the middle of an identifier.
   1.144 + * This includes U+0000 since it is handled as U+FFFD, but for purposes of
   1.145 + * GatherText it should not be included in IsOpenCharClass.
   1.146 + */
   1.147 +static inline bool
   1.148 +IsIdentChar(int32_t ch) {
   1.149 +  return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
   1.150 +}
   1.151 +
   1.152 +/**
   1.153 + * True if 'ch' is a character that by itself begins an identifier.
   1.154 + * This includes U+0000 since it is handled as U+FFFD, but for purposes of
   1.155 + * GatherText it should not be included in IsOpenCharClass.
   1.156 + * (This is a subset of IsIdentChar.)
   1.157 + */
   1.158 +static inline bool
   1.159 +IsIdentStart(int32_t ch) {
   1.160 +  return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
   1.161 +}
   1.162 +
   1.163 +/**
   1.164 + * True if the two-character sequence aFirstChar+aSecondChar begins an
   1.165 + * identifier.
   1.166 + */
   1.167 +static inline bool
   1.168 +StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
   1.169 +{
   1.170 +  return IsIdentStart(aFirstChar) ||
   1.171 +    (aFirstChar == '-' && IsIdentStart(aSecondChar));
   1.172 +}
   1.173 +
   1.174 +/**
   1.175 + * True if 'ch' is a decimal digit.
   1.176 + */
   1.177 +static inline bool
   1.178 +IsDigit(int32_t ch) {
   1.179 +  return (ch >= '0') && (ch <= '9');
   1.180 +}
   1.181 +
   1.182 +/**
   1.183 + * True if 'ch' is a hexadecimal digit.
   1.184 + */
   1.185 +static inline bool
   1.186 +IsHexDigit(int32_t ch) {
   1.187 +  return IsClosedCharClass(ch, IS_HEX_DIGIT);
   1.188 +}
   1.189 +
   1.190 +/**
   1.191 + * Assuming that 'ch' is a decimal digit, return its numeric value.
   1.192 + */
   1.193 +static inline uint32_t
   1.194 +DecimalDigitValue(int32_t ch)
   1.195 +{
   1.196 +  return ch - '0';
   1.197 +}
   1.198 +
   1.199 +/**
   1.200 + * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
   1.201 + */
   1.202 +static inline uint32_t
   1.203 +HexDigitValue(int32_t ch)
   1.204 +{
   1.205 +  if (IsDigit(ch)) {
   1.206 +    return DecimalDigitValue(ch);
   1.207 +  } else {
   1.208 +    // Note: c&7 just keeps the low three bits which causes
   1.209 +    // upper and lower case alphabetics to both yield their
   1.210 +    // "relative to 10" value for computing the hex value.
   1.211 +    return (ch & 0x7) + 9;
   1.212 +  }
   1.213 +}
   1.214 +
   1.215 +/**
   1.216 + * If 'ch' can be the first character of a two-character match operator
   1.217 + * token, return the token type code for that token, otherwise return
   1.218 + * eCSSToken_Symbol to indicate that it can't.
   1.219 + */
   1.220 +static inline nsCSSTokenType
   1.221 +MatchOperatorType(int32_t ch)
   1.222 +{
   1.223 +  switch (ch) {
   1.224 +  case '~': return eCSSToken_Includes;
   1.225 +  case '|': return eCSSToken_Dashmatch;
   1.226 +  case '^': return eCSSToken_Beginsmatch;
   1.227 +  case '$': return eCSSToken_Endsmatch;
   1.228 +  case '*': return eCSSToken_Containsmatch;
   1.229 +  default:  return eCSSToken_Symbol;
   1.230 +  }
   1.231 +}
   1.232 +
   1.233 +/* Out-of-line nsCSSToken methods. */
   1.234 +
   1.235 +/**
   1.236 + * Append the textual representation of |this| to |aBuffer|.
   1.237 + */
   1.238 +void
   1.239 +nsCSSToken::AppendToString(nsString& aBuffer) const
   1.240 +{
   1.241 +  switch (mType) {
   1.242 +    case eCSSToken_Ident:
   1.243 +      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
   1.244 +      break;
   1.245 +
   1.246 +    case eCSSToken_AtKeyword:
   1.247 +      aBuffer.Append('@');
   1.248 +      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
   1.249 +      break;
   1.250 +
   1.251 +    case eCSSToken_ID:
   1.252 +    case eCSSToken_Hash:
   1.253 +      aBuffer.Append('#');
   1.254 +      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
   1.255 +      break;
   1.256 +
   1.257 +    case eCSSToken_Function:
   1.258 +      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
   1.259 +      aBuffer.Append('(');
   1.260 +      break;
   1.261 +
   1.262 +    case eCSSToken_URL:
   1.263 +    case eCSSToken_Bad_URL:
   1.264 +      aBuffer.AppendLiteral("url(");
   1.265 +      if (mSymbol != char16_t(0)) {
   1.266 +        nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
   1.267 +      } else {
   1.268 +        aBuffer.Append(mIdent);
   1.269 +      }
   1.270 +      if (mType == eCSSToken_URL) {
   1.271 +        aBuffer.Append(char16_t(')'));
   1.272 +      }
   1.273 +      break;
   1.274 +
   1.275 +    case eCSSToken_Number:
   1.276 +      if (mIntegerValid) {
   1.277 +        aBuffer.AppendInt(mInteger, 10);
   1.278 +      } else {
   1.279 +        aBuffer.AppendFloat(mNumber);
   1.280 +      }
   1.281 +      break;
   1.282 +
   1.283 +    case eCSSToken_Percentage:
   1.284 +      aBuffer.AppendFloat(mNumber * 100.0f);
   1.285 +      aBuffer.Append(char16_t('%'));
   1.286 +      break;
   1.287 +
   1.288 +    case eCSSToken_Dimension:
   1.289 +      if (mIntegerValid) {
   1.290 +        aBuffer.AppendInt(mInteger, 10);
   1.291 +      } else {
   1.292 +        aBuffer.AppendFloat(mNumber);
   1.293 +      }
   1.294 +      nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
   1.295 +      break;
   1.296 +
   1.297 +    case eCSSToken_Bad_String:
   1.298 +      nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
   1.299 +      // remove the trailing quote character
   1.300 +      aBuffer.Truncate(aBuffer.Length() - 1);
   1.301 +      break;
   1.302 +
   1.303 +    case eCSSToken_String:
   1.304 +      nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
   1.305 +      break;
   1.306 +
   1.307 +    case eCSSToken_Symbol:
   1.308 +      aBuffer.Append(mSymbol);
   1.309 +      break;
   1.310 +
   1.311 +    case eCSSToken_Whitespace:
   1.312 +      aBuffer.Append(' ');
   1.313 +      break;
   1.314 +
   1.315 +    case eCSSToken_HTMLComment:
   1.316 +    case eCSSToken_URange:
   1.317 +      aBuffer.Append(mIdent);
   1.318 +      break;
   1.319 +
   1.320 +    case eCSSToken_Includes:
   1.321 +      aBuffer.AppendLiteral("~=");
   1.322 +      break;
   1.323 +    case eCSSToken_Dashmatch:
   1.324 +      aBuffer.AppendLiteral("|=");
   1.325 +      break;
   1.326 +    case eCSSToken_Beginsmatch:
   1.327 +      aBuffer.AppendLiteral("^=");
   1.328 +      break;
   1.329 +    case eCSSToken_Endsmatch:
   1.330 +      aBuffer.AppendLiteral("$=");
   1.331 +      break;
   1.332 +    case eCSSToken_Containsmatch:
   1.333 +      aBuffer.AppendLiteral("*=");
   1.334 +      break;
   1.335 +
   1.336 +    default:
   1.337 +      NS_ERROR("invalid token type");
   1.338 +      break;
   1.339 +  }
   1.340 +}
   1.341 +
   1.342 +/* nsCSSScanner methods. */
   1.343 +
   1.344 +nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
   1.345 +  : mBuffer(aBuffer.BeginReading())
   1.346 +  , mOffset(0)
   1.347 +  , mCount(aBuffer.Length())
   1.348 +  , mLineNumber(aLineNumber)
   1.349 +  , mLineOffset(0)
   1.350 +  , mTokenLineNumber(aLineNumber)
   1.351 +  , mTokenLineOffset(0)
   1.352 +  , mTokenOffset(0)
   1.353 +  , mRecordStartOffset(0)
   1.354 +  , mEOFCharacters(eEOFCharacters_None)
   1.355 +  , mReporter(nullptr)
   1.356 +  , mSVGMode(false)
   1.357 +  , mRecording(false)
   1.358 +  , mSeenBadToken(false)
   1.359 +  , mSeenVariableReference(false)
   1.360 +{
   1.361 +  MOZ_COUNT_CTOR(nsCSSScanner);
   1.362 +}
   1.363 +
   1.364 +nsCSSScanner::~nsCSSScanner()
   1.365 +{
   1.366 +  MOZ_COUNT_DTOR(nsCSSScanner);
   1.367 +}
   1.368 +
   1.369 +void
   1.370 +nsCSSScanner::StartRecording()
   1.371 +{
   1.372 +  MOZ_ASSERT(!mRecording, "already started recording");
   1.373 +  mRecording = true;
   1.374 +  mRecordStartOffset = mOffset;
   1.375 +}
   1.376 +
   1.377 +void
   1.378 +nsCSSScanner::StopRecording()
   1.379 +{
   1.380 +  MOZ_ASSERT(mRecording, "haven't started recording");
   1.381 +  mRecording = false;
   1.382 +}
   1.383 +
   1.384 +void
   1.385 +nsCSSScanner::StopRecording(nsString& aBuffer)
   1.386 +{
   1.387 +  MOZ_ASSERT(mRecording, "haven't started recording");
   1.388 +  mRecording = false;
   1.389 +  aBuffer.Append(mBuffer + mRecordStartOffset,
   1.390 +                 mOffset - mRecordStartOffset);
   1.391 +}
   1.392 +
   1.393 +uint32_t
   1.394 +nsCSSScanner::RecordingLength() const
   1.395 +{
   1.396 +  MOZ_ASSERT(mRecording, "haven't started recording");
   1.397 +  return mOffset - mRecordStartOffset;
   1.398 +}
   1.399 +
   1.400 +#ifdef DEBUG
   1.401 +bool
   1.402 +nsCSSScanner::IsRecording() const
   1.403 +{
   1.404 +  return mRecording;
   1.405 +}
   1.406 +#endif
   1.407 +
   1.408 +nsDependentSubstring
   1.409 +nsCSSScanner::GetCurrentLine() const
   1.410 +{
   1.411 +  uint32_t end = mTokenOffset;
   1.412 +  while (end < mCount && !IsVertSpace(mBuffer[end])) {
   1.413 +    end++;
   1.414 +  }
   1.415 +  return nsDependentSubstring(mBuffer + mTokenLineOffset,
   1.416 +                              mBuffer + end);
   1.417 +}
   1.418 +
   1.419 +/**
   1.420 + * Return the raw UTF-16 code unit at position |mOffset + n| within
   1.421 + * the read buffer.  If that is beyond the end of the buffer, returns
   1.422 + * -1 to indicate end of input.
   1.423 + */
   1.424 +inline int32_t
   1.425 +nsCSSScanner::Peek(uint32_t n)
   1.426 +{
   1.427 +  if (mOffset + n >= mCount) {
   1.428 +    return -1;
   1.429 +  }
   1.430 +  return mBuffer[mOffset + n];
   1.431 +}
   1.432 +
   1.433 +/**
   1.434 + * Advance |mOffset| over |n| code units.  Advance(0) is a no-op.
   1.435 + * If |n| is greater than the distance to end of input, will silently
   1.436 + * stop at the end.  May not be used to advance over a line boundary;
   1.437 + * AdvanceLine() must be used instead.
   1.438 + */
   1.439 +inline void
   1.440 +nsCSSScanner::Advance(uint32_t n)
   1.441 +{
   1.442 +#ifdef DEBUG
   1.443 +  while (mOffset < mCount && n > 0) {
   1.444 +    MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
   1.445 +               "may not Advance() over a line boundary");
   1.446 +    mOffset++;
   1.447 +    n--;
   1.448 +  }
   1.449 +#else
   1.450 +  if (mOffset + n >= mCount || mOffset + n < mOffset)
   1.451 +    mOffset = mCount;
   1.452 +  else
   1.453 +    mOffset += n;
   1.454 +#endif
   1.455 +}
   1.456 +
   1.457 +/**
   1.458 + * Advance |mOffset| over a line boundary.
   1.459 + */
   1.460 +void
   1.461 +nsCSSScanner::AdvanceLine()
   1.462 +{
   1.463 +  MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
   1.464 +             "may not AdvanceLine() over a horizontal character");
   1.465 +  // Advance over \r\n as a unit.
   1.466 +  if (mBuffer[mOffset]   == '\r' && mOffset + 1 < mCount &&
   1.467 +      mBuffer[mOffset+1] == '\n')
   1.468 +    mOffset += 2;
   1.469 +  else
   1.470 +    mOffset += 1;
   1.471 +  // 0 is a magical line number meaning that we don't know (i.e., script)
   1.472 +  if (mLineNumber != 0)
   1.473 +    mLineNumber++;
   1.474 +  mLineOffset = mOffset;
   1.475 +}
   1.476 +
   1.477 +/**
   1.478 + * Back up |mOffset| over |n| code units.  Backup(0) is a no-op.
   1.479 + * If |n| is greater than the distance to beginning of input, will
   1.480 + * silently stop at the beginning.  May not be used to back up over a
   1.481 + * line boundary.
   1.482 + */
   1.483 +void
   1.484 +nsCSSScanner::Backup(uint32_t n)
   1.485 +{
   1.486 +#ifdef DEBUG
   1.487 +  while (mOffset > 0 && n > 0) {
   1.488 +    MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
   1.489 +               "may not Backup() over a line boundary");
   1.490 +    mOffset--;
   1.491 +    n--;
   1.492 +  }
   1.493 +#else
   1.494 +  if (mOffset < n)
   1.495 +    mOffset = 0;
   1.496 +  else
   1.497 +    mOffset -= n;
   1.498 +#endif
   1.499 +}
   1.500 +
   1.501 +void
   1.502 +nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
   1.503 +{
   1.504 +  aState.mOffset = mOffset;
   1.505 +  aState.mLineNumber = mLineNumber;
   1.506 +  aState.mLineOffset = mLineOffset;
   1.507 +  aState.mTokenLineNumber = mTokenLineNumber;
   1.508 +  aState.mTokenLineOffset = mTokenLineOffset;
   1.509 +  aState.mTokenOffset = mTokenOffset;
   1.510 +  aState.mInitialized = true;
   1.511 +}
   1.512 +
   1.513 +void
   1.514 +nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
   1.515 +{
   1.516 +  MOZ_ASSERT(aState.mInitialized, "have not saved state");
   1.517 +  if (aState.mInitialized) {
   1.518 +    mOffset = aState.mOffset;
   1.519 +    mLineNumber = aState.mLineNumber;
   1.520 +    mLineOffset = aState.mLineOffset;
   1.521 +    mTokenLineNumber = aState.mTokenLineNumber;
   1.522 +    mTokenLineOffset = aState.mTokenLineOffset;
   1.523 +    mTokenOffset = aState.mTokenOffset;
   1.524 +  }
   1.525 +}
   1.526 +
   1.527 +/**
   1.528 + * Skip over a sequence of whitespace characters (vertical or
   1.529 + * horizontal) starting at the current read position.
   1.530 + */
   1.531 +void
   1.532 +nsCSSScanner::SkipWhitespace()
   1.533 +{
   1.534 +  for (;;) {
   1.535 +    int32_t ch = Peek();
   1.536 +    if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
   1.537 +      break;
   1.538 +    }
   1.539 +    if (IsVertSpace(ch)) {
   1.540 +      AdvanceLine();
   1.541 +    } else {
   1.542 +      Advance();
   1.543 +    }
   1.544 +  }
   1.545 +}
   1.546 +
   1.547 +/**
   1.548 + * Skip over one CSS comment starting at the current read position.
   1.549 + */
   1.550 +void
   1.551 +nsCSSScanner::SkipComment()
   1.552 +{
   1.553 +  MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
   1.554 +  Advance(2);
   1.555 +  for (;;) {
   1.556 +    int32_t ch = Peek();
   1.557 +    if (ch < 0) {
   1.558 +      mReporter->ReportUnexpectedEOF("PECommentEOF");
   1.559 +      SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
   1.560 +      return;
   1.561 +    }
   1.562 +    if (ch == '*') {
   1.563 +      Advance();
   1.564 +      ch = Peek();
   1.565 +      if (ch < 0) {
   1.566 +        mReporter->ReportUnexpectedEOF("PECommentEOF");
   1.567 +        SetEOFCharacters(eEOFCharacters_Slash);
   1.568 +        return;
   1.569 +      }
   1.570 +      if (ch == '/') {
   1.571 +        Advance();
   1.572 +        return;
   1.573 +      }
   1.574 +    } else if (IsVertSpace(ch)) {
   1.575 +      AdvanceLine();
   1.576 +    } else {
   1.577 +      Advance();
   1.578 +    }
   1.579 +  }
   1.580 +}
   1.581 +
   1.582 +/**
   1.583 + * If there is a valid escape sequence starting at the current read
   1.584 + * position, consume it, decode it, append the result to |aOutput|,
   1.585 + * and return true.  Otherwise, consume nothing, leave |aOutput|
   1.586 + * unmodified, and return false.  If |aInString| is true, accept the
   1.587 + * additional form of escape sequence allowed within string-like tokens.
   1.588 + */
   1.589 +bool
   1.590 +nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
   1.591 +{
   1.592 +  MOZ_ASSERT(Peek() == '\\', "should not have been called");
   1.593 +  int32_t ch = Peek(1);
   1.594 +  if (ch < 0) {
   1.595 +    // If we are in a string (or a url() containing a string), we want to drop
   1.596 +    // the backslash on the floor.  Otherwise, we want to treat it as a U+FFFD
   1.597 +    // character.
   1.598 +    Advance();
   1.599 +    if (aInString) {
   1.600 +      SetEOFCharacters(eEOFCharacters_DropBackslash);
   1.601 +    } else {
   1.602 +      aOutput.Append(UCS2_REPLACEMENT_CHAR);
   1.603 +      SetEOFCharacters(eEOFCharacters_ReplacementChar);
   1.604 +    }
   1.605 +    return true;
   1.606 +  }
   1.607 +  if (IsVertSpace(ch)) {
   1.608 +    if (aInString) {
   1.609 +      // In strings (and in url() containing a string), escaped
   1.610 +      // newlines are completely removed, to allow splitting over
   1.611 +      // multiple lines.
   1.612 +      Advance();
   1.613 +      AdvanceLine();
   1.614 +      return true;
   1.615 +    }
   1.616 +    // Outside of strings, backslash followed by a newline is not an escape.
   1.617 +    return false;
   1.618 +  }
   1.619 +
   1.620 +  if (!IsHexDigit(ch)) {
   1.621 +    // "Any character (except a hexadecimal digit, linefeed, carriage
   1.622 +    // return, or form feed) can be escaped with a backslash to remove
   1.623 +    // its special meaning." -- CSS2.1 section 4.1.3
   1.624 +    Advance(2);
   1.625 +    if (ch == 0) {
   1.626 +      aOutput.Append(UCS2_REPLACEMENT_CHAR);
   1.627 +    } else {
   1.628 +      aOutput.Append(ch);
   1.629 +    }
   1.630 +    return true;
   1.631 +  }
   1.632 +
   1.633 +  // "[at most six hexadecimal digits following a backslash] stand
   1.634 +  // for the ISO 10646 character with that number, which must not be
   1.635 +  // zero. (It is undefined in CSS 2.1 what happens if a style sheet
   1.636 +  // does contain a character with Unicode codepoint zero.)"
   1.637 +  //   -- CSS2.1 section 4.1.3
   1.638 +
   1.639 +  // At this point we know we have \ followed by at least one
   1.640 +  // hexadecimal digit, therefore the escape sequence is valid and we
   1.641 +  // can go ahead and consume the backslash.
   1.642 +  Advance();
   1.643 +  uint32_t val = 0;
   1.644 +  int i = 0;
   1.645 +  do {
   1.646 +    val = val * 16 + HexDigitValue(ch);
   1.647 +    i++;
   1.648 +    Advance();
   1.649 +    ch = Peek();
   1.650 +  } while (i < 6 && IsHexDigit(ch));
   1.651 +
   1.652 +  // "Interpret the hex digits as a hexadecimal number. If this number is zero,
   1.653 +  // or is greater than the maximum allowed codepoint, return U+FFFD
   1.654 +  // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
   1.655 +  if (MOZ_UNLIKELY(val == 0)) {
   1.656 +    aOutput.Append(UCS2_REPLACEMENT_CHAR);
   1.657 +  } else {
   1.658 +    AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
   1.659 +  }
   1.660 +
   1.661 +  // Consume exactly one whitespace character after a
   1.662 +  // hexadecimal escape sequence.
   1.663 +  if (IsVertSpace(ch)) {
   1.664 +    AdvanceLine();
   1.665 +  } else if (IsHorzSpace(ch)) {
   1.666 +    Advance();
   1.667 +  }
   1.668 +  return true;
   1.669 +}
   1.670 +
   1.671 +/**
   1.672 + * Consume a run of "text" beginning with the current read position,
   1.673 + * consisting of characters in the class |aClass| (which must be a
   1.674 + * suitable argument to IsOpenCharClass) plus escape sequences.
   1.675 + * Append the text to |aText|, after decoding escape sequences.
   1.676 + *
   1.677 + * Returns true if at least one character was appended to |aText|,
   1.678 + * false otherwise.
   1.679 + */
   1.680 +bool
   1.681 +nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
   1.682 +{
   1.683 +  // This is all of the character classes currently used with
   1.684 +  // GatherText.  If you have a need to use this function with a
   1.685 +  // different class, go ahead and add it.
   1.686 +  MOZ_ASSERT(aClass == IS_STRING ||
   1.687 +             aClass == IS_IDCHAR ||
   1.688 +             aClass == IS_URL_CHAR,
   1.689 +             "possibly-inappropriate character class");
   1.690 +
   1.691 +  uint32_t start = mOffset;
   1.692 +  bool inString = aClass == IS_STRING;
   1.693 +
   1.694 +  for (;;) {
   1.695 +    // Consume runs of unescaped characters in one go.
   1.696 +    uint32_t n = mOffset;
   1.697 +    while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
   1.698 +      n++;
   1.699 +    }
   1.700 +    if (n > mOffset) {
   1.701 +      aText.Append(&mBuffer[mOffset], n - mOffset);
   1.702 +      mOffset = n;
   1.703 +    }
   1.704 +    if (n == mCount) {
   1.705 +      break;
   1.706 +    }
   1.707 +
   1.708 +    int32_t ch = Peek();
   1.709 +    MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
   1.710 +               "should not have exited the inner loop");
   1.711 +    if (ch == 0) {
   1.712 +      Advance();
   1.713 +      aText.Append(UCS2_REPLACEMENT_CHAR);
   1.714 +      continue;
   1.715 +    }
   1.716 +
   1.717 +    if (ch != '\\') {
   1.718 +      break;
   1.719 +    }
   1.720 +    if (!GatherEscape(aText, inString)) {
   1.721 +      break;
   1.722 +    }
   1.723 +  }
   1.724 +
   1.725 +  return mOffset > start;
   1.726 +}
   1.727 +
   1.728 +/**
   1.729 + * Scan an Ident token.  This also handles Function and URL tokens,
   1.730 + * both of which begin indistinguishably from an identifier.  It can
   1.731 + * produce a Symbol token when an apparent identifier actually led
   1.732 + * into an invalid escape sequence.
   1.733 + */
   1.734 +bool
   1.735 +nsCSSScanner::ScanIdent(nsCSSToken& aToken)
   1.736 +{
   1.737 +  if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
   1.738 +    MOZ_ASSERT(Peek() == '\\',
   1.739 +               "unexpected IsIdentStart character that did not begin an ident");
   1.740 +    aToken.mSymbol = Peek();
   1.741 +    Advance();
   1.742 +    return true;
   1.743 +  }
   1.744 +
   1.745 +  if (MOZ_LIKELY(Peek() != '(')) {
   1.746 +    aToken.mType = eCSSToken_Ident;
   1.747 +    return true;
   1.748 +  }
   1.749 +
   1.750 +  Advance();
   1.751 +  aToken.mType = eCSSToken_Function;
   1.752 +  if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
   1.753 +    NextURL(aToken);
   1.754 +  } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
   1.755 +    mSeenVariableReference = true;
   1.756 +  }
   1.757 +  return true;
   1.758 +}
   1.759 +
   1.760 +/**
   1.761 + * Scan an AtKeyword token.  Also handles production of Symbol when
   1.762 + * an '@' is not followed by an identifier.
   1.763 + */
   1.764 +bool
   1.765 +nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
   1.766 +{
   1.767 +  MOZ_ASSERT(Peek() == '@', "should not have been called");
   1.768 +
   1.769 +  // Fall back for when '@' isn't followed by an identifier.
   1.770 +  aToken.mSymbol = '@';
   1.771 +  Advance();
   1.772 +
   1.773 +  int32_t ch = Peek();
   1.774 +  if (StartsIdent(ch, Peek(1))) {
   1.775 +    if (GatherText(IS_IDCHAR, aToken.mIdent)) {
   1.776 +       aToken.mType = eCSSToken_AtKeyword;
   1.777 +     }
   1.778 +  }
   1.779 +  return true;
   1.780 +}
   1.781 +
   1.782 +/**
   1.783 + * Scan a Hash token.  Handles the distinction between eCSSToken_ID
   1.784 + * and eCSSToken_Hash, and handles production of Symbol when a '#'
   1.785 + * is not followed by identifier characters.
   1.786 + */
   1.787 +bool
   1.788 +nsCSSScanner::ScanHash(nsCSSToken& aToken)
   1.789 +{
   1.790 +  MOZ_ASSERT(Peek() == '#', "should not have been called");
   1.791 +
   1.792 +  // Fall back for when '#' isn't followed by identifier characters.
   1.793 +  aToken.mSymbol = '#';
   1.794 +  Advance();
   1.795 +
   1.796 +  int32_t ch = Peek();
   1.797 +  if (IsIdentChar(ch) || ch == '\\') {
   1.798 +    nsCSSTokenType type =
   1.799 +      StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
   1.800 +    aToken.mIdent.SetLength(0);
   1.801 +    if (GatherText(IS_IDCHAR, aToken.mIdent)) {
   1.802 +      aToken.mType = type;
   1.803 +    }
   1.804 +  }
   1.805 +
   1.806 +  return true;
   1.807 +}
   1.808 +
   1.809 +/**
   1.810 + * Scan a Number, Percentage, or Dimension token (all of which begin
   1.811 + * like a Number).  Can produce a Symbol when a '.' is not followed by
   1.812 + * digits, or when '+' or '-' are not followed by either a digit or a
   1.813 + * '.' and then a digit.  Can also produce a HTMLComment when it
   1.814 + * encounters '-->'.
   1.815 + */
   1.816 +bool
   1.817 +nsCSSScanner::ScanNumber(nsCSSToken& aToken)
   1.818 +{
   1.819 +  int32_t c = Peek();
   1.820 +#ifdef DEBUG
   1.821 +  {
   1.822 +    int32_t c2 = Peek(1);
   1.823 +    int32_t c3 = Peek(2);
   1.824 +    MOZ_ASSERT(IsDigit(c) ||
   1.825 +               (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
   1.826 +               (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
   1.827 +               "should not have been called");
   1.828 +  }
   1.829 +#endif
   1.830 +
   1.831 +  // Sign of the mantissa (-1 or 1).
   1.832 +  int32_t sign = c == '-' ? -1 : 1;
   1.833 +  // Absolute value of the integer part of the mantissa.  This is a double so
   1.834 +  // we don't run into overflow issues for consumers that only care about our
   1.835 +  // floating-point value while still being able to express the full int32_t
   1.836 +  // range for consumers who want integers.
   1.837 +  double intPart = 0;
   1.838 +  // Fractional part of the mantissa.  This is a double so that when we convert
   1.839 +  // to float at the end we'll end up rounding to nearest float instead of
   1.840 +  // truncating down (as we would if fracPart were a float and we just
   1.841 +  // effectively lost the last several digits).
   1.842 +  double fracPart = 0;
   1.843 +  // Absolute value of the power of 10 that we should multiply by (only
   1.844 +  // relevant for numbers in scientific notation).  Has to be a signed integer,
   1.845 +  // because multiplication of signed by unsigned converts the unsigned to
   1.846 +  // signed, so if we plan to actually multiply by expSign...
   1.847 +  int32_t exponent = 0;
   1.848 +  // Sign of the exponent.
   1.849 +  int32_t expSign = 1;
   1.850 +
   1.851 +  aToken.mHasSign = (c == '+' || c == '-');
   1.852 +  if (aToken.mHasSign) {
   1.853 +    Advance();
   1.854 +    c = Peek();
   1.855 +  }
   1.856 +
   1.857 +  bool gotDot = (c == '.');
   1.858 +
   1.859 +  if (!gotDot) {
   1.860 +    // Scan the integer part of the mantissa.
   1.861 +    MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
   1.862 +    do {
   1.863 +      intPart = 10*intPart + DecimalDigitValue(c);
   1.864 +      Advance();
   1.865 +      c = Peek();
   1.866 +    } while (IsDigit(c));
   1.867 +
   1.868 +    gotDot = (c == '.') && IsDigit(Peek(1));
   1.869 +  }
   1.870 +
   1.871 +  if (gotDot) {
   1.872 +    // Scan the fractional part of the mantissa.
   1.873 +    Advance();
   1.874 +    c = Peek();
   1.875 +    MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
   1.876 +    // Power of ten by which we need to divide our next digit
   1.877 +    double divisor = 10;
   1.878 +    do {
   1.879 +      fracPart += DecimalDigitValue(c) / divisor;
   1.880 +      divisor *= 10;
   1.881 +      Advance();
   1.882 +      c = Peek();
   1.883 +    } while (IsDigit(c));
   1.884 +  }
   1.885 +
   1.886 +  bool gotE = false;
   1.887 +  if (c == 'e' || c == 'E') {
   1.888 +    int32_t expSignChar = Peek(1);
   1.889 +    int32_t nextChar = Peek(2);
   1.890 +    if (IsDigit(expSignChar) ||
   1.891 +        ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
   1.892 +      gotE = true;
   1.893 +      if (expSignChar == '-') {
   1.894 +        expSign = -1;
   1.895 +      }
   1.896 +      Advance(); // consumes the E
   1.897 +      if (expSignChar == '-' || expSignChar == '+') {
   1.898 +        Advance();
   1.899 +        c = nextChar;
   1.900 +      } else {
   1.901 +        c = expSignChar;
   1.902 +      }
   1.903 +      MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
   1.904 +      do {
   1.905 +        exponent = 10*exponent + DecimalDigitValue(c);
   1.906 +        Advance();
   1.907 +        c = Peek();
   1.908 +      } while (IsDigit(c));
   1.909 +    }
   1.910 +  }
   1.911 +
   1.912 +  nsCSSTokenType type = eCSSToken_Number;
   1.913 +
   1.914 +  // Set mIntegerValid for all cases (except %, below) because we need
   1.915 +  // it for the "2n" in :nth-child(2n).
   1.916 +  aToken.mIntegerValid = false;
   1.917 +
   1.918 +  // Time to reassemble our number.
   1.919 +  // Do all the math in double precision so it's truncated only once.
   1.920 +  double value = sign * (intPart + fracPart);
   1.921 +  if (gotE) {
   1.922 +    // Explicitly cast expSign*exponent to double to avoid issues with
   1.923 +    // overloaded pow() on Windows.
   1.924 +    value *= pow(10.0, double(expSign * exponent));
   1.925 +  } else if (!gotDot) {
   1.926 +    // Clamp values outside of integer range.
   1.927 +    if (sign > 0) {
   1.928 +      aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
   1.929 +    } else {
   1.930 +      aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
   1.931 +    }
   1.932 +    aToken.mIntegerValid = true;
   1.933 +  }
   1.934 +
   1.935 +  nsString& ident = aToken.mIdent;
   1.936 +
   1.937 +  // Check for Dimension and Percentage tokens.
   1.938 +  if (c >= 0) {
   1.939 +    if (StartsIdent(c, Peek(1))) {
   1.940 +      if (GatherText(IS_IDCHAR, ident)) {
   1.941 +        type = eCSSToken_Dimension;
   1.942 +      }
   1.943 +    } else if (c == '%') {
   1.944 +      Advance();
   1.945 +      type = eCSSToken_Percentage;
   1.946 +      value = value / 100.0f;
   1.947 +      aToken.mIntegerValid = false;
   1.948 +    }
   1.949 +  }
   1.950 +  aToken.mNumber = value;
   1.951 +  aToken.mType = type;
   1.952 +  return true;
   1.953 +}
   1.954 +
   1.955 +/**
   1.956 + * Scan a string constant ('foo' or "foo").  Will always produce
   1.957 + * either a String or a Bad_String token; the latter occurs when the
   1.958 + * close quote is missing.  Always returns true (for convenience in Next()).
   1.959 + */
   1.960 +bool
   1.961 +nsCSSScanner::ScanString(nsCSSToken& aToken)
   1.962 +{
   1.963 +  int32_t aStop = Peek();
   1.964 +  MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
   1.965 +  aToken.mType = eCSSToken_String;
   1.966 +  aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
   1.967 +  Advance();
   1.968 +
   1.969 +  for (;;) {
   1.970 +    GatherText(IS_STRING, aToken.mIdent);
   1.971 +
   1.972 +    int32_t ch = Peek();
   1.973 +    if (ch == -1) {
   1.974 +      AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
   1.975 +                                      eEOFCharacters_SingleQuote);
   1.976 +      break; // EOF ends a string token with no error.
   1.977 +    }
   1.978 +    if (ch == aStop) {
   1.979 +      Advance();
   1.980 +      break;
   1.981 +    }
   1.982 +    // Both " and ' are excluded from IS_STRING.
   1.983 +    if (ch == '"' || ch == '\'') {
   1.984 +      aToken.mIdent.Append(ch);
   1.985 +      Advance();
   1.986 +      continue;
   1.987 +    }
   1.988 +
   1.989 +    mSeenBadToken = true;
   1.990 +    aToken.mType = eCSSToken_Bad_String;
   1.991 +    mReporter->ReportUnexpected("SEUnterminatedString", aToken);
   1.992 +    break;
   1.993 +  }
   1.994 +  return true;
   1.995 +}
   1.996 +
   1.997 +/**
   1.998 + * Scan a unicode-range token.  These match the regular expression
   1.999 + *
  1.1000 + *     u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
  1.1001 + *
  1.1002 + * However, some such tokens are "invalid".  There are three valid forms:
  1.1003 + *
  1.1004 + *     u+[0-9a-f]{x}              1 <= x <= 6
  1.1005 + *     u+[0-9a-f]{x}\?{y}         1 <= x+y <= 6
  1.1006 + *     u+[0-9a-f]{x}-[0-9a-f]{y}  1 <= x <= 6, 1 <= y <= 6
  1.1007 + *
  1.1008 + * All unicode-range tokens have their text recorded in mIdent; valid ones
  1.1009 + * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
  1.1010 + * Note that this does not validate the numeric range, only the syntactic
  1.1011 + * form.
  1.1012 + */
  1.1013 +bool
  1.1014 +nsCSSScanner::ScanURange(nsCSSToken& aResult)
  1.1015 +{
  1.1016 +  int32_t intro1 = Peek();
  1.1017 +  int32_t intro2 = Peek(1);
  1.1018 +  int32_t ch = Peek(2);
  1.1019 +
  1.1020 +  MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
  1.1021 +             intro2 == '+' &&
  1.1022 +             (IsHexDigit(ch) || ch == '?'),
  1.1023 +             "should not have been called");
  1.1024 +
  1.1025 +  aResult.mIdent.Append(intro1);
  1.1026 +  aResult.mIdent.Append(intro2);
  1.1027 +  Advance(2);
  1.1028 +
  1.1029 +  bool valid = true;
  1.1030 +  bool haveQues = false;
  1.1031 +  uint32_t low = 0;
  1.1032 +  uint32_t high = 0;
  1.1033 +  int i = 0;
  1.1034 +
  1.1035 +  do {
  1.1036 +    aResult.mIdent.Append(ch);
  1.1037 +    if (IsHexDigit(ch)) {
  1.1038 +      if (haveQues) {
  1.1039 +        valid = false; // All question marks should be at the end.
  1.1040 +      }
  1.1041 +      low = low*16 + HexDigitValue(ch);
  1.1042 +      high = high*16 + HexDigitValue(ch);
  1.1043 +    } else {
  1.1044 +      haveQues = true;
  1.1045 +      low = low*16 + 0x0;
  1.1046 +      high = high*16 + 0xF;
  1.1047 +    }
  1.1048 +
  1.1049 +    i++;
  1.1050 +    Advance();
  1.1051 +    ch = Peek();
  1.1052 +  } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
  1.1053 +
  1.1054 +  if (ch == '-' && IsHexDigit(Peek(1))) {
  1.1055 +    if (haveQues) {
  1.1056 +      valid = false;
  1.1057 +    }
  1.1058 +
  1.1059 +    aResult.mIdent.Append(ch);
  1.1060 +    Advance();
  1.1061 +    ch = Peek();
  1.1062 +    high = 0;
  1.1063 +    i = 0;
  1.1064 +    do {
  1.1065 +      aResult.mIdent.Append(ch);
  1.1066 +      high = high*16 + HexDigitValue(ch);
  1.1067 +
  1.1068 +      i++;
  1.1069 +      Advance();
  1.1070 +      ch = Peek();
  1.1071 +    } while (i < 6 && IsHexDigit(ch));
  1.1072 +  }
  1.1073 +
  1.1074 +  aResult.mInteger = low;
  1.1075 +  aResult.mInteger2 = high;
  1.1076 +  aResult.mIntegerValid = valid;
  1.1077 +  aResult.mType = eCSSToken_URange;
  1.1078 +  return true;
  1.1079 +}
  1.1080 +
  1.1081 +#ifdef DEBUG
  1.1082 +/* static */ void
  1.1083 +nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
  1.1084 +{
  1.1085 +  MOZ_ASSERT(c == eEOFCharacters_None ||
  1.1086 +             c == eEOFCharacters_ReplacementChar ||
  1.1087 +             c == eEOFCharacters_Slash ||
  1.1088 +             c == (eEOFCharacters_Asterisk |
  1.1089 +                   eEOFCharacters_Slash) ||
  1.1090 +             c == eEOFCharacters_DoubleQuote ||
  1.1091 +             c == eEOFCharacters_SingleQuote ||
  1.1092 +             c == (eEOFCharacters_DropBackslash |
  1.1093 +                   eEOFCharacters_DoubleQuote) ||
  1.1094 +             c == (eEOFCharacters_DropBackslash |
  1.1095 +                   eEOFCharacters_SingleQuote) ||
  1.1096 +             c == eEOFCharacters_CloseParen ||
  1.1097 +             c == (eEOFCharacters_ReplacementChar |
  1.1098 +                   eEOFCharacters_CloseParen) ||
  1.1099 +             c == (eEOFCharacters_DoubleQuote |
  1.1100 +                   eEOFCharacters_CloseParen) ||
  1.1101 +             c == (eEOFCharacters_SingleQuote |
  1.1102 +                   eEOFCharacters_CloseParen) ||
  1.1103 +             c == (eEOFCharacters_DropBackslash |
  1.1104 +                   eEOFCharacters_DoubleQuote |
  1.1105 +                   eEOFCharacters_CloseParen) ||
  1.1106 +             c == (eEOFCharacters_DropBackslash |
  1.1107 +                   eEOFCharacters_SingleQuote |
  1.1108 +                   eEOFCharacters_CloseParen),
  1.1109 +             "invalid EOFCharacters value");
  1.1110 +}
  1.1111 +#endif
  1.1112 +
  1.1113 +void
  1.1114 +nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
  1.1115 +{
  1.1116 +  mEOFCharacters = EOFCharacters(aEOFCharacters);
  1.1117 +}
  1.1118 +
  1.1119 +void
  1.1120 +nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
  1.1121 +{
  1.1122 +  mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
  1.1123 +}
  1.1124 +
  1.1125 +static const char16_t kImpliedEOFCharacters[] = {
  1.1126 +  UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
  1.1127 +};
  1.1128 +
  1.1129 +/* static */ void
  1.1130 +nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
  1.1131 +                                         nsAString& aResult)
  1.1132 +{
  1.1133 +  // First, ignore eEOFCharacters_DropBackslash.
  1.1134 +  uint32_t c = aEOFCharacters >> 1;
  1.1135 +
  1.1136 +  // All of the remaining EOFCharacters bits represent appended characters,
  1.1137 +  // and the bits are in the order that they need appending.
  1.1138 +  for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
  1.1139 +    if (c & 1) {
  1.1140 +      aResult.Append(*p);
  1.1141 +    }
  1.1142 +  }
  1.1143 +
  1.1144 +  MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
  1.1145 +}
  1.1146 +
  1.1147 +/**
  1.1148 + * Consume the part of an URL token after the initial 'url('.  Caller
  1.1149 + * is assumed to have consumed 'url(' already.  Will always produce
  1.1150 + * either an URL or a Bad_URL token.
  1.1151 + *
  1.1152 + * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
  1.1153 + * the special lexical rules for URL tokens in a nonstandard context.
  1.1154 + */
  1.1155 +bool
  1.1156 +nsCSSScanner::NextURL(nsCSSToken& aToken)
  1.1157 +{
  1.1158 +  SkipWhitespace();
  1.1159 +
  1.1160 +  int32_t ch = Peek();
  1.1161 +  if (ch < 0) {
  1.1162 +    return false;
  1.1163 +  }
  1.1164 +
  1.1165 +  // aToken.mIdent may be "url" at this point; clear that out
  1.1166 +  aToken.mIdent.Truncate();
  1.1167 +
  1.1168 +  // Do we have a string?
  1.1169 +  if (ch == '"' || ch == '\'') {
  1.1170 +    ScanString(aToken);
  1.1171 +    if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
  1.1172 +      aToken.mType = eCSSToken_Bad_URL;
  1.1173 +      return true;
  1.1174 +    }
  1.1175 +    MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
  1.1176 +
  1.1177 +  } else {
  1.1178 +    // Otherwise, this is the start of a non-quoted url (which may be empty).
  1.1179 +    aToken.mSymbol = char16_t(0);
  1.1180 +    GatherText(IS_URL_CHAR, aToken.mIdent);
  1.1181 +  }
  1.1182 +
  1.1183 +  // Consume trailing whitespace and then look for a close parenthesis.
  1.1184 +  SkipWhitespace();
  1.1185 +  ch = Peek();
  1.1186 +  if (MOZ_LIKELY(ch < 0 || ch == ')')) {
  1.1187 +    Advance();
  1.1188 +    aToken.mType = eCSSToken_URL;
  1.1189 +    if (ch < 0) {
  1.1190 +      AddEOFCharacters(eEOFCharacters_CloseParen);
  1.1191 +    }
  1.1192 +  } else {
  1.1193 +    mSeenBadToken = true;
  1.1194 +    aToken.mType = eCSSToken_Bad_URL;
  1.1195 +  }
  1.1196 +  return true;
  1.1197 +}
  1.1198 +
  1.1199 +/**
  1.1200 + * Primary scanner entry point.  Consume one token and fill in
  1.1201 + * |aToken| accordingly.  Will skip over any number of comments first,
  1.1202 + * and will also skip over rather than return whitespace tokens if
  1.1203 + * |aSkipWS| is true.
  1.1204 + *
  1.1205 + * Returns true if it successfully consumed a token, false if EOF has
  1.1206 + * been reached.  Will always advance the current read position by at
  1.1207 + * least one character unless called when already at EOF.
  1.1208 + */
  1.1209 +bool
  1.1210 +nsCSSScanner::Next(nsCSSToken& aToken, bool aSkipWS)
  1.1211 +{
  1.1212 +  int32_t ch;
  1.1213 +
  1.1214 +  // do this here so we don't have to do it in dozens of other places
  1.1215 +  aToken.mIdent.Truncate();
  1.1216 +  aToken.mType = eCSSToken_Symbol;
  1.1217 +
  1.1218 +  for (;;) {
  1.1219 +    // Consume any number of comments, and possibly also whitespace tokens,
  1.1220 +    // in between other tokens.
  1.1221 +    mTokenOffset = mOffset;
  1.1222 +    mTokenLineOffset = mLineOffset;
  1.1223 +    mTokenLineNumber = mLineNumber;
  1.1224 +
  1.1225 +    ch = Peek();
  1.1226 +    if (IsWhitespace(ch)) {
  1.1227 +      SkipWhitespace();
  1.1228 +      if (!aSkipWS) {
  1.1229 +        aToken.mType = eCSSToken_Whitespace;
  1.1230 +        return true;
  1.1231 +      }
  1.1232 +      continue; // start again at the beginning
  1.1233 +    }
  1.1234 +    if (ch == '/' && !IsSVGMode() && Peek(1) == '*') {
  1.1235 +      // FIXME: Editor wants comments to be preserved (bug 60290).
  1.1236 +      SkipComment();
  1.1237 +      continue; // start again at the beginning
  1.1238 +    }
  1.1239 +    break;
  1.1240 +  }
  1.1241 +
  1.1242 +  // EOF
  1.1243 +  if (ch < 0) {
  1.1244 +    return false;
  1.1245 +  }
  1.1246 +
  1.1247 +  // 'u' could be UNICODE-RANGE or an identifier-family token
  1.1248 +  if (ch == 'u' || ch == 'U') {
  1.1249 +    int32_t c2 = Peek(1);
  1.1250 +    int32_t c3 = Peek(2);
  1.1251 +    if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
  1.1252 +      return ScanURange(aToken);
  1.1253 +    }
  1.1254 +    return ScanIdent(aToken);
  1.1255 +  }
  1.1256 +
  1.1257 +  // identifier family
  1.1258 +  if (IsIdentStart(ch)) {
  1.1259 +    return ScanIdent(aToken);
  1.1260 +  }
  1.1261 +
  1.1262 +  // number family
  1.1263 +  if (IsDigit(ch)) {
  1.1264 +    return ScanNumber(aToken);
  1.1265 +  }
  1.1266 +
  1.1267 +  if (ch == '.' && IsDigit(Peek(1))) {
  1.1268 +    return ScanNumber(aToken);
  1.1269 +  }
  1.1270 +
  1.1271 +  if (ch == '+') {
  1.1272 +    int32_t c2 = Peek(1);
  1.1273 +    if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
  1.1274 +      return ScanNumber(aToken);
  1.1275 +    }
  1.1276 +  }
  1.1277 +
  1.1278 +  // '-' can start an identifier-family token, a number-family token,
  1.1279 +  // or an HTML-comment
  1.1280 +  if (ch == '-') {
  1.1281 +    int32_t c2 = Peek(1);
  1.1282 +    int32_t c3 = Peek(2);
  1.1283 +    if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
  1.1284 +      return ScanIdent(aToken);
  1.1285 +    }
  1.1286 +    if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
  1.1287 +      return ScanNumber(aToken);
  1.1288 +    }
  1.1289 +    if (c2 == '-' && c3 == '>') {
  1.1290 +      Advance(3);
  1.1291 +      aToken.mType = eCSSToken_HTMLComment;
  1.1292 +      aToken.mIdent.AssignLiteral("-->");
  1.1293 +      return true;
  1.1294 +    }
  1.1295 +  }
  1.1296 +
  1.1297 +  // the other HTML-comment token
  1.1298 +  if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
  1.1299 +    Advance(4);
  1.1300 +    aToken.mType = eCSSToken_HTMLComment;
  1.1301 +    aToken.mIdent.AssignLiteral("<!--");
  1.1302 +    return true;
  1.1303 +  }
  1.1304 +
  1.1305 +  // AT_KEYWORD
  1.1306 +  if (ch == '@') {
  1.1307 +    return ScanAtKeyword(aToken);
  1.1308 +  }
  1.1309 +
  1.1310 +  // HASH
  1.1311 +  if (ch == '#') {
  1.1312 +    return ScanHash(aToken);
  1.1313 +  }
  1.1314 +
  1.1315 +  // STRING
  1.1316 +  if (ch == '"' || ch == '\'') {
  1.1317 +    return ScanString(aToken);
  1.1318 +  }
  1.1319 +
  1.1320 +  // Match operators: ~= |= ^= $= *=
  1.1321 +  nsCSSTokenType opType = MatchOperatorType(ch);
  1.1322 +  if (opType != eCSSToken_Symbol && Peek(1) == '=') {
  1.1323 +    aToken.mType = opType;
  1.1324 +    Advance(2);
  1.1325 +    return true;
  1.1326 +  }
  1.1327 +
  1.1328 +  // Otherwise, a symbol (DELIM).
  1.1329 +  aToken.mSymbol = ch;
  1.1330 +  Advance();
  1.1331 +  return true;
  1.1332 +}
  1.1333 +
  1.1334 +/* nsCSSGridTemplateAreaScanner methods. */
  1.1335 +
  1.1336 +nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
  1.1337 +  : mBuffer(aBuffer.BeginReading())
  1.1338 +  , mOffset(0)
  1.1339 +  , mCount(aBuffer.Length())
  1.1340 +{
  1.1341 +}
  1.1342 +
  1.1343 +bool
  1.1344 +nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
  1.1345 +{
  1.1346 +  int32_t ch;
  1.1347 +  // Skip whitespace
  1.1348 +  do {
  1.1349 +    if (mOffset >= mCount) {
  1.1350 +      return false;
  1.1351 +    }
  1.1352 +    ch = mBuffer[mOffset];
  1.1353 +    mOffset++;
  1.1354 +  } while (IsWhitespace(ch));
  1.1355 +
  1.1356 +  if (IsOpenCharClass(ch, IS_IDCHAR)) {
  1.1357 +    // Named cell token
  1.1358 +    uint32_t start = mOffset - 1;  // offset of |ch|
  1.1359 +    while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
  1.1360 +      mOffset++;
  1.1361 +    }
  1.1362 +    aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
  1.1363 +    aTokenResult.isTrash = false;
  1.1364 +  } else if (ch == '.') {
  1.1365 +    // Null cell token
  1.1366 +    aTokenResult.mName.Truncate();
  1.1367 +    aTokenResult.isTrash = false;
  1.1368 +  } else {
  1.1369 +    // Trash token
  1.1370 +    aTokenResult.isTrash = true;
  1.1371 +  }
  1.1372 +  return true;
  1.1373 +}

mercurial