|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* tokenization of CSS style sheets */ |
|
7 |
|
8 #ifndef nsCSSScanner_h___ |
|
9 #define nsCSSScanner_h___ |
|
10 |
|
11 #include "nsString.h" |
|
12 |
|
13 namespace mozilla { |
|
14 namespace css { |
|
15 class ErrorReporter; |
|
16 } |
|
17 } |
|
18 |
|
19 // Token types; in close but not perfect correspondence to the token |
|
20 // categorization in section 4.1.1 of CSS2.1. (The deviations are all |
|
21 // the fault of css3-selectors, which has requirements that can only be |
|
22 // met by changing the generic tokenization.) The comment on each line |
|
23 // illustrates the form of each identifier. |
|
24 |
|
25 enum nsCSSTokenType { |
|
26 // White space of any kind. No value fields are used. Note that |
|
27 // comments do *not* count as white space; comments separate tokens |
|
28 // but are not themselves tokens. |
|
29 eCSSToken_Whitespace, // |
|
30 |
|
31 // Identifier-like tokens. mIdent is the text of the identifier. |
|
32 // The difference between ID and Hash is: if the text after the # |
|
33 // would have been a valid Ident if the # hadn't been there, the |
|
34 // scanner produces an ID token. Otherwise it produces a Hash token. |
|
35 // (This distinction is required by css3-selectors.) |
|
36 eCSSToken_Ident, // word |
|
37 eCSSToken_Function, // word( |
|
38 eCSSToken_AtKeyword, // @word |
|
39 eCSSToken_ID, // #word |
|
40 eCSSToken_Hash, // #0word |
|
41 |
|
42 // Numeric tokens. mNumber is the floating-point value of the |
|
43 // number, and mHasSign indicates whether there was an explicit sign |
|
44 // (+ or -) in front of the number. If mIntegerValid is true, the |
|
45 // number had the lexical form of an integer, and mInteger is its |
|
46 // integer value. Lexically integer values outside the range of a |
|
47 // 32-bit signed number are clamped to the maximum values; mNumber |
|
48 // will indicate a 'truer' value in that case. Percentage tokens |
|
49 // are always considered not to be integers, even if their numeric |
|
50 // value is integral (100% => mNumber = 1.0). For Dimension |
|
51 // tokens, mIdent holds the text of the unit. |
|
52 eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3 |
|
53 eCSSToken_Dimension, // 24px 8.5in |
|
54 eCSSToken_Percentage, // 85% 1280.4% |
|
55 |
|
56 // String-like tokens. In all cases, mIdent holds the text |
|
57 // belonging to the string, and mSymbol holds the delimiter |
|
58 // character, which may be ', ", or zero (only for unquoted URLs). |
|
59 // Bad_String and Bad_URL tokens are emitted when the closing |
|
60 // delimiter or parenthesis was missing. |
|
61 eCSSToken_String, // 'foo bar' "foo bar" |
|
62 eCSSToken_Bad_String, // 'foo bar |
|
63 eCSSToken_URL, // url(foobar) url("foo bar") |
|
64 eCSSToken_Bad_URL, // url(foo |
|
65 |
|
66 // Any one-character symbol. mSymbol holds the character. |
|
67 eCSSToken_Symbol, // . ; { } ! * |
|
68 |
|
69 // Match operators. These are single tokens rather than pairs of |
|
70 // Symbol tokens because css3-selectors forbids the presence of |
|
71 // comments between the two characters. No value fields are used; |
|
72 // the token type indicates which operator. |
|
73 eCSSToken_Includes, // ~= |
|
74 eCSSToken_Dashmatch, // |= |
|
75 eCSSToken_Beginsmatch, // ^= |
|
76 eCSSToken_Endsmatch, // $= |
|
77 eCSSToken_Containsmatch, // *= |
|
78 |
|
79 // Unicode-range token: currently used only in @font-face. |
|
80 // The lexical rule for this token includes several forms that are |
|
81 // semantically invalid. Therefore, mIdent always holds the |
|
82 // complete original text of the token (so we can print it |
|
83 // accurately in diagnostics), and mIntegerValid is true iff the |
|
84 // token is semantically valid. In that case, mInteger holds the |
|
85 // lowest value included in the range, and mInteger2 holds the |
|
86 // highest value included in the range. |
|
87 eCSSToken_URange, // U+007e U+01?? U+2000-206F |
|
88 |
|
89 // HTML comment delimiters, ignored as a unit when they appear at |
|
90 // the top level of a style sheet, for compatibility with websites |
|
91 // written for compatibility with pre-CSS browsers. This token type |
|
92 // subsumes the css2.1 CDO and CDC tokens, which are always treated |
|
93 // the same by the parser. mIdent holds the text of the token, for |
|
94 // diagnostics. |
|
95 eCSSToken_HTMLComment, // <!-- --> |
|
96 }; |
|
97 |
|
98 // Classification of tokens used to determine if a "/**/" string must be |
|
99 // inserted if pasting token streams together when serializing. We include |
|
100 // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch, |
|
101 // as css-syntax does not treat these as whole tokens, but we will still |
|
102 // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch |
|
103 // and between a '/' delim and a '*=' containsmatch. |
|
104 // |
|
105 // https://dvcs.w3.org/hg/csswg/raw-file/372e659027a0/css-syntax/Overview.html#serialization |
|
106 enum nsCSSTokenSerializationType { |
|
107 eCSSTokenSerialization_Nothing, |
|
108 eCSSTokenSerialization_Whitespace, |
|
109 eCSSTokenSerialization_AtKeyword_or_Hash, |
|
110 eCSSTokenSerialization_Number, |
|
111 eCSSTokenSerialization_Dimension, |
|
112 eCSSTokenSerialization_Percentage, |
|
113 eCSSTokenSerialization_URange, |
|
114 eCSSTokenSerialization_URL_or_BadURL, |
|
115 eCSSTokenSerialization_Function, |
|
116 eCSSTokenSerialization_Ident, |
|
117 eCSSTokenSerialization_CDC, |
|
118 eCSSTokenSerialization_DashMatch, |
|
119 eCSSTokenSerialization_ContainsMatch, |
|
120 eCSSTokenSerialization_Symbol_Hash, // '#' |
|
121 eCSSTokenSerialization_Symbol_At, // '@' |
|
122 eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+' |
|
123 eCSSTokenSerialization_Symbol_Minus, // '-' |
|
124 eCSSTokenSerialization_Symbol_OpenParen, // '(' |
|
125 eCSSTokenSerialization_Symbol_Question, // '?' |
|
126 eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~' |
|
127 eCSSTokenSerialization_Symbol_Equals, // '=' |
|
128 eCSSTokenSerialization_Symbol_Bar, // '|' |
|
129 eCSSTokenSerialization_Symbol_Slash, // '/' |
|
130 eCSSTokenSerialization_Symbol_Asterisk, // '*' |
|
131 eCSSTokenSerialization_Other // anything else |
|
132 }; |
|
133 |
|
134 // A single token returned from the scanner. mType is always |
|
135 // meaningful; comments above describe which other fields are |
|
136 // meaningful for which token types. |
|
137 struct nsCSSToken { |
|
138 nsAutoString mIdent; |
|
139 float mNumber; |
|
140 int32_t mInteger; |
|
141 int32_t mInteger2; |
|
142 nsCSSTokenType mType; |
|
143 char16_t mSymbol; |
|
144 bool mIntegerValid; |
|
145 bool mHasSign; |
|
146 |
|
147 nsCSSToken() |
|
148 : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace), |
|
149 mSymbol('\0'), mIntegerValid(false), mHasSign(false) |
|
150 {} |
|
151 |
|
152 bool IsSymbol(char16_t aSymbol) const { |
|
153 return mType == eCSSToken_Symbol && mSymbol == aSymbol; |
|
154 } |
|
155 |
|
156 void AppendToString(nsString& aBuffer) const; |
|
157 }; |
|
158 |
|
159 // Represents an nsCSSScanner's saved position in the input buffer. |
|
160 class nsCSSScannerPosition { |
|
161 friend class nsCSSScanner; |
|
162 public: |
|
163 nsCSSScannerPosition() : mInitialized(false) { } |
|
164 |
|
165 uint32_t LineNumber() { |
|
166 MOZ_ASSERT(mInitialized); |
|
167 return mLineNumber; |
|
168 } |
|
169 |
|
170 uint32_t LineOffset() { |
|
171 MOZ_ASSERT(mInitialized); |
|
172 return mLineOffset; |
|
173 } |
|
174 |
|
175 private: |
|
176 uint32_t mOffset; |
|
177 uint32_t mLineNumber; |
|
178 uint32_t mLineOffset; |
|
179 uint32_t mTokenLineNumber; |
|
180 uint32_t mTokenLineOffset; |
|
181 uint32_t mTokenOffset; |
|
182 bool mInitialized; |
|
183 }; |
|
184 |
|
185 // nsCSSScanner tokenizes an input stream using the CSS2.1 forward |
|
186 // compatible tokenization rules. Used internally by nsCSSParser; |
|
187 // not available for use by other code. |
|
188 class nsCSSScanner { |
|
189 public: |
|
190 // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0| |
|
191 // when the line number is unknown. |
|
192 nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber); |
|
193 ~nsCSSScanner(); |
|
194 |
|
195 void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) { |
|
196 mReporter = aReporter; |
|
197 } |
|
198 // Set whether or not we are processing SVG |
|
199 void SetSVGMode(bool aSVGMode) { |
|
200 mSVGMode = aSVGMode; |
|
201 } |
|
202 bool IsSVGMode() const { |
|
203 return mSVGMode; |
|
204 } |
|
205 |
|
206 // Reset or check whether a BAD_URL or BAD_STRING token has been seen. |
|
207 void ClearSeenBadToken() { mSeenBadToken = false; } |
|
208 bool SeenBadToken() const { return mSeenBadToken; } |
|
209 |
|
210 // Reset or check whether a "var(" FUNCTION token has been seen. |
|
211 void ClearSeenVariableReference() { mSeenVariableReference = false; } |
|
212 bool SeenVariableReference() const { return mSeenVariableReference; } |
|
213 |
|
214 // Get the 1-based line number of the last character of |
|
215 // the most recently processed token. |
|
216 uint32_t GetLineNumber() const { return mTokenLineNumber; } |
|
217 |
|
218 // Get the 0-based column number of the first character of |
|
219 // the most recently processed token. |
|
220 uint32_t GetColumnNumber() const |
|
221 { return mTokenOffset - mTokenLineOffset; } |
|
222 |
|
223 // Get the text of the line containing the first character of |
|
224 // the most recently processed token. |
|
225 nsDependentSubstring GetCurrentLine() const; |
|
226 |
|
227 // Get the next token. Return false on EOF. aTokenResult is filled |
|
228 // in with the data for the token. If aSkipWS is true, skip over |
|
229 // eCSSToken_Whitespace tokens rather than returning them. |
|
230 bool Next(nsCSSToken& aTokenResult, bool aSkipWS); |
|
231 |
|
232 // Get the body of an URL token (everything after the 'url('). |
|
233 // This is exposed for use by nsCSSParser::ParseMozDocumentRule, |
|
234 // which, for historical reasons, must make additional function |
|
235 // tokens behave like url(). Please do not add new uses to the |
|
236 // parser. |
|
237 bool NextURL(nsCSSToken& aTokenResult); |
|
238 |
|
239 // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg, |
|
240 // because "2n-1" is a single DIMENSION token, and "n-1" is a single |
|
241 // IDENT token, but the :nth() selector syntax wants to interpret |
|
242 // them the same as "2n -1" and "n -1" respectively. Please do not |
|
243 // add new uses to the parser. |
|
244 // |
|
245 // Note: this function may not be used to back up over a line boundary. |
|
246 void Backup(uint32_t n); |
|
247 |
|
248 // Starts recording the input stream from the current position. |
|
249 void StartRecording(); |
|
250 |
|
251 // Abandons recording of the input stream. |
|
252 void StopRecording(); |
|
253 |
|
254 // Stops recording of the input stream and appends the recorded |
|
255 // input to aBuffer. |
|
256 void StopRecording(nsString& aBuffer); |
|
257 |
|
258 // Returns the length of the current recording. |
|
259 uint32_t RecordingLength() const; |
|
260 |
|
261 #ifdef DEBUG |
|
262 bool IsRecording() const; |
|
263 #endif |
|
264 |
|
265 // Stores the current scanner offset into the specified object. |
|
266 void SavePosition(nsCSSScannerPosition& aState); |
|
267 |
|
268 // Resets the scanner offset to a position saved by SavePosition. |
|
269 void RestoreSavedPosition(const nsCSSScannerPosition& aState); |
|
270 |
|
271 enum EOFCharacters { |
|
272 eEOFCharacters_None = 0x0000, |
|
273 |
|
274 // to handle \<EOF> inside strings |
|
275 eEOFCharacters_DropBackslash = 0x0001, |
|
276 |
|
277 // to handle \<EOF> outside strings |
|
278 eEOFCharacters_ReplacementChar = 0x0002, |
|
279 |
|
280 // to close comments |
|
281 eEOFCharacters_Asterisk = 0x0004, |
|
282 eEOFCharacters_Slash = 0x0008, |
|
283 |
|
284 // to close double-quoted strings |
|
285 eEOFCharacters_DoubleQuote = 0x0010, |
|
286 |
|
287 // to close single-quoted strings |
|
288 eEOFCharacters_SingleQuote = 0x0020, |
|
289 |
|
290 // to close URLs |
|
291 eEOFCharacters_CloseParen = 0x0040, |
|
292 }; |
|
293 |
|
294 // Appends any characters to the specified string the input stream to make the |
|
295 // last token not rely on special EOF handling behavior. |
|
296 // |
|
297 // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored. |
|
298 static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters, |
|
299 nsAString& aString); |
|
300 |
|
301 EOFCharacters GetEOFCharacters() const { |
|
302 #ifdef DEBUG |
|
303 AssertEOFCharactersValid(mEOFCharacters); |
|
304 #endif |
|
305 return mEOFCharacters; |
|
306 } |
|
307 |
|
308 #ifdef DEBUG |
|
309 static void AssertEOFCharactersValid(uint32_t c); |
|
310 #endif |
|
311 |
|
312 protected: |
|
313 int32_t Peek(uint32_t n = 0); |
|
314 void Advance(uint32_t n = 1); |
|
315 void AdvanceLine(); |
|
316 |
|
317 void SkipWhitespace(); |
|
318 void SkipComment(); |
|
319 |
|
320 bool GatherEscape(nsString& aOutput, bool aInString); |
|
321 bool GatherText(uint8_t aClass, nsString& aIdent); |
|
322 |
|
323 bool ScanIdent(nsCSSToken& aResult); |
|
324 bool ScanAtKeyword(nsCSSToken& aResult); |
|
325 bool ScanHash(nsCSSToken& aResult); |
|
326 bool ScanNumber(nsCSSToken& aResult); |
|
327 bool ScanString(nsCSSToken& aResult); |
|
328 bool ScanURange(nsCSSToken& aResult); |
|
329 |
|
330 void SetEOFCharacters(uint32_t aEOFCharacters); |
|
331 void AddEOFCharacters(uint32_t aEOFCharacters); |
|
332 |
|
333 const char16_t *mBuffer; |
|
334 uint32_t mOffset; |
|
335 uint32_t mCount; |
|
336 |
|
337 uint32_t mLineNumber; |
|
338 uint32_t mLineOffset; |
|
339 |
|
340 uint32_t mTokenLineNumber; |
|
341 uint32_t mTokenLineOffset; |
|
342 uint32_t mTokenOffset; |
|
343 |
|
344 uint32_t mRecordStartOffset; |
|
345 EOFCharacters mEOFCharacters; |
|
346 |
|
347 mozilla::css::ErrorReporter *mReporter; |
|
348 |
|
349 // True if we are in SVG mode; false in "normal" CSS |
|
350 bool mSVGMode; |
|
351 bool mRecording; |
|
352 bool mSeenBadToken; |
|
353 bool mSeenVariableReference; |
|
354 }; |
|
355 |
|
356 // Token for the grid-template-areas micro-syntax |
|
357 // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas |
|
358 struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken { |
|
359 nsAutoString mName; // Empty for a null cell, non-empty for a named cell |
|
360 bool isTrash; // True for a trash token, mName is ignored in this case. |
|
361 }; |
|
362 |
|
363 // Scanner for the grid-template-areas micro-syntax |
|
364 class nsCSSGridTemplateAreaScanner { |
|
365 public: |
|
366 nsCSSGridTemplateAreaScanner(const nsAString& aBuffer); |
|
367 |
|
368 // Get the next token. Return false on EOF. |
|
369 // aTokenResult is filled in with the data for the token. |
|
370 bool Next(nsCSSGridTemplateAreaToken& aTokenResult); |
|
371 |
|
372 private: |
|
373 const char16_t *mBuffer; |
|
374 uint32_t mOffset; |
|
375 uint32_t mCount; |
|
376 }; |
|
377 |
|
378 #endif /* nsCSSScanner_h___ */ |