js/src/yarr/YarrPattern.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 *
michael@0 4 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
michael@0 5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
michael@0 6 *
michael@0 7 * Redistribution and use in source and binary forms, with or without
michael@0 8 * modification, are permitted provided that the following conditions
michael@0 9 * are met:
michael@0 10 * 1. Redistributions of source code must retain the above copyright
michael@0 11 * notice, this list of conditions and the following disclaimer.
michael@0 12 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 13 * notice, this list of conditions and the following disclaimer in the
michael@0 14 * documentation and/or other materials provided with the distribution.
michael@0 15 *
michael@0 16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
michael@0 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
michael@0 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
michael@0 20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
michael@0 21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
michael@0 22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
michael@0 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
michael@0 24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 27 */
michael@0 28
michael@0 29 #ifndef yarr_YarrPattern_h
michael@0 30 #define yarr_YarrPattern_h
michael@0 31
michael@0 32 #include "yarr/wtfbridge.h"
michael@0 33 #include "yarr/ASCIICType.h"
michael@0 34
michael@0 35 namespace JSC { namespace Yarr {
michael@0 36
michael@0 37 struct PatternDisjunction;
michael@0 38
michael@0 39 enum ErrorCode {
michael@0 40 NoError,
michael@0 41 PatternTooLarge,
michael@0 42 QuantifierOutOfOrder,
michael@0 43 QuantifierWithoutAtom,
michael@0 44 QuantifierTooLarge,
michael@0 45 MissingParentheses,
michael@0 46 ParenthesesUnmatched,
michael@0 47 ParenthesesTypeInvalid,
michael@0 48 CharacterClassUnmatched,
michael@0 49 CharacterClassOutOfOrder,
michael@0 50 CharacterClassInvalidRange,
michael@0 51 EscapeUnterminated,
michael@0 52 RuntimeError,
michael@0 53 NumberOfErrorCodes
michael@0 54 };
michael@0 55
michael@0 56 static inline const char* errorMessage(ErrorCode code)
michael@0 57 {
michael@0 58
michael@0 59 #define REGEXP_ERROR_PREFIX "Invalid regular expression: "
michael@0 60 // The order of this array must match the ErrorCode enum.
michael@0 61 static const char* errorMessages[NumberOfErrorCodes] = {
michael@0 62 0, // NoError
michael@0 63 REGEXP_ERROR_PREFIX "regular expression too large",
michael@0 64 REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier",
michael@0 65 REGEXP_ERROR_PREFIX "nothing to repeat",
michael@0 66 REGEXP_ERROR_PREFIX "number too large in {} quantifier",
michael@0 67 REGEXP_ERROR_PREFIX "missing )",
michael@0 68 REGEXP_ERROR_PREFIX "unmatched parentheses",
michael@0 69 REGEXP_ERROR_PREFIX "unrecognized character after (?",
michael@0 70 REGEXP_ERROR_PREFIX "missing terminating ] for character class",
michael@0 71 REGEXP_ERROR_PREFIX "character class out of order"
michael@0 72 REGEXP_ERROR_PREFIX "range out of order in character class",
michael@0 73 REGEXP_ERROR_PREFIX "\\ at end of pattern"
michael@0 74 };
michael@0 75 #undef REGEXP_ERROR_PREFIX
michael@0 76
michael@0 77 return errorMessages[code];
michael@0 78 }
michael@0 79
michael@0 80 struct CharacterRange {
michael@0 81 UChar begin;
michael@0 82 UChar end;
michael@0 83
michael@0 84 CharacterRange(UChar begin, UChar end)
michael@0 85 : begin(begin)
michael@0 86 , end(end)
michael@0 87 {
michael@0 88 }
michael@0 89 };
michael@0 90
michael@0 91 struct CharacterClass {
michael@0 92 WTF_MAKE_FAST_ALLOCATED;
michael@0 93 public:
michael@0 94 // All CharacterClass instances have to have the full set of matches and ranges,
michael@0 95 // they may have an optional m_table for faster lookups (which must match the
michael@0 96 // specified matches and ranges)
michael@0 97 CharacterClass()
michael@0 98 : m_table(0)
michael@0 99 {
michael@0 100 }
michael@0 101 CharacterClass(const char* table, bool inverted)
michael@0 102 : m_table(table)
michael@0 103 , m_tableInverted(inverted)
michael@0 104 {
michael@0 105 }
michael@0 106 Vector<UChar> m_matches;
michael@0 107 Vector<CharacterRange> m_ranges;
michael@0 108 Vector<UChar> m_matchesUnicode;
michael@0 109 Vector<CharacterRange> m_rangesUnicode;
michael@0 110
michael@0 111 const char* m_table;
michael@0 112 bool m_tableInverted;
michael@0 113 };
michael@0 114
michael@0 115 enum QuantifierType {
michael@0 116 QuantifierFixedCount,
michael@0 117 QuantifierGreedy,
michael@0 118 QuantifierNonGreedy
michael@0 119 };
michael@0 120
michael@0 121 struct PatternTerm {
michael@0 122 enum Type {
michael@0 123 TypeAssertionBOL,
michael@0 124 TypeAssertionEOL,
michael@0 125 TypeAssertionWordBoundary,
michael@0 126 TypePatternCharacter,
michael@0 127 TypeCharacterClass,
michael@0 128 TypeBackReference,
michael@0 129 TypeForwardReference,
michael@0 130 TypeParenthesesSubpattern,
michael@0 131 TypeParentheticalAssertion,
michael@0 132 TypeDotStarEnclosure
michael@0 133 } type;
michael@0 134 bool m_capture :1;
michael@0 135 bool m_invert :1;
michael@0 136 union {
michael@0 137 UChar patternCharacter;
michael@0 138 CharacterClass* characterClass;
michael@0 139 unsigned backReferenceSubpatternId;
michael@0 140 struct {
michael@0 141 PatternDisjunction* disjunction;
michael@0 142 unsigned subpatternId;
michael@0 143 unsigned lastSubpatternId;
michael@0 144 bool isCopy;
michael@0 145 bool isTerminal;
michael@0 146 } parentheses;
michael@0 147 struct {
michael@0 148 bool bolAnchor : 1;
michael@0 149 bool eolAnchor : 1;
michael@0 150 } anchors;
michael@0 151 };
michael@0 152 QuantifierType quantityType;
michael@0 153 Checked<unsigned> quantityCount;
michael@0 154 int inputPosition;
michael@0 155 unsigned frameLocation;
michael@0 156
michael@0 157 PatternTerm(UChar ch)
michael@0 158 : type(PatternTerm::TypePatternCharacter)
michael@0 159 , m_capture(false)
michael@0 160 , m_invert(false)
michael@0 161 {
michael@0 162 patternCharacter = ch;
michael@0 163 quantityType = QuantifierFixedCount;
michael@0 164 quantityCount = 1;
michael@0 165 }
michael@0 166
michael@0 167 PatternTerm(CharacterClass* charClass, bool invert)
michael@0 168 : type(PatternTerm::TypeCharacterClass)
michael@0 169 , m_capture(false)
michael@0 170 , m_invert(invert)
michael@0 171 {
michael@0 172 characterClass = charClass;
michael@0 173 quantityType = QuantifierFixedCount;
michael@0 174 quantityCount = 1;
michael@0 175 }
michael@0 176
michael@0 177 PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
michael@0 178 : type(type)
michael@0 179 , m_capture(capture)
michael@0 180 , m_invert(invert)
michael@0 181 {
michael@0 182 parentheses.disjunction = disjunction;
michael@0 183 parentheses.subpatternId = subpatternId;
michael@0 184 parentheses.isCopy = false;
michael@0 185 parentheses.isTerminal = false;
michael@0 186 quantityType = QuantifierFixedCount;
michael@0 187 quantityCount = 1;
michael@0 188 }
michael@0 189
michael@0 190 PatternTerm(Type type, bool invert = false)
michael@0 191 : type(type)
michael@0 192 , m_capture(false)
michael@0 193 , m_invert(invert)
michael@0 194 {
michael@0 195 quantityType = QuantifierFixedCount;
michael@0 196 quantityCount = 1;
michael@0 197 }
michael@0 198
michael@0 199 PatternTerm(unsigned spatternId)
michael@0 200 : type(TypeBackReference)
michael@0 201 , m_capture(false)
michael@0 202 , m_invert(false)
michael@0 203 {
michael@0 204 backReferenceSubpatternId = spatternId;
michael@0 205 quantityType = QuantifierFixedCount;
michael@0 206 quantityCount = 1;
michael@0 207 }
michael@0 208
michael@0 209 PatternTerm(bool bolAnchor, bool eolAnchor)
michael@0 210 : type(TypeDotStarEnclosure)
michael@0 211 , m_capture(false)
michael@0 212 , m_invert(false)
michael@0 213 {
michael@0 214 anchors.bolAnchor = bolAnchor;
michael@0 215 anchors.eolAnchor = eolAnchor;
michael@0 216 quantityType = QuantifierFixedCount;
michael@0 217 quantityCount = 1;
michael@0 218 }
michael@0 219
michael@0 220 // No-argument constructor for js::Vector.
michael@0 221 PatternTerm()
michael@0 222 : type(PatternTerm::TypePatternCharacter)
michael@0 223 , m_capture(false)
michael@0 224 , m_invert(false)
michael@0 225 {
michael@0 226 patternCharacter = 0;
michael@0 227 quantityType = QuantifierFixedCount;
michael@0 228 quantityCount = 1;
michael@0 229 }
michael@0 230
michael@0 231 static PatternTerm ForwardReference()
michael@0 232 {
michael@0 233 return PatternTerm(TypeForwardReference);
michael@0 234 }
michael@0 235
michael@0 236 static PatternTerm BOL()
michael@0 237 {
michael@0 238 return PatternTerm(TypeAssertionBOL);
michael@0 239 }
michael@0 240
michael@0 241 static PatternTerm EOL()
michael@0 242 {
michael@0 243 return PatternTerm(TypeAssertionEOL);
michael@0 244 }
michael@0 245
michael@0 246 static PatternTerm WordBoundary(bool invert)
michael@0 247 {
michael@0 248 return PatternTerm(TypeAssertionWordBoundary, invert);
michael@0 249 }
michael@0 250
michael@0 251 bool invert()
michael@0 252 {
michael@0 253 return m_invert;
michael@0 254 }
michael@0 255
michael@0 256 bool capture()
michael@0 257 {
michael@0 258 return m_capture;
michael@0 259 }
michael@0 260
michael@0 261 void quantify(unsigned count, QuantifierType type)
michael@0 262 {
michael@0 263 quantityCount = count;
michael@0 264 quantityType = type;
michael@0 265 }
michael@0 266 };
michael@0 267
michael@0 268 struct PatternAlternative {
michael@0 269 WTF_MAKE_FAST_ALLOCATED;
michael@0 270 public:
michael@0 271 PatternAlternative(PatternDisjunction* disjunction)
michael@0 272 : m_parent(disjunction)
michael@0 273 , m_onceThrough(false)
michael@0 274 , m_hasFixedSize(false)
michael@0 275 , m_startsWithBOL(false)
michael@0 276 , m_containsBOL(false)
michael@0 277 {
michael@0 278 }
michael@0 279
michael@0 280 PatternTerm& lastTerm()
michael@0 281 {
michael@0 282 ASSERT(m_terms.size());
michael@0 283 return m_terms[m_terms.size() - 1];
michael@0 284 }
michael@0 285
michael@0 286 void removeLastTerm()
michael@0 287 {
michael@0 288 ASSERT(m_terms.size());
michael@0 289 m_terms.shrink(m_terms.size() - 1);
michael@0 290 }
michael@0 291
michael@0 292 void setOnceThrough()
michael@0 293 {
michael@0 294 m_onceThrough = true;
michael@0 295 }
michael@0 296
michael@0 297 bool onceThrough()
michael@0 298 {
michael@0 299 return m_onceThrough;
michael@0 300 }
michael@0 301
michael@0 302 Vector<PatternTerm> m_terms;
michael@0 303 PatternDisjunction* m_parent;
michael@0 304 unsigned m_minimumSize;
michael@0 305 bool m_onceThrough : 1;
michael@0 306 bool m_hasFixedSize : 1;
michael@0 307 bool m_startsWithBOL : 1;
michael@0 308 bool m_containsBOL : 1;
michael@0 309 };
michael@0 310
michael@0 311 struct PatternDisjunction {
michael@0 312 WTF_MAKE_FAST_ALLOCATED;
michael@0 313 public:
michael@0 314 PatternDisjunction(PatternAlternative* parent = 0)
michael@0 315 : m_parent(parent)
michael@0 316 , m_hasFixedSize(false)
michael@0 317 {
michael@0 318 }
michael@0 319
michael@0 320 ~PatternDisjunction()
michael@0 321 {
michael@0 322 deleteAllValues(m_alternatives);
michael@0 323 }
michael@0 324
michael@0 325 PatternAlternative* addNewAlternative()
michael@0 326 {
michael@0 327 PatternAlternative* alternative = newOrCrash<PatternAlternative>(this);
michael@0 328 m_alternatives.append(alternative);
michael@0 329 return alternative;
michael@0 330 }
michael@0 331
michael@0 332 Vector<PatternAlternative*> m_alternatives;
michael@0 333 PatternAlternative* m_parent;
michael@0 334 unsigned m_minimumSize;
michael@0 335 unsigned m_callFrameSize;
michael@0 336 bool m_hasFixedSize;
michael@0 337 };
michael@0 338
michael@0 339 // You probably don't want to be calling these functions directly
michael@0 340 // (please to be calling newlineCharacterClass() et al on your
michael@0 341 // friendly neighborhood YarrPattern instance to get nicely
michael@0 342 // cached copies).
michael@0 343 CharacterClass* newlineCreate();
michael@0 344 CharacterClass* digitsCreate();
michael@0 345 CharacterClass* spacesCreate();
michael@0 346 CharacterClass* wordcharCreate();
michael@0 347 CharacterClass* nondigitsCreate();
michael@0 348 CharacterClass* nonspacesCreate();
michael@0 349 CharacterClass* nonwordcharCreate();
michael@0 350
michael@0 351 struct TermChain {
michael@0 352 TermChain(PatternTerm term)
michael@0 353 : term(term)
michael@0 354 {}
michael@0 355
michael@0 356 PatternTerm term;
michael@0 357 Vector<TermChain> hotTerms;
michael@0 358 };
michael@0 359
michael@0 360 struct YarrPattern {
michael@0 361 YarrPattern(const String& pattern, bool ignoreCase, bool multiline, ErrorCode* error);
michael@0 362
michael@0 363 ~YarrPattern()
michael@0 364 {
michael@0 365 deleteAllValues(m_disjunctions);
michael@0 366 deleteAllValues(m_userCharacterClasses);
michael@0 367 }
michael@0 368
michael@0 369 void reset()
michael@0 370 {
michael@0 371 m_numSubpatterns = 0;
michael@0 372 m_maxBackReference = 0;
michael@0 373
michael@0 374 m_containsBackreferences = false;
michael@0 375 m_containsBOL = false;
michael@0 376
michael@0 377 newlineCached = 0;
michael@0 378 digitsCached = 0;
michael@0 379 spacesCached = 0;
michael@0 380 wordcharCached = 0;
michael@0 381 nondigitsCached = 0;
michael@0 382 nonspacesCached = 0;
michael@0 383 nonwordcharCached = 0;
michael@0 384
michael@0 385 deleteAllValues(m_disjunctions);
michael@0 386 m_disjunctions.clear();
michael@0 387 deleteAllValues(m_userCharacterClasses);
michael@0 388 m_userCharacterClasses.clear();
michael@0 389 }
michael@0 390
michael@0 391 bool containsIllegalBackReference()
michael@0 392 {
michael@0 393 return m_maxBackReference > m_numSubpatterns;
michael@0 394 }
michael@0 395
michael@0 396 CharacterClass* newlineCharacterClass()
michael@0 397 {
michael@0 398 if (!newlineCached)
michael@0 399 m_userCharacterClasses.append(newlineCached = newlineCreate());
michael@0 400 return newlineCached;
michael@0 401 }
michael@0 402 CharacterClass* digitsCharacterClass()
michael@0 403 {
michael@0 404 if (!digitsCached)
michael@0 405 m_userCharacterClasses.append(digitsCached = digitsCreate());
michael@0 406 return digitsCached;
michael@0 407 }
michael@0 408 CharacterClass* spacesCharacterClass()
michael@0 409 {
michael@0 410 if (!spacesCached)
michael@0 411 m_userCharacterClasses.append(spacesCached = spacesCreate());
michael@0 412 return spacesCached;
michael@0 413 }
michael@0 414 CharacterClass* wordcharCharacterClass()
michael@0 415 {
michael@0 416 if (!wordcharCached)
michael@0 417 m_userCharacterClasses.append(wordcharCached = wordcharCreate());
michael@0 418 return wordcharCached;
michael@0 419 }
michael@0 420 CharacterClass* nondigitsCharacterClass()
michael@0 421 {
michael@0 422 if (!nondigitsCached)
michael@0 423 m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
michael@0 424 return nondigitsCached;
michael@0 425 }
michael@0 426 CharacterClass* nonspacesCharacterClass()
michael@0 427 {
michael@0 428 if (!nonspacesCached)
michael@0 429 m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
michael@0 430 return nonspacesCached;
michael@0 431 }
michael@0 432 CharacterClass* nonwordcharCharacterClass()
michael@0 433 {
michael@0 434 if (!nonwordcharCached)
michael@0 435 m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
michael@0 436 return nonwordcharCached;
michael@0 437 }
michael@0 438
michael@0 439 bool m_ignoreCase : 1;
michael@0 440 bool m_multiline : 1;
michael@0 441 bool m_containsBackreferences : 1;
michael@0 442 bool m_containsBOL : 1;
michael@0 443 unsigned m_numSubpatterns;
michael@0 444 unsigned m_maxBackReference;
michael@0 445 PatternDisjunction* m_body;
michael@0 446 Vector<PatternDisjunction*, 4> m_disjunctions;
michael@0 447 Vector<CharacterClass*> m_userCharacterClasses;
michael@0 448
michael@0 449 private:
michael@0 450 ErrorCode compile(const String& patternString);
michael@0 451
michael@0 452 CharacterClass* newlineCached;
michael@0 453 CharacterClass* digitsCached;
michael@0 454 CharacterClass* spacesCached;
michael@0 455 CharacterClass* wordcharCached;
michael@0 456 CharacterClass* nondigitsCached;
michael@0 457 CharacterClass* nonspacesCached;
michael@0 458 CharacterClass* nonwordcharCached;
michael@0 459 };
michael@0 460
michael@0 461 } } // namespace JSC::Yarr
michael@0 462
michael@0 463 #endif /* yarr_YarrPattern_h */

mercurial