js/src/yarr/YarrInterpreter.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 *
michael@0 4 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
michael@0 5 *
michael@0 6 * Redistribution and use in source and binary forms, with or without
michael@0 7 * modification, are permitted provided that the following conditions
michael@0 8 * are met:
michael@0 9 * 1. Redistributions of source code must retain the above copyright
michael@0 10 * notice, this list of conditions and the following disclaimer.
michael@0 11 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 12 * notice, this list of conditions and the following disclaimer in the
michael@0 13 * documentation and/or other materials provided with the distribution.
michael@0 14 *
michael@0 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
michael@0 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
michael@0 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
michael@0 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
michael@0 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
michael@0 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
michael@0 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
michael@0 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 26 */
michael@0 27
michael@0 28 #ifndef yarr_YarrInterpreter_h
michael@0 29 #define yarr_YarrInterpreter_h
michael@0 30
michael@0 31 #include "yarr/YarrPattern.h"
michael@0 32
michael@0 33 namespace WTF {
michael@0 34 class BumpPointerAllocator;
michael@0 35 }
michael@0 36 using WTF::BumpPointerAllocator;
michael@0 37
michael@0 38 namespace JSC { namespace Yarr {
michael@0 39
michael@0 40 class ByteDisjunction;
michael@0 41
michael@0 42 struct ByteTerm {
michael@0 43 enum Type {
michael@0 44 TypeBodyAlternativeBegin,
michael@0 45 TypeBodyAlternativeDisjunction,
michael@0 46 TypeBodyAlternativeEnd,
michael@0 47 TypeAlternativeBegin,
michael@0 48 TypeAlternativeDisjunction,
michael@0 49 TypeAlternativeEnd,
michael@0 50 TypeSubpatternBegin,
michael@0 51 TypeSubpatternEnd,
michael@0 52 TypeAssertionBOL,
michael@0 53 TypeAssertionEOL,
michael@0 54 TypeAssertionWordBoundary,
michael@0 55 TypePatternCharacterOnce,
michael@0 56 TypePatternCharacterFixed,
michael@0 57 TypePatternCharacterGreedy,
michael@0 58 TypePatternCharacterNonGreedy,
michael@0 59 TypePatternCasedCharacterOnce,
michael@0 60 TypePatternCasedCharacterFixed,
michael@0 61 TypePatternCasedCharacterGreedy,
michael@0 62 TypePatternCasedCharacterNonGreedy,
michael@0 63 TypeCharacterClass,
michael@0 64 TypeBackReference,
michael@0 65 TypeParenthesesSubpattern,
michael@0 66 TypeParenthesesSubpatternOnceBegin,
michael@0 67 TypeParenthesesSubpatternOnceEnd,
michael@0 68 TypeParenthesesSubpatternTerminalBegin,
michael@0 69 TypeParenthesesSubpatternTerminalEnd,
michael@0 70 TypeParentheticalAssertionBegin,
michael@0 71 TypeParentheticalAssertionEnd,
michael@0 72 TypeCheckInput,
michael@0 73 TypeUncheckInput,
michael@0 74 TypeDotStarEnclosure
michael@0 75 } type;
michael@0 76 union {
michael@0 77 struct {
michael@0 78 union {
michael@0 79 UChar patternCharacter;
michael@0 80 struct {
michael@0 81 UChar lo;
michael@0 82 UChar hi;
michael@0 83 } casedCharacter;
michael@0 84 CharacterClass* characterClass;
michael@0 85 unsigned subpatternId;
michael@0 86 };
michael@0 87 union {
michael@0 88 ByteDisjunction* parenthesesDisjunction;
michael@0 89 unsigned parenthesesWidth;
michael@0 90 };
michael@0 91 QuantifierType quantityType;
michael@0 92 unsigned quantityCount;
michael@0 93 } atom;
michael@0 94 struct {
michael@0 95 int next;
michael@0 96 int end;
michael@0 97 bool onceThrough;
michael@0 98 } alternative;
michael@0 99 struct {
michael@0 100 bool m_bol : 1;
michael@0 101 bool m_eol : 1;
michael@0 102 } anchors;
michael@0 103 unsigned checkInputCount;
michael@0 104 };
michael@0 105 unsigned frameLocation;
michael@0 106 bool m_capture : 1;
michael@0 107 bool m_invert : 1;
michael@0 108 unsigned inputPosition;
michael@0 109
michael@0 110 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 111 : frameLocation(frameLocation)
michael@0 112 , m_capture(false)
michael@0 113 , m_invert(false)
michael@0 114 {
michael@0 115 switch (quantityType) {
michael@0 116 case QuantifierFixedCount:
michael@0 117 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
michael@0 118 break;
michael@0 119 case QuantifierGreedy:
michael@0 120 type = ByteTerm::TypePatternCharacterGreedy;
michael@0 121 break;
michael@0 122 case QuantifierNonGreedy:
michael@0 123 type = ByteTerm::TypePatternCharacterNonGreedy;
michael@0 124 break;
michael@0 125 }
michael@0 126
michael@0 127 atom.patternCharacter = ch;
michael@0 128 atom.quantityType = quantityType;
michael@0 129 atom.quantityCount = quantityCount.unsafeGet();
michael@0 130 inputPosition = inputPos;
michael@0 131 }
michael@0 132
michael@0 133 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 134 : frameLocation(frameLocation)
michael@0 135 , m_capture(false)
michael@0 136 , m_invert(false)
michael@0 137 {
michael@0 138 switch (quantityType) {
michael@0 139 case QuantifierFixedCount:
michael@0 140 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
michael@0 141 break;
michael@0 142 case QuantifierGreedy:
michael@0 143 type = ByteTerm::TypePatternCasedCharacterGreedy;
michael@0 144 break;
michael@0 145 case QuantifierNonGreedy:
michael@0 146 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
michael@0 147 break;
michael@0 148 }
michael@0 149
michael@0 150 atom.casedCharacter.lo = lo;
michael@0 151 atom.casedCharacter.hi = hi;
michael@0 152 atom.quantityType = quantityType;
michael@0 153 atom.quantityCount = quantityCount.unsafeGet();
michael@0 154 inputPosition = inputPos;
michael@0 155 }
michael@0 156
michael@0 157 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
michael@0 158 : type(ByteTerm::TypeCharacterClass)
michael@0 159 , m_capture(false)
michael@0 160 , m_invert(invert)
michael@0 161 {
michael@0 162 atom.characterClass = characterClass;
michael@0 163 atom.quantityType = QuantifierFixedCount;
michael@0 164 atom.quantityCount = 1;
michael@0 165 inputPosition = inputPos;
michael@0 166 }
michael@0 167
michael@0 168 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
michael@0 169 : type(type)
michael@0 170 , m_capture(capture)
michael@0 171 , m_invert(false)
michael@0 172 {
michael@0 173 atom.subpatternId = subpatternId;
michael@0 174 atom.parenthesesDisjunction = parenthesesInfo;
michael@0 175 atom.quantityType = QuantifierFixedCount;
michael@0 176 atom.quantityCount = 1;
michael@0 177 inputPosition = inputPos;
michael@0 178 }
michael@0 179
michael@0 180 ByteTerm(Type type, bool invert = false)
michael@0 181 : type(type)
michael@0 182 , m_capture(false)
michael@0 183 , m_invert(invert)
michael@0 184 {
michael@0 185 atom.quantityType = QuantifierFixedCount;
michael@0 186 atom.quantityCount = 1;
michael@0 187 }
michael@0 188
michael@0 189 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
michael@0 190 : type(type)
michael@0 191 , m_capture(capture)
michael@0 192 , m_invert(invert)
michael@0 193 {
michael@0 194 atom.subpatternId = subpatternId;
michael@0 195 atom.quantityType = QuantifierFixedCount;
michael@0 196 atom.quantityCount = 1;
michael@0 197 inputPosition = inputPos;
michael@0 198 }
michael@0 199
michael@0 200 // For js::Vector. Does not create a valid object.
michael@0 201 ByteTerm()
michael@0 202 {
michael@0 203 }
michael@0 204
michael@0 205 static ByteTerm BOL(int inputPos)
michael@0 206 {
michael@0 207 ByteTerm term(TypeAssertionBOL);
michael@0 208 term.inputPosition = inputPos;
michael@0 209 return term;
michael@0 210 }
michael@0 211
michael@0 212 static ByteTerm CheckInput(Checked<unsigned> count)
michael@0 213 {
michael@0 214 ByteTerm term(TypeCheckInput);
michael@0 215 term.checkInputCount = count.unsafeGet();
michael@0 216 return term;
michael@0 217 }
michael@0 218
michael@0 219 static ByteTerm UncheckInput(Checked<unsigned> count)
michael@0 220 {
michael@0 221 ByteTerm term(TypeUncheckInput);
michael@0 222 term.checkInputCount = count.unsafeGet();
michael@0 223 return term;
michael@0 224 }
michael@0 225
michael@0 226 static ByteTerm EOL(int inputPos)
michael@0 227 {
michael@0 228 ByteTerm term(TypeAssertionEOL);
michael@0 229 term.inputPosition = inputPos;
michael@0 230 return term;
michael@0 231 }
michael@0 232
michael@0 233 static ByteTerm WordBoundary(bool invert, int inputPos)
michael@0 234 {
michael@0 235 ByteTerm term(TypeAssertionWordBoundary, invert);
michael@0 236 term.inputPosition = inputPos;
michael@0 237 return term;
michael@0 238 }
michael@0 239
michael@0 240 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
michael@0 241 {
michael@0 242 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
michael@0 243 }
michael@0 244
michael@0 245 static ByteTerm BodyAlternativeBegin(bool onceThrough)
michael@0 246 {
michael@0 247 ByteTerm term(TypeBodyAlternativeBegin);
michael@0 248 term.alternative.next = 0;
michael@0 249 term.alternative.end = 0;
michael@0 250 term.alternative.onceThrough = onceThrough;
michael@0 251 return term;
michael@0 252 }
michael@0 253
michael@0 254 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
michael@0 255 {
michael@0 256 ByteTerm term(TypeBodyAlternativeDisjunction);
michael@0 257 term.alternative.next = 0;
michael@0 258 term.alternative.end = 0;
michael@0 259 term.alternative.onceThrough = onceThrough;
michael@0 260 return term;
michael@0 261 }
michael@0 262
michael@0 263 static ByteTerm BodyAlternativeEnd()
michael@0 264 {
michael@0 265 ByteTerm term(TypeBodyAlternativeEnd);
michael@0 266 term.alternative.next = 0;
michael@0 267 term.alternative.end = 0;
michael@0 268 term.alternative.onceThrough = false;
michael@0 269 return term;
michael@0 270 }
michael@0 271
michael@0 272 static ByteTerm AlternativeBegin()
michael@0 273 {
michael@0 274 ByteTerm term(TypeAlternativeBegin);
michael@0 275 term.alternative.next = 0;
michael@0 276 term.alternative.end = 0;
michael@0 277 term.alternative.onceThrough = false;
michael@0 278 return term;
michael@0 279 }
michael@0 280
michael@0 281 static ByteTerm AlternativeDisjunction()
michael@0 282 {
michael@0 283 ByteTerm term(TypeAlternativeDisjunction);
michael@0 284 term.alternative.next = 0;
michael@0 285 term.alternative.end = 0;
michael@0 286 term.alternative.onceThrough = false;
michael@0 287 return term;
michael@0 288 }
michael@0 289
michael@0 290 static ByteTerm AlternativeEnd()
michael@0 291 {
michael@0 292 ByteTerm term(TypeAlternativeEnd);
michael@0 293 term.alternative.next = 0;
michael@0 294 term.alternative.end = 0;
michael@0 295 term.alternative.onceThrough = false;
michael@0 296 return term;
michael@0 297 }
michael@0 298
michael@0 299 static ByteTerm SubpatternBegin()
michael@0 300 {
michael@0 301 return ByteTerm(TypeSubpatternBegin);
michael@0 302 }
michael@0 303
michael@0 304 static ByteTerm SubpatternEnd()
michael@0 305 {
michael@0 306 return ByteTerm(TypeSubpatternEnd);
michael@0 307 }
michael@0 308
michael@0 309 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
michael@0 310 {
michael@0 311 ByteTerm term(TypeDotStarEnclosure);
michael@0 312 term.anchors.m_bol = bolAnchor;
michael@0 313 term.anchors.m_eol = eolAnchor;
michael@0 314 return term;
michael@0 315 }
michael@0 316
michael@0 317 bool invert()
michael@0 318 {
michael@0 319 return m_invert;
michael@0 320 }
michael@0 321
michael@0 322 bool capture()
michael@0 323 {
michael@0 324 return m_capture;
michael@0 325 }
michael@0 326 };
michael@0 327
michael@0 328 class ByteDisjunction {
michael@0 329 WTF_MAKE_FAST_ALLOCATED;
michael@0 330 public:
michael@0 331 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
michael@0 332 : m_numSubpatterns(numSubpatterns)
michael@0 333 , m_frameSize(frameSize)
michael@0 334 {
michael@0 335 }
michael@0 336
michael@0 337 Vector<ByteTerm> terms;
michael@0 338 unsigned m_numSubpatterns;
michael@0 339 unsigned m_frameSize;
michael@0 340 };
michael@0 341
michael@0 342 struct BytecodePattern {
michael@0 343 WTF_MAKE_FAST_ALLOCATED;
michael@0 344 public:
michael@0 345 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
michael@0 346 : m_body(body)
michael@0 347 , m_ignoreCase(pattern.m_ignoreCase)
michael@0 348 , m_multiline(pattern.m_multiline)
michael@0 349 , m_allocator(allocator)
michael@0 350 {
michael@0 351 newlineCharacterClass = pattern.newlineCharacterClass();
michael@0 352 wordcharCharacterClass = pattern.wordcharCharacterClass();
michael@0 353
michael@0 354 // Trick: 'Steal' the YarrPattern's ParenthesesInfo!
michael@0 355 // The input vector isn't used afterwards anymore,
michael@0 356 // that way we don't have to copy the input.
michael@0 357 JS_ASSERT(m_allParenthesesInfo.size() == 0);
michael@0 358 m_allParenthesesInfo.swap(allParenthesesInfo);
michael@0 359
michael@0 360 // Trick: 'Steal' the YarrPattern's CharacterClasses!
michael@0 361 // The input vector isn't used afterwards anymore,
michael@0 362 // that way we don't have to copy the input.
michael@0 363 JS_ASSERT(m_userCharacterClasses.size() == 0);
michael@0 364 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
michael@0 365 }
michael@0 366
michael@0 367 ~BytecodePattern()
michael@0 368 {
michael@0 369 deleteAllValues(m_allParenthesesInfo);
michael@0 370 deleteAllValues(m_userCharacterClasses);
michael@0 371 }
michael@0 372
michael@0 373 OwnPtr<ByteDisjunction> m_body;
michael@0 374 bool m_ignoreCase;
michael@0 375 bool m_multiline;
michael@0 376 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
michael@0 377 // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
michael@0 378 BumpPointerAllocator* m_allocator;
michael@0 379
michael@0 380 CharacterClass* newlineCharacterClass;
michael@0 381 CharacterClass* wordcharCharacterClass;
michael@0 382
michael@0 383 private:
michael@0 384 Vector<ByteDisjunction*> m_allParenthesesInfo;
michael@0 385 Vector<CharacterClass*> m_userCharacterClasses;
michael@0 386 };
michael@0 387
michael@0 388 JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
michael@0 389 JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output);
michael@0 390 unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
michael@0 391 unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
michael@0 392
michael@0 393 } } // namespace JSC::Yarr
michael@0 394
michael@0 395 #endif /* yarr_YarrInterpreter_h */

mercurial