Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
michael@0 | 2 | * vim: set ts=8 sts=4 et sw=4 tw=99: |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. |
michael@0 | 5 | * |
michael@0 | 6 | * Redistribution and use in source and binary forms, with or without |
michael@0 | 7 | * modification, are permitted provided that the following conditions |
michael@0 | 8 | * are met: |
michael@0 | 9 | * 1. Redistributions of source code must retain the above copyright |
michael@0 | 10 | * notice, this list of conditions and the following disclaimer. |
michael@0 | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
michael@0 | 12 | * notice, this list of conditions and the following disclaimer in the |
michael@0 | 13 | * documentation and/or other materials provided with the distribution. |
michael@0 | 14 | * |
michael@0 | 15 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
michael@0 | 16 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
michael@0 | 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
michael@0 | 18 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
michael@0 | 19 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
michael@0 | 20 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
michael@0 | 21 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
michael@0 | 22 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
michael@0 | 23 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 25 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 26 | */ |
michael@0 | 27 | |
michael@0 | 28 | #ifndef yarr_YarrInterpreter_h |
michael@0 | 29 | #define yarr_YarrInterpreter_h |
michael@0 | 30 | |
michael@0 | 31 | #include "yarr/YarrPattern.h" |
michael@0 | 32 | |
michael@0 | 33 | namespace WTF { |
michael@0 | 34 | class BumpPointerAllocator; |
michael@0 | 35 | } |
michael@0 | 36 | using WTF::BumpPointerAllocator; |
michael@0 | 37 | |
michael@0 | 38 | namespace JSC { namespace Yarr { |
michael@0 | 39 | |
michael@0 | 40 | class ByteDisjunction; |
michael@0 | 41 | |
michael@0 | 42 | struct ByteTerm { |
michael@0 | 43 | enum Type { |
michael@0 | 44 | TypeBodyAlternativeBegin, |
michael@0 | 45 | TypeBodyAlternativeDisjunction, |
michael@0 | 46 | TypeBodyAlternativeEnd, |
michael@0 | 47 | TypeAlternativeBegin, |
michael@0 | 48 | TypeAlternativeDisjunction, |
michael@0 | 49 | TypeAlternativeEnd, |
michael@0 | 50 | TypeSubpatternBegin, |
michael@0 | 51 | TypeSubpatternEnd, |
michael@0 | 52 | TypeAssertionBOL, |
michael@0 | 53 | TypeAssertionEOL, |
michael@0 | 54 | TypeAssertionWordBoundary, |
michael@0 | 55 | TypePatternCharacterOnce, |
michael@0 | 56 | TypePatternCharacterFixed, |
michael@0 | 57 | TypePatternCharacterGreedy, |
michael@0 | 58 | TypePatternCharacterNonGreedy, |
michael@0 | 59 | TypePatternCasedCharacterOnce, |
michael@0 | 60 | TypePatternCasedCharacterFixed, |
michael@0 | 61 | TypePatternCasedCharacterGreedy, |
michael@0 | 62 | TypePatternCasedCharacterNonGreedy, |
michael@0 | 63 | TypeCharacterClass, |
michael@0 | 64 | TypeBackReference, |
michael@0 | 65 | TypeParenthesesSubpattern, |
michael@0 | 66 | TypeParenthesesSubpatternOnceBegin, |
michael@0 | 67 | TypeParenthesesSubpatternOnceEnd, |
michael@0 | 68 | TypeParenthesesSubpatternTerminalBegin, |
michael@0 | 69 | TypeParenthesesSubpatternTerminalEnd, |
michael@0 | 70 | TypeParentheticalAssertionBegin, |
michael@0 | 71 | TypeParentheticalAssertionEnd, |
michael@0 | 72 | TypeCheckInput, |
michael@0 | 73 | TypeUncheckInput, |
michael@0 | 74 | TypeDotStarEnclosure |
michael@0 | 75 | } type; |
michael@0 | 76 | union { |
michael@0 | 77 | struct { |
michael@0 | 78 | union { |
michael@0 | 79 | UChar patternCharacter; |
michael@0 | 80 | struct { |
michael@0 | 81 | UChar lo; |
michael@0 | 82 | UChar hi; |
michael@0 | 83 | } casedCharacter; |
michael@0 | 84 | CharacterClass* characterClass; |
michael@0 | 85 | unsigned subpatternId; |
michael@0 | 86 | }; |
michael@0 | 87 | union { |
michael@0 | 88 | ByteDisjunction* parenthesesDisjunction; |
michael@0 | 89 | unsigned parenthesesWidth; |
michael@0 | 90 | }; |
michael@0 | 91 | QuantifierType quantityType; |
michael@0 | 92 | unsigned quantityCount; |
michael@0 | 93 | } atom; |
michael@0 | 94 | struct { |
michael@0 | 95 | int next; |
michael@0 | 96 | int end; |
michael@0 | 97 | bool onceThrough; |
michael@0 | 98 | } alternative; |
michael@0 | 99 | struct { |
michael@0 | 100 | bool m_bol : 1; |
michael@0 | 101 | bool m_eol : 1; |
michael@0 | 102 | } anchors; |
michael@0 | 103 | unsigned checkInputCount; |
michael@0 | 104 | }; |
michael@0 | 105 | unsigned frameLocation; |
michael@0 | 106 | bool m_capture : 1; |
michael@0 | 107 | bool m_invert : 1; |
michael@0 | 108 | unsigned inputPosition; |
michael@0 | 109 | |
michael@0 | 110 | ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 111 | : frameLocation(frameLocation) |
michael@0 | 112 | , m_capture(false) |
michael@0 | 113 | , m_invert(false) |
michael@0 | 114 | { |
michael@0 | 115 | switch (quantityType) { |
michael@0 | 116 | case QuantifierFixedCount: |
michael@0 | 117 | type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; |
michael@0 | 118 | break; |
michael@0 | 119 | case QuantifierGreedy: |
michael@0 | 120 | type = ByteTerm::TypePatternCharacterGreedy; |
michael@0 | 121 | break; |
michael@0 | 122 | case QuantifierNonGreedy: |
michael@0 | 123 | type = ByteTerm::TypePatternCharacterNonGreedy; |
michael@0 | 124 | break; |
michael@0 | 125 | } |
michael@0 | 126 | |
michael@0 | 127 | atom.patternCharacter = ch; |
michael@0 | 128 | atom.quantityType = quantityType; |
michael@0 | 129 | atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 130 | inputPosition = inputPos; |
michael@0 | 131 | } |
michael@0 | 132 | |
michael@0 | 133 | ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 134 | : frameLocation(frameLocation) |
michael@0 | 135 | , m_capture(false) |
michael@0 | 136 | , m_invert(false) |
michael@0 | 137 | { |
michael@0 | 138 | switch (quantityType) { |
michael@0 | 139 | case QuantifierFixedCount: |
michael@0 | 140 | type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; |
michael@0 | 141 | break; |
michael@0 | 142 | case QuantifierGreedy: |
michael@0 | 143 | type = ByteTerm::TypePatternCasedCharacterGreedy; |
michael@0 | 144 | break; |
michael@0 | 145 | case QuantifierNonGreedy: |
michael@0 | 146 | type = ByteTerm::TypePatternCasedCharacterNonGreedy; |
michael@0 | 147 | break; |
michael@0 | 148 | } |
michael@0 | 149 | |
michael@0 | 150 | atom.casedCharacter.lo = lo; |
michael@0 | 151 | atom.casedCharacter.hi = hi; |
michael@0 | 152 | atom.quantityType = quantityType; |
michael@0 | 153 | atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 154 | inputPosition = inputPos; |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) |
michael@0 | 158 | : type(ByteTerm::TypeCharacterClass) |
michael@0 | 159 | , m_capture(false) |
michael@0 | 160 | , m_invert(invert) |
michael@0 | 161 | { |
michael@0 | 162 | atom.characterClass = characterClass; |
michael@0 | 163 | atom.quantityType = QuantifierFixedCount; |
michael@0 | 164 | atom.quantityCount = 1; |
michael@0 | 165 | inputPosition = inputPos; |
michael@0 | 166 | } |
michael@0 | 167 | |
michael@0 | 168 | ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) |
michael@0 | 169 | : type(type) |
michael@0 | 170 | , m_capture(capture) |
michael@0 | 171 | , m_invert(false) |
michael@0 | 172 | { |
michael@0 | 173 | atom.subpatternId = subpatternId; |
michael@0 | 174 | atom.parenthesesDisjunction = parenthesesInfo; |
michael@0 | 175 | atom.quantityType = QuantifierFixedCount; |
michael@0 | 176 | atom.quantityCount = 1; |
michael@0 | 177 | inputPosition = inputPos; |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | ByteTerm(Type type, bool invert = false) |
michael@0 | 181 | : type(type) |
michael@0 | 182 | , m_capture(false) |
michael@0 | 183 | , m_invert(invert) |
michael@0 | 184 | { |
michael@0 | 185 | atom.quantityType = QuantifierFixedCount; |
michael@0 | 186 | atom.quantityCount = 1; |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) |
michael@0 | 190 | : type(type) |
michael@0 | 191 | , m_capture(capture) |
michael@0 | 192 | , m_invert(invert) |
michael@0 | 193 | { |
michael@0 | 194 | atom.subpatternId = subpatternId; |
michael@0 | 195 | atom.quantityType = QuantifierFixedCount; |
michael@0 | 196 | atom.quantityCount = 1; |
michael@0 | 197 | inputPosition = inputPos; |
michael@0 | 198 | } |
michael@0 | 199 | |
michael@0 | 200 | // For js::Vector. Does not create a valid object. |
michael@0 | 201 | ByteTerm() |
michael@0 | 202 | { |
michael@0 | 203 | } |
michael@0 | 204 | |
michael@0 | 205 | static ByteTerm BOL(int inputPos) |
michael@0 | 206 | { |
michael@0 | 207 | ByteTerm term(TypeAssertionBOL); |
michael@0 | 208 | term.inputPosition = inputPos; |
michael@0 | 209 | return term; |
michael@0 | 210 | } |
michael@0 | 211 | |
michael@0 | 212 | static ByteTerm CheckInput(Checked<unsigned> count) |
michael@0 | 213 | { |
michael@0 | 214 | ByteTerm term(TypeCheckInput); |
michael@0 | 215 | term.checkInputCount = count.unsafeGet(); |
michael@0 | 216 | return term; |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | static ByteTerm UncheckInput(Checked<unsigned> count) |
michael@0 | 220 | { |
michael@0 | 221 | ByteTerm term(TypeUncheckInput); |
michael@0 | 222 | term.checkInputCount = count.unsafeGet(); |
michael@0 | 223 | return term; |
michael@0 | 224 | } |
michael@0 | 225 | |
michael@0 | 226 | static ByteTerm EOL(int inputPos) |
michael@0 | 227 | { |
michael@0 | 228 | ByteTerm term(TypeAssertionEOL); |
michael@0 | 229 | term.inputPosition = inputPos; |
michael@0 | 230 | return term; |
michael@0 | 231 | } |
michael@0 | 232 | |
michael@0 | 233 | static ByteTerm WordBoundary(bool invert, int inputPos) |
michael@0 | 234 | { |
michael@0 | 235 | ByteTerm term(TypeAssertionWordBoundary, invert); |
michael@0 | 236 | term.inputPosition = inputPos; |
michael@0 | 237 | return term; |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | static ByteTerm BackReference(unsigned subpatternId, int inputPos) |
michael@0 | 241 | { |
michael@0 | 242 | return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); |
michael@0 | 243 | } |
michael@0 | 244 | |
michael@0 | 245 | static ByteTerm BodyAlternativeBegin(bool onceThrough) |
michael@0 | 246 | { |
michael@0 | 247 | ByteTerm term(TypeBodyAlternativeBegin); |
michael@0 | 248 | term.alternative.next = 0; |
michael@0 | 249 | term.alternative.end = 0; |
michael@0 | 250 | term.alternative.onceThrough = onceThrough; |
michael@0 | 251 | return term; |
michael@0 | 252 | } |
michael@0 | 253 | |
michael@0 | 254 | static ByteTerm BodyAlternativeDisjunction(bool onceThrough) |
michael@0 | 255 | { |
michael@0 | 256 | ByteTerm term(TypeBodyAlternativeDisjunction); |
michael@0 | 257 | term.alternative.next = 0; |
michael@0 | 258 | term.alternative.end = 0; |
michael@0 | 259 | term.alternative.onceThrough = onceThrough; |
michael@0 | 260 | return term; |
michael@0 | 261 | } |
michael@0 | 262 | |
michael@0 | 263 | static ByteTerm BodyAlternativeEnd() |
michael@0 | 264 | { |
michael@0 | 265 | ByteTerm term(TypeBodyAlternativeEnd); |
michael@0 | 266 | term.alternative.next = 0; |
michael@0 | 267 | term.alternative.end = 0; |
michael@0 | 268 | term.alternative.onceThrough = false; |
michael@0 | 269 | return term; |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | static ByteTerm AlternativeBegin() |
michael@0 | 273 | { |
michael@0 | 274 | ByteTerm term(TypeAlternativeBegin); |
michael@0 | 275 | term.alternative.next = 0; |
michael@0 | 276 | term.alternative.end = 0; |
michael@0 | 277 | term.alternative.onceThrough = false; |
michael@0 | 278 | return term; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | static ByteTerm AlternativeDisjunction() |
michael@0 | 282 | { |
michael@0 | 283 | ByteTerm term(TypeAlternativeDisjunction); |
michael@0 | 284 | term.alternative.next = 0; |
michael@0 | 285 | term.alternative.end = 0; |
michael@0 | 286 | term.alternative.onceThrough = false; |
michael@0 | 287 | return term; |
michael@0 | 288 | } |
michael@0 | 289 | |
michael@0 | 290 | static ByteTerm AlternativeEnd() |
michael@0 | 291 | { |
michael@0 | 292 | ByteTerm term(TypeAlternativeEnd); |
michael@0 | 293 | term.alternative.next = 0; |
michael@0 | 294 | term.alternative.end = 0; |
michael@0 | 295 | term.alternative.onceThrough = false; |
michael@0 | 296 | return term; |
michael@0 | 297 | } |
michael@0 | 298 | |
michael@0 | 299 | static ByteTerm SubpatternBegin() |
michael@0 | 300 | { |
michael@0 | 301 | return ByteTerm(TypeSubpatternBegin); |
michael@0 | 302 | } |
michael@0 | 303 | |
michael@0 | 304 | static ByteTerm SubpatternEnd() |
michael@0 | 305 | { |
michael@0 | 306 | return ByteTerm(TypeSubpatternEnd); |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor) |
michael@0 | 310 | { |
michael@0 | 311 | ByteTerm term(TypeDotStarEnclosure); |
michael@0 | 312 | term.anchors.m_bol = bolAnchor; |
michael@0 | 313 | term.anchors.m_eol = eolAnchor; |
michael@0 | 314 | return term; |
michael@0 | 315 | } |
michael@0 | 316 | |
michael@0 | 317 | bool invert() |
michael@0 | 318 | { |
michael@0 | 319 | return m_invert; |
michael@0 | 320 | } |
michael@0 | 321 | |
michael@0 | 322 | bool capture() |
michael@0 | 323 | { |
michael@0 | 324 | return m_capture; |
michael@0 | 325 | } |
michael@0 | 326 | }; |
michael@0 | 327 | |
michael@0 | 328 | class ByteDisjunction { |
michael@0 | 329 | WTF_MAKE_FAST_ALLOCATED; |
michael@0 | 330 | public: |
michael@0 | 331 | ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) |
michael@0 | 332 | : m_numSubpatterns(numSubpatterns) |
michael@0 | 333 | , m_frameSize(frameSize) |
michael@0 | 334 | { |
michael@0 | 335 | } |
michael@0 | 336 | |
michael@0 | 337 | Vector<ByteTerm> terms; |
michael@0 | 338 | unsigned m_numSubpatterns; |
michael@0 | 339 | unsigned m_frameSize; |
michael@0 | 340 | }; |
michael@0 | 341 | |
michael@0 | 342 | struct BytecodePattern { |
michael@0 | 343 | WTF_MAKE_FAST_ALLOCATED; |
michael@0 | 344 | public: |
michael@0 | 345 | BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator) |
michael@0 | 346 | : m_body(body) |
michael@0 | 347 | , m_ignoreCase(pattern.m_ignoreCase) |
michael@0 | 348 | , m_multiline(pattern.m_multiline) |
michael@0 | 349 | , m_allocator(allocator) |
michael@0 | 350 | { |
michael@0 | 351 | newlineCharacterClass = pattern.newlineCharacterClass(); |
michael@0 | 352 | wordcharCharacterClass = pattern.wordcharCharacterClass(); |
michael@0 | 353 | |
michael@0 | 354 | // Trick: 'Steal' the YarrPattern's ParenthesesInfo! |
michael@0 | 355 | // The input vector isn't used afterwards anymore, |
michael@0 | 356 | // that way we don't have to copy the input. |
michael@0 | 357 | JS_ASSERT(m_allParenthesesInfo.size() == 0); |
michael@0 | 358 | m_allParenthesesInfo.swap(allParenthesesInfo); |
michael@0 | 359 | |
michael@0 | 360 | // Trick: 'Steal' the YarrPattern's CharacterClasses! |
michael@0 | 361 | // The input vector isn't used afterwards anymore, |
michael@0 | 362 | // that way we don't have to copy the input. |
michael@0 | 363 | JS_ASSERT(m_userCharacterClasses.size() == 0); |
michael@0 | 364 | m_userCharacterClasses.swap(pattern.m_userCharacterClasses); |
michael@0 | 365 | } |
michael@0 | 366 | |
michael@0 | 367 | ~BytecodePattern() |
michael@0 | 368 | { |
michael@0 | 369 | deleteAllValues(m_allParenthesesInfo); |
michael@0 | 370 | deleteAllValues(m_userCharacterClasses); |
michael@0 | 371 | } |
michael@0 | 372 | |
michael@0 | 373 | OwnPtr<ByteDisjunction> m_body; |
michael@0 | 374 | bool m_ignoreCase; |
michael@0 | 375 | bool m_multiline; |
michael@0 | 376 | // Each BytecodePattern is associated with a RegExp, each RegExp is associated |
michael@0 | 377 | // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. |
michael@0 | 378 | BumpPointerAllocator* m_allocator; |
michael@0 | 379 | |
michael@0 | 380 | CharacterClass* newlineCharacterClass; |
michael@0 | 381 | CharacterClass* wordcharCharacterClass; |
michael@0 | 382 | |
michael@0 | 383 | private: |
michael@0 | 384 | Vector<ByteDisjunction*> m_allParenthesesInfo; |
michael@0 | 385 | Vector<CharacterClass*> m_userCharacterClasses; |
michael@0 | 386 | }; |
michael@0 | 387 | |
michael@0 | 388 | JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*); |
michael@0 | 389 | JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output); |
michael@0 | 390 | unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); |
michael@0 | 391 | unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); |
michael@0 | 392 | |
michael@0 | 393 | } } // namespace JSC::Yarr |
michael@0 | 394 | |
michael@0 | 395 | #endif /* yarr_YarrInterpreter_h */ |