1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/repattrn.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,814 @@ 1.4 +// 1.5 +// file: repattrn.cpp 1.6 +// 1.7 +/* 1.8 +*************************************************************************** 1.9 +* Copyright (C) 2002-2012 International Business Machines Corporation * 1.10 +* and others. All rights reserved. * 1.11 +*************************************************************************** 1.12 +*/ 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1.17 + 1.18 +#include "unicode/regex.h" 1.19 +#include "unicode/uclean.h" 1.20 +#include "uassert.h" 1.21 +#include "uvector.h" 1.22 +#include "uvectr32.h" 1.23 +#include "uvectr64.h" 1.24 +#include "regexcmp.h" 1.25 +#include "regeximp.h" 1.26 +#include "regexst.h" 1.27 + 1.28 +U_NAMESPACE_BEGIN 1.29 + 1.30 +//-------------------------------------------------------------------------- 1.31 +// 1.32 +// RegexPattern Default Constructor 1.33 +// 1.34 +//-------------------------------------------------------------------------- 1.35 +RegexPattern::RegexPattern() { 1.36 + // Init all of this instances data. 1.37 + init(); 1.38 +} 1.39 + 1.40 + 1.41 +//-------------------------------------------------------------------------- 1.42 +// 1.43 +// Copy Constructor Note: This is a rather inefficient implementation, 1.44 +// but it probably doesn't matter. 1.45 +// 1.46 +//-------------------------------------------------------------------------- 1.47 +RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 1.48 + init(); 1.49 + *this = other; 1.50 +} 1.51 + 1.52 + 1.53 + 1.54 +//-------------------------------------------------------------------------- 1.55 +// 1.56 +// Assignment Operator 1.57 +// 1.58 +//-------------------------------------------------------------------------- 1.59 +RegexPattern &RegexPattern::operator = (const RegexPattern &other) { 1.60 + if (this == &other) { 1.61 + // Source and destination are the same. Don't do anything. 1.62 + return *this; 1.63 + } 1.64 + 1.65 + // Clean out any previous contents of object being assigned to. 1.66 + zap(); 1.67 + 1.68 + // Give target object a default initialization 1.69 + init(); 1.70 + 1.71 + // Copy simple fields 1.72 + if ( other.fPatternString == NULL ) { 1.73 + fPatternString = NULL; 1.74 + fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 1.75 + } else { 1.76 + fPatternString = new UnicodeString(*(other.fPatternString)); 1.77 + UErrorCode status = U_ZERO_ERROR; 1.78 + fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status); 1.79 + if (U_FAILURE(status)) { 1.80 + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1.81 + return *this; 1.82 + } 1.83 + } 1.84 + fFlags = other.fFlags; 1.85 + fLiteralText = other.fLiteralText; 1.86 + fDeferredStatus = other.fDeferredStatus; 1.87 + fMinMatchLen = other.fMinMatchLen; 1.88 + fFrameSize = other.fFrameSize; 1.89 + fDataSize = other.fDataSize; 1.90 + fMaxCaptureDigits = other.fMaxCaptureDigits; 1.91 + fStaticSets = other.fStaticSets; 1.92 + fStaticSets8 = other.fStaticSets8; 1.93 + 1.94 + fStartType = other.fStartType; 1.95 + fInitialStringIdx = other.fInitialStringIdx; 1.96 + fInitialStringLen = other.fInitialStringLen; 1.97 + *fInitialChars = *other.fInitialChars; 1.98 + fInitialChar = other.fInitialChar; 1.99 + *fInitialChars8 = *other.fInitialChars8; 1.100 + fNeedsAltInput = other.fNeedsAltInput; 1.101 + 1.102 + // Copy the pattern. It's just values, nothing deep to copy. 1.103 + fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 1.104 + fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 1.105 + 1.106 + // Copy the Unicode Sets. 1.107 + // Could be made more efficient if the sets were reference counted and shared, 1.108 + // but I doubt that pattern copying will be particularly common. 1.109 + // Note: init() already added an empty element zero to fSets 1.110 + int32_t i; 1.111 + int32_t numSets = other.fSets->size(); 1.112 + fSets8 = new Regex8BitSet[numSets]; 1.113 + if (fSets8 == NULL) { 1.114 + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1.115 + return *this; 1.116 + } 1.117 + for (i=1; i<numSets; i++) { 1.118 + if (U_FAILURE(fDeferredStatus)) { 1.119 + return *this; 1.120 + } 1.121 + UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 1.122 + UnicodeSet *newSet = new UnicodeSet(*sourceSet); 1.123 + if (newSet == NULL) { 1.124 + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1.125 + break; 1.126 + } 1.127 + fSets->addElement(newSet, fDeferredStatus); 1.128 + fSets8[i] = other.fSets8[i]; 1.129 + } 1.130 + 1.131 + return *this; 1.132 +} 1.133 + 1.134 + 1.135 +//-------------------------------------------------------------------------- 1.136 +// 1.137 +// init Shared initialization for use by constructors. 1.138 +// Bring an uninitialized RegexPattern up to a default state. 1.139 +// 1.140 +//-------------------------------------------------------------------------- 1.141 +void RegexPattern::init() { 1.142 + fFlags = 0; 1.143 + fCompiledPat = 0; 1.144 + fLiteralText.remove(); 1.145 + fSets = NULL; 1.146 + fSets8 = NULL; 1.147 + fDeferredStatus = U_ZERO_ERROR; 1.148 + fMinMatchLen = 0; 1.149 + fFrameSize = 0; 1.150 + fDataSize = 0; 1.151 + fGroupMap = NULL; 1.152 + fMaxCaptureDigits = 1; 1.153 + fStaticSets = NULL; 1.154 + fStaticSets8 = NULL; 1.155 + fStartType = START_NO_INFO; 1.156 + fInitialStringIdx = 0; 1.157 + fInitialStringLen = 0; 1.158 + fInitialChars = NULL; 1.159 + fInitialChar = 0; 1.160 + fInitialChars8 = NULL; 1.161 + fNeedsAltInput = FALSE; 1.162 + 1.163 + fPattern = NULL; // will be set later 1.164 + fPatternString = NULL; // may be set later 1.165 + fCompiledPat = new UVector64(fDeferredStatus); 1.166 + fGroupMap = new UVector32(fDeferredStatus); 1.167 + fSets = new UVector(fDeferredStatus); 1.168 + fInitialChars = new UnicodeSet; 1.169 + fInitialChars8 = new Regex8BitSet; 1.170 + if (U_FAILURE(fDeferredStatus)) { 1.171 + return; 1.172 + } 1.173 + if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 1.174 + fInitialChars == NULL || fInitialChars8 == NULL) { 1.175 + fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1.176 + return; 1.177 + } 1.178 + 1.179 + // Slot zero of the vector of sets is reserved. Fill it here. 1.180 + fSets->addElement((int32_t)0, fDeferredStatus); 1.181 +} 1.182 + 1.183 + 1.184 +//-------------------------------------------------------------------------- 1.185 +// 1.186 +// zap Delete everything owned by this RegexPattern. 1.187 +// 1.188 +//-------------------------------------------------------------------------- 1.189 +void RegexPattern::zap() { 1.190 + delete fCompiledPat; 1.191 + fCompiledPat = NULL; 1.192 + int i; 1.193 + for (i=1; i<fSets->size(); i++) { 1.194 + UnicodeSet *s; 1.195 + s = (UnicodeSet *)fSets->elementAt(i); 1.196 + if (s != NULL) { 1.197 + delete s; 1.198 + } 1.199 + } 1.200 + delete fSets; 1.201 + fSets = NULL; 1.202 + delete[] fSets8; 1.203 + fSets8 = NULL; 1.204 + delete fGroupMap; 1.205 + fGroupMap = NULL; 1.206 + delete fInitialChars; 1.207 + fInitialChars = NULL; 1.208 + delete fInitialChars8; 1.209 + fInitialChars8 = NULL; 1.210 + if (fPattern != NULL) { 1.211 + utext_close(fPattern); 1.212 + fPattern = NULL; 1.213 + } 1.214 + if (fPatternString != NULL) { 1.215 + delete fPatternString; 1.216 + fPatternString = NULL; 1.217 + } 1.218 +} 1.219 + 1.220 + 1.221 +//-------------------------------------------------------------------------- 1.222 +// 1.223 +// Destructor 1.224 +// 1.225 +//-------------------------------------------------------------------------- 1.226 +RegexPattern::~RegexPattern() { 1.227 + zap(); 1.228 +} 1.229 + 1.230 + 1.231 +//-------------------------------------------------------------------------- 1.232 +// 1.233 +// Clone 1.234 +// 1.235 +//-------------------------------------------------------------------------- 1.236 +RegexPattern *RegexPattern::clone() const { 1.237 + RegexPattern *copy = new RegexPattern(*this); 1.238 + return copy; 1.239 +} 1.240 + 1.241 + 1.242 +//-------------------------------------------------------------------------- 1.243 +// 1.244 +// operator == (comparison) Consider to patterns to be == if the 1.245 +// pattern strings and the flags are the same. 1.246 +// Note that pattern strings with the same 1.247 +// characters can still be considered different. 1.248 +// 1.249 +//-------------------------------------------------------------------------- 1.250 +UBool RegexPattern::operator ==(const RegexPattern &other) const { 1.251 + if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 1.252 + if (this->fPatternString != NULL && other.fPatternString != NULL) { 1.253 + return *(this->fPatternString) == *(other.fPatternString); 1.254 + } else if (this->fPattern == NULL) { 1.255 + if (other.fPattern == NULL) { 1.256 + return TRUE; 1.257 + } 1.258 + } else if (other.fPattern != NULL) { 1.259 + UTEXT_SETNATIVEINDEX(this->fPattern, 0); 1.260 + UTEXT_SETNATIVEINDEX(other.fPattern, 0); 1.261 + return utext_equals(this->fPattern, other.fPattern); 1.262 + } 1.263 + } 1.264 + return FALSE; 1.265 +} 1.266 + 1.267 +//--------------------------------------------------------------------- 1.268 +// 1.269 +// compile 1.270 +// 1.271 +//--------------------------------------------------------------------- 1.272 +RegexPattern * U_EXPORT2 1.273 +RegexPattern::compile(const UnicodeString ®ex, 1.274 + uint32_t flags, 1.275 + UParseError &pe, 1.276 + UErrorCode &status) 1.277 +{ 1.278 + if (U_FAILURE(status)) { 1.279 + return NULL; 1.280 + } 1.281 + 1.282 + const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 1.283 + UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 1.284 + UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 1.285 + 1.286 + if ((flags & ~allFlags) != 0) { 1.287 + status = U_REGEX_INVALID_FLAG; 1.288 + return NULL; 1.289 + } 1.290 + 1.291 + if ((flags & UREGEX_CANON_EQ) != 0) { 1.292 + status = U_REGEX_UNIMPLEMENTED; 1.293 + return NULL; 1.294 + } 1.295 + 1.296 + RegexPattern *This = new RegexPattern; 1.297 + if (This == NULL) { 1.298 + status = U_MEMORY_ALLOCATION_ERROR; 1.299 + return NULL; 1.300 + } 1.301 + if (U_FAILURE(This->fDeferredStatus)) { 1.302 + status = This->fDeferredStatus; 1.303 + delete This; 1.304 + return NULL; 1.305 + } 1.306 + This->fFlags = flags; 1.307 + 1.308 + RegexCompile compiler(This, status); 1.309 + compiler.compile(regex, pe, status); 1.310 + 1.311 + if (U_FAILURE(status)) { 1.312 + delete This; 1.313 + This = NULL; 1.314 + } 1.315 + 1.316 + return This; 1.317 +} 1.318 + 1.319 + 1.320 +// 1.321 +// compile, UText mode 1.322 +// 1.323 +RegexPattern * U_EXPORT2 1.324 +RegexPattern::compile(UText *regex, 1.325 + uint32_t flags, 1.326 + UParseError &pe, 1.327 + UErrorCode &status) 1.328 +{ 1.329 + if (U_FAILURE(status)) { 1.330 + return NULL; 1.331 + } 1.332 + 1.333 + const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 1.334 + UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 1.335 + UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 1.336 + 1.337 + if ((flags & ~allFlags) != 0) { 1.338 + status = U_REGEX_INVALID_FLAG; 1.339 + return NULL; 1.340 + } 1.341 + 1.342 + if ((flags & UREGEX_CANON_EQ) != 0) { 1.343 + status = U_REGEX_UNIMPLEMENTED; 1.344 + return NULL; 1.345 + } 1.346 + 1.347 + RegexPattern *This = new RegexPattern; 1.348 + if (This == NULL) { 1.349 + status = U_MEMORY_ALLOCATION_ERROR; 1.350 + return NULL; 1.351 + } 1.352 + if (U_FAILURE(This->fDeferredStatus)) { 1.353 + status = This->fDeferredStatus; 1.354 + delete This; 1.355 + return NULL; 1.356 + } 1.357 + This->fFlags = flags; 1.358 + 1.359 + RegexCompile compiler(This, status); 1.360 + compiler.compile(regex, pe, status); 1.361 + 1.362 + if (U_FAILURE(status)) { 1.363 + delete This; 1.364 + This = NULL; 1.365 + } 1.366 + 1.367 + return This; 1.368 +} 1.369 + 1.370 +// 1.371 +// compile with default flags. 1.372 +// 1.373 +RegexPattern * U_EXPORT2 1.374 +RegexPattern::compile(const UnicodeString ®ex, 1.375 + UParseError &pe, 1.376 + UErrorCode &err) 1.377 +{ 1.378 + return compile(regex, 0, pe, err); 1.379 +} 1.380 + 1.381 + 1.382 +// 1.383 +// compile with default flags, UText mode 1.384 +// 1.385 +RegexPattern * U_EXPORT2 1.386 +RegexPattern::compile(UText *regex, 1.387 + UParseError &pe, 1.388 + UErrorCode &err) 1.389 +{ 1.390 + return compile(regex, 0, pe, err); 1.391 +} 1.392 + 1.393 + 1.394 +// 1.395 +// compile with no UParseErr parameter. 1.396 +// 1.397 +RegexPattern * U_EXPORT2 1.398 +RegexPattern::compile(const UnicodeString ®ex, 1.399 + uint32_t flags, 1.400 + UErrorCode &err) 1.401 +{ 1.402 + UParseError pe; 1.403 + return compile(regex, flags, pe, err); 1.404 +} 1.405 + 1.406 + 1.407 +// 1.408 +// compile with no UParseErr parameter, UText mode 1.409 +// 1.410 +RegexPattern * U_EXPORT2 1.411 +RegexPattern::compile(UText *regex, 1.412 + uint32_t flags, 1.413 + UErrorCode &err) 1.414 +{ 1.415 + UParseError pe; 1.416 + return compile(regex, flags, pe, err); 1.417 +} 1.418 + 1.419 + 1.420 +//--------------------------------------------------------------------- 1.421 +// 1.422 +// flags 1.423 +// 1.424 +//--------------------------------------------------------------------- 1.425 +uint32_t RegexPattern::flags() const { 1.426 + return fFlags; 1.427 +} 1.428 + 1.429 + 1.430 +//--------------------------------------------------------------------- 1.431 +// 1.432 +// matcher(UnicodeString, err) 1.433 +// 1.434 +//--------------------------------------------------------------------- 1.435 +RegexMatcher *RegexPattern::matcher(const UnicodeString &input, 1.436 + UErrorCode &status) const { 1.437 + RegexMatcher *retMatcher = matcher(status); 1.438 + if (retMatcher != NULL) { 1.439 + retMatcher->fDeferredStatus = status; 1.440 + retMatcher->reset(input); 1.441 + } 1.442 + return retMatcher; 1.443 +} 1.444 + 1.445 + 1.446 +//--------------------------------------------------------------------- 1.447 +// 1.448 +// matcher(status) 1.449 +// 1.450 +//--------------------------------------------------------------------- 1.451 +RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 1.452 + RegexMatcher *retMatcher = NULL; 1.453 + 1.454 + if (U_FAILURE(status)) { 1.455 + return NULL; 1.456 + } 1.457 + if (U_FAILURE(fDeferredStatus)) { 1.458 + status = fDeferredStatus; 1.459 + return NULL; 1.460 + } 1.461 + 1.462 + retMatcher = new RegexMatcher(this); 1.463 + if (retMatcher == NULL) { 1.464 + status = U_MEMORY_ALLOCATION_ERROR; 1.465 + return NULL; 1.466 + } 1.467 + return retMatcher; 1.468 +} 1.469 + 1.470 + 1.471 + 1.472 +//--------------------------------------------------------------------- 1.473 +// 1.474 +// matches Convenience function to test for a match, starting 1.475 +// with a pattern string and a data string. 1.476 +// 1.477 +//--------------------------------------------------------------------- 1.478 +UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 1.479 + const UnicodeString &input, 1.480 + UParseError &pe, 1.481 + UErrorCode &status) { 1.482 + 1.483 + if (U_FAILURE(status)) {return FALSE;} 1.484 + 1.485 + UBool retVal; 1.486 + RegexPattern *pat = NULL; 1.487 + RegexMatcher *matcher = NULL; 1.488 + 1.489 + pat = RegexPattern::compile(regex, 0, pe, status); 1.490 + matcher = pat->matcher(input, status); 1.491 + retVal = matcher->matches(status); 1.492 + 1.493 + delete matcher; 1.494 + delete pat; 1.495 + return retVal; 1.496 +} 1.497 + 1.498 + 1.499 +// 1.500 +// matches, UText mode 1.501 +// 1.502 +UBool U_EXPORT2 RegexPattern::matches(UText *regex, 1.503 + UText *input, 1.504 + UParseError &pe, 1.505 + UErrorCode &status) { 1.506 + 1.507 + if (U_FAILURE(status)) {return FALSE;} 1.508 + 1.509 + UBool retVal = FALSE; 1.510 + RegexPattern *pat = NULL; 1.511 + RegexMatcher *matcher = NULL; 1.512 + 1.513 + pat = RegexPattern::compile(regex, 0, pe, status); 1.514 + matcher = pat->matcher(status); 1.515 + if (U_SUCCESS(status)) { 1.516 + matcher->reset(input); 1.517 + retVal = matcher->matches(status); 1.518 + } 1.519 + 1.520 + delete matcher; 1.521 + delete pat; 1.522 + return retVal; 1.523 +} 1.524 + 1.525 + 1.526 + 1.527 + 1.528 + 1.529 +//--------------------------------------------------------------------- 1.530 +// 1.531 +// pattern 1.532 +// 1.533 +//--------------------------------------------------------------------- 1.534 +UnicodeString RegexPattern::pattern() const { 1.535 + if (fPatternString != NULL) { 1.536 + return *fPatternString; 1.537 + } else if (fPattern == NULL) { 1.538 + return UnicodeString(); 1.539 + } else { 1.540 + UErrorCode status = U_ZERO_ERROR; 1.541 + int64_t nativeLen = utext_nativeLength(fPattern); 1.542 + int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 1.543 + UnicodeString result; 1.544 + 1.545 + status = U_ZERO_ERROR; 1.546 + UChar *resultChars = result.getBuffer(len16); 1.547 + utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 1.548 + result.releaseBuffer(len16); 1.549 + 1.550 + return result; 1.551 + } 1.552 +} 1.553 + 1.554 + 1.555 + 1.556 + 1.557 +//--------------------------------------------------------------------- 1.558 +// 1.559 +// patternText 1.560 +// 1.561 +//--------------------------------------------------------------------- 1.562 +UText *RegexPattern::patternText(UErrorCode &status) const { 1.563 + if (U_FAILURE(status)) {return NULL;} 1.564 + status = U_ZERO_ERROR; 1.565 + 1.566 + if (fPattern != NULL) { 1.567 + return fPattern; 1.568 + } else { 1.569 + RegexStaticSets::initGlobals(&status); 1.570 + return RegexStaticSets::gStaticSets->fEmptyText; 1.571 + } 1.572 +} 1.573 + 1.574 + 1.575 + 1.576 +//--------------------------------------------------------------------- 1.577 +// 1.578 +// split 1.579 +// 1.580 +//--------------------------------------------------------------------- 1.581 +int32_t RegexPattern::split(const UnicodeString &input, 1.582 + UnicodeString dest[], 1.583 + int32_t destCapacity, 1.584 + UErrorCode &status) const 1.585 +{ 1.586 + if (U_FAILURE(status)) { 1.587 + return 0; 1.588 + }; 1.589 + 1.590 + RegexMatcher m(this); 1.591 + int32_t r = 0; 1.592 + // Check m's status to make sure all is ok. 1.593 + if (U_SUCCESS(m.fDeferredStatus)) { 1.594 + r = m.split(input, dest, destCapacity, status); 1.595 + } 1.596 + return r; 1.597 +} 1.598 + 1.599 +// 1.600 +// split, UText mode 1.601 +// 1.602 +int32_t RegexPattern::split(UText *input, 1.603 + UText *dest[], 1.604 + int32_t destCapacity, 1.605 + UErrorCode &status) const 1.606 +{ 1.607 + if (U_FAILURE(status)) { 1.608 + return 0; 1.609 + }; 1.610 + 1.611 + RegexMatcher m(this); 1.612 + int32_t r = 0; 1.613 + // Check m's status to make sure all is ok. 1.614 + if (U_SUCCESS(m.fDeferredStatus)) { 1.615 + r = m.split(input, dest, destCapacity, status); 1.616 + } 1.617 + return r; 1.618 +} 1.619 + 1.620 + 1.621 + 1.622 +//--------------------------------------------------------------------- 1.623 +// 1.624 +// dump Output the compiled form of the pattern. 1.625 +// Debugging function only. 1.626 +// 1.627 +//--------------------------------------------------------------------- 1.628 +#if defined(REGEX_DEBUG) 1.629 +void RegexPattern::dumpOp(int32_t index) const { 1.630 + static const char * const opNames[] = {URX_OPCODE_NAMES}; 1.631 + int32_t op = fCompiledPat->elementAti(index); 1.632 + int32_t val = URX_VAL(op); 1.633 + int32_t type = URX_TYPE(op); 1.634 + int32_t pinnedType = type; 1.635 + if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 1.636 + pinnedType = 0; 1.637 + } 1.638 + 1.639 + REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); 1.640 + switch (type) { 1.641 + case URX_NOP: 1.642 + case URX_DOTANY: 1.643 + case URX_DOTANY_ALL: 1.644 + case URX_FAIL: 1.645 + case URX_CARET: 1.646 + case URX_DOLLAR: 1.647 + case URX_BACKSLASH_G: 1.648 + case URX_BACKSLASH_X: 1.649 + case URX_END: 1.650 + case URX_DOLLAR_M: 1.651 + case URX_CARET_M: 1.652 + // Types with no operand field of interest. 1.653 + break; 1.654 + 1.655 + case URX_RESERVED_OP: 1.656 + case URX_START_CAPTURE: 1.657 + case URX_END_CAPTURE: 1.658 + case URX_STATE_SAVE: 1.659 + case URX_JMP: 1.660 + case URX_JMP_SAV: 1.661 + case URX_JMP_SAV_X: 1.662 + case URX_BACKSLASH_B: 1.663 + case URX_BACKSLASH_BU: 1.664 + case URX_BACKSLASH_D: 1.665 + case URX_BACKSLASH_Z: 1.666 + case URX_STRING_LEN: 1.667 + case URX_CTR_INIT: 1.668 + case URX_CTR_INIT_NG: 1.669 + case URX_CTR_LOOP: 1.670 + case URX_CTR_LOOP_NG: 1.671 + case URX_RELOC_OPRND: 1.672 + case URX_STO_SP: 1.673 + case URX_LD_SP: 1.674 + case URX_BACKREF: 1.675 + case URX_STO_INP_LOC: 1.676 + case URX_JMPX: 1.677 + case URX_LA_START: 1.678 + case URX_LA_END: 1.679 + case URX_BACKREF_I: 1.680 + case URX_LB_START: 1.681 + case URX_LB_CONT: 1.682 + case URX_LB_END: 1.683 + case URX_LBN_CONT: 1.684 + case URX_LBN_END: 1.685 + case URX_LOOP_C: 1.686 + case URX_LOOP_DOT_I: 1.687 + // types with an integer operand field. 1.688 + REGEX_DUMP_DEBUG_PRINTF(("%d", val)); 1.689 + break; 1.690 + 1.691 + case URX_ONECHAR: 1.692 + case URX_ONECHAR_I: 1.693 + REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); 1.694 + break; 1.695 + 1.696 + case URX_STRING: 1.697 + case URX_STRING_I: 1.698 + { 1.699 + int32_t lengthOp = fCompiledPat->elementAti(index+1); 1.700 + U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 1.701 + int32_t length = URX_VAL(lengthOp); 1.702 + int32_t i; 1.703 + for (i=val; i<val+length; i++) { 1.704 + UChar c = fLiteralText[i]; 1.705 + if (c < 32 || c >= 256) {c = '.';} 1.706 + REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 1.707 + } 1.708 + } 1.709 + break; 1.710 + 1.711 + case URX_SETREF: 1.712 + case URX_LOOP_SR_I: 1.713 + { 1.714 + UnicodeString s; 1.715 + UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 1.716 + set->toPattern(s, TRUE); 1.717 + for (int32_t i=0; i<s.length(); i++) { 1.718 + REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 1.719 + } 1.720 + } 1.721 + break; 1.722 + 1.723 + case URX_STATIC_SETREF: 1.724 + case URX_STAT_SETREF_N: 1.725 + { 1.726 + UnicodeString s; 1.727 + if (val & URX_NEG_SET) { 1.728 + REGEX_DUMP_DEBUG_PRINTF(("NOT ")); 1.729 + val &= ~URX_NEG_SET; 1.730 + } 1.731 + UnicodeSet *set = fStaticSets[val]; 1.732 + set->toPattern(s, TRUE); 1.733 + for (int32_t i=0; i<s.length(); i++) { 1.734 + REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 1.735 + } 1.736 + } 1.737 + break; 1.738 + 1.739 + 1.740 + default: 1.741 + REGEX_DUMP_DEBUG_PRINTF(("??????")); 1.742 + break; 1.743 + } 1.744 + REGEX_DUMP_DEBUG_PRINTF(("\n")); 1.745 +} 1.746 +#endif 1.747 + 1.748 + 1.749 +#if defined(REGEX_DEBUG) 1.750 +U_CAPI void U_EXPORT2 1.751 +RegexPatternDump(const RegexPattern *This) { 1.752 + int index; 1.753 + int i; 1.754 + 1.755 + REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); 1.756 + UChar32 c = utext_next32From(This->fPattern, 0); 1.757 + while (c != U_SENTINEL) { 1.758 + if (c<32 || c>256) { 1.759 + c = '.'; 1.760 + } 1.761 + REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 1.762 + 1.763 + c = UTEXT_NEXT32(This->fPattern); 1.764 + } 1.765 + REGEX_DUMP_DEBUG_PRINTF(("\n")); 1.766 + REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); 1.767 + REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); 1.768 + if (This->fStartType == START_STRING) { 1.769 + REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); 1.770 + for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { 1.771 + REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. 1.772 + } 1.773 + REGEX_DUMP_DEBUG_PRINTF(("\"\n")); 1.774 + 1.775 + } else if (This->fStartType == START_SET) { 1.776 + int32_t numSetChars = This->fInitialChars->size(); 1.777 + if (numSetChars > 20) { 1.778 + numSetChars = 20; 1.779 + } 1.780 + REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); 1.781 + for (i=0; i<numSetChars; i++) { 1.782 + UChar32 c = This->fInitialChars->charAt(i); 1.783 + if (0x20<c && c <0x7e) { 1.784 + REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); 1.785 + } else { 1.786 + REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); 1.787 + } 1.788 + } 1.789 + if (numSetChars < This->fInitialChars->size()) { 1.790 + REGEX_DUMP_DEBUG_PRINTF((" ...")); 1.791 + } 1.792 + REGEX_DUMP_DEBUG_PRINTF(("\n")); 1.793 + 1.794 + } else if (This->fStartType == START_CHAR) { 1.795 + REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); 1.796 + if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { 1.797 + REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); 1.798 + } else { 1.799 + REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); 1.800 + } 1.801 + } 1.802 + 1.803 + REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ 1.804 + "-------------------------------------------\n")); 1.805 + for (index = 0; index<This->fCompiledPat->size(); index++) { 1.806 + This->dumpOp(index); 1.807 + } 1.808 + REGEX_DUMP_DEBUG_PRINTF(("\n\n")); 1.809 +} 1.810 +#endif 1.811 + 1.812 + 1.813 + 1.814 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 1.815 + 1.816 +U_NAMESPACE_END 1.817 +#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS