|
1 // |
|
2 // file: repattrn.cpp |
|
3 // |
|
4 /* |
|
5 *************************************************************************** |
|
6 * Copyright (C) 2002-2012 International Business Machines Corporation * |
|
7 * and others. All rights reserved. * |
|
8 *************************************************************************** |
|
9 */ |
|
10 |
|
11 #include "unicode/utypes.h" |
|
12 |
|
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
|
14 |
|
15 #include "unicode/regex.h" |
|
16 #include "unicode/uclean.h" |
|
17 #include "uassert.h" |
|
18 #include "uvector.h" |
|
19 #include "uvectr32.h" |
|
20 #include "uvectr64.h" |
|
21 #include "regexcmp.h" |
|
22 #include "regeximp.h" |
|
23 #include "regexst.h" |
|
24 |
|
25 U_NAMESPACE_BEGIN |
|
26 |
|
27 //-------------------------------------------------------------------------- |
|
28 // |
|
29 // RegexPattern Default Constructor |
|
30 // |
|
31 //-------------------------------------------------------------------------- |
|
32 RegexPattern::RegexPattern() { |
|
33 // Init all of this instances data. |
|
34 init(); |
|
35 } |
|
36 |
|
37 |
|
38 //-------------------------------------------------------------------------- |
|
39 // |
|
40 // Copy Constructor Note: This is a rather inefficient implementation, |
|
41 // but it probably doesn't matter. |
|
42 // |
|
43 //-------------------------------------------------------------------------- |
|
44 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { |
|
45 init(); |
|
46 *this = other; |
|
47 } |
|
48 |
|
49 |
|
50 |
|
51 //-------------------------------------------------------------------------- |
|
52 // |
|
53 // Assignment Operator |
|
54 // |
|
55 //-------------------------------------------------------------------------- |
|
56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) { |
|
57 if (this == &other) { |
|
58 // Source and destination are the same. Don't do anything. |
|
59 return *this; |
|
60 } |
|
61 |
|
62 // Clean out any previous contents of object being assigned to. |
|
63 zap(); |
|
64 |
|
65 // Give target object a default initialization |
|
66 init(); |
|
67 |
|
68 // Copy simple fields |
|
69 if ( other.fPatternString == NULL ) { |
|
70 fPatternString = NULL; |
|
71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); |
|
72 } else { |
|
73 fPatternString = new UnicodeString(*(other.fPatternString)); |
|
74 UErrorCode status = U_ZERO_ERROR; |
|
75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status); |
|
76 if (U_FAILURE(status)) { |
|
77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
|
78 return *this; |
|
79 } |
|
80 } |
|
81 fFlags = other.fFlags; |
|
82 fLiteralText = other.fLiteralText; |
|
83 fDeferredStatus = other.fDeferredStatus; |
|
84 fMinMatchLen = other.fMinMatchLen; |
|
85 fFrameSize = other.fFrameSize; |
|
86 fDataSize = other.fDataSize; |
|
87 fMaxCaptureDigits = other.fMaxCaptureDigits; |
|
88 fStaticSets = other.fStaticSets; |
|
89 fStaticSets8 = other.fStaticSets8; |
|
90 |
|
91 fStartType = other.fStartType; |
|
92 fInitialStringIdx = other.fInitialStringIdx; |
|
93 fInitialStringLen = other.fInitialStringLen; |
|
94 *fInitialChars = *other.fInitialChars; |
|
95 fInitialChar = other.fInitialChar; |
|
96 *fInitialChars8 = *other.fInitialChars8; |
|
97 fNeedsAltInput = other.fNeedsAltInput; |
|
98 |
|
99 // Copy the pattern. It's just values, nothing deep to copy. |
|
100 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); |
|
101 fGroupMap->assign(*other.fGroupMap, fDeferredStatus); |
|
102 |
|
103 // Copy the Unicode Sets. |
|
104 // Could be made more efficient if the sets were reference counted and shared, |
|
105 // but I doubt that pattern copying will be particularly common. |
|
106 // Note: init() already added an empty element zero to fSets |
|
107 int32_t i; |
|
108 int32_t numSets = other.fSets->size(); |
|
109 fSets8 = new Regex8BitSet[numSets]; |
|
110 if (fSets8 == NULL) { |
|
111 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
|
112 return *this; |
|
113 } |
|
114 for (i=1; i<numSets; i++) { |
|
115 if (U_FAILURE(fDeferredStatus)) { |
|
116 return *this; |
|
117 } |
|
118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); |
|
119 UnicodeSet *newSet = new UnicodeSet(*sourceSet); |
|
120 if (newSet == NULL) { |
|
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
|
122 break; |
|
123 } |
|
124 fSets->addElement(newSet, fDeferredStatus); |
|
125 fSets8[i] = other.fSets8[i]; |
|
126 } |
|
127 |
|
128 return *this; |
|
129 } |
|
130 |
|
131 |
|
132 //-------------------------------------------------------------------------- |
|
133 // |
|
134 // init Shared initialization for use by constructors. |
|
135 // Bring an uninitialized RegexPattern up to a default state. |
|
136 // |
|
137 //-------------------------------------------------------------------------- |
|
138 void RegexPattern::init() { |
|
139 fFlags = 0; |
|
140 fCompiledPat = 0; |
|
141 fLiteralText.remove(); |
|
142 fSets = NULL; |
|
143 fSets8 = NULL; |
|
144 fDeferredStatus = U_ZERO_ERROR; |
|
145 fMinMatchLen = 0; |
|
146 fFrameSize = 0; |
|
147 fDataSize = 0; |
|
148 fGroupMap = NULL; |
|
149 fMaxCaptureDigits = 1; |
|
150 fStaticSets = NULL; |
|
151 fStaticSets8 = NULL; |
|
152 fStartType = START_NO_INFO; |
|
153 fInitialStringIdx = 0; |
|
154 fInitialStringLen = 0; |
|
155 fInitialChars = NULL; |
|
156 fInitialChar = 0; |
|
157 fInitialChars8 = NULL; |
|
158 fNeedsAltInput = FALSE; |
|
159 |
|
160 fPattern = NULL; // will be set later |
|
161 fPatternString = NULL; // may be set later |
|
162 fCompiledPat = new UVector64(fDeferredStatus); |
|
163 fGroupMap = new UVector32(fDeferredStatus); |
|
164 fSets = new UVector(fDeferredStatus); |
|
165 fInitialChars = new UnicodeSet; |
|
166 fInitialChars8 = new Regex8BitSet; |
|
167 if (U_FAILURE(fDeferredStatus)) { |
|
168 return; |
|
169 } |
|
170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || |
|
171 fInitialChars == NULL || fInitialChars8 == NULL) { |
|
172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
|
173 return; |
|
174 } |
|
175 |
|
176 // Slot zero of the vector of sets is reserved. Fill it here. |
|
177 fSets->addElement((int32_t)0, fDeferredStatus); |
|
178 } |
|
179 |
|
180 |
|
181 //-------------------------------------------------------------------------- |
|
182 // |
|
183 // zap Delete everything owned by this RegexPattern. |
|
184 // |
|
185 //-------------------------------------------------------------------------- |
|
186 void RegexPattern::zap() { |
|
187 delete fCompiledPat; |
|
188 fCompiledPat = NULL; |
|
189 int i; |
|
190 for (i=1; i<fSets->size(); i++) { |
|
191 UnicodeSet *s; |
|
192 s = (UnicodeSet *)fSets->elementAt(i); |
|
193 if (s != NULL) { |
|
194 delete s; |
|
195 } |
|
196 } |
|
197 delete fSets; |
|
198 fSets = NULL; |
|
199 delete[] fSets8; |
|
200 fSets8 = NULL; |
|
201 delete fGroupMap; |
|
202 fGroupMap = NULL; |
|
203 delete fInitialChars; |
|
204 fInitialChars = NULL; |
|
205 delete fInitialChars8; |
|
206 fInitialChars8 = NULL; |
|
207 if (fPattern != NULL) { |
|
208 utext_close(fPattern); |
|
209 fPattern = NULL; |
|
210 } |
|
211 if (fPatternString != NULL) { |
|
212 delete fPatternString; |
|
213 fPatternString = NULL; |
|
214 } |
|
215 } |
|
216 |
|
217 |
|
218 //-------------------------------------------------------------------------- |
|
219 // |
|
220 // Destructor |
|
221 // |
|
222 //-------------------------------------------------------------------------- |
|
223 RegexPattern::~RegexPattern() { |
|
224 zap(); |
|
225 } |
|
226 |
|
227 |
|
228 //-------------------------------------------------------------------------- |
|
229 // |
|
230 // Clone |
|
231 // |
|
232 //-------------------------------------------------------------------------- |
|
233 RegexPattern *RegexPattern::clone() const { |
|
234 RegexPattern *copy = new RegexPattern(*this); |
|
235 return copy; |
|
236 } |
|
237 |
|
238 |
|
239 //-------------------------------------------------------------------------- |
|
240 // |
|
241 // operator == (comparison) Consider to patterns to be == if the |
|
242 // pattern strings and the flags are the same. |
|
243 // Note that pattern strings with the same |
|
244 // characters can still be considered different. |
|
245 // |
|
246 //-------------------------------------------------------------------------- |
|
247 UBool RegexPattern::operator ==(const RegexPattern &other) const { |
|
248 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { |
|
249 if (this->fPatternString != NULL && other.fPatternString != NULL) { |
|
250 return *(this->fPatternString) == *(other.fPatternString); |
|
251 } else if (this->fPattern == NULL) { |
|
252 if (other.fPattern == NULL) { |
|
253 return TRUE; |
|
254 } |
|
255 } else if (other.fPattern != NULL) { |
|
256 UTEXT_SETNATIVEINDEX(this->fPattern, 0); |
|
257 UTEXT_SETNATIVEINDEX(other.fPattern, 0); |
|
258 return utext_equals(this->fPattern, other.fPattern); |
|
259 } |
|
260 } |
|
261 return FALSE; |
|
262 } |
|
263 |
|
264 //--------------------------------------------------------------------- |
|
265 // |
|
266 // compile |
|
267 // |
|
268 //--------------------------------------------------------------------- |
|
269 RegexPattern * U_EXPORT2 |
|
270 RegexPattern::compile(const UnicodeString ®ex, |
|
271 uint32_t flags, |
|
272 UParseError &pe, |
|
273 UErrorCode &status) |
|
274 { |
|
275 if (U_FAILURE(status)) { |
|
276 return NULL; |
|
277 } |
|
278 |
|
279 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | |
|
280 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | |
|
281 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; |
|
282 |
|
283 if ((flags & ~allFlags) != 0) { |
|
284 status = U_REGEX_INVALID_FLAG; |
|
285 return NULL; |
|
286 } |
|
287 |
|
288 if ((flags & UREGEX_CANON_EQ) != 0) { |
|
289 status = U_REGEX_UNIMPLEMENTED; |
|
290 return NULL; |
|
291 } |
|
292 |
|
293 RegexPattern *This = new RegexPattern; |
|
294 if (This == NULL) { |
|
295 status = U_MEMORY_ALLOCATION_ERROR; |
|
296 return NULL; |
|
297 } |
|
298 if (U_FAILURE(This->fDeferredStatus)) { |
|
299 status = This->fDeferredStatus; |
|
300 delete This; |
|
301 return NULL; |
|
302 } |
|
303 This->fFlags = flags; |
|
304 |
|
305 RegexCompile compiler(This, status); |
|
306 compiler.compile(regex, pe, status); |
|
307 |
|
308 if (U_FAILURE(status)) { |
|
309 delete This; |
|
310 This = NULL; |
|
311 } |
|
312 |
|
313 return This; |
|
314 } |
|
315 |
|
316 |
|
317 // |
|
318 // compile, UText mode |
|
319 // |
|
320 RegexPattern * U_EXPORT2 |
|
321 RegexPattern::compile(UText *regex, |
|
322 uint32_t flags, |
|
323 UParseError &pe, |
|
324 UErrorCode &status) |
|
325 { |
|
326 if (U_FAILURE(status)) { |
|
327 return NULL; |
|
328 } |
|
329 |
|
330 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | |
|
331 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | |
|
332 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; |
|
333 |
|
334 if ((flags & ~allFlags) != 0) { |
|
335 status = U_REGEX_INVALID_FLAG; |
|
336 return NULL; |
|
337 } |
|
338 |
|
339 if ((flags & UREGEX_CANON_EQ) != 0) { |
|
340 status = U_REGEX_UNIMPLEMENTED; |
|
341 return NULL; |
|
342 } |
|
343 |
|
344 RegexPattern *This = new RegexPattern; |
|
345 if (This == NULL) { |
|
346 status = U_MEMORY_ALLOCATION_ERROR; |
|
347 return NULL; |
|
348 } |
|
349 if (U_FAILURE(This->fDeferredStatus)) { |
|
350 status = This->fDeferredStatus; |
|
351 delete This; |
|
352 return NULL; |
|
353 } |
|
354 This->fFlags = flags; |
|
355 |
|
356 RegexCompile compiler(This, status); |
|
357 compiler.compile(regex, pe, status); |
|
358 |
|
359 if (U_FAILURE(status)) { |
|
360 delete This; |
|
361 This = NULL; |
|
362 } |
|
363 |
|
364 return This; |
|
365 } |
|
366 |
|
367 // |
|
368 // compile with default flags. |
|
369 // |
|
370 RegexPattern * U_EXPORT2 |
|
371 RegexPattern::compile(const UnicodeString ®ex, |
|
372 UParseError &pe, |
|
373 UErrorCode &err) |
|
374 { |
|
375 return compile(regex, 0, pe, err); |
|
376 } |
|
377 |
|
378 |
|
379 // |
|
380 // compile with default flags, UText mode |
|
381 // |
|
382 RegexPattern * U_EXPORT2 |
|
383 RegexPattern::compile(UText *regex, |
|
384 UParseError &pe, |
|
385 UErrorCode &err) |
|
386 { |
|
387 return compile(regex, 0, pe, err); |
|
388 } |
|
389 |
|
390 |
|
391 // |
|
392 // compile with no UParseErr parameter. |
|
393 // |
|
394 RegexPattern * U_EXPORT2 |
|
395 RegexPattern::compile(const UnicodeString ®ex, |
|
396 uint32_t flags, |
|
397 UErrorCode &err) |
|
398 { |
|
399 UParseError pe; |
|
400 return compile(regex, flags, pe, err); |
|
401 } |
|
402 |
|
403 |
|
404 // |
|
405 // compile with no UParseErr parameter, UText mode |
|
406 // |
|
407 RegexPattern * U_EXPORT2 |
|
408 RegexPattern::compile(UText *regex, |
|
409 uint32_t flags, |
|
410 UErrorCode &err) |
|
411 { |
|
412 UParseError pe; |
|
413 return compile(regex, flags, pe, err); |
|
414 } |
|
415 |
|
416 |
|
417 //--------------------------------------------------------------------- |
|
418 // |
|
419 // flags |
|
420 // |
|
421 //--------------------------------------------------------------------- |
|
422 uint32_t RegexPattern::flags() const { |
|
423 return fFlags; |
|
424 } |
|
425 |
|
426 |
|
427 //--------------------------------------------------------------------- |
|
428 // |
|
429 // matcher(UnicodeString, err) |
|
430 // |
|
431 //--------------------------------------------------------------------- |
|
432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input, |
|
433 UErrorCode &status) const { |
|
434 RegexMatcher *retMatcher = matcher(status); |
|
435 if (retMatcher != NULL) { |
|
436 retMatcher->fDeferredStatus = status; |
|
437 retMatcher->reset(input); |
|
438 } |
|
439 return retMatcher; |
|
440 } |
|
441 |
|
442 |
|
443 //--------------------------------------------------------------------- |
|
444 // |
|
445 // matcher(status) |
|
446 // |
|
447 //--------------------------------------------------------------------- |
|
448 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { |
|
449 RegexMatcher *retMatcher = NULL; |
|
450 |
|
451 if (U_FAILURE(status)) { |
|
452 return NULL; |
|
453 } |
|
454 if (U_FAILURE(fDeferredStatus)) { |
|
455 status = fDeferredStatus; |
|
456 return NULL; |
|
457 } |
|
458 |
|
459 retMatcher = new RegexMatcher(this); |
|
460 if (retMatcher == NULL) { |
|
461 status = U_MEMORY_ALLOCATION_ERROR; |
|
462 return NULL; |
|
463 } |
|
464 return retMatcher; |
|
465 } |
|
466 |
|
467 |
|
468 |
|
469 //--------------------------------------------------------------------- |
|
470 // |
|
471 // matches Convenience function to test for a match, starting |
|
472 // with a pattern string and a data string. |
|
473 // |
|
474 //--------------------------------------------------------------------- |
|
475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, |
|
476 const UnicodeString &input, |
|
477 UParseError &pe, |
|
478 UErrorCode &status) { |
|
479 |
|
480 if (U_FAILURE(status)) {return FALSE;} |
|
481 |
|
482 UBool retVal; |
|
483 RegexPattern *pat = NULL; |
|
484 RegexMatcher *matcher = NULL; |
|
485 |
|
486 pat = RegexPattern::compile(regex, 0, pe, status); |
|
487 matcher = pat->matcher(input, status); |
|
488 retVal = matcher->matches(status); |
|
489 |
|
490 delete matcher; |
|
491 delete pat; |
|
492 return retVal; |
|
493 } |
|
494 |
|
495 |
|
496 // |
|
497 // matches, UText mode |
|
498 // |
|
499 UBool U_EXPORT2 RegexPattern::matches(UText *regex, |
|
500 UText *input, |
|
501 UParseError &pe, |
|
502 UErrorCode &status) { |
|
503 |
|
504 if (U_FAILURE(status)) {return FALSE;} |
|
505 |
|
506 UBool retVal = FALSE; |
|
507 RegexPattern *pat = NULL; |
|
508 RegexMatcher *matcher = NULL; |
|
509 |
|
510 pat = RegexPattern::compile(regex, 0, pe, status); |
|
511 matcher = pat->matcher(status); |
|
512 if (U_SUCCESS(status)) { |
|
513 matcher->reset(input); |
|
514 retVal = matcher->matches(status); |
|
515 } |
|
516 |
|
517 delete matcher; |
|
518 delete pat; |
|
519 return retVal; |
|
520 } |
|
521 |
|
522 |
|
523 |
|
524 |
|
525 |
|
526 //--------------------------------------------------------------------- |
|
527 // |
|
528 // pattern |
|
529 // |
|
530 //--------------------------------------------------------------------- |
|
531 UnicodeString RegexPattern::pattern() const { |
|
532 if (fPatternString != NULL) { |
|
533 return *fPatternString; |
|
534 } else if (fPattern == NULL) { |
|
535 return UnicodeString(); |
|
536 } else { |
|
537 UErrorCode status = U_ZERO_ERROR; |
|
538 int64_t nativeLen = utext_nativeLength(fPattern); |
|
539 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error |
|
540 UnicodeString result; |
|
541 |
|
542 status = U_ZERO_ERROR; |
|
543 UChar *resultChars = result.getBuffer(len16); |
|
544 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning |
|
545 result.releaseBuffer(len16); |
|
546 |
|
547 return result; |
|
548 } |
|
549 } |
|
550 |
|
551 |
|
552 |
|
553 |
|
554 //--------------------------------------------------------------------- |
|
555 // |
|
556 // patternText |
|
557 // |
|
558 //--------------------------------------------------------------------- |
|
559 UText *RegexPattern::patternText(UErrorCode &status) const { |
|
560 if (U_FAILURE(status)) {return NULL;} |
|
561 status = U_ZERO_ERROR; |
|
562 |
|
563 if (fPattern != NULL) { |
|
564 return fPattern; |
|
565 } else { |
|
566 RegexStaticSets::initGlobals(&status); |
|
567 return RegexStaticSets::gStaticSets->fEmptyText; |
|
568 } |
|
569 } |
|
570 |
|
571 |
|
572 |
|
573 //--------------------------------------------------------------------- |
|
574 // |
|
575 // split |
|
576 // |
|
577 //--------------------------------------------------------------------- |
|
578 int32_t RegexPattern::split(const UnicodeString &input, |
|
579 UnicodeString dest[], |
|
580 int32_t destCapacity, |
|
581 UErrorCode &status) const |
|
582 { |
|
583 if (U_FAILURE(status)) { |
|
584 return 0; |
|
585 }; |
|
586 |
|
587 RegexMatcher m(this); |
|
588 int32_t r = 0; |
|
589 // Check m's status to make sure all is ok. |
|
590 if (U_SUCCESS(m.fDeferredStatus)) { |
|
591 r = m.split(input, dest, destCapacity, status); |
|
592 } |
|
593 return r; |
|
594 } |
|
595 |
|
596 // |
|
597 // split, UText mode |
|
598 // |
|
599 int32_t RegexPattern::split(UText *input, |
|
600 UText *dest[], |
|
601 int32_t destCapacity, |
|
602 UErrorCode &status) const |
|
603 { |
|
604 if (U_FAILURE(status)) { |
|
605 return 0; |
|
606 }; |
|
607 |
|
608 RegexMatcher m(this); |
|
609 int32_t r = 0; |
|
610 // Check m's status to make sure all is ok. |
|
611 if (U_SUCCESS(m.fDeferredStatus)) { |
|
612 r = m.split(input, dest, destCapacity, status); |
|
613 } |
|
614 return r; |
|
615 } |
|
616 |
|
617 |
|
618 |
|
619 //--------------------------------------------------------------------- |
|
620 // |
|
621 // dump Output the compiled form of the pattern. |
|
622 // Debugging function only. |
|
623 // |
|
624 //--------------------------------------------------------------------- |
|
625 #if defined(REGEX_DEBUG) |
|
626 void RegexPattern::dumpOp(int32_t index) const { |
|
627 static const char * const opNames[] = {URX_OPCODE_NAMES}; |
|
628 int32_t op = fCompiledPat->elementAti(index); |
|
629 int32_t val = URX_VAL(op); |
|
630 int32_t type = URX_TYPE(op); |
|
631 int32_t pinnedType = type; |
|
632 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { |
|
633 pinnedType = 0; |
|
634 } |
|
635 |
|
636 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); |
|
637 switch (type) { |
|
638 case URX_NOP: |
|
639 case URX_DOTANY: |
|
640 case URX_DOTANY_ALL: |
|
641 case URX_FAIL: |
|
642 case URX_CARET: |
|
643 case URX_DOLLAR: |
|
644 case URX_BACKSLASH_G: |
|
645 case URX_BACKSLASH_X: |
|
646 case URX_END: |
|
647 case URX_DOLLAR_M: |
|
648 case URX_CARET_M: |
|
649 // Types with no operand field of interest. |
|
650 break; |
|
651 |
|
652 case URX_RESERVED_OP: |
|
653 case URX_START_CAPTURE: |
|
654 case URX_END_CAPTURE: |
|
655 case URX_STATE_SAVE: |
|
656 case URX_JMP: |
|
657 case URX_JMP_SAV: |
|
658 case URX_JMP_SAV_X: |
|
659 case URX_BACKSLASH_B: |
|
660 case URX_BACKSLASH_BU: |
|
661 case URX_BACKSLASH_D: |
|
662 case URX_BACKSLASH_Z: |
|
663 case URX_STRING_LEN: |
|
664 case URX_CTR_INIT: |
|
665 case URX_CTR_INIT_NG: |
|
666 case URX_CTR_LOOP: |
|
667 case URX_CTR_LOOP_NG: |
|
668 case URX_RELOC_OPRND: |
|
669 case URX_STO_SP: |
|
670 case URX_LD_SP: |
|
671 case URX_BACKREF: |
|
672 case URX_STO_INP_LOC: |
|
673 case URX_JMPX: |
|
674 case URX_LA_START: |
|
675 case URX_LA_END: |
|
676 case URX_BACKREF_I: |
|
677 case URX_LB_START: |
|
678 case URX_LB_CONT: |
|
679 case URX_LB_END: |
|
680 case URX_LBN_CONT: |
|
681 case URX_LBN_END: |
|
682 case URX_LOOP_C: |
|
683 case URX_LOOP_DOT_I: |
|
684 // types with an integer operand field. |
|
685 REGEX_DUMP_DEBUG_PRINTF(("%d", val)); |
|
686 break; |
|
687 |
|
688 case URX_ONECHAR: |
|
689 case URX_ONECHAR_I: |
|
690 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); |
|
691 break; |
|
692 |
|
693 case URX_STRING: |
|
694 case URX_STRING_I: |
|
695 { |
|
696 int32_t lengthOp = fCompiledPat->elementAti(index+1); |
|
697 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); |
|
698 int32_t length = URX_VAL(lengthOp); |
|
699 int32_t i; |
|
700 for (i=val; i<val+length; i++) { |
|
701 UChar c = fLiteralText[i]; |
|
702 if (c < 32 || c >= 256) {c = '.';} |
|
703 REGEX_DUMP_DEBUG_PRINTF(("%c", c)); |
|
704 } |
|
705 } |
|
706 break; |
|
707 |
|
708 case URX_SETREF: |
|
709 case URX_LOOP_SR_I: |
|
710 { |
|
711 UnicodeString s; |
|
712 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); |
|
713 set->toPattern(s, TRUE); |
|
714 for (int32_t i=0; i<s.length(); i++) { |
|
715 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); |
|
716 } |
|
717 } |
|
718 break; |
|
719 |
|
720 case URX_STATIC_SETREF: |
|
721 case URX_STAT_SETREF_N: |
|
722 { |
|
723 UnicodeString s; |
|
724 if (val & URX_NEG_SET) { |
|
725 REGEX_DUMP_DEBUG_PRINTF(("NOT ")); |
|
726 val &= ~URX_NEG_SET; |
|
727 } |
|
728 UnicodeSet *set = fStaticSets[val]; |
|
729 set->toPattern(s, TRUE); |
|
730 for (int32_t i=0; i<s.length(); i++) { |
|
731 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); |
|
732 } |
|
733 } |
|
734 break; |
|
735 |
|
736 |
|
737 default: |
|
738 REGEX_DUMP_DEBUG_PRINTF(("??????")); |
|
739 break; |
|
740 } |
|
741 REGEX_DUMP_DEBUG_PRINTF(("\n")); |
|
742 } |
|
743 #endif |
|
744 |
|
745 |
|
746 #if defined(REGEX_DEBUG) |
|
747 U_CAPI void U_EXPORT2 |
|
748 RegexPatternDump(const RegexPattern *This) { |
|
749 int index; |
|
750 int i; |
|
751 |
|
752 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); |
|
753 UChar32 c = utext_next32From(This->fPattern, 0); |
|
754 while (c != U_SENTINEL) { |
|
755 if (c<32 || c>256) { |
|
756 c = '.'; |
|
757 } |
|
758 REGEX_DUMP_DEBUG_PRINTF(("%c", c)); |
|
759 |
|
760 c = UTEXT_NEXT32(This->fPattern); |
|
761 } |
|
762 REGEX_DUMP_DEBUG_PRINTF(("\n")); |
|
763 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); |
|
764 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); |
|
765 if (This->fStartType == START_STRING) { |
|
766 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); |
|
767 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { |
|
768 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. |
|
769 } |
|
770 REGEX_DUMP_DEBUG_PRINTF(("\"\n")); |
|
771 |
|
772 } else if (This->fStartType == START_SET) { |
|
773 int32_t numSetChars = This->fInitialChars->size(); |
|
774 if (numSetChars > 20) { |
|
775 numSetChars = 20; |
|
776 } |
|
777 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); |
|
778 for (i=0; i<numSetChars; i++) { |
|
779 UChar32 c = This->fInitialChars->charAt(i); |
|
780 if (0x20<c && c <0x7e) { |
|
781 REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); |
|
782 } else { |
|
783 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); |
|
784 } |
|
785 } |
|
786 if (numSetChars < This->fInitialChars->size()) { |
|
787 REGEX_DUMP_DEBUG_PRINTF((" ...")); |
|
788 } |
|
789 REGEX_DUMP_DEBUG_PRINTF(("\n")); |
|
790 |
|
791 } else if (This->fStartType == START_CHAR) { |
|
792 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); |
|
793 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { |
|
794 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); |
|
795 } else { |
|
796 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); |
|
797 } |
|
798 } |
|
799 |
|
800 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ |
|
801 "-------------------------------------------\n")); |
|
802 for (index = 0; index<This->fCompiledPat->size(); index++) { |
|
803 This->dumpOp(index); |
|
804 } |
|
805 REGEX_DUMP_DEBUG_PRINTF(("\n\n")); |
|
806 } |
|
807 #endif |
|
808 |
|
809 |
|
810 |
|
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) |
|
812 |
|
813 U_NAMESPACE_END |
|
814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |