intl/icu/source/i18n/repattrn.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 //
     2 //  file:  repattrn.cpp
     3 //
     4 /*
     5 ***************************************************************************
     6 *   Copyright (C) 2002-2012 International Business Machines Corporation   *
     7 *   and others. All rights reserved.                                      *
     8 ***************************************************************************
     9 */
    11 #include "unicode/utypes.h"
    13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
    15 #include "unicode/regex.h"
    16 #include "unicode/uclean.h"
    17 #include "uassert.h"
    18 #include "uvector.h"
    19 #include "uvectr32.h"
    20 #include "uvectr64.h"
    21 #include "regexcmp.h"
    22 #include "regeximp.h"
    23 #include "regexst.h"
    25 U_NAMESPACE_BEGIN
    27 //--------------------------------------------------------------------------
    28 //
    29 //    RegexPattern    Default Constructor
    30 //
    31 //--------------------------------------------------------------------------
    32 RegexPattern::RegexPattern() {
    33     // Init all of this instances data.
    34     init();
    35 }
    38 //--------------------------------------------------------------------------
    39 //
    40 //   Copy Constructor        Note:  This is a rather inefficient implementation,
    41 //                                  but it probably doesn't matter.
    42 //
    43 //--------------------------------------------------------------------------
    44 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
    45     init();
    46     *this = other;
    47 }
    51 //--------------------------------------------------------------------------
    52 //
    53 //    Assignment Operator
    54 //
    55 //--------------------------------------------------------------------------
    56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
    57     if (this == &other) {
    58         // Source and destination are the same.  Don't do anything.
    59         return *this;
    60     }
    62     // Clean out any previous contents of object being assigned to.
    63     zap();
    65     // Give target object a default initialization
    66     init();
    68     // Copy simple fields
    69     if ( other.fPatternString == NULL ) {
    70         fPatternString = NULL;
    71         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
    72     } else {
    73         fPatternString = new UnicodeString(*(other.fPatternString));
    74         UErrorCode status = U_ZERO_ERROR;
    75         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
    76         if (U_FAILURE(status)) {
    77             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
    78             return *this;
    79         }
    80     }
    81     fFlags            = other.fFlags;
    82     fLiteralText      = other.fLiteralText;
    83     fDeferredStatus   = other.fDeferredStatus;
    84     fMinMatchLen      = other.fMinMatchLen;
    85     fFrameSize        = other.fFrameSize;
    86     fDataSize         = other.fDataSize;
    87     fMaxCaptureDigits = other.fMaxCaptureDigits;
    88     fStaticSets       = other.fStaticSets;
    89     fStaticSets8      = other.fStaticSets8;
    91     fStartType        = other.fStartType;
    92     fInitialStringIdx = other.fInitialStringIdx;
    93     fInitialStringLen = other.fInitialStringLen;
    94     *fInitialChars    = *other.fInitialChars;
    95     fInitialChar      = other.fInitialChar;
    96     *fInitialChars8   = *other.fInitialChars8;
    97     fNeedsAltInput    = other.fNeedsAltInput;
    99     //  Copy the pattern.  It's just values, nothing deep to copy.
   100     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
   101     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
   103     //  Copy the Unicode Sets.
   104     //    Could be made more efficient if the sets were reference counted and shared,
   105     //    but I doubt that pattern copying will be particularly common.
   106     //    Note:  init() already added an empty element zero to fSets
   107     int32_t i;
   108     int32_t  numSets = other.fSets->size();
   109     fSets8 = new Regex8BitSet[numSets];
   110     if (fSets8 == NULL) {
   111     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
   112     	return *this;
   113     }
   114     for (i=1; i<numSets; i++) {
   115         if (U_FAILURE(fDeferredStatus)) {
   116             return *this;
   117         }
   118         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
   119         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
   120         if (newSet == NULL) {
   121             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
   122             break;
   123         }
   124         fSets->addElement(newSet, fDeferredStatus);
   125         fSets8[i] = other.fSets8[i];
   126     }
   128     return *this;
   129 }
   132 //--------------------------------------------------------------------------
   133 //
   134 //    init        Shared initialization for use by constructors.
   135 //                Bring an uninitialized RegexPattern up to a default state.
   136 //
   137 //--------------------------------------------------------------------------
   138 void RegexPattern::init() {
   139     fFlags            = 0;
   140     fCompiledPat      = 0;
   141     fLiteralText.remove();
   142     fSets             = NULL;
   143     fSets8            = NULL;
   144     fDeferredStatus   = U_ZERO_ERROR;
   145     fMinMatchLen      = 0;
   146     fFrameSize        = 0;
   147     fDataSize         = 0;
   148     fGroupMap         = NULL;
   149     fMaxCaptureDigits = 1;
   150     fStaticSets       = NULL;
   151     fStaticSets8      = NULL;
   152     fStartType        = START_NO_INFO;
   153     fInitialStringIdx = 0;
   154     fInitialStringLen = 0;
   155     fInitialChars     = NULL;
   156     fInitialChar      = 0;
   157     fInitialChars8    = NULL;
   158     fNeedsAltInput    = FALSE;
   160     fPattern          = NULL; // will be set later
   161     fPatternString    = NULL; // may be set later
   162     fCompiledPat      = new UVector64(fDeferredStatus);
   163     fGroupMap         = new UVector32(fDeferredStatus);
   164     fSets             = new UVector(fDeferredStatus);
   165     fInitialChars     = new UnicodeSet;
   166     fInitialChars8    = new Regex8BitSet;
   167     if (U_FAILURE(fDeferredStatus)) {
   168         return;
   169     }
   170     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
   171         fInitialChars == NULL || fInitialChars8 == NULL) {
   172         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
   173         return;
   174     }
   176     // Slot zero of the vector of sets is reserved.  Fill it here.
   177     fSets->addElement((int32_t)0, fDeferredStatus);
   178 }
   181 //--------------------------------------------------------------------------
   182 //
   183 //   zap            Delete everything owned by this RegexPattern.
   184 //
   185 //--------------------------------------------------------------------------
   186 void RegexPattern::zap() {
   187     delete fCompiledPat;
   188     fCompiledPat = NULL;
   189     int i;
   190     for (i=1; i<fSets->size(); i++) {
   191         UnicodeSet *s;
   192         s = (UnicodeSet *)fSets->elementAt(i);
   193         if (s != NULL) {
   194             delete s;
   195         }
   196     }
   197     delete fSets;
   198     fSets = NULL;
   199     delete[] fSets8;
   200     fSets8 = NULL;
   201     delete fGroupMap;
   202     fGroupMap = NULL;
   203     delete fInitialChars;
   204     fInitialChars = NULL;
   205     delete fInitialChars8;
   206     fInitialChars8 = NULL;
   207     if (fPattern != NULL) {
   208         utext_close(fPattern);
   209         fPattern = NULL;
   210     }
   211     if (fPatternString != NULL) {
   212         delete fPatternString;
   213         fPatternString = NULL;
   214     }
   215 }
   218 //--------------------------------------------------------------------------
   219 //
   220 //   Destructor
   221 //
   222 //--------------------------------------------------------------------------
   223 RegexPattern::~RegexPattern() {
   224     zap();
   225 }
   228 //--------------------------------------------------------------------------
   229 //
   230 //   Clone
   231 //
   232 //--------------------------------------------------------------------------
   233 RegexPattern  *RegexPattern::clone() const {
   234     RegexPattern  *copy = new RegexPattern(*this);
   235     return copy;
   236 }
   239 //--------------------------------------------------------------------------
   240 //
   241 //   operator ==   (comparison)    Consider to patterns to be == if the
   242 //                                 pattern strings and the flags are the same.
   243 //                                 Note that pattern strings with the same
   244 //                                 characters can still be considered different.
   245 //
   246 //--------------------------------------------------------------------------
   247 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
   248     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
   249         if (this->fPatternString != NULL && other.fPatternString != NULL) {
   250             return *(this->fPatternString) == *(other.fPatternString);
   251         } else if (this->fPattern == NULL) {
   252             if (other.fPattern == NULL) {
   253                 return TRUE;
   254             }
   255         } else if (other.fPattern != NULL) {
   256             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
   257             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
   258             return utext_equals(this->fPattern, other.fPattern);
   259         }
   260     }
   261     return FALSE;
   262 }
   264 //---------------------------------------------------------------------
   265 //
   266 //   compile
   267 //
   268 //---------------------------------------------------------------------
   269 RegexPattern * U_EXPORT2
   270 RegexPattern::compile(const UnicodeString &regex,
   271                       uint32_t             flags,
   272                       UParseError          &pe,
   273                       UErrorCode           &status)
   274 {
   275     if (U_FAILURE(status)) {
   276         return NULL;
   277     }
   279     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
   280     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
   281     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
   283     if ((flags & ~allFlags) != 0) {
   284         status = U_REGEX_INVALID_FLAG;
   285         return NULL;
   286     }
   288     if ((flags & UREGEX_CANON_EQ) != 0) {
   289         status = U_REGEX_UNIMPLEMENTED;
   290         return NULL;
   291     }
   293     RegexPattern *This = new RegexPattern;
   294     if (This == NULL) {
   295         status = U_MEMORY_ALLOCATION_ERROR;
   296         return NULL;
   297     }
   298     if (U_FAILURE(This->fDeferredStatus)) {
   299         status = This->fDeferredStatus;
   300         delete This;
   301         return NULL;
   302     }
   303     This->fFlags = flags;
   305     RegexCompile     compiler(This, status);
   306     compiler.compile(regex, pe, status);
   308     if (U_FAILURE(status)) {
   309         delete This;
   310         This = NULL;
   311     }
   313     return This;
   314 }
   317 //
   318 //   compile, UText mode
   319 //
   320 RegexPattern * U_EXPORT2
   321 RegexPattern::compile(UText                *regex,
   322                       uint32_t             flags,
   323                       UParseError          &pe,
   324                       UErrorCode           &status)
   325 {
   326     if (U_FAILURE(status)) {
   327         return NULL;
   328     }
   330     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
   331                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
   332                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
   334     if ((flags & ~allFlags) != 0) {
   335         status = U_REGEX_INVALID_FLAG;
   336         return NULL;
   337     }
   339     if ((flags & UREGEX_CANON_EQ) != 0) {
   340         status = U_REGEX_UNIMPLEMENTED;
   341         return NULL;
   342     }
   344     RegexPattern *This = new RegexPattern;
   345     if (This == NULL) {
   346         status = U_MEMORY_ALLOCATION_ERROR;
   347         return NULL;
   348     }
   349     if (U_FAILURE(This->fDeferredStatus)) {
   350         status = This->fDeferredStatus;
   351         delete This;
   352         return NULL;
   353     }
   354     This->fFlags = flags;
   356     RegexCompile     compiler(This, status);
   357     compiler.compile(regex, pe, status);
   359     if (U_FAILURE(status)) {
   360         delete This;
   361         This = NULL;
   362     }
   364     return This;
   365 }
   367 //
   368 //   compile with default flags.
   369 //
   370 RegexPattern * U_EXPORT2
   371 RegexPattern::compile(const UnicodeString &regex,
   372                       UParseError         &pe,
   373                       UErrorCode          &err)
   374 {
   375     return compile(regex, 0, pe, err);
   376 }
   379 //
   380 //   compile with default flags, UText mode
   381 //
   382 RegexPattern * U_EXPORT2
   383 RegexPattern::compile(UText               *regex,
   384                       UParseError         &pe,
   385                       UErrorCode          &err)
   386 {
   387     return compile(regex, 0, pe, err);
   388 }
   391 //
   392 //   compile with no UParseErr parameter.
   393 //
   394 RegexPattern * U_EXPORT2
   395 RegexPattern::compile(const UnicodeString &regex,
   396                       uint32_t             flags,
   397                       UErrorCode          &err)
   398 {
   399     UParseError pe;
   400     return compile(regex, flags, pe, err);
   401 }
   404 //
   405 //   compile with no UParseErr parameter, UText mode
   406 //
   407 RegexPattern * U_EXPORT2
   408 RegexPattern::compile(UText                *regex,
   409                       uint32_t             flags,
   410                       UErrorCode           &err)
   411 {
   412     UParseError pe;
   413     return compile(regex, flags, pe, err);
   414 }
   417 //---------------------------------------------------------------------
   418 //
   419 //   flags
   420 //
   421 //---------------------------------------------------------------------
   422 uint32_t RegexPattern::flags() const {
   423     return fFlags;
   424 }
   427 //---------------------------------------------------------------------
   428 //
   429 //   matcher(UnicodeString, err)
   430 //
   431 //---------------------------------------------------------------------
   432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
   433                                     UErrorCode          &status)  const {
   434     RegexMatcher    *retMatcher = matcher(status);
   435     if (retMatcher != NULL) {
   436         retMatcher->fDeferredStatus = status;
   437         retMatcher->reset(input);
   438     }
   439     return retMatcher;
   440 }
   443 //---------------------------------------------------------------------
   444 //
   445 //   matcher(status)
   446 //
   447 //---------------------------------------------------------------------
   448 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
   449     RegexMatcher    *retMatcher = NULL;
   451     if (U_FAILURE(status)) {
   452         return NULL;
   453     }
   454     if (U_FAILURE(fDeferredStatus)) {
   455         status = fDeferredStatus;
   456         return NULL;
   457     }
   459     retMatcher = new RegexMatcher(this);
   460     if (retMatcher == NULL) {
   461         status = U_MEMORY_ALLOCATION_ERROR;
   462         return NULL;
   463     }
   464     return retMatcher;
   465 }
   469 //---------------------------------------------------------------------
   470 //
   471 //   matches        Convenience function to test for a match, starting
   472 //                  with a pattern string and a data string.
   473 //
   474 //---------------------------------------------------------------------
   475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
   476               const UnicodeString   &input,
   477                     UParseError     &pe,
   478                     UErrorCode      &status) {
   480     if (U_FAILURE(status)) {return FALSE;}
   482     UBool         retVal;
   483     RegexPattern *pat     = NULL;
   484     RegexMatcher *matcher = NULL;
   486     pat     = RegexPattern::compile(regex, 0, pe, status);
   487     matcher = pat->matcher(input, status);
   488     retVal  = matcher->matches(status);
   490     delete matcher;
   491     delete pat;
   492     return retVal;
   493 }
   496 //
   497 //   matches, UText mode
   498 //
   499 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
   500                     UText           *input,
   501                     UParseError     &pe,
   502                     UErrorCode      &status) {
   504     if (U_FAILURE(status)) {return FALSE;}
   506     UBool         retVal  = FALSE;
   507     RegexPattern *pat     = NULL;
   508     RegexMatcher *matcher = NULL;
   510     pat     = RegexPattern::compile(regex, 0, pe, status);
   511     matcher = pat->matcher(status);
   512     if (U_SUCCESS(status)) {
   513         matcher->reset(input);
   514         retVal  = matcher->matches(status);
   515     }
   517     delete matcher;
   518     delete pat;
   519     return retVal;
   520 }
   526 //---------------------------------------------------------------------
   527 //
   528 //   pattern
   529 //
   530 //---------------------------------------------------------------------
   531 UnicodeString RegexPattern::pattern() const {
   532     if (fPatternString != NULL) {
   533         return *fPatternString;
   534     } else if (fPattern == NULL) {
   535         return UnicodeString();
   536     } else {
   537         UErrorCode status = U_ZERO_ERROR;
   538         int64_t nativeLen = utext_nativeLength(fPattern);
   539         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
   540         UnicodeString result;
   542         status = U_ZERO_ERROR;
   543         UChar *resultChars = result.getBuffer(len16);
   544         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
   545         result.releaseBuffer(len16);
   547         return result;
   548     }
   549 }
   554 //---------------------------------------------------------------------
   555 //
   556 //   patternText
   557 //
   558 //---------------------------------------------------------------------
   559 UText *RegexPattern::patternText(UErrorCode      &status) const {
   560     if (U_FAILURE(status)) {return NULL;}
   561     status = U_ZERO_ERROR;
   563     if (fPattern != NULL) {
   564         return fPattern;
   565     } else {
   566         RegexStaticSets::initGlobals(&status);
   567         return RegexStaticSets::gStaticSets->fEmptyText;
   568     }
   569 }
   573 //---------------------------------------------------------------------
   574 //
   575 //   split
   576 //
   577 //---------------------------------------------------------------------
   578 int32_t  RegexPattern::split(const UnicodeString &input,
   579         UnicodeString    dest[],
   580         int32_t          destCapacity,
   581         UErrorCode      &status) const
   582 {
   583     if (U_FAILURE(status)) {
   584         return 0;
   585     };
   587     RegexMatcher  m(this);
   588     int32_t r = 0;
   589     // Check m's status to make sure all is ok.
   590     if (U_SUCCESS(m.fDeferredStatus)) {
   591     	r = m.split(input, dest, destCapacity, status);
   592     }
   593     return r;
   594 }
   596 //
   597 //   split, UText mode
   598 //
   599 int32_t  RegexPattern::split(UText *input,
   600         UText           *dest[],
   601         int32_t          destCapacity,
   602         UErrorCode      &status) const
   603 {
   604     if (U_FAILURE(status)) {
   605         return 0;
   606     };
   608     RegexMatcher  m(this);
   609     int32_t r = 0;
   610     // Check m's status to make sure all is ok.
   611     if (U_SUCCESS(m.fDeferredStatus)) {
   612     	r = m.split(input, dest, destCapacity, status);
   613     }
   614     return r;
   615 }
   619 //---------------------------------------------------------------------
   620 //
   621 //   dump    Output the compiled form of the pattern.
   622 //           Debugging function only.
   623 //
   624 //---------------------------------------------------------------------
   625 #if defined(REGEX_DEBUG)
   626 void   RegexPattern::dumpOp(int32_t index) const {
   627     static const char * const opNames[] = {URX_OPCODE_NAMES};
   628     int32_t op          = fCompiledPat->elementAti(index);
   629     int32_t val         = URX_VAL(op);
   630     int32_t type        = URX_TYPE(op);
   631     int32_t pinnedType  = type;
   632     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
   633         pinnedType = 0;
   634     }
   636     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
   637     switch (type) {
   638     case URX_NOP:
   639     case URX_DOTANY:
   640     case URX_DOTANY_ALL:
   641     case URX_FAIL:
   642     case URX_CARET:
   643     case URX_DOLLAR:
   644     case URX_BACKSLASH_G:
   645     case URX_BACKSLASH_X:
   646     case URX_END:
   647     case URX_DOLLAR_M:
   648     case URX_CARET_M:
   649         // Types with no operand field of interest.
   650         break;
   652     case URX_RESERVED_OP:
   653     case URX_START_CAPTURE:
   654     case URX_END_CAPTURE:
   655     case URX_STATE_SAVE:
   656     case URX_JMP:
   657     case URX_JMP_SAV:
   658     case URX_JMP_SAV_X:
   659     case URX_BACKSLASH_B:
   660     case URX_BACKSLASH_BU:
   661     case URX_BACKSLASH_D:
   662     case URX_BACKSLASH_Z:
   663     case URX_STRING_LEN:
   664     case URX_CTR_INIT:
   665     case URX_CTR_INIT_NG:
   666     case URX_CTR_LOOP:
   667     case URX_CTR_LOOP_NG:
   668     case URX_RELOC_OPRND:
   669     case URX_STO_SP:
   670     case URX_LD_SP:
   671     case URX_BACKREF:
   672     case URX_STO_INP_LOC:
   673     case URX_JMPX:
   674     case URX_LA_START:
   675     case URX_LA_END:
   676     case URX_BACKREF_I:
   677     case URX_LB_START:
   678     case URX_LB_CONT:
   679     case URX_LB_END:
   680     case URX_LBN_CONT:
   681     case URX_LBN_END:
   682     case URX_LOOP_C:
   683     case URX_LOOP_DOT_I:
   684         // types with an integer operand field.
   685         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
   686         break;
   688     case URX_ONECHAR:
   689     case URX_ONECHAR_I:
   690         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
   691         break;
   693     case URX_STRING:
   694     case URX_STRING_I:
   695         {
   696             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
   697             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
   698             int32_t length = URX_VAL(lengthOp);
   699             int32_t i;
   700             for (i=val; i<val+length; i++) {
   701                 UChar c = fLiteralText[i];
   702                 if (c < 32 || c >= 256) {c = '.';}
   703                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
   704             }
   705         }
   706         break;
   708     case URX_SETREF:
   709     case URX_LOOP_SR_I:
   710         {
   711             UnicodeString s;
   712             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
   713             set->toPattern(s, TRUE);
   714             for (int32_t i=0; i<s.length(); i++) {
   715                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
   716             }
   717         }
   718         break;
   720     case URX_STATIC_SETREF:
   721     case URX_STAT_SETREF_N:
   722         {
   723             UnicodeString s;
   724             if (val & URX_NEG_SET) {
   725                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
   726                 val &= ~URX_NEG_SET;
   727             }
   728             UnicodeSet *set = fStaticSets[val];
   729             set->toPattern(s, TRUE);
   730             for (int32_t i=0; i<s.length(); i++) {
   731                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
   732             }
   733         }
   734         break;
   737     default:
   738         REGEX_DUMP_DEBUG_PRINTF(("??????"));
   739         break;
   740     }
   741     REGEX_DUMP_DEBUG_PRINTF(("\n"));
   742 }
   743 #endif
   746 #if defined(REGEX_DEBUG)
   747 U_CAPI void  U_EXPORT2
   748 RegexPatternDump(const RegexPattern *This) {
   749     int      index;
   750     int      i;
   752     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
   753     UChar32 c = utext_next32From(This->fPattern, 0);
   754     while (c != U_SENTINEL) {
   755         if (c<32 || c>256) {
   756             c = '.';
   757         }
   758         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
   760         c = UTEXT_NEXT32(This->fPattern);
   761     }
   762     REGEX_DUMP_DEBUG_PRINTF(("\n"));
   763     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
   764     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
   765     if (This->fStartType == START_STRING) {
   766         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
   767         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
   768             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
   769         }
   770         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
   772     } else if (This->fStartType == START_SET) {
   773         int32_t numSetChars = This->fInitialChars->size();
   774         if (numSetChars > 20) {
   775             numSetChars = 20;
   776         }
   777         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
   778         for (i=0; i<numSetChars; i++) {
   779             UChar32 c = This->fInitialChars->charAt(i);
   780             if (0x20<c && c <0x7e) {
   781                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
   782             } else {
   783                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
   784             }
   785         }
   786         if (numSetChars < This->fInitialChars->size()) {
   787             REGEX_DUMP_DEBUG_PRINTF((" ..."));
   788         }
   789         REGEX_DUMP_DEBUG_PRINTF(("\n"));
   791     } else if (This->fStartType == START_CHAR) {
   792         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
   793         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
   794                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
   795             } else {
   796                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
   797             }
   798     }
   800     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
   801            "-------------------------------------------\n"));
   802     for (index = 0; index<This->fCompiledPat->size(); index++) {
   803         This->dumpOp(index);
   804     }
   805     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
   806 }
   807 #endif
   811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
   813 U_NAMESPACE_END
   814 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS

mercurial