intl/icu/source/i18n/uregex.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/uregex.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1948 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2004-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  uregex.cpp
    1.10 +*/
    1.11 +
    1.12 +#include "unicode/utypes.h"
    1.13 +
    1.14 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS
    1.15 +
    1.16 +#include "unicode/regex.h"
    1.17 +#include "unicode/uregex.h"
    1.18 +#include "unicode/unistr.h"
    1.19 +#include "unicode/ustring.h"
    1.20 +#include "unicode/uchar.h"
    1.21 +#include "unicode/uobject.h"
    1.22 +#include "unicode/utf16.h"
    1.23 +#include "umutex.h"
    1.24 +#include "uassert.h"
    1.25 +#include "cmemory.h"
    1.26 +
    1.27 +#include "regextxt.h"
    1.28 +
    1.29 +#include <stdio.h>
    1.30 +
    1.31 +U_NAMESPACE_BEGIN
    1.32 +
    1.33 +#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
    1.34 +
    1.35 +struct RegularExpression: public UMemory {
    1.36 +public:
    1.37 +    RegularExpression();
    1.38 +    ~RegularExpression();
    1.39 +    int32_t           fMagic;
    1.40 +    RegexPattern     *fPat;
    1.41 +    u_atomic_int32_t *fPatRefCount;
    1.42 +    UChar            *fPatString;
    1.43 +    int32_t           fPatStringLen;
    1.44 +    RegexMatcher     *fMatcher;
    1.45 +    const UChar      *fText;         // Text from setText()
    1.46 +    int32_t           fTextLength;   // Length provided by user with setText(), which
    1.47 +                                     //  may be -1.
    1.48 +    UBool             fOwnsText;
    1.49 +};
    1.50 +
    1.51 +static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
    1.52 +
    1.53 +RegularExpression::RegularExpression() {
    1.54 +    fMagic        = REXP_MAGIC;
    1.55 +    fPat          = NULL;
    1.56 +    fPatRefCount  = NULL;
    1.57 +    fPatString    = NULL;
    1.58 +    fPatStringLen = 0;
    1.59 +    fMatcher      = NULL;
    1.60 +    fText         = NULL;
    1.61 +    fTextLength   = 0;
    1.62 +    fOwnsText     = FALSE;
    1.63 +}
    1.64 +
    1.65 +RegularExpression::~RegularExpression() {
    1.66 +    delete fMatcher;
    1.67 +    fMatcher = NULL;
    1.68 +    if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
    1.69 +        delete fPat;
    1.70 +        uprv_free(fPatString);
    1.71 +        uprv_free((void *)fPatRefCount);
    1.72 +    }
    1.73 +    if (fOwnsText && fText!=NULL) {
    1.74 +        uprv_free((void *)fText);
    1.75 +    }
    1.76 +    fMagic = 0;
    1.77 +}
    1.78 +
    1.79 +U_NAMESPACE_END
    1.80 +
    1.81 +U_NAMESPACE_USE
    1.82 +
    1.83 +//----------------------------------------------------------------------------------------
    1.84 +//
    1.85 +//   validateRE    Do boilerplate style checks on API function parameters.
    1.86 +//                 Return TRUE if they look OK.
    1.87 +//----------------------------------------------------------------------------------------
    1.88 +static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
    1.89 +    if (U_FAILURE(*status)) {
    1.90 +        return FALSE;
    1.91 +    }
    1.92 +    if (re == NULL || re->fMagic != REXP_MAGIC) {
    1.93 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
    1.94 +        return FALSE;
    1.95 +    }
    1.96 +    // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
    1.97 +    if (requiresText && re->fText == NULL && !re->fOwnsText) {
    1.98 +        *status = U_REGEX_INVALID_STATE;
    1.99 +        return FALSE;
   1.100 +    }
   1.101 +    return TRUE;
   1.102 +}
   1.103 +
   1.104 +//----------------------------------------------------------------------------------------
   1.105 +//
   1.106 +//    uregex_open
   1.107 +//
   1.108 +//----------------------------------------------------------------------------------------
   1.109 +U_CAPI URegularExpression *  U_EXPORT2
   1.110 +uregex_open( const  UChar          *pattern,
   1.111 +                    int32_t         patternLength,
   1.112 +                    uint32_t        flags,
   1.113 +                    UParseError    *pe,
   1.114 +                    UErrorCode     *status) {
   1.115 +
   1.116 +    if (U_FAILURE(*status)) {
   1.117 +        return NULL;
   1.118 +    }
   1.119 +    if (pattern == NULL || patternLength < -1 || patternLength == 0) {
   1.120 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.121 +        return NULL;
   1.122 +    }
   1.123 +    int32_t actualPatLen = patternLength;
   1.124 +    if (actualPatLen == -1) {
   1.125 +        actualPatLen = u_strlen(pattern);
   1.126 +    }
   1.127 +
   1.128 +    RegularExpression  *re     = new RegularExpression;
   1.129 +    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
   1.130 +    UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
   1.131 +    if (re == NULL || refC == NULL || patBuf == NULL) {
   1.132 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.133 +        delete re;
   1.134 +        uprv_free((void *)refC);
   1.135 +        uprv_free(patBuf);
   1.136 +        return NULL;
   1.137 +    }
   1.138 +    re->fPatRefCount = refC;
   1.139 +    *re->fPatRefCount = 1;
   1.140 +
   1.141 +    //
   1.142 +    // Make a copy of the pattern string, so we can return it later if asked.
   1.143 +    //    For compiling the pattern, we will use a UText wrapper around
   1.144 +    //    this local copy, to avoid making even more copies.
   1.145 +    //
   1.146 +    re->fPatString    = patBuf;
   1.147 +    re->fPatStringLen = patternLength;
   1.148 +    u_memcpy(patBuf, pattern, actualPatLen);
   1.149 +    patBuf[actualPatLen] = 0;
   1.150 +    
   1.151 +    UText patText = UTEXT_INITIALIZER;
   1.152 +    utext_openUChars(&patText, patBuf, patternLength, status);
   1.153 +
   1.154 +    //
   1.155 +    // Compile the pattern
   1.156 +    //
   1.157 +    if (pe != NULL) {
   1.158 +        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
   1.159 +    } else {
   1.160 +        re->fPat = RegexPattern::compile(&patText, flags, *status);
   1.161 +    }
   1.162 +    utext_close(&patText);
   1.163 +    
   1.164 +    if (U_FAILURE(*status)) {
   1.165 +        goto ErrorExit;
   1.166 +    }
   1.167 +
   1.168 +    //
   1.169 +    // Create the matcher object
   1.170 +    //
   1.171 +    re->fMatcher = re->fPat->matcher(*status);
   1.172 +    if (U_SUCCESS(*status)) {
   1.173 +        return (URegularExpression*)re;
   1.174 +    }
   1.175 +
   1.176 +ErrorExit:
   1.177 +    delete re;
   1.178 +    return NULL;
   1.179 +
   1.180 +}
   1.181 +
   1.182 +//----------------------------------------------------------------------------------------
   1.183 +//
   1.184 +//    uregex_openUText
   1.185 +//
   1.186 +//----------------------------------------------------------------------------------------
   1.187 +U_CAPI URegularExpression *  U_EXPORT2
   1.188 +uregex_openUText(UText          *pattern,
   1.189 +                 uint32_t        flags,
   1.190 +                 UParseError    *pe,
   1.191 +                 UErrorCode     *status) {
   1.192 +    
   1.193 +    if (U_FAILURE(*status)) {
   1.194 +        return NULL;
   1.195 +    }
   1.196 +    if (pattern == NULL) {
   1.197 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.198 +        return NULL;
   1.199 +    }
   1.200 +    
   1.201 +    int64_t patternNativeLength = utext_nativeLength(pattern);
   1.202 +    
   1.203 +    if (patternNativeLength == 0) {
   1.204 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.205 +        return NULL;
   1.206 +    }
   1.207 +    
   1.208 +    RegularExpression *re     = new RegularExpression;
   1.209 +    
   1.210 +    UErrorCode lengthStatus = U_ZERO_ERROR;
   1.211 +    int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
   1.212 +    
   1.213 +    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
   1.214 +    UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
   1.215 +    if (re == NULL || refC == NULL || patBuf == NULL) {
   1.216 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.217 +        delete re;
   1.218 +        uprv_free((void *)refC);
   1.219 +        uprv_free(patBuf);
   1.220 +        return NULL;
   1.221 +    }
   1.222 +    re->fPatRefCount = refC;
   1.223 +    *re->fPatRefCount = 1;
   1.224 +    
   1.225 +    //
   1.226 +    // Make a copy of the pattern string, so we can return it later if asked.
   1.227 +    //    For compiling the pattern, we will use a read-only UText wrapper
   1.228 +    //    around this local copy, to avoid making even more copies.
   1.229 +    //
   1.230 +    re->fPatString    = patBuf;
   1.231 +    re->fPatStringLen = pattern16Length;
   1.232 +    utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
   1.233 +    
   1.234 +    UText patText = UTEXT_INITIALIZER;
   1.235 +    utext_openUChars(&patText, patBuf, pattern16Length, status);
   1.236 +    
   1.237 +    //
   1.238 +    // Compile the pattern
   1.239 +    //
   1.240 +    if (pe != NULL) {
   1.241 +        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
   1.242 +    } else {
   1.243 +        re->fPat = RegexPattern::compile(&patText, flags, *status);
   1.244 +    }
   1.245 +    utext_close(&patText);
   1.246 +    
   1.247 +    if (U_FAILURE(*status)) {
   1.248 +        goto ErrorExit;
   1.249 +    }
   1.250 +    
   1.251 +    //
   1.252 +    // Create the matcher object
   1.253 +    //
   1.254 +    re->fMatcher = re->fPat->matcher(*status);
   1.255 +    if (U_SUCCESS(*status)) {
   1.256 +        return (URegularExpression*)re;
   1.257 +    }
   1.258 +    
   1.259 +ErrorExit:
   1.260 +    delete re;
   1.261 +    return NULL;
   1.262 +    
   1.263 +}
   1.264 +
   1.265 +//----------------------------------------------------------------------------------------
   1.266 +//
   1.267 +//    uregex_close
   1.268 +//
   1.269 +//----------------------------------------------------------------------------------------
   1.270 +U_CAPI void  U_EXPORT2
   1.271 +uregex_close(URegularExpression  *re2) {
   1.272 +    RegularExpression *re = (RegularExpression*)re2;
   1.273 +    UErrorCode  status = U_ZERO_ERROR;
   1.274 +    if (validateRE(re, FALSE, &status) == FALSE) {
   1.275 +        return;
   1.276 +    }
   1.277 +    delete re;
   1.278 +}
   1.279 +
   1.280 +
   1.281 +//----------------------------------------------------------------------------------------
   1.282 +//
   1.283 +//    uregex_clone
   1.284 +//
   1.285 +//----------------------------------------------------------------------------------------
   1.286 +U_CAPI URegularExpression * U_EXPORT2 
   1.287 +uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
   1.288 +    RegularExpression *source = (RegularExpression*)source2;
   1.289 +    if (validateRE(source, FALSE, status) == FALSE) {
   1.290 +        return NULL;
   1.291 +    }
   1.292 +
   1.293 +    RegularExpression *clone = new RegularExpression;
   1.294 +    if (clone == NULL) {
   1.295 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.296 +        return NULL;
   1.297 +    }
   1.298 +
   1.299 +    clone->fMatcher = source->fPat->matcher(*status);
   1.300 +    if (U_FAILURE(*status)) {
   1.301 +        delete clone;
   1.302 +        return NULL;
   1.303 +    }
   1.304 +
   1.305 +    clone->fPat          = source->fPat;
   1.306 +    clone->fPatRefCount  = source->fPatRefCount; 
   1.307 +    clone->fPatString    = source->fPatString;
   1.308 +    clone->fPatStringLen = source->fPatStringLen;
   1.309 +    umtx_atomic_inc(source->fPatRefCount);
   1.310 +    // Note:  fText is not cloned.
   1.311 +
   1.312 +    return (URegularExpression*)clone;
   1.313 +}
   1.314 +
   1.315 +
   1.316 +
   1.317 +
   1.318 +//------------------------------------------------------------------------------
   1.319 +//
   1.320 +//    uregex_pattern
   1.321 +//
   1.322 +//------------------------------------------------------------------------------
   1.323 +U_CAPI const UChar * U_EXPORT2 
   1.324 +uregex_pattern(const  URegularExpression *regexp2,
   1.325 +                      int32_t            *patLength,
   1.326 +                      UErrorCode         *status)  {
   1.327 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.328 +    
   1.329 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.330 +        return NULL;
   1.331 +    }
   1.332 +    if (patLength != NULL) {
   1.333 +        *patLength = regexp->fPatStringLen;
   1.334 +    }
   1.335 +    return regexp->fPatString;
   1.336 +}
   1.337 +
   1.338 +
   1.339 +//------------------------------------------------------------------------------
   1.340 +//
   1.341 +//    uregex_patternUText
   1.342 +//
   1.343 +//------------------------------------------------------------------------------
   1.344 +U_CAPI UText * U_EXPORT2
   1.345 +uregex_patternUText(const URegularExpression *regexp2,
   1.346 +                          UErrorCode         *status)  {
   1.347 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.348 +    return regexp->fPat->patternText(*status);
   1.349 +}
   1.350 +
   1.351 +
   1.352 +//------------------------------------------------------------------------------
   1.353 +//
   1.354 +//    uregex_flags
   1.355 +//
   1.356 +//------------------------------------------------------------------------------
   1.357 +U_CAPI int32_t U_EXPORT2 
   1.358 +uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
   1.359 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.360 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.361 +        return 0;
   1.362 +    }
   1.363 +    int32_t flags = regexp->fPat->flags();
   1.364 +    return flags;
   1.365 +}
   1.366 +
   1.367 +
   1.368 +//------------------------------------------------------------------------------
   1.369 +//
   1.370 +//    uregex_setText
   1.371 +//
   1.372 +//------------------------------------------------------------------------------
   1.373 +U_CAPI void U_EXPORT2 
   1.374 +uregex_setText(URegularExpression *regexp2,
   1.375 +               const UChar        *text,
   1.376 +               int32_t             textLength,
   1.377 +               UErrorCode         *status)  {
   1.378 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.379 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.380 +        return;
   1.381 +    }
   1.382 +    if (text == NULL || textLength < -1) {
   1.383 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.384 +        return;
   1.385 +    }
   1.386 +    
   1.387 +    if (regexp->fOwnsText && regexp->fText != NULL) {
   1.388 +        uprv_free((void *)regexp->fText);
   1.389 +    }
   1.390 +    
   1.391 +    regexp->fText       = text;
   1.392 +    regexp->fTextLength = textLength;
   1.393 +    regexp->fOwnsText   = FALSE;
   1.394 +    
   1.395 +    UText input = UTEXT_INITIALIZER;
   1.396 +    utext_openUChars(&input, text, textLength, status);
   1.397 +    regexp->fMatcher->reset(&input);
   1.398 +    utext_close(&input); // reset() made a shallow clone, so we don't need this copy
   1.399 +}
   1.400 +
   1.401 +
   1.402 +//------------------------------------------------------------------------------
   1.403 +//
   1.404 +//    uregex_setUText
   1.405 +//
   1.406 +//------------------------------------------------------------------------------
   1.407 +U_CAPI void U_EXPORT2 
   1.408 +uregex_setUText(URegularExpression *regexp2,
   1.409 +                UText              *text,
   1.410 +                UErrorCode         *status) {
   1.411 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.412 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.413 +        return;
   1.414 +    }
   1.415 +    if (text == NULL) {
   1.416 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.417 +        return;
   1.418 +    }
   1.419 +    
   1.420 +    if (regexp->fOwnsText && regexp->fText != NULL) {
   1.421 +        uprv_free((void *)regexp->fText);
   1.422 +    }
   1.423 +    
   1.424 +    regexp->fText       = NULL; // only fill it in on request
   1.425 +    regexp->fTextLength = -1;
   1.426 +    regexp->fOwnsText   = TRUE;
   1.427 +    regexp->fMatcher->reset(text);
   1.428 +}
   1.429 +
   1.430 +
   1.431 +
   1.432 +//------------------------------------------------------------------------------
   1.433 +//
   1.434 +//    uregex_getText
   1.435 +//
   1.436 +//------------------------------------------------------------------------------
   1.437 +U_CAPI const UChar * U_EXPORT2 
   1.438 +uregex_getText(URegularExpression *regexp2,
   1.439 +               int32_t            *textLength,
   1.440 +               UErrorCode         *status)  {
   1.441 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.442 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.443 +        return NULL;
   1.444 +    }
   1.445 +    
   1.446 +    if (regexp->fText == NULL) {
   1.447 +        // need to fill in the text
   1.448 +        UText *inputText = regexp->fMatcher->inputText();
   1.449 +        int64_t inputNativeLength = utext_nativeLength(inputText);
   1.450 +        if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
   1.451 +            regexp->fText = inputText->chunkContents;
   1.452 +            regexp->fTextLength = (int32_t)inputNativeLength;
   1.453 +            regexp->fOwnsText = FALSE; // because the UText owns it
   1.454 +        } else {
   1.455 +            UErrorCode lengthStatus = U_ZERO_ERROR;
   1.456 +            regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
   1.457 +            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
   1.458 +            
   1.459 +            utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
   1.460 +            regexp->fText = inputChars;
   1.461 +            regexp->fOwnsText = TRUE; // should already be set but just in case
   1.462 +        }
   1.463 +    }
   1.464 +    
   1.465 +    if (textLength != NULL) {
   1.466 +        *textLength = regexp->fTextLength;
   1.467 +    }
   1.468 +    return regexp->fText;
   1.469 +}
   1.470 +
   1.471 +
   1.472 +//------------------------------------------------------------------------------
   1.473 +//
   1.474 +//    uregex_getUText
   1.475 +//
   1.476 +//------------------------------------------------------------------------------
   1.477 +U_CAPI UText * U_EXPORT2 
   1.478 +uregex_getUText(URegularExpression *regexp2,
   1.479 +                UText              *dest,
   1.480 +                UErrorCode         *status)  {
   1.481 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.482 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.483 +        return dest;
   1.484 +    }
   1.485 +    return regexp->fMatcher->getInput(dest, *status);
   1.486 +}
   1.487 +
   1.488 +
   1.489 +//------------------------------------------------------------------------------
   1.490 +//
   1.491 +//    uregex_refreshUText
   1.492 +//
   1.493 +//------------------------------------------------------------------------------
   1.494 +U_CAPI void U_EXPORT2 
   1.495 +uregex_refreshUText(URegularExpression *regexp2,
   1.496 +                    UText              *text,
   1.497 +                    UErrorCode         *status) {
   1.498 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.499 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.500 +        return;
   1.501 +    }
   1.502 +    regexp->fMatcher->refreshInputText(text, *status);
   1.503 +}
   1.504 +
   1.505 +
   1.506 +//------------------------------------------------------------------------------
   1.507 +//
   1.508 +//    uregex_matches
   1.509 +//
   1.510 +//------------------------------------------------------------------------------
   1.511 +U_CAPI UBool U_EXPORT2 
   1.512 +uregex_matches(URegularExpression *regexp2,
   1.513 +               int32_t            startIndex,
   1.514 +               UErrorCode        *status)  {
   1.515 +    return uregex_matches64( regexp2, (int64_t)startIndex, status);
   1.516 +}
   1.517 +
   1.518 +U_CAPI UBool U_EXPORT2 
   1.519 +uregex_matches64(URegularExpression *regexp2,
   1.520 +                 int64_t            startIndex,
   1.521 +                 UErrorCode        *status)  {
   1.522 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.523 +    UBool result = FALSE;
   1.524 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.525 +        return result;
   1.526 +    }
   1.527 +    if (startIndex == -1) {
   1.528 +        result = regexp->fMatcher->matches(*status);
   1.529 +    } else {
   1.530 +        result = regexp->fMatcher->matches(startIndex, *status);
   1.531 +    }
   1.532 +    return result;
   1.533 +}
   1.534 +
   1.535 +
   1.536 +//------------------------------------------------------------------------------
   1.537 +//
   1.538 +//    uregex_lookingAt
   1.539 +//
   1.540 +//------------------------------------------------------------------------------
   1.541 +U_CAPI UBool U_EXPORT2 
   1.542 +uregex_lookingAt(URegularExpression *regexp2,
   1.543 +                 int32_t             startIndex,
   1.544 +                 UErrorCode         *status)  {
   1.545 +    return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
   1.546 +}
   1.547 +
   1.548 +U_CAPI UBool U_EXPORT2 
   1.549 +uregex_lookingAt64(URegularExpression *regexp2,
   1.550 +                   int64_t             startIndex,
   1.551 +                   UErrorCode         *status)  {
   1.552 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.553 +    UBool result = FALSE;
   1.554 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.555 +        return result;
   1.556 +    }
   1.557 +    if (startIndex == -1) {
   1.558 +        result = regexp->fMatcher->lookingAt(*status);
   1.559 +    } else {
   1.560 +        result = regexp->fMatcher->lookingAt(startIndex, *status);
   1.561 +    }
   1.562 +    return result;
   1.563 +}
   1.564 +
   1.565 +
   1.566 +
   1.567 +//------------------------------------------------------------------------------
   1.568 +//
   1.569 +//    uregex_find
   1.570 +//
   1.571 +//------------------------------------------------------------------------------
   1.572 +U_CAPI UBool U_EXPORT2 
   1.573 +uregex_find(URegularExpression *regexp2,
   1.574 +            int32_t             startIndex, 
   1.575 +            UErrorCode         *status)  {
   1.576 +    return uregex_find64( regexp2, (int64_t)startIndex, status);
   1.577 +}
   1.578 +
   1.579 +U_CAPI UBool U_EXPORT2 
   1.580 +uregex_find64(URegularExpression *regexp2,
   1.581 +              int64_t             startIndex, 
   1.582 +              UErrorCode         *status)  {
   1.583 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.584 +    UBool result = FALSE;
   1.585 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.586 +        return result;
   1.587 +    }
   1.588 +    if (startIndex == -1) {
   1.589 +        regexp->fMatcher->resetPreserveRegion();
   1.590 +        result = regexp->fMatcher->find();
   1.591 +    } else {
   1.592 +        result = regexp->fMatcher->find(startIndex, *status);
   1.593 +    }
   1.594 +    return result;
   1.595 +}
   1.596 +
   1.597 +
   1.598 +//------------------------------------------------------------------------------
   1.599 +//
   1.600 +//    uregex_findNext
   1.601 +//
   1.602 +//------------------------------------------------------------------------------
   1.603 +U_CAPI UBool U_EXPORT2 
   1.604 +uregex_findNext(URegularExpression *regexp2,
   1.605 +                UErrorCode         *status)  {
   1.606 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.607 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.608 +        return FALSE;
   1.609 +    }
   1.610 +    UBool result = regexp->fMatcher->find();
   1.611 +    return result;
   1.612 +}
   1.613 +
   1.614 +//------------------------------------------------------------------------------
   1.615 +//
   1.616 +//    uregex_groupCount
   1.617 +//
   1.618 +//------------------------------------------------------------------------------
   1.619 +U_CAPI int32_t U_EXPORT2 
   1.620 +uregex_groupCount(URegularExpression *regexp2,
   1.621 +                  UErrorCode         *status)  {
   1.622 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.623 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.624 +        return 0;
   1.625 +    }
   1.626 +    int32_t  result = regexp->fMatcher->groupCount();
   1.627 +    return result;
   1.628 +}
   1.629 +
   1.630 +
   1.631 +//------------------------------------------------------------------------------
   1.632 +//
   1.633 +//    uregex_group
   1.634 +//
   1.635 +//------------------------------------------------------------------------------
   1.636 +U_CAPI int32_t U_EXPORT2 
   1.637 +uregex_group(URegularExpression *regexp2,
   1.638 +             int32_t             groupNum,
   1.639 +             UChar              *dest,
   1.640 +             int32_t             destCapacity,
   1.641 +             UErrorCode          *status)  {
   1.642 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.643 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.644 +        return 0;
   1.645 +    }
   1.646 +    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
   1.647 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.648 +        return 0;
   1.649 +    }
   1.650 +    
   1.651 +    if (destCapacity == 0 || regexp->fText != NULL) {
   1.652 +        // If preflighting or if we already have the text as UChars,
   1.653 +        // this is a little cheaper than going through uregex_groupUTextDeep()
   1.654 +        
   1.655 +        //
   1.656 +        // Pick up the range of characters from the matcher
   1.657 +        //
   1.658 +        int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
   1.659 +        int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
   1.660 +        if (U_FAILURE(*status)) {
   1.661 +            return 0;
   1.662 +        }
   1.663 +
   1.664 +        //
   1.665 +        // Trim length based on buffer capacity
   1.666 +        // 
   1.667 +        int32_t fullLength = endIx - startIx;
   1.668 +        int32_t copyLength = fullLength;
   1.669 +        if (copyLength < destCapacity) {
   1.670 +            dest[copyLength] = 0;
   1.671 +        } else if (copyLength == destCapacity) {
   1.672 +            *status = U_STRING_NOT_TERMINATED_WARNING;
   1.673 +        } else {
   1.674 +            copyLength = destCapacity;
   1.675 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.676 +        }
   1.677 +        
   1.678 +        //
   1.679 +        // Copy capture group to user's buffer
   1.680 +        //
   1.681 +        if (copyLength > 0) {
   1.682 +            u_memcpy(dest, &regexp->fText[startIx], copyLength);
   1.683 +        }
   1.684 +        return fullLength;
   1.685 +    } else {
   1.686 +        UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
   1.687 +        int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
   1.688 +        utext_close(groupText);
   1.689 +        return result;
   1.690 +    }
   1.691 +}
   1.692 +
   1.693 +
   1.694 +//------------------------------------------------------------------------------
   1.695 +//
   1.696 +//    uregex_groupUText
   1.697 +//
   1.698 +//------------------------------------------------------------------------------
   1.699 +U_CAPI UText * U_EXPORT2 
   1.700 +uregex_groupUText(URegularExpression *regexp2,
   1.701 +                  int32_t             groupNum,
   1.702 +                  UText              *dest,
   1.703 +                  int64_t            *groupLength,
   1.704 +                  UErrorCode         *status)  {
   1.705 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.706 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.707 +        UErrorCode emptyTextStatus = U_ZERO_ERROR;
   1.708 +        return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
   1.709 +    }
   1.710 +
   1.711 +    return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
   1.712 +}
   1.713 +
   1.714 +//------------------------------------------------------------------------------
   1.715 +//
   1.716 +//    uregex_groupUTextDeep
   1.717 +//
   1.718 +//------------------------------------------------------------------------------
   1.719 +U_CAPI UText * U_EXPORT2 
   1.720 +uregex_groupUTextDeep(URegularExpression *regexp2,
   1.721 +                  int32_t             groupNum,
   1.722 +                  UText              *dest,
   1.723 +                  UErrorCode         *status)  {
   1.724 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.725 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.726 +        UErrorCode emptyTextStatus = U_ZERO_ERROR;
   1.727 +        return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
   1.728 +    }
   1.729 +
   1.730 +    if (regexp->fText != NULL) {
   1.731 +        //
   1.732 +        // Pick up the range of characters from the matcher
   1.733 +        // and use our already-extracted characters
   1.734 +        //
   1.735 +        int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
   1.736 +        int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
   1.737 +        if (U_FAILURE(*status)) {
   1.738 +            UErrorCode emptyTextStatus = U_ZERO_ERROR;
   1.739 +            return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
   1.740 +        }
   1.741 +        
   1.742 +        if (dest) {
   1.743 +            utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
   1.744 +        } else {
   1.745 +            UText groupText = UTEXT_INITIALIZER;
   1.746 +            utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
   1.747 +            dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
   1.748 +            utext_close(&groupText);
   1.749 +        }
   1.750 +        
   1.751 +        return dest;
   1.752 +    } else {
   1.753 +        return regexp->fMatcher->group(groupNum, dest, *status);
   1.754 +    }
   1.755 +}
   1.756 +
   1.757 +//------------------------------------------------------------------------------
   1.758 +//
   1.759 +//    uregex_start
   1.760 +//
   1.761 +//------------------------------------------------------------------------------
   1.762 +U_CAPI int32_t U_EXPORT2 
   1.763 +uregex_start(URegularExpression *regexp2,
   1.764 +             int32_t             groupNum,
   1.765 +             UErrorCode          *status)  {
   1.766 +    return (int32_t)uregex_start64( regexp2, groupNum, status);
   1.767 +}
   1.768 +
   1.769 +U_CAPI int64_t U_EXPORT2 
   1.770 +uregex_start64(URegularExpression *regexp2,
   1.771 +               int32_t             groupNum,
   1.772 +               UErrorCode          *status)  {
   1.773 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.774 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.775 +        return 0;
   1.776 +    }
   1.777 +    int32_t result = regexp->fMatcher->start(groupNum, *status);
   1.778 +    return result;
   1.779 +}
   1.780 +
   1.781 +//------------------------------------------------------------------------------
   1.782 +//
   1.783 +//    uregex_end
   1.784 +//
   1.785 +//------------------------------------------------------------------------------
   1.786 +U_CAPI int32_t U_EXPORT2 
   1.787 +uregex_end(URegularExpression   *regexp2,
   1.788 +           int32_t               groupNum,
   1.789 +           UErrorCode           *status)  {
   1.790 +    return (int32_t)uregex_end64( regexp2, groupNum, status);
   1.791 +}
   1.792 +
   1.793 +U_CAPI int64_t U_EXPORT2 
   1.794 +uregex_end64(URegularExpression   *regexp2,
   1.795 +             int32_t               groupNum,
   1.796 +             UErrorCode           *status)  {
   1.797 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.798 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.799 +        return 0;
   1.800 +    }
   1.801 +    int32_t result = regexp->fMatcher->end(groupNum, *status);
   1.802 +    return result;
   1.803 +}
   1.804 +
   1.805 +//------------------------------------------------------------------------------
   1.806 +//
   1.807 +//    uregex_reset
   1.808 +//
   1.809 +//------------------------------------------------------------------------------
   1.810 +U_CAPI void U_EXPORT2 
   1.811 +uregex_reset(URegularExpression    *regexp2,
   1.812 +             int32_t               index,
   1.813 +             UErrorCode            *status)  {
   1.814 +    uregex_reset64( regexp2, (int64_t)index, status);
   1.815 +}
   1.816 +
   1.817 +U_CAPI void U_EXPORT2 
   1.818 +uregex_reset64(URegularExpression    *regexp2,
   1.819 +               int64_t               index,
   1.820 +               UErrorCode            *status)  {
   1.821 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.822 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.823 +        return;
   1.824 +    }
   1.825 +    regexp->fMatcher->reset(index, *status);
   1.826 +}
   1.827 +
   1.828 +
   1.829 +//------------------------------------------------------------------------------
   1.830 +//
   1.831 +//    uregex_setRegion
   1.832 +//
   1.833 +//------------------------------------------------------------------------------
   1.834 +U_CAPI void U_EXPORT2 
   1.835 +uregex_setRegion(URegularExpression   *regexp2,
   1.836 +                 int32_t               regionStart,
   1.837 +                 int32_t               regionLimit,
   1.838 +                 UErrorCode           *status)  {
   1.839 +    uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
   1.840 +}
   1.841 +
   1.842 +U_CAPI void U_EXPORT2 
   1.843 +uregex_setRegion64(URegularExpression   *regexp2,
   1.844 +                   int64_t               regionStart,
   1.845 +                   int64_t               regionLimit,
   1.846 +                   UErrorCode           *status)  {
   1.847 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.848 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.849 +        return;
   1.850 +    }
   1.851 +    regexp->fMatcher->region(regionStart, regionLimit, *status);
   1.852 +}
   1.853 +
   1.854 +
   1.855 +//------------------------------------------------------------------------------
   1.856 +//
   1.857 +//    uregex_setRegionAndStart
   1.858 +//
   1.859 +//------------------------------------------------------------------------------
   1.860 +U_CAPI void U_EXPORT2 
   1.861 +uregex_setRegionAndStart(URegularExpression   *regexp2,
   1.862 +                 int64_t               regionStart,
   1.863 +                 int64_t               regionLimit,
   1.864 +                 int64_t               startIndex,
   1.865 +                 UErrorCode           *status)  {
   1.866 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.867 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.868 +        return;
   1.869 +    }
   1.870 +    regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
   1.871 +}
   1.872 +
   1.873 +//------------------------------------------------------------------------------
   1.874 +//
   1.875 +//    uregex_regionStart
   1.876 +//
   1.877 +//------------------------------------------------------------------------------
   1.878 +U_CAPI int32_t U_EXPORT2 
   1.879 +uregex_regionStart(const  URegularExpression   *regexp2,
   1.880 +                          UErrorCode           *status)  {
   1.881 +    return (int32_t)uregex_regionStart64(regexp2, status);
   1.882 +}
   1.883 +
   1.884 +U_CAPI int64_t U_EXPORT2 
   1.885 +uregex_regionStart64(const  URegularExpression   *regexp2,
   1.886 +                            UErrorCode           *status)  {
   1.887 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.888 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.889 +        return 0;
   1.890 +    }
   1.891 +    return regexp->fMatcher->regionStart();
   1.892 +}
   1.893 +
   1.894 +
   1.895 +//------------------------------------------------------------------------------
   1.896 +//
   1.897 +//    uregex_regionEnd
   1.898 +//
   1.899 +//------------------------------------------------------------------------------
   1.900 +U_CAPI int32_t U_EXPORT2 
   1.901 +uregex_regionEnd(const  URegularExpression   *regexp2,
   1.902 +                        UErrorCode           *status)  {
   1.903 +    return (int32_t)uregex_regionEnd64(regexp2, status);
   1.904 +}
   1.905 +
   1.906 +U_CAPI int64_t U_EXPORT2 
   1.907 +uregex_regionEnd64(const  URegularExpression   *regexp2,
   1.908 +                          UErrorCode           *status)  {
   1.909 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.910 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.911 +        return 0;
   1.912 +    }
   1.913 +    return regexp->fMatcher->regionEnd();
   1.914 +}
   1.915 +
   1.916 +
   1.917 +//------------------------------------------------------------------------------
   1.918 +//
   1.919 +//    uregex_hasTransparentBounds
   1.920 +//
   1.921 +//------------------------------------------------------------------------------
   1.922 +U_CAPI UBool U_EXPORT2 
   1.923 +uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
   1.924 +                                   UErrorCode           *status)  {
   1.925 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.926 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.927 +        return FALSE;
   1.928 +    }
   1.929 +    return regexp->fMatcher->hasTransparentBounds();
   1.930 +}
   1.931 +
   1.932 +
   1.933 +//------------------------------------------------------------------------------
   1.934 +//
   1.935 +//    uregex_useTransparentBounds
   1.936 +//
   1.937 +//------------------------------------------------------------------------------
   1.938 +U_CAPI void U_EXPORT2 
   1.939 +uregex_useTransparentBounds(URegularExpression    *regexp2,
   1.940 +                            UBool                  b,
   1.941 +                            UErrorCode            *status)  {
   1.942 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.943 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.944 +        return;
   1.945 +    }
   1.946 +    regexp->fMatcher->useTransparentBounds(b);
   1.947 +}
   1.948 +
   1.949 +
   1.950 +//------------------------------------------------------------------------------
   1.951 +//
   1.952 +//    uregex_hasAnchoringBounds
   1.953 +//
   1.954 +//------------------------------------------------------------------------------
   1.955 +U_CAPI UBool U_EXPORT2 
   1.956 +uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
   1.957 +                                 UErrorCode           *status)  {
   1.958 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.959 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.960 +        return FALSE;
   1.961 +    }
   1.962 +    return regexp->fMatcher->hasAnchoringBounds();
   1.963 +}
   1.964 +
   1.965 +
   1.966 +//------------------------------------------------------------------------------
   1.967 +//
   1.968 +//    uregex_useAnchoringBounds
   1.969 +//
   1.970 +//------------------------------------------------------------------------------
   1.971 +U_CAPI void U_EXPORT2 
   1.972 +uregex_useAnchoringBounds(URegularExpression    *regexp2,
   1.973 +                          UBool                  b,
   1.974 +                          UErrorCode            *status)  {
   1.975 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.976 +    if (validateRE(regexp, FALSE, status) == FALSE) {
   1.977 +        return;
   1.978 +    }
   1.979 +    regexp->fMatcher->useAnchoringBounds(b);
   1.980 +}
   1.981 +
   1.982 +
   1.983 +//------------------------------------------------------------------------------
   1.984 +//
   1.985 +//    uregex_hitEnd
   1.986 +//
   1.987 +//------------------------------------------------------------------------------
   1.988 +U_CAPI UBool U_EXPORT2 
   1.989 +uregex_hitEnd(const  URegularExpression   *regexp2,
   1.990 +                     UErrorCode           *status)  {
   1.991 +    RegularExpression *regexp = (RegularExpression*)regexp2;
   1.992 +    if (validateRE(regexp, TRUE, status) == FALSE) {
   1.993 +        return FALSE;
   1.994 +    }
   1.995 +    return regexp->fMatcher->hitEnd();
   1.996 +}
   1.997 +
   1.998 +
   1.999 +//------------------------------------------------------------------------------
  1.1000 +//
  1.1001 +//    uregex_requireEnd
  1.1002 +//
  1.1003 +//------------------------------------------------------------------------------
  1.1004 +U_CAPI UBool U_EXPORT2 
  1.1005 +uregex_requireEnd(const  URegularExpression   *regexp2,
  1.1006 +                         UErrorCode           *status)  {
  1.1007 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1008 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1009 +        return FALSE;
  1.1010 +    }
  1.1011 +    return regexp->fMatcher->requireEnd();
  1.1012 +}
  1.1013 +
  1.1014 +
  1.1015 +//------------------------------------------------------------------------------
  1.1016 +//
  1.1017 +//    uregex_setTimeLimit
  1.1018 +//
  1.1019 +//------------------------------------------------------------------------------
  1.1020 +U_CAPI void U_EXPORT2 
  1.1021 +uregex_setTimeLimit(URegularExpression   *regexp2,
  1.1022 +                    int32_t               limit,
  1.1023 +                    UErrorCode           *status) {
  1.1024 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1025 +    if (validateRE(regexp, FALSE, status)) {
  1.1026 +        regexp->fMatcher->setTimeLimit(limit, *status);
  1.1027 +    }
  1.1028 +}
  1.1029 +
  1.1030 +
  1.1031 +
  1.1032 +//------------------------------------------------------------------------------
  1.1033 +//
  1.1034 +//    uregex_getTimeLimit
  1.1035 +//
  1.1036 +//------------------------------------------------------------------------------
  1.1037 +U_CAPI int32_t U_EXPORT2 
  1.1038 +uregex_getTimeLimit(const  URegularExpression   *regexp2,
  1.1039 +                           UErrorCode           *status) {
  1.1040 +    int32_t retVal = 0;
  1.1041 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1042 +    if (validateRE(regexp, FALSE, status)) {
  1.1043 +        retVal = regexp->fMatcher->getTimeLimit();
  1.1044 +    }
  1.1045 +    return retVal;
  1.1046 +}
  1.1047 +
  1.1048 +
  1.1049 +
  1.1050 +//------------------------------------------------------------------------------
  1.1051 +//
  1.1052 +//    uregex_setStackLimit
  1.1053 +//
  1.1054 +//------------------------------------------------------------------------------
  1.1055 +U_CAPI void U_EXPORT2 
  1.1056 +uregex_setStackLimit(URegularExpression   *regexp2,
  1.1057 +                     int32_t               limit,
  1.1058 +                     UErrorCode           *status) {
  1.1059 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1060 +    if (validateRE(regexp, FALSE, status)) {
  1.1061 +        regexp->fMatcher->setStackLimit(limit, *status);
  1.1062 +    }
  1.1063 +}
  1.1064 +
  1.1065 +
  1.1066 +
  1.1067 +//------------------------------------------------------------------------------
  1.1068 +//
  1.1069 +//    uregex_getStackLimit
  1.1070 +//
  1.1071 +//------------------------------------------------------------------------------
  1.1072 +U_CAPI int32_t U_EXPORT2 
  1.1073 +uregex_getStackLimit(const  URegularExpression   *regexp2,
  1.1074 +                            UErrorCode           *status) {
  1.1075 +    int32_t retVal = 0;
  1.1076 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1077 +    if (validateRE(regexp, FALSE, status)) {
  1.1078 +        retVal = regexp->fMatcher->getStackLimit();
  1.1079 +    }
  1.1080 +    return retVal;
  1.1081 +}
  1.1082 +
  1.1083 +
  1.1084 +//------------------------------------------------------------------------------
  1.1085 +//
  1.1086 +//    uregex_setMatchCallback
  1.1087 +//
  1.1088 +//------------------------------------------------------------------------------
  1.1089 +U_CAPI void U_EXPORT2
  1.1090 +uregex_setMatchCallback(URegularExpression      *regexp2,
  1.1091 +                        URegexMatchCallback     *callback,
  1.1092 +                        const void              *context,
  1.1093 +                        UErrorCode              *status) {
  1.1094 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1095 +    if (validateRE(regexp, FALSE, status)) {
  1.1096 +        regexp->fMatcher->setMatchCallback(callback, context, *status);
  1.1097 +    }
  1.1098 +}
  1.1099 +
  1.1100 +
  1.1101 +//------------------------------------------------------------------------------
  1.1102 +//
  1.1103 +//    uregex_getMatchCallback
  1.1104 +//
  1.1105 +//------------------------------------------------------------------------------
  1.1106 +U_CAPI void U_EXPORT2 
  1.1107 +uregex_getMatchCallback(const URegularExpression    *regexp2,
  1.1108 +                        URegexMatchCallback        **callback,
  1.1109 +                        const void                 **context,
  1.1110 +                        UErrorCode                  *status) {
  1.1111 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1112 +     if (validateRE(regexp, FALSE, status)) {
  1.1113 +         regexp->fMatcher->getMatchCallback(*callback, *context, *status);
  1.1114 +     }
  1.1115 +}
  1.1116 +
  1.1117 +
  1.1118 +//------------------------------------------------------------------------------
  1.1119 +//
  1.1120 +//    uregex_setMatchProgressCallback
  1.1121 +//
  1.1122 +//------------------------------------------------------------------------------
  1.1123 +U_CAPI void U_EXPORT2
  1.1124 +uregex_setFindProgressCallback(URegularExpression              *regexp2,
  1.1125 +                                URegexFindProgressCallback      *callback,
  1.1126 +                                const void                      *context,
  1.1127 +                                UErrorCode                      *status) {
  1.1128 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1129 +    if (validateRE(regexp, FALSE, status)) {
  1.1130 +        regexp->fMatcher->setFindProgressCallback(callback, context, *status);
  1.1131 +    }
  1.1132 +}
  1.1133 +
  1.1134 +
  1.1135 +//------------------------------------------------------------------------------
  1.1136 +//
  1.1137 +//    uregex_getMatchCallback
  1.1138 +//
  1.1139 +//------------------------------------------------------------------------------
  1.1140 +U_CAPI void U_EXPORT2 
  1.1141 +uregex_getFindProgressCallback(const URegularExpression          *regexp2,
  1.1142 +                                URegexFindProgressCallback        **callback,
  1.1143 +                                const void                        **context,
  1.1144 +                                UErrorCode                        *status) {
  1.1145 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1146 +     if (validateRE(regexp, FALSE, status)) {
  1.1147 +         regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
  1.1148 +     }
  1.1149 +}
  1.1150 +
  1.1151 +
  1.1152 +//------------------------------------------------------------------------------
  1.1153 +//
  1.1154 +//    uregex_replaceAll
  1.1155 +//
  1.1156 +//------------------------------------------------------------------------------
  1.1157 +U_CAPI int32_t U_EXPORT2 
  1.1158 +uregex_replaceAll(URegularExpression    *regexp2,
  1.1159 +                  const UChar           *replacementText,
  1.1160 +                  int32_t                replacementLength,
  1.1161 +                  UChar                 *destBuf,
  1.1162 +                  int32_t                destCapacity,
  1.1163 +                  UErrorCode            *status)  {
  1.1164 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1165 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1166 +        return 0;
  1.1167 +    }
  1.1168 +    if (replacementText == NULL || replacementLength < -1 ||
  1.1169 +        (destBuf == NULL && destCapacity > 0) ||
  1.1170 +        destCapacity < 0) {
  1.1171 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1172 +        return 0;
  1.1173 +    }
  1.1174 +
  1.1175 +    int32_t   len = 0;
  1.1176 +
  1.1177 +    uregex_reset(regexp2, 0, status);
  1.1178 +
  1.1179 +    // Note: Seperate error code variables for findNext() and appendReplacement()
  1.1180 +    //       are used so that destination buffer overflow errors
  1.1181 +    //       in appendReplacement won't stop findNext() from working.
  1.1182 +    //       appendReplacement() and appendTail() special case incoming buffer
  1.1183 +    //       overflow errors, continuing to return the correct length.
  1.1184 +    UErrorCode  findStatus = *status;
  1.1185 +    while (uregex_findNext(regexp2, &findStatus)) {
  1.1186 +        len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
  1.1187 +                                        &destBuf, &destCapacity, status);
  1.1188 +    }
  1.1189 +    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
  1.1190 +    
  1.1191 +    if (U_FAILURE(findStatus)) {
  1.1192 +        // If anything went wrong with the findNext(), make that error trump
  1.1193 +        //   whatever may have happened with the append() operations.
  1.1194 +        //   Errors in findNext() are not expected.
  1.1195 +        *status = findStatus;
  1.1196 +    }
  1.1197 +
  1.1198 +    return len;
  1.1199 +}
  1.1200 +
  1.1201 +
  1.1202 +//------------------------------------------------------------------------------
  1.1203 +//
  1.1204 +//    uregex_replaceAllUText
  1.1205 +//
  1.1206 +//------------------------------------------------------------------------------
  1.1207 +U_CAPI UText * U_EXPORT2 
  1.1208 +uregex_replaceAllUText(URegularExpression    *regexp2,
  1.1209 +                       UText                 *replacementText,
  1.1210 +                       UText                 *dest,
  1.1211 +                       UErrorCode            *status)  {
  1.1212 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1213 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1214 +        return 0;
  1.1215 +    }
  1.1216 +    if (replacementText == NULL) {
  1.1217 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1218 +        return 0;
  1.1219 +    }
  1.1220 +    
  1.1221 +    dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
  1.1222 +    return dest;
  1.1223 +}
  1.1224 +    
  1.1225 +
  1.1226 +//------------------------------------------------------------------------------
  1.1227 +//
  1.1228 +//    uregex_replaceFirst
  1.1229 +//
  1.1230 +//------------------------------------------------------------------------------
  1.1231 +U_CAPI int32_t U_EXPORT2 
  1.1232 +uregex_replaceFirst(URegularExpression  *regexp2,
  1.1233 +                    const UChar         *replacementText,
  1.1234 +                    int32_t              replacementLength,
  1.1235 +                    UChar               *destBuf,
  1.1236 +                    int32_t              destCapacity,
  1.1237 +                    UErrorCode          *status)  {
  1.1238 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1239 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1240 +        return 0;
  1.1241 +    }
  1.1242 +    if (replacementText == NULL || replacementLength < -1 ||
  1.1243 +        (destBuf == NULL && destCapacity > 0) ||
  1.1244 +        destCapacity < 0) {
  1.1245 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1246 +        return 0;
  1.1247 +    }
  1.1248 +
  1.1249 +    int32_t   len = 0;
  1.1250 +    UBool     findSucceeded;
  1.1251 +    uregex_reset(regexp2, 0, status);
  1.1252 +    findSucceeded = uregex_find(regexp2, 0, status);
  1.1253 +    if (findSucceeded) {
  1.1254 +        len = uregex_appendReplacement(regexp2, replacementText, replacementLength, 
  1.1255 +                                       &destBuf, &destCapacity, status);
  1.1256 +    }
  1.1257 +    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
  1.1258 +
  1.1259 +    return len;
  1.1260 +}
  1.1261 +
  1.1262 +
  1.1263 +//------------------------------------------------------------------------------
  1.1264 +//
  1.1265 +//    uregex_replaceFirstUText
  1.1266 +//
  1.1267 +//------------------------------------------------------------------------------
  1.1268 +U_CAPI UText * U_EXPORT2 
  1.1269 +uregex_replaceFirstUText(URegularExpression  *regexp2,
  1.1270 +                         UText                 *replacementText,
  1.1271 +                         UText                 *dest,
  1.1272 +                         UErrorCode            *status)  {
  1.1273 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1274 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1275 +        return 0;
  1.1276 +    }
  1.1277 +    if (replacementText == NULL) {
  1.1278 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1279 +        return 0;
  1.1280 +    }
  1.1281 +    
  1.1282 +    dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
  1.1283 +    return dest;
  1.1284 +}
  1.1285 +
  1.1286 +
  1.1287 +//------------------------------------------------------------------------------
  1.1288 +//
  1.1289 +//    uregex_appendReplacement
  1.1290 +//
  1.1291 +//------------------------------------------------------------------------------
  1.1292 +
  1.1293 +U_NAMESPACE_BEGIN
  1.1294 +//
  1.1295 +//  Dummy class, because these functions need to be friends of class RegexMatcher,
  1.1296 +//               and stand-alone C functions don't work as friends
  1.1297 +//
  1.1298 +class RegexCImpl {
  1.1299 + public:
  1.1300 +   inline static  int32_t appendReplacement(RegularExpression    *regexp,
  1.1301 +                      const UChar           *replacementText,
  1.1302 +                      int32_t                replacementLength,
  1.1303 +                      UChar                **destBuf,
  1.1304 +                      int32_t               *destCapacity,
  1.1305 +                      UErrorCode            *status);
  1.1306 +
  1.1307 +   inline static int32_t appendTail(RegularExpression    *regexp,
  1.1308 +        UChar                **destBuf,
  1.1309 +        int32_t               *destCapacity,
  1.1310 +        UErrorCode            *status);
  1.1311 +                  
  1.1312 +    inline static int32_t split(RegularExpression    *regexp,
  1.1313 +        UChar                 *destBuf,
  1.1314 +        int32_t                destCapacity,
  1.1315 +        int32_t               *requiredCapacity,
  1.1316 +        UChar                 *destFields[],
  1.1317 +        int32_t                destFieldsCapacity,
  1.1318 +        UErrorCode            *status);
  1.1319 +};
  1.1320 +
  1.1321 +U_NAMESPACE_END
  1.1322 +
  1.1323 +
  1.1324 +
  1.1325 +static const UChar BACKSLASH  = 0x5c;
  1.1326 +static const UChar DOLLARSIGN = 0x24;
  1.1327 +
  1.1328 +//
  1.1329 +//  Move a character to an output buffer, with bounds checking on the index.
  1.1330 +//      Index advances even if capacity is exceeded, for preflight size computations.
  1.1331 +//      This little sequence is used a LOT.
  1.1332 +//
  1.1333 +static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
  1.1334 +    if (*idx < bufCapacity) {
  1.1335 +        buf[*idx] = c;
  1.1336 +    }
  1.1337 +    (*idx)++;
  1.1338 +}
  1.1339 +
  1.1340 +
  1.1341 +//
  1.1342 +//  appendReplacement, the actual implementation.
  1.1343 +//
  1.1344 +int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
  1.1345 +                                      const UChar           *replacementText,
  1.1346 +                                      int32_t                replacementLength,
  1.1347 +                                      UChar                **destBuf,
  1.1348 +                                      int32_t               *destCapacity,
  1.1349 +                                      UErrorCode            *status)  {
  1.1350 +
  1.1351 +    // If we come in with a buffer overflow error, don't suppress the operation.
  1.1352 +    //  A series of appendReplacements, appendTail need to correctly preflight
  1.1353 +    //  the buffer size when an overflow happens somewhere in the middle.
  1.1354 +    UBool pendingBufferOverflow = FALSE;
  1.1355 +    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
  1.1356 +        pendingBufferOverflow = TRUE;
  1.1357 +        *status = U_ZERO_ERROR;
  1.1358 +    }
  1.1359 +
  1.1360 +    //
  1.1361 +    // Validate all paramters
  1.1362 +    //
  1.1363 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1364 +        return 0;
  1.1365 +    }
  1.1366 +    if (replacementText == NULL || replacementLength < -1 ||
  1.1367 +        destCapacity == NULL || destBuf == NULL || 
  1.1368 +        (*destBuf == NULL && *destCapacity > 0) ||
  1.1369 +        *destCapacity < 0) {
  1.1370 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1371 +        return 0;
  1.1372 +    }
  1.1373 +
  1.1374 +    RegexMatcher *m = regexp->fMatcher;
  1.1375 +    if (m->fMatch == FALSE) {
  1.1376 +        *status = U_REGEX_INVALID_STATE;
  1.1377 +        return 0;
  1.1378 +    }
  1.1379 +
  1.1380 +    UChar    *dest             = *destBuf;
  1.1381 +    int32_t   capacity         = *destCapacity;
  1.1382 +    int32_t   destIdx          =  0;
  1.1383 +    int32_t   i;
  1.1384 +    
  1.1385 +    // If it wasn't supplied by the caller,  get the length of the replacement text.
  1.1386 +    //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
  1.1387 +    //          the fly and avoid this step.
  1.1388 +    if (replacementLength == -1) {
  1.1389 +        replacementLength = u_strlen(replacementText);
  1.1390 +    }
  1.1391 +
  1.1392 +    // Copy input string from the end of previous match to start of current match
  1.1393 +    if (regexp->fText != NULL) {
  1.1394 +        int32_t matchStart;
  1.1395 +        int32_t lastMatchEnd;
  1.1396 +        if (UTEXT_USES_U16(m->fInputText)) {
  1.1397 +            lastMatchEnd = (int32_t)m->fLastMatchEnd;
  1.1398 +            matchStart = (int32_t)m->fMatchStart;
  1.1399 +        } else {
  1.1400 +            // !!!: Would like a better way to do this!
  1.1401 +            UErrorCode status = U_ZERO_ERROR;
  1.1402 +            lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
  1.1403 +            status = U_ZERO_ERROR;
  1.1404 +            matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
  1.1405 +        }
  1.1406 +        for (i=lastMatchEnd; i<matchStart; i++) {
  1.1407 +            appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
  1.1408 +        }        
  1.1409 +    } else {
  1.1410 +        UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
  1.1411 +        destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
  1.1412 +                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
  1.1413 +                                 &possibleOverflowError);
  1.1414 +    }
  1.1415 +    U_ASSERT(destIdx >= 0);
  1.1416 +
  1.1417 +    // scan the replacement text, looking for substitutions ($n) and \escapes.
  1.1418 +    int32_t  replIdx = 0;
  1.1419 +    while (replIdx < replacementLength) {
  1.1420 +        UChar  c = replacementText[replIdx];
  1.1421 +        replIdx++;
  1.1422 +        if (c != DOLLARSIGN && c != BACKSLASH) {
  1.1423 +            // Common case, no substitution, no escaping, 
  1.1424 +            //  just copy the char to the dest buf.
  1.1425 +            appendToBuf(c, &destIdx, dest, capacity);
  1.1426 +            continue;
  1.1427 +        }
  1.1428 +
  1.1429 +        if (c == BACKSLASH) {
  1.1430 +            // Backslash Escape.  Copy the following char out without further checks.
  1.1431 +            //                    Note:  Surrogate pairs don't need any special handling
  1.1432 +            //                           The second half wont be a '$' or a '\', and
  1.1433 +            //                           will move to the dest normally on the next
  1.1434 +            //                           loop iteration.
  1.1435 +            if (replIdx >= replacementLength) {
  1.1436 +                break;
  1.1437 +            }
  1.1438 +            c = replacementText[replIdx];
  1.1439 +
  1.1440 +            if (c==0x55/*U*/ || c==0x75/*u*/) {
  1.1441 +                // We have a \udddd or \Udddddddd escape sequence.
  1.1442 +                UChar32 escapedChar = 
  1.1443 +                    u_unescapeAt(uregex_ucstr_unescape_charAt,
  1.1444 +                       &replIdx,                   // Index is updated by unescapeAt 
  1.1445 +                       replacementLength,          // Length of replacement text
  1.1446 +                       (void *)replacementText);
  1.1447 +
  1.1448 +                if (escapedChar != (UChar32)0xFFFFFFFF) {
  1.1449 +                    if (escapedChar <= 0xffff) {
  1.1450 +                        appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
  1.1451 +                    } else {
  1.1452 +                        appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
  1.1453 +                        appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
  1.1454 +                    }
  1.1455 +                    continue;
  1.1456 +                }
  1.1457 +                // Note:  if the \u escape was invalid, just fall through and
  1.1458 +                //        treat it as a plain \<anything> escape.
  1.1459 +            }
  1.1460 +
  1.1461 +            // Plain backslash escape.  Just put out the escaped character.
  1.1462 +            appendToBuf(c, &destIdx, dest, capacity);
  1.1463 +
  1.1464 +            replIdx++;
  1.1465 +            continue;
  1.1466 +        }
  1.1467 +
  1.1468 +
  1.1469 +
  1.1470 +        // We've got a $.  Pick up a capture group number if one follows.
  1.1471 +        // Consume at most the number of digits necessary for the largest capture
  1.1472 +        // number that is valid for this pattern.
  1.1473 +
  1.1474 +        int32_t numDigits = 0;
  1.1475 +        int32_t groupNum  = 0;
  1.1476 +        UChar32 digitC;
  1.1477 +        for (;;) {
  1.1478 +            if (replIdx >= replacementLength) {
  1.1479 +                break;
  1.1480 +            }
  1.1481 +            U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
  1.1482 +            if (u_isdigit(digitC) == FALSE) {
  1.1483 +                break;
  1.1484 +            }
  1.1485 +
  1.1486 +            U16_FWD_1(replacementText, replIdx, replacementLength);
  1.1487 +            groupNum=groupNum*10 + u_charDigitValue(digitC);
  1.1488 +            numDigits++;
  1.1489 +            if (numDigits >= m->fPattern->fMaxCaptureDigits) {
  1.1490 +                break;
  1.1491 +            }
  1.1492 +        }
  1.1493 +
  1.1494 +
  1.1495 +        if (numDigits == 0) {
  1.1496 +            // The $ didn't introduce a group number at all.
  1.1497 +            // Treat it as just part of the substitution text.
  1.1498 +            appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
  1.1499 +            continue;
  1.1500 +        }
  1.1501 +
  1.1502 +        // Finally, append the capture group data to the destination.
  1.1503 +        destIdx += uregex_group((URegularExpression*)regexp, groupNum,
  1.1504 +                                dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
  1.1505 +        if (*status == U_BUFFER_OVERFLOW_ERROR) {
  1.1506 +            // Ignore buffer overflow when extracting the group.  We need to
  1.1507 +            //   continue on to get full size of the untruncated result.  We will
  1.1508 +            //   raise our own buffer overflow error at the end.
  1.1509 +            *status = U_ZERO_ERROR;
  1.1510 +        }
  1.1511 +
  1.1512 +        if (U_FAILURE(*status)) {
  1.1513 +            // Can fail if group number is out of range.
  1.1514 +            break;
  1.1515 +        }
  1.1516 +
  1.1517 +    }
  1.1518 +
  1.1519 +    //
  1.1520 +    //  Nul Terminate the dest buffer if possible.
  1.1521 +    //  Set the appropriate buffer overflow or not terminated error, if needed.
  1.1522 +    //
  1.1523 +    if (destIdx < capacity) {
  1.1524 +        dest[destIdx] = 0;
  1.1525 +    } else if (destIdx == *destCapacity) {
  1.1526 +        *status = U_STRING_NOT_TERMINATED_WARNING;
  1.1527 +    } else {
  1.1528 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.1529 +    }
  1.1530 +    
  1.1531 +    //
  1.1532 +    // Return an updated dest buffer and capacity to the caller.
  1.1533 +    //
  1.1534 +    if (destIdx > 0 &&  *destCapacity > 0) {
  1.1535 +        if (destIdx < capacity) {
  1.1536 +            *destBuf      += destIdx;
  1.1537 +            *destCapacity -= destIdx;
  1.1538 +        } else {
  1.1539 +            *destBuf      += capacity;
  1.1540 +            *destCapacity =  0;
  1.1541 +        }
  1.1542 +    }
  1.1543 +
  1.1544 +    // If we came in with a buffer overflow, make sure we go out with one also.
  1.1545 +    //   (A zero length match right at the end of the previous match could
  1.1546 +    //    make this function succeed even though a previous call had overflowed the buf)
  1.1547 +    if (pendingBufferOverflow && U_SUCCESS(*status)) {
  1.1548 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.1549 +    }
  1.1550 +
  1.1551 +    return destIdx;
  1.1552 +}
  1.1553 +
  1.1554 +//
  1.1555 +//   appendReplacement   the actual API function,
  1.1556 +//
  1.1557 +U_CAPI int32_t U_EXPORT2 
  1.1558 +uregex_appendReplacement(URegularExpression    *regexp2,
  1.1559 +                         const UChar           *replacementText,
  1.1560 +                         int32_t                replacementLength,
  1.1561 +                         UChar                **destBuf,
  1.1562 +                         int32_t               *destCapacity,
  1.1563 +                         UErrorCode            *status) {
  1.1564 +    
  1.1565 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1566 +    return RegexCImpl::appendReplacement(
  1.1567 +        regexp, replacementText, replacementLength,destBuf, destCapacity, status);
  1.1568 +}
  1.1569 +
  1.1570 +//
  1.1571 +//   uregex_appendReplacementUText...can just use the normal C++ method
  1.1572 +//
  1.1573 +U_CAPI void U_EXPORT2 
  1.1574 +uregex_appendReplacementUText(URegularExpression    *regexp2,
  1.1575 +                              UText                 *replText,
  1.1576 +                              UText                 *dest,
  1.1577 +                              UErrorCode            *status)  {
  1.1578 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1579 +    regexp->fMatcher->appendReplacement(dest, replText, *status);
  1.1580 +}
  1.1581 +
  1.1582 +
  1.1583 +//------------------------------------------------------------------------------
  1.1584 +//
  1.1585 +//    uregex_appendTail
  1.1586 +//
  1.1587 +//------------------------------------------------------------------------------
  1.1588 +int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
  1.1589 +                               UChar                **destBuf,
  1.1590 +                               int32_t               *destCapacity,
  1.1591 +                               UErrorCode            *status)
  1.1592 +{
  1.1593 +
  1.1594 +    // If we come in with a buffer overflow error, don't suppress the operation.
  1.1595 +    //  A series of appendReplacements, appendTail need to correctly preflight
  1.1596 +    //  the buffer size when an overflow happens somewhere in the middle.
  1.1597 +    UBool pendingBufferOverflow = FALSE;
  1.1598 +    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
  1.1599 +        pendingBufferOverflow = TRUE;
  1.1600 +        *status = U_ZERO_ERROR;
  1.1601 +    }
  1.1602 +
  1.1603 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1604 +        return 0;
  1.1605 +    }
  1.1606 +    
  1.1607 +    if (destCapacity == NULL || destBuf == NULL || 
  1.1608 +        (*destBuf == NULL && *destCapacity > 0) ||
  1.1609 +        *destCapacity < 0)
  1.1610 +    {
  1.1611 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1612 +        return 0;
  1.1613 +    }
  1.1614 +
  1.1615 +    RegexMatcher *m = regexp->fMatcher;
  1.1616 +
  1.1617 +    int32_t  destIdx     = 0;
  1.1618 +    int32_t  destCap     = *destCapacity;
  1.1619 +    UChar    *dest       = *destBuf;
  1.1620 +    
  1.1621 +    if (regexp->fText != NULL) {
  1.1622 +        int32_t srcIdx;
  1.1623 +        int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
  1.1624 +        if (nativeIdx == -1) {
  1.1625 +            srcIdx = 0;
  1.1626 +        } else if (UTEXT_USES_U16(m->fInputText)) {
  1.1627 +            srcIdx = (int32_t)nativeIdx;
  1.1628 +        } else {
  1.1629 +            UErrorCode status = U_ZERO_ERROR;
  1.1630 +            srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
  1.1631 +        }
  1.1632 +            
  1.1633 +        for (;;) {
  1.1634 +            U_ASSERT(destIdx >= 0);
  1.1635 +
  1.1636 +            if (srcIdx == regexp->fTextLength) {
  1.1637 +                break;
  1.1638 +            }
  1.1639 +            UChar c = regexp->fText[srcIdx];
  1.1640 +            if (c == 0 && regexp->fTextLength == -1) {
  1.1641 +                regexp->fTextLength = srcIdx;
  1.1642 +                break;
  1.1643 +            }
  1.1644 +
  1.1645 +            if (destIdx < destCap) {
  1.1646 +                dest[destIdx] = c;
  1.1647 +            } else {
  1.1648 +                // We've overflowed the dest buffer.
  1.1649 +                //  If the total input string length is known, we can
  1.1650 +                //    compute the total buffer size needed without scanning through the string.
  1.1651 +                if (regexp->fTextLength > 0) {
  1.1652 +                    destIdx += (regexp->fTextLength - srcIdx);
  1.1653 +                    break;
  1.1654 +                }
  1.1655 +            }
  1.1656 +            srcIdx++;
  1.1657 +            destIdx++;
  1.1658 +        }            
  1.1659 +    } else {
  1.1660 +        int64_t  srcIdx;
  1.1661 +        if (m->fMatch) {
  1.1662 +            // The most recent call to find() succeeded.  
  1.1663 +            srcIdx = m->fMatchEnd;
  1.1664 +        } else {
  1.1665 +            // The last call to find() on this matcher failed().
  1.1666 +            //   Look back to the end of the last find() that succeeded for src index.
  1.1667 +            srcIdx = m->fLastMatchEnd;
  1.1668 +            if (srcIdx == -1)  {
  1.1669 +                // There has been no successful match with this matcher.
  1.1670 +                //   We want to copy the whole string.
  1.1671 +                srcIdx = 0;
  1.1672 +            }
  1.1673 +        }
  1.1674 +
  1.1675 +        destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
  1.1676 +    }
  1.1677 +
  1.1678 +    //
  1.1679 +    //  NUL terminate the output string, if possible, otherwise issue the
  1.1680 +    //   appropriate error or warning.
  1.1681 +    //
  1.1682 +    if (destIdx < destCap) {
  1.1683 +        dest[destIdx] = 0;
  1.1684 +    } else  if (destIdx == destCap) {
  1.1685 +        *status = U_STRING_NOT_TERMINATED_WARNING;
  1.1686 +    } else {
  1.1687 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.1688 +    }
  1.1689 +
  1.1690 +    //
  1.1691 +    // Update the user's buffer ptr and capacity vars to reflect the
  1.1692 +    //   amount used.
  1.1693 +    //
  1.1694 +    if (destIdx < destCap) {
  1.1695 +        *destBuf      += destIdx;
  1.1696 +        *destCapacity -= destIdx;
  1.1697 +    } else if (*destBuf != NULL) {
  1.1698 +        *destBuf      += destCap;
  1.1699 +        *destCapacity  = 0;
  1.1700 +    }
  1.1701 +
  1.1702 +    if (pendingBufferOverflow && U_SUCCESS(*status)) {
  1.1703 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.1704 +    }
  1.1705 +
  1.1706 +    return destIdx;
  1.1707 +}
  1.1708 +
  1.1709 +
  1.1710 +//
  1.1711 +//   appendTail   the actual API function
  1.1712 +//
  1.1713 +U_CAPI int32_t U_EXPORT2 
  1.1714 +uregex_appendTail(URegularExpression    *regexp2,
  1.1715 +                  UChar                **destBuf,
  1.1716 +                  int32_t               *destCapacity,
  1.1717 +                  UErrorCode            *status)  {
  1.1718 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1719 +    return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
  1.1720 +}
  1.1721 +
  1.1722 +
  1.1723 +//
  1.1724 +//   uregex_appendTailUText...can just use the normal C++ method
  1.1725 +//
  1.1726 +U_CAPI UText * U_EXPORT2 
  1.1727 +uregex_appendTailUText(URegularExpression    *regexp2,
  1.1728 +                       UText                 *dest,
  1.1729 +                       UErrorCode            *status)  {
  1.1730 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1731 +    return regexp->fMatcher->appendTail(dest, *status);
  1.1732 +}
  1.1733 +
  1.1734 +
  1.1735 +//------------------------------------------------------------------------------
  1.1736 +//
  1.1737 +//    copyString     Internal utility to copy a string to an output buffer,
  1.1738 +//                   while managing buffer overflow and preflight size
  1.1739 +//                   computation.  NUL termination is added to destination,
  1.1740 +//                   and the NUL is counted in the output size.
  1.1741 +//
  1.1742 +//------------------------------------------------------------------------------
  1.1743 +#if 0
  1.1744 +static void copyString(UChar        *destBuffer,    //  Destination buffer.
  1.1745 +                       int32_t       destCapacity,  //  Total capacity of dest buffer
  1.1746 +                       int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
  1.1747 +                                                    //    Update not clipped to destCapacity.
  1.1748 +                       const UChar  *srcPtr,        //  Pointer to source string
  1.1749 +                       int32_t       srcLen)        //  Source string len.
  1.1750 +{
  1.1751 +    int32_t  si;
  1.1752 +    int32_t  di = *destIndex;
  1.1753 +    UChar    c;
  1.1754 +
  1.1755 +    for (si=0; si<srcLen;  si++) {
  1.1756 +        c = srcPtr[si];
  1.1757 +        if (di < destCapacity) {
  1.1758 +            destBuffer[di] = c;
  1.1759 +            di++;
  1.1760 +        } else {
  1.1761 +            di += srcLen - si;
  1.1762 +            break;
  1.1763 +        }
  1.1764 +    }
  1.1765 +    if (di<destCapacity) {
  1.1766 +        destBuffer[di] = 0;
  1.1767 +    }
  1.1768 +    di++;
  1.1769 +    *destIndex = di;
  1.1770 +}
  1.1771 +#endif
  1.1772 +
  1.1773 +//------------------------------------------------------------------------------
  1.1774 +//
  1.1775 +//    uregex_split
  1.1776 +//
  1.1777 +//------------------------------------------------------------------------------
  1.1778 +int32_t RegexCImpl::split(RegularExpression     *regexp,
  1.1779 +                          UChar                 *destBuf,
  1.1780 +                          int32_t                destCapacity,
  1.1781 +                          int32_t               *requiredCapacity,
  1.1782 +                          UChar                 *destFields[],
  1.1783 +                          int32_t                destFieldsCapacity,
  1.1784 +                          UErrorCode            *status) {
  1.1785 +    //
  1.1786 +    // Reset for the input text
  1.1787 +    //
  1.1788 +    regexp->fMatcher->reset();
  1.1789 +    UText *inputText = regexp->fMatcher->fInputText;
  1.1790 +    int64_t   nextOutputStringStart = 0;
  1.1791 +    int64_t   inputLen = regexp->fMatcher->fInputLength;
  1.1792 +    if (inputLen == 0) {
  1.1793 +        return 0;
  1.1794 +    }
  1.1795 +
  1.1796 +    //
  1.1797 +    // Loop through the input text, searching for the delimiter pattern
  1.1798 +    //
  1.1799 +    int32_t   i;             // Index of the field being processed.
  1.1800 +    int32_t   destIdx = 0;   // Next available position in destBuf;
  1.1801 +    int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
  1.1802 +    UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
  1.1803 +    for (i=0; ; i++) {
  1.1804 +        if (i>=destFieldsCapacity-1) {
  1.1805 +            // There are one or zero output strings left.
  1.1806 +            // Fill the last output string with whatever is left from the input, then exit the loop.
  1.1807 +            //  ( i will be == destFieldsCapacity if we filled the output array while processing
  1.1808 +            //    capture groups of the delimiter expression, in which case we will discard the
  1.1809 +            //    last capture group saved in favor of the unprocessed remainder of the
  1.1810 +            //    input string.)
  1.1811 +            if (inputLen > nextOutputStringStart) {
  1.1812 +                if (i != destFieldsCapacity-1) {
  1.1813 +                    // No fields are left.  Recycle the last one for holding the trailing part of
  1.1814 +                    //   the input string.
  1.1815 +                    i = destFieldsCapacity-1;
  1.1816 +                    destIdx = (int32_t)(destFields[i] - destFields[0]);
  1.1817 +                }
  1.1818 +                
  1.1819 +                destFields[i] = &destBuf[destIdx];
  1.1820 +                destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
  1.1821 +                                             &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
  1.1822 +            }
  1.1823 +            break;
  1.1824 +        }
  1.1825 +        
  1.1826 +        if (regexp->fMatcher->find()) {
  1.1827 +            // We found another delimiter.  Move everything from where we started looking
  1.1828 +            //  up until the start of the delimiter into the next output string.
  1.1829 +            destFields[i] = &destBuf[destIdx];
  1.1830 +            
  1.1831 +            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
  1.1832 +                                         &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
  1.1833 +            if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
  1.1834 +                tStatus = U_ZERO_ERROR;
  1.1835 +            } else {
  1.1836 +                *status = tStatus;
  1.1837 +            }
  1.1838 +            nextOutputStringStart = regexp->fMatcher->fMatchEnd;
  1.1839 +            
  1.1840 +            // If the delimiter pattern has capturing parentheses, the captured
  1.1841 +            //  text goes out into the next n destination strings.
  1.1842 +            int32_t groupNum;
  1.1843 +            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
  1.1844 +                // If we've run out of output string slots, bail out.
  1.1845 +                if (i==destFieldsCapacity-1) {
  1.1846 +                    break;
  1.1847 +                }
  1.1848 +                i++;
  1.1849 +                
  1.1850 +                // Set up to extract the capture group contents into the dest buffer.
  1.1851 +                destFields[i] = &destBuf[destIdx];
  1.1852 +                tStatus = U_ZERO_ERROR;
  1.1853 +                int32_t t = uregex_group((URegularExpression*)regexp, 
  1.1854 +                                         groupNum, 
  1.1855 +                                         destFields[i], 
  1.1856 +                                         REMAINING_CAPACITY(destIdx, destCapacity), 
  1.1857 +                                         &tStatus);
  1.1858 +                destIdx += t + 1;    // Record the space used in the output string buffer.
  1.1859 +                                     //  +1 for the NUL that terminates the string.
  1.1860 +                if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
  1.1861 +                    tStatus = U_ZERO_ERROR;
  1.1862 +                } else {
  1.1863 +                    *status = tStatus;
  1.1864 +                }
  1.1865 +            }
  1.1866 +
  1.1867 +            if (nextOutputStringStart == inputLen) {
  1.1868 +                // The delimiter was at the end of the string. 
  1.1869 +                // Output an empty string, and then we are done.
  1.1870 +                if (destIdx < destCapacity) {
  1.1871 +                    destBuf[destIdx] = 0;
  1.1872 +                }
  1.1873 +                if (i < destFieldsCapacity-1) {
  1.1874 +                   ++i;
  1.1875 +                }
  1.1876 +                if (destIdx < destCapacity) {
  1.1877 +                    destFields[i] = destBuf + destIdx;
  1.1878 +                }
  1.1879 +                ++destIdx;
  1.1880 +                break;
  1.1881 +            }
  1.1882 +
  1.1883 +        }
  1.1884 +        else
  1.1885 +        {
  1.1886 +            // We ran off the end of the input while looking for the next delimiter.
  1.1887 +            // All the remaining text goes into the current output string.
  1.1888 +            destFields[i] = &destBuf[destIdx];
  1.1889 +            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
  1.1890 +                                         &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
  1.1891 +            break;
  1.1892 +        }
  1.1893 +    }
  1.1894 +
  1.1895 +    // Zero out any unused portion of the destFields array
  1.1896 +    int j;
  1.1897 +    for (j=i+1; j<destFieldsCapacity; j++) {
  1.1898 +        destFields[j] = NULL;
  1.1899 +    }
  1.1900 +
  1.1901 +    if (requiredCapacity != NULL) {
  1.1902 +        *requiredCapacity = destIdx;
  1.1903 +    }
  1.1904 +    if (destIdx > destCapacity) {
  1.1905 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.1906 +    }
  1.1907 +    return i+1;
  1.1908 +}
  1.1909 +
  1.1910 +//
  1.1911 +//   uregex_split   The actual API function
  1.1912 +//
  1.1913 +U_CAPI int32_t U_EXPORT2 
  1.1914 +uregex_split(URegularExpression      *regexp2,
  1.1915 +             UChar                   *destBuf,
  1.1916 +             int32_t                  destCapacity,
  1.1917 +             int32_t                 *requiredCapacity,
  1.1918 +             UChar                   *destFields[],
  1.1919 +             int32_t                  destFieldsCapacity,
  1.1920 +             UErrorCode              *status) {
  1.1921 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1922 +    if (validateRE(regexp, TRUE, status) == FALSE) {
  1.1923 +        return 0;
  1.1924 +    }
  1.1925 +    if ((destBuf == NULL && destCapacity > 0) ||
  1.1926 +        destCapacity < 0 ||
  1.1927 +        destFields == NULL ||
  1.1928 +        destFieldsCapacity < 1 ) {
  1.1929 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1930 +        return 0;
  1.1931 +    }
  1.1932 +    
  1.1933 +    return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
  1.1934 +}
  1.1935 +    
  1.1936 +
  1.1937 +//
  1.1938 +//   uregex_splitUText...can just use the normal C++ method
  1.1939 +//
  1.1940 +U_CAPI int32_t U_EXPORT2 
  1.1941 +uregex_splitUText(URegularExpression    *regexp2,
  1.1942 +                  UText                 *destFields[],
  1.1943 +                  int32_t                destFieldsCapacity,
  1.1944 +                  UErrorCode            *status) {
  1.1945 +    RegularExpression *regexp = (RegularExpression*)regexp2;
  1.1946 +    return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
  1.1947 +}
  1.1948 +
  1.1949 +
  1.1950 +#endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
  1.1951 +

mercurial