1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/uregex.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1948 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2004-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: uregex.cpp 1.10 +*/ 1.11 + 1.12 +#include "unicode/utypes.h" 1.13 + 1.14 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1.15 + 1.16 +#include "unicode/regex.h" 1.17 +#include "unicode/uregex.h" 1.18 +#include "unicode/unistr.h" 1.19 +#include "unicode/ustring.h" 1.20 +#include "unicode/uchar.h" 1.21 +#include "unicode/uobject.h" 1.22 +#include "unicode/utf16.h" 1.23 +#include "umutex.h" 1.24 +#include "uassert.h" 1.25 +#include "cmemory.h" 1.26 + 1.27 +#include "regextxt.h" 1.28 + 1.29 +#include <stdio.h> 1.30 + 1.31 +U_NAMESPACE_BEGIN 1.32 + 1.33 +#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0) 1.34 + 1.35 +struct RegularExpression: public UMemory { 1.36 +public: 1.37 + RegularExpression(); 1.38 + ~RegularExpression(); 1.39 + int32_t fMagic; 1.40 + RegexPattern *fPat; 1.41 + u_atomic_int32_t *fPatRefCount; 1.42 + UChar *fPatString; 1.43 + int32_t fPatStringLen; 1.44 + RegexMatcher *fMatcher; 1.45 + const UChar *fText; // Text from setText() 1.46 + int32_t fTextLength; // Length provided by user with setText(), which 1.47 + // may be -1. 1.48 + UBool fOwnsText; 1.49 +}; 1.50 + 1.51 +static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII 1.52 + 1.53 +RegularExpression::RegularExpression() { 1.54 + fMagic = REXP_MAGIC; 1.55 + fPat = NULL; 1.56 + fPatRefCount = NULL; 1.57 + fPatString = NULL; 1.58 + fPatStringLen = 0; 1.59 + fMatcher = NULL; 1.60 + fText = NULL; 1.61 + fTextLength = 0; 1.62 + fOwnsText = FALSE; 1.63 +} 1.64 + 1.65 +RegularExpression::~RegularExpression() { 1.66 + delete fMatcher; 1.67 + fMatcher = NULL; 1.68 + if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) { 1.69 + delete fPat; 1.70 + uprv_free(fPatString); 1.71 + uprv_free((void *)fPatRefCount); 1.72 + } 1.73 + if (fOwnsText && fText!=NULL) { 1.74 + uprv_free((void *)fText); 1.75 + } 1.76 + fMagic = 0; 1.77 +} 1.78 + 1.79 +U_NAMESPACE_END 1.80 + 1.81 +U_NAMESPACE_USE 1.82 + 1.83 +//---------------------------------------------------------------------------------------- 1.84 +// 1.85 +// validateRE Do boilerplate style checks on API function parameters. 1.86 +// Return TRUE if they look OK. 1.87 +//---------------------------------------------------------------------------------------- 1.88 +static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) { 1.89 + if (U_FAILURE(*status)) { 1.90 + return FALSE; 1.91 + } 1.92 + if (re == NULL || re->fMagic != REXP_MAGIC) { 1.93 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.94 + return FALSE; 1.95 + } 1.96 + // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway 1.97 + if (requiresText && re->fText == NULL && !re->fOwnsText) { 1.98 + *status = U_REGEX_INVALID_STATE; 1.99 + return FALSE; 1.100 + } 1.101 + return TRUE; 1.102 +} 1.103 + 1.104 +//---------------------------------------------------------------------------------------- 1.105 +// 1.106 +// uregex_open 1.107 +// 1.108 +//---------------------------------------------------------------------------------------- 1.109 +U_CAPI URegularExpression * U_EXPORT2 1.110 +uregex_open( const UChar *pattern, 1.111 + int32_t patternLength, 1.112 + uint32_t flags, 1.113 + UParseError *pe, 1.114 + UErrorCode *status) { 1.115 + 1.116 + if (U_FAILURE(*status)) { 1.117 + return NULL; 1.118 + } 1.119 + if (pattern == NULL || patternLength < -1 || patternLength == 0) { 1.120 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.121 + return NULL; 1.122 + } 1.123 + int32_t actualPatLen = patternLength; 1.124 + if (actualPatLen == -1) { 1.125 + actualPatLen = u_strlen(pattern); 1.126 + } 1.127 + 1.128 + RegularExpression *re = new RegularExpression; 1.129 + u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); 1.130 + UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1)); 1.131 + if (re == NULL || refC == NULL || patBuf == NULL) { 1.132 + *status = U_MEMORY_ALLOCATION_ERROR; 1.133 + delete re; 1.134 + uprv_free((void *)refC); 1.135 + uprv_free(patBuf); 1.136 + return NULL; 1.137 + } 1.138 + re->fPatRefCount = refC; 1.139 + *re->fPatRefCount = 1; 1.140 + 1.141 + // 1.142 + // Make a copy of the pattern string, so we can return it later if asked. 1.143 + // For compiling the pattern, we will use a UText wrapper around 1.144 + // this local copy, to avoid making even more copies. 1.145 + // 1.146 + re->fPatString = patBuf; 1.147 + re->fPatStringLen = patternLength; 1.148 + u_memcpy(patBuf, pattern, actualPatLen); 1.149 + patBuf[actualPatLen] = 0; 1.150 + 1.151 + UText patText = UTEXT_INITIALIZER; 1.152 + utext_openUChars(&patText, patBuf, patternLength, status); 1.153 + 1.154 + // 1.155 + // Compile the pattern 1.156 + // 1.157 + if (pe != NULL) { 1.158 + re->fPat = RegexPattern::compile(&patText, flags, *pe, *status); 1.159 + } else { 1.160 + re->fPat = RegexPattern::compile(&patText, flags, *status); 1.161 + } 1.162 + utext_close(&patText); 1.163 + 1.164 + if (U_FAILURE(*status)) { 1.165 + goto ErrorExit; 1.166 + } 1.167 + 1.168 + // 1.169 + // Create the matcher object 1.170 + // 1.171 + re->fMatcher = re->fPat->matcher(*status); 1.172 + if (U_SUCCESS(*status)) { 1.173 + return (URegularExpression*)re; 1.174 + } 1.175 + 1.176 +ErrorExit: 1.177 + delete re; 1.178 + return NULL; 1.179 + 1.180 +} 1.181 + 1.182 +//---------------------------------------------------------------------------------------- 1.183 +// 1.184 +// uregex_openUText 1.185 +// 1.186 +//---------------------------------------------------------------------------------------- 1.187 +U_CAPI URegularExpression * U_EXPORT2 1.188 +uregex_openUText(UText *pattern, 1.189 + uint32_t flags, 1.190 + UParseError *pe, 1.191 + UErrorCode *status) { 1.192 + 1.193 + if (U_FAILURE(*status)) { 1.194 + return NULL; 1.195 + } 1.196 + if (pattern == NULL) { 1.197 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.198 + return NULL; 1.199 + } 1.200 + 1.201 + int64_t patternNativeLength = utext_nativeLength(pattern); 1.202 + 1.203 + if (patternNativeLength == 0) { 1.204 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.205 + return NULL; 1.206 + } 1.207 + 1.208 + RegularExpression *re = new RegularExpression; 1.209 + 1.210 + UErrorCode lengthStatus = U_ZERO_ERROR; 1.211 + int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus); 1.212 + 1.213 + u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t)); 1.214 + UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1)); 1.215 + if (re == NULL || refC == NULL || patBuf == NULL) { 1.216 + *status = U_MEMORY_ALLOCATION_ERROR; 1.217 + delete re; 1.218 + uprv_free((void *)refC); 1.219 + uprv_free(patBuf); 1.220 + return NULL; 1.221 + } 1.222 + re->fPatRefCount = refC; 1.223 + *re->fPatRefCount = 1; 1.224 + 1.225 + // 1.226 + // Make a copy of the pattern string, so we can return it later if asked. 1.227 + // For compiling the pattern, we will use a read-only UText wrapper 1.228 + // around this local copy, to avoid making even more copies. 1.229 + // 1.230 + re->fPatString = patBuf; 1.231 + re->fPatStringLen = pattern16Length; 1.232 + utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status); 1.233 + 1.234 + UText patText = UTEXT_INITIALIZER; 1.235 + utext_openUChars(&patText, patBuf, pattern16Length, status); 1.236 + 1.237 + // 1.238 + // Compile the pattern 1.239 + // 1.240 + if (pe != NULL) { 1.241 + re->fPat = RegexPattern::compile(&patText, flags, *pe, *status); 1.242 + } else { 1.243 + re->fPat = RegexPattern::compile(&patText, flags, *status); 1.244 + } 1.245 + utext_close(&patText); 1.246 + 1.247 + if (U_FAILURE(*status)) { 1.248 + goto ErrorExit; 1.249 + } 1.250 + 1.251 + // 1.252 + // Create the matcher object 1.253 + // 1.254 + re->fMatcher = re->fPat->matcher(*status); 1.255 + if (U_SUCCESS(*status)) { 1.256 + return (URegularExpression*)re; 1.257 + } 1.258 + 1.259 +ErrorExit: 1.260 + delete re; 1.261 + return NULL; 1.262 + 1.263 +} 1.264 + 1.265 +//---------------------------------------------------------------------------------------- 1.266 +// 1.267 +// uregex_close 1.268 +// 1.269 +//---------------------------------------------------------------------------------------- 1.270 +U_CAPI void U_EXPORT2 1.271 +uregex_close(URegularExpression *re2) { 1.272 + RegularExpression *re = (RegularExpression*)re2; 1.273 + UErrorCode status = U_ZERO_ERROR; 1.274 + if (validateRE(re, FALSE, &status) == FALSE) { 1.275 + return; 1.276 + } 1.277 + delete re; 1.278 +} 1.279 + 1.280 + 1.281 +//---------------------------------------------------------------------------------------- 1.282 +// 1.283 +// uregex_clone 1.284 +// 1.285 +//---------------------------------------------------------------------------------------- 1.286 +U_CAPI URegularExpression * U_EXPORT2 1.287 +uregex_clone(const URegularExpression *source2, UErrorCode *status) { 1.288 + RegularExpression *source = (RegularExpression*)source2; 1.289 + if (validateRE(source, FALSE, status) == FALSE) { 1.290 + return NULL; 1.291 + } 1.292 + 1.293 + RegularExpression *clone = new RegularExpression; 1.294 + if (clone == NULL) { 1.295 + *status = U_MEMORY_ALLOCATION_ERROR; 1.296 + return NULL; 1.297 + } 1.298 + 1.299 + clone->fMatcher = source->fPat->matcher(*status); 1.300 + if (U_FAILURE(*status)) { 1.301 + delete clone; 1.302 + return NULL; 1.303 + } 1.304 + 1.305 + clone->fPat = source->fPat; 1.306 + clone->fPatRefCount = source->fPatRefCount; 1.307 + clone->fPatString = source->fPatString; 1.308 + clone->fPatStringLen = source->fPatStringLen; 1.309 + umtx_atomic_inc(source->fPatRefCount); 1.310 + // Note: fText is not cloned. 1.311 + 1.312 + return (URegularExpression*)clone; 1.313 +} 1.314 + 1.315 + 1.316 + 1.317 + 1.318 +//------------------------------------------------------------------------------ 1.319 +// 1.320 +// uregex_pattern 1.321 +// 1.322 +//------------------------------------------------------------------------------ 1.323 +U_CAPI const UChar * U_EXPORT2 1.324 +uregex_pattern(const URegularExpression *regexp2, 1.325 + int32_t *patLength, 1.326 + UErrorCode *status) { 1.327 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.328 + 1.329 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.330 + return NULL; 1.331 + } 1.332 + if (patLength != NULL) { 1.333 + *patLength = regexp->fPatStringLen; 1.334 + } 1.335 + return regexp->fPatString; 1.336 +} 1.337 + 1.338 + 1.339 +//------------------------------------------------------------------------------ 1.340 +// 1.341 +// uregex_patternUText 1.342 +// 1.343 +//------------------------------------------------------------------------------ 1.344 +U_CAPI UText * U_EXPORT2 1.345 +uregex_patternUText(const URegularExpression *regexp2, 1.346 + UErrorCode *status) { 1.347 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.348 + return regexp->fPat->patternText(*status); 1.349 +} 1.350 + 1.351 + 1.352 +//------------------------------------------------------------------------------ 1.353 +// 1.354 +// uregex_flags 1.355 +// 1.356 +//------------------------------------------------------------------------------ 1.357 +U_CAPI int32_t U_EXPORT2 1.358 +uregex_flags(const URegularExpression *regexp2, UErrorCode *status) { 1.359 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.360 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.361 + return 0; 1.362 + } 1.363 + int32_t flags = regexp->fPat->flags(); 1.364 + return flags; 1.365 +} 1.366 + 1.367 + 1.368 +//------------------------------------------------------------------------------ 1.369 +// 1.370 +// uregex_setText 1.371 +// 1.372 +//------------------------------------------------------------------------------ 1.373 +U_CAPI void U_EXPORT2 1.374 +uregex_setText(URegularExpression *regexp2, 1.375 + const UChar *text, 1.376 + int32_t textLength, 1.377 + UErrorCode *status) { 1.378 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.379 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.380 + return; 1.381 + } 1.382 + if (text == NULL || textLength < -1) { 1.383 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.384 + return; 1.385 + } 1.386 + 1.387 + if (regexp->fOwnsText && regexp->fText != NULL) { 1.388 + uprv_free((void *)regexp->fText); 1.389 + } 1.390 + 1.391 + regexp->fText = text; 1.392 + regexp->fTextLength = textLength; 1.393 + regexp->fOwnsText = FALSE; 1.394 + 1.395 + UText input = UTEXT_INITIALIZER; 1.396 + utext_openUChars(&input, text, textLength, status); 1.397 + regexp->fMatcher->reset(&input); 1.398 + utext_close(&input); // reset() made a shallow clone, so we don't need this copy 1.399 +} 1.400 + 1.401 + 1.402 +//------------------------------------------------------------------------------ 1.403 +// 1.404 +// uregex_setUText 1.405 +// 1.406 +//------------------------------------------------------------------------------ 1.407 +U_CAPI void U_EXPORT2 1.408 +uregex_setUText(URegularExpression *regexp2, 1.409 + UText *text, 1.410 + UErrorCode *status) { 1.411 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.412 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.413 + return; 1.414 + } 1.415 + if (text == NULL) { 1.416 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.417 + return; 1.418 + } 1.419 + 1.420 + if (regexp->fOwnsText && regexp->fText != NULL) { 1.421 + uprv_free((void *)regexp->fText); 1.422 + } 1.423 + 1.424 + regexp->fText = NULL; // only fill it in on request 1.425 + regexp->fTextLength = -1; 1.426 + regexp->fOwnsText = TRUE; 1.427 + regexp->fMatcher->reset(text); 1.428 +} 1.429 + 1.430 + 1.431 + 1.432 +//------------------------------------------------------------------------------ 1.433 +// 1.434 +// uregex_getText 1.435 +// 1.436 +//------------------------------------------------------------------------------ 1.437 +U_CAPI const UChar * U_EXPORT2 1.438 +uregex_getText(URegularExpression *regexp2, 1.439 + int32_t *textLength, 1.440 + UErrorCode *status) { 1.441 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.442 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.443 + return NULL; 1.444 + } 1.445 + 1.446 + if (regexp->fText == NULL) { 1.447 + // need to fill in the text 1.448 + UText *inputText = regexp->fMatcher->inputText(); 1.449 + int64_t inputNativeLength = utext_nativeLength(inputText); 1.450 + if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) { 1.451 + regexp->fText = inputText->chunkContents; 1.452 + regexp->fTextLength = (int32_t)inputNativeLength; 1.453 + regexp->fOwnsText = FALSE; // because the UText owns it 1.454 + } else { 1.455 + UErrorCode lengthStatus = U_ZERO_ERROR; 1.456 + regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error 1.457 + UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1)); 1.458 + 1.459 + utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status); 1.460 + regexp->fText = inputChars; 1.461 + regexp->fOwnsText = TRUE; // should already be set but just in case 1.462 + } 1.463 + } 1.464 + 1.465 + if (textLength != NULL) { 1.466 + *textLength = regexp->fTextLength; 1.467 + } 1.468 + return regexp->fText; 1.469 +} 1.470 + 1.471 + 1.472 +//------------------------------------------------------------------------------ 1.473 +// 1.474 +// uregex_getUText 1.475 +// 1.476 +//------------------------------------------------------------------------------ 1.477 +U_CAPI UText * U_EXPORT2 1.478 +uregex_getUText(URegularExpression *regexp2, 1.479 + UText *dest, 1.480 + UErrorCode *status) { 1.481 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.482 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.483 + return dest; 1.484 + } 1.485 + return regexp->fMatcher->getInput(dest, *status); 1.486 +} 1.487 + 1.488 + 1.489 +//------------------------------------------------------------------------------ 1.490 +// 1.491 +// uregex_refreshUText 1.492 +// 1.493 +//------------------------------------------------------------------------------ 1.494 +U_CAPI void U_EXPORT2 1.495 +uregex_refreshUText(URegularExpression *regexp2, 1.496 + UText *text, 1.497 + UErrorCode *status) { 1.498 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.499 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.500 + return; 1.501 + } 1.502 + regexp->fMatcher->refreshInputText(text, *status); 1.503 +} 1.504 + 1.505 + 1.506 +//------------------------------------------------------------------------------ 1.507 +// 1.508 +// uregex_matches 1.509 +// 1.510 +//------------------------------------------------------------------------------ 1.511 +U_CAPI UBool U_EXPORT2 1.512 +uregex_matches(URegularExpression *regexp2, 1.513 + int32_t startIndex, 1.514 + UErrorCode *status) { 1.515 + return uregex_matches64( regexp2, (int64_t)startIndex, status); 1.516 +} 1.517 + 1.518 +U_CAPI UBool U_EXPORT2 1.519 +uregex_matches64(URegularExpression *regexp2, 1.520 + int64_t startIndex, 1.521 + UErrorCode *status) { 1.522 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.523 + UBool result = FALSE; 1.524 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.525 + return result; 1.526 + } 1.527 + if (startIndex == -1) { 1.528 + result = regexp->fMatcher->matches(*status); 1.529 + } else { 1.530 + result = regexp->fMatcher->matches(startIndex, *status); 1.531 + } 1.532 + return result; 1.533 +} 1.534 + 1.535 + 1.536 +//------------------------------------------------------------------------------ 1.537 +// 1.538 +// uregex_lookingAt 1.539 +// 1.540 +//------------------------------------------------------------------------------ 1.541 +U_CAPI UBool U_EXPORT2 1.542 +uregex_lookingAt(URegularExpression *regexp2, 1.543 + int32_t startIndex, 1.544 + UErrorCode *status) { 1.545 + return uregex_lookingAt64( regexp2, (int64_t)startIndex, status); 1.546 +} 1.547 + 1.548 +U_CAPI UBool U_EXPORT2 1.549 +uregex_lookingAt64(URegularExpression *regexp2, 1.550 + int64_t startIndex, 1.551 + UErrorCode *status) { 1.552 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.553 + UBool result = FALSE; 1.554 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.555 + return result; 1.556 + } 1.557 + if (startIndex == -1) { 1.558 + result = regexp->fMatcher->lookingAt(*status); 1.559 + } else { 1.560 + result = regexp->fMatcher->lookingAt(startIndex, *status); 1.561 + } 1.562 + return result; 1.563 +} 1.564 + 1.565 + 1.566 + 1.567 +//------------------------------------------------------------------------------ 1.568 +// 1.569 +// uregex_find 1.570 +// 1.571 +//------------------------------------------------------------------------------ 1.572 +U_CAPI UBool U_EXPORT2 1.573 +uregex_find(URegularExpression *regexp2, 1.574 + int32_t startIndex, 1.575 + UErrorCode *status) { 1.576 + return uregex_find64( regexp2, (int64_t)startIndex, status); 1.577 +} 1.578 + 1.579 +U_CAPI UBool U_EXPORT2 1.580 +uregex_find64(URegularExpression *regexp2, 1.581 + int64_t startIndex, 1.582 + UErrorCode *status) { 1.583 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.584 + UBool result = FALSE; 1.585 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.586 + return result; 1.587 + } 1.588 + if (startIndex == -1) { 1.589 + regexp->fMatcher->resetPreserveRegion(); 1.590 + result = regexp->fMatcher->find(); 1.591 + } else { 1.592 + result = regexp->fMatcher->find(startIndex, *status); 1.593 + } 1.594 + return result; 1.595 +} 1.596 + 1.597 + 1.598 +//------------------------------------------------------------------------------ 1.599 +// 1.600 +// uregex_findNext 1.601 +// 1.602 +//------------------------------------------------------------------------------ 1.603 +U_CAPI UBool U_EXPORT2 1.604 +uregex_findNext(URegularExpression *regexp2, 1.605 + UErrorCode *status) { 1.606 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.607 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.608 + return FALSE; 1.609 + } 1.610 + UBool result = regexp->fMatcher->find(); 1.611 + return result; 1.612 +} 1.613 + 1.614 +//------------------------------------------------------------------------------ 1.615 +// 1.616 +// uregex_groupCount 1.617 +// 1.618 +//------------------------------------------------------------------------------ 1.619 +U_CAPI int32_t U_EXPORT2 1.620 +uregex_groupCount(URegularExpression *regexp2, 1.621 + UErrorCode *status) { 1.622 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.623 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.624 + return 0; 1.625 + } 1.626 + int32_t result = regexp->fMatcher->groupCount(); 1.627 + return result; 1.628 +} 1.629 + 1.630 + 1.631 +//------------------------------------------------------------------------------ 1.632 +// 1.633 +// uregex_group 1.634 +// 1.635 +//------------------------------------------------------------------------------ 1.636 +U_CAPI int32_t U_EXPORT2 1.637 +uregex_group(URegularExpression *regexp2, 1.638 + int32_t groupNum, 1.639 + UChar *dest, 1.640 + int32_t destCapacity, 1.641 + UErrorCode *status) { 1.642 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.643 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.644 + return 0; 1.645 + } 1.646 + if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) { 1.647 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.648 + return 0; 1.649 + } 1.650 + 1.651 + if (destCapacity == 0 || regexp->fText != NULL) { 1.652 + // If preflighting or if we already have the text as UChars, 1.653 + // this is a little cheaper than going through uregex_groupUTextDeep() 1.654 + 1.655 + // 1.656 + // Pick up the range of characters from the matcher 1.657 + // 1.658 + int32_t startIx = regexp->fMatcher->start(groupNum, *status); 1.659 + int32_t endIx = regexp->fMatcher->end (groupNum, *status); 1.660 + if (U_FAILURE(*status)) { 1.661 + return 0; 1.662 + } 1.663 + 1.664 + // 1.665 + // Trim length based on buffer capacity 1.666 + // 1.667 + int32_t fullLength = endIx - startIx; 1.668 + int32_t copyLength = fullLength; 1.669 + if (copyLength < destCapacity) { 1.670 + dest[copyLength] = 0; 1.671 + } else if (copyLength == destCapacity) { 1.672 + *status = U_STRING_NOT_TERMINATED_WARNING; 1.673 + } else { 1.674 + copyLength = destCapacity; 1.675 + *status = U_BUFFER_OVERFLOW_ERROR; 1.676 + } 1.677 + 1.678 + // 1.679 + // Copy capture group to user's buffer 1.680 + // 1.681 + if (copyLength > 0) { 1.682 + u_memcpy(dest, ®exp->fText[startIx], copyLength); 1.683 + } 1.684 + return fullLength; 1.685 + } else { 1.686 + UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status); 1.687 + int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status); 1.688 + utext_close(groupText); 1.689 + return result; 1.690 + } 1.691 +} 1.692 + 1.693 + 1.694 +//------------------------------------------------------------------------------ 1.695 +// 1.696 +// uregex_groupUText 1.697 +// 1.698 +//------------------------------------------------------------------------------ 1.699 +U_CAPI UText * U_EXPORT2 1.700 +uregex_groupUText(URegularExpression *regexp2, 1.701 + int32_t groupNum, 1.702 + UText *dest, 1.703 + int64_t *groupLength, 1.704 + UErrorCode *status) { 1.705 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.706 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.707 + UErrorCode emptyTextStatus = U_ZERO_ERROR; 1.708 + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 1.709 + } 1.710 + 1.711 + return regexp->fMatcher->group(groupNum, dest, *groupLength, *status); 1.712 +} 1.713 + 1.714 +//------------------------------------------------------------------------------ 1.715 +// 1.716 +// uregex_groupUTextDeep 1.717 +// 1.718 +//------------------------------------------------------------------------------ 1.719 +U_CAPI UText * U_EXPORT2 1.720 +uregex_groupUTextDeep(URegularExpression *regexp2, 1.721 + int32_t groupNum, 1.722 + UText *dest, 1.723 + UErrorCode *status) { 1.724 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.725 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.726 + UErrorCode emptyTextStatus = U_ZERO_ERROR; 1.727 + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 1.728 + } 1.729 + 1.730 + if (regexp->fText != NULL) { 1.731 + // 1.732 + // Pick up the range of characters from the matcher 1.733 + // and use our already-extracted characters 1.734 + // 1.735 + int32_t startIx = regexp->fMatcher->start(groupNum, *status); 1.736 + int32_t endIx = regexp->fMatcher->end (groupNum, *status); 1.737 + if (U_FAILURE(*status)) { 1.738 + UErrorCode emptyTextStatus = U_ZERO_ERROR; 1.739 + return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus)); 1.740 + } 1.741 + 1.742 + if (dest) { 1.743 + utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status); 1.744 + } else { 1.745 + UText groupText = UTEXT_INITIALIZER; 1.746 + utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startIx, status); 1.747 + dest = utext_clone(NULL, &groupText, TRUE, FALSE, status); 1.748 + utext_close(&groupText); 1.749 + } 1.750 + 1.751 + return dest; 1.752 + } else { 1.753 + return regexp->fMatcher->group(groupNum, dest, *status); 1.754 + } 1.755 +} 1.756 + 1.757 +//------------------------------------------------------------------------------ 1.758 +// 1.759 +// uregex_start 1.760 +// 1.761 +//------------------------------------------------------------------------------ 1.762 +U_CAPI int32_t U_EXPORT2 1.763 +uregex_start(URegularExpression *regexp2, 1.764 + int32_t groupNum, 1.765 + UErrorCode *status) { 1.766 + return (int32_t)uregex_start64( regexp2, groupNum, status); 1.767 +} 1.768 + 1.769 +U_CAPI int64_t U_EXPORT2 1.770 +uregex_start64(URegularExpression *regexp2, 1.771 + int32_t groupNum, 1.772 + UErrorCode *status) { 1.773 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.774 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.775 + return 0; 1.776 + } 1.777 + int32_t result = regexp->fMatcher->start(groupNum, *status); 1.778 + return result; 1.779 +} 1.780 + 1.781 +//------------------------------------------------------------------------------ 1.782 +// 1.783 +// uregex_end 1.784 +// 1.785 +//------------------------------------------------------------------------------ 1.786 +U_CAPI int32_t U_EXPORT2 1.787 +uregex_end(URegularExpression *regexp2, 1.788 + int32_t groupNum, 1.789 + UErrorCode *status) { 1.790 + return (int32_t)uregex_end64( regexp2, groupNum, status); 1.791 +} 1.792 + 1.793 +U_CAPI int64_t U_EXPORT2 1.794 +uregex_end64(URegularExpression *regexp2, 1.795 + int32_t groupNum, 1.796 + UErrorCode *status) { 1.797 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.798 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.799 + return 0; 1.800 + } 1.801 + int32_t result = regexp->fMatcher->end(groupNum, *status); 1.802 + return result; 1.803 +} 1.804 + 1.805 +//------------------------------------------------------------------------------ 1.806 +// 1.807 +// uregex_reset 1.808 +// 1.809 +//------------------------------------------------------------------------------ 1.810 +U_CAPI void U_EXPORT2 1.811 +uregex_reset(URegularExpression *regexp2, 1.812 + int32_t index, 1.813 + UErrorCode *status) { 1.814 + uregex_reset64( regexp2, (int64_t)index, status); 1.815 +} 1.816 + 1.817 +U_CAPI void U_EXPORT2 1.818 +uregex_reset64(URegularExpression *regexp2, 1.819 + int64_t index, 1.820 + UErrorCode *status) { 1.821 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.822 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.823 + return; 1.824 + } 1.825 + regexp->fMatcher->reset(index, *status); 1.826 +} 1.827 + 1.828 + 1.829 +//------------------------------------------------------------------------------ 1.830 +// 1.831 +// uregex_setRegion 1.832 +// 1.833 +//------------------------------------------------------------------------------ 1.834 +U_CAPI void U_EXPORT2 1.835 +uregex_setRegion(URegularExpression *regexp2, 1.836 + int32_t regionStart, 1.837 + int32_t regionLimit, 1.838 + UErrorCode *status) { 1.839 + uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status); 1.840 +} 1.841 + 1.842 +U_CAPI void U_EXPORT2 1.843 +uregex_setRegion64(URegularExpression *regexp2, 1.844 + int64_t regionStart, 1.845 + int64_t regionLimit, 1.846 + UErrorCode *status) { 1.847 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.848 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.849 + return; 1.850 + } 1.851 + regexp->fMatcher->region(regionStart, regionLimit, *status); 1.852 +} 1.853 + 1.854 + 1.855 +//------------------------------------------------------------------------------ 1.856 +// 1.857 +// uregex_setRegionAndStart 1.858 +// 1.859 +//------------------------------------------------------------------------------ 1.860 +U_CAPI void U_EXPORT2 1.861 +uregex_setRegionAndStart(URegularExpression *regexp2, 1.862 + int64_t regionStart, 1.863 + int64_t regionLimit, 1.864 + int64_t startIndex, 1.865 + UErrorCode *status) { 1.866 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.867 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.868 + return; 1.869 + } 1.870 + regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status); 1.871 +} 1.872 + 1.873 +//------------------------------------------------------------------------------ 1.874 +// 1.875 +// uregex_regionStart 1.876 +// 1.877 +//------------------------------------------------------------------------------ 1.878 +U_CAPI int32_t U_EXPORT2 1.879 +uregex_regionStart(const URegularExpression *regexp2, 1.880 + UErrorCode *status) { 1.881 + return (int32_t)uregex_regionStart64(regexp2, status); 1.882 +} 1.883 + 1.884 +U_CAPI int64_t U_EXPORT2 1.885 +uregex_regionStart64(const URegularExpression *regexp2, 1.886 + UErrorCode *status) { 1.887 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.888 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.889 + return 0; 1.890 + } 1.891 + return regexp->fMatcher->regionStart(); 1.892 +} 1.893 + 1.894 + 1.895 +//------------------------------------------------------------------------------ 1.896 +// 1.897 +// uregex_regionEnd 1.898 +// 1.899 +//------------------------------------------------------------------------------ 1.900 +U_CAPI int32_t U_EXPORT2 1.901 +uregex_regionEnd(const URegularExpression *regexp2, 1.902 + UErrorCode *status) { 1.903 + return (int32_t)uregex_regionEnd64(regexp2, status); 1.904 +} 1.905 + 1.906 +U_CAPI int64_t U_EXPORT2 1.907 +uregex_regionEnd64(const URegularExpression *regexp2, 1.908 + UErrorCode *status) { 1.909 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.910 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.911 + return 0; 1.912 + } 1.913 + return regexp->fMatcher->regionEnd(); 1.914 +} 1.915 + 1.916 + 1.917 +//------------------------------------------------------------------------------ 1.918 +// 1.919 +// uregex_hasTransparentBounds 1.920 +// 1.921 +//------------------------------------------------------------------------------ 1.922 +U_CAPI UBool U_EXPORT2 1.923 +uregex_hasTransparentBounds(const URegularExpression *regexp2, 1.924 + UErrorCode *status) { 1.925 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.926 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.927 + return FALSE; 1.928 + } 1.929 + return regexp->fMatcher->hasTransparentBounds(); 1.930 +} 1.931 + 1.932 + 1.933 +//------------------------------------------------------------------------------ 1.934 +// 1.935 +// uregex_useTransparentBounds 1.936 +// 1.937 +//------------------------------------------------------------------------------ 1.938 +U_CAPI void U_EXPORT2 1.939 +uregex_useTransparentBounds(URegularExpression *regexp2, 1.940 + UBool b, 1.941 + UErrorCode *status) { 1.942 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.943 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.944 + return; 1.945 + } 1.946 + regexp->fMatcher->useTransparentBounds(b); 1.947 +} 1.948 + 1.949 + 1.950 +//------------------------------------------------------------------------------ 1.951 +// 1.952 +// uregex_hasAnchoringBounds 1.953 +// 1.954 +//------------------------------------------------------------------------------ 1.955 +U_CAPI UBool U_EXPORT2 1.956 +uregex_hasAnchoringBounds(const URegularExpression *regexp2, 1.957 + UErrorCode *status) { 1.958 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.959 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.960 + return FALSE; 1.961 + } 1.962 + return regexp->fMatcher->hasAnchoringBounds(); 1.963 +} 1.964 + 1.965 + 1.966 +//------------------------------------------------------------------------------ 1.967 +// 1.968 +// uregex_useAnchoringBounds 1.969 +// 1.970 +//------------------------------------------------------------------------------ 1.971 +U_CAPI void U_EXPORT2 1.972 +uregex_useAnchoringBounds(URegularExpression *regexp2, 1.973 + UBool b, 1.974 + UErrorCode *status) { 1.975 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.976 + if (validateRE(regexp, FALSE, status) == FALSE) { 1.977 + return; 1.978 + } 1.979 + regexp->fMatcher->useAnchoringBounds(b); 1.980 +} 1.981 + 1.982 + 1.983 +//------------------------------------------------------------------------------ 1.984 +// 1.985 +// uregex_hitEnd 1.986 +// 1.987 +//------------------------------------------------------------------------------ 1.988 +U_CAPI UBool U_EXPORT2 1.989 +uregex_hitEnd(const URegularExpression *regexp2, 1.990 + UErrorCode *status) { 1.991 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.992 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.993 + return FALSE; 1.994 + } 1.995 + return regexp->fMatcher->hitEnd(); 1.996 +} 1.997 + 1.998 + 1.999 +//------------------------------------------------------------------------------ 1.1000 +// 1.1001 +// uregex_requireEnd 1.1002 +// 1.1003 +//------------------------------------------------------------------------------ 1.1004 +U_CAPI UBool U_EXPORT2 1.1005 +uregex_requireEnd(const URegularExpression *regexp2, 1.1006 + UErrorCode *status) { 1.1007 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1008 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1009 + return FALSE; 1.1010 + } 1.1011 + return regexp->fMatcher->requireEnd(); 1.1012 +} 1.1013 + 1.1014 + 1.1015 +//------------------------------------------------------------------------------ 1.1016 +// 1.1017 +// uregex_setTimeLimit 1.1018 +// 1.1019 +//------------------------------------------------------------------------------ 1.1020 +U_CAPI void U_EXPORT2 1.1021 +uregex_setTimeLimit(URegularExpression *regexp2, 1.1022 + int32_t limit, 1.1023 + UErrorCode *status) { 1.1024 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1025 + if (validateRE(regexp, FALSE, status)) { 1.1026 + regexp->fMatcher->setTimeLimit(limit, *status); 1.1027 + } 1.1028 +} 1.1029 + 1.1030 + 1.1031 + 1.1032 +//------------------------------------------------------------------------------ 1.1033 +// 1.1034 +// uregex_getTimeLimit 1.1035 +// 1.1036 +//------------------------------------------------------------------------------ 1.1037 +U_CAPI int32_t U_EXPORT2 1.1038 +uregex_getTimeLimit(const URegularExpression *regexp2, 1.1039 + UErrorCode *status) { 1.1040 + int32_t retVal = 0; 1.1041 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1042 + if (validateRE(regexp, FALSE, status)) { 1.1043 + retVal = regexp->fMatcher->getTimeLimit(); 1.1044 + } 1.1045 + return retVal; 1.1046 +} 1.1047 + 1.1048 + 1.1049 + 1.1050 +//------------------------------------------------------------------------------ 1.1051 +// 1.1052 +// uregex_setStackLimit 1.1053 +// 1.1054 +//------------------------------------------------------------------------------ 1.1055 +U_CAPI void U_EXPORT2 1.1056 +uregex_setStackLimit(URegularExpression *regexp2, 1.1057 + int32_t limit, 1.1058 + UErrorCode *status) { 1.1059 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1060 + if (validateRE(regexp, FALSE, status)) { 1.1061 + regexp->fMatcher->setStackLimit(limit, *status); 1.1062 + } 1.1063 +} 1.1064 + 1.1065 + 1.1066 + 1.1067 +//------------------------------------------------------------------------------ 1.1068 +// 1.1069 +// uregex_getStackLimit 1.1070 +// 1.1071 +//------------------------------------------------------------------------------ 1.1072 +U_CAPI int32_t U_EXPORT2 1.1073 +uregex_getStackLimit(const URegularExpression *regexp2, 1.1074 + UErrorCode *status) { 1.1075 + int32_t retVal = 0; 1.1076 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1077 + if (validateRE(regexp, FALSE, status)) { 1.1078 + retVal = regexp->fMatcher->getStackLimit(); 1.1079 + } 1.1080 + return retVal; 1.1081 +} 1.1082 + 1.1083 + 1.1084 +//------------------------------------------------------------------------------ 1.1085 +// 1.1086 +// uregex_setMatchCallback 1.1087 +// 1.1088 +//------------------------------------------------------------------------------ 1.1089 +U_CAPI void U_EXPORT2 1.1090 +uregex_setMatchCallback(URegularExpression *regexp2, 1.1091 + URegexMatchCallback *callback, 1.1092 + const void *context, 1.1093 + UErrorCode *status) { 1.1094 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1095 + if (validateRE(regexp, FALSE, status)) { 1.1096 + regexp->fMatcher->setMatchCallback(callback, context, *status); 1.1097 + } 1.1098 +} 1.1099 + 1.1100 + 1.1101 +//------------------------------------------------------------------------------ 1.1102 +// 1.1103 +// uregex_getMatchCallback 1.1104 +// 1.1105 +//------------------------------------------------------------------------------ 1.1106 +U_CAPI void U_EXPORT2 1.1107 +uregex_getMatchCallback(const URegularExpression *regexp2, 1.1108 + URegexMatchCallback **callback, 1.1109 + const void **context, 1.1110 + UErrorCode *status) { 1.1111 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1112 + if (validateRE(regexp, FALSE, status)) { 1.1113 + regexp->fMatcher->getMatchCallback(*callback, *context, *status); 1.1114 + } 1.1115 +} 1.1116 + 1.1117 + 1.1118 +//------------------------------------------------------------------------------ 1.1119 +// 1.1120 +// uregex_setMatchProgressCallback 1.1121 +// 1.1122 +//------------------------------------------------------------------------------ 1.1123 +U_CAPI void U_EXPORT2 1.1124 +uregex_setFindProgressCallback(URegularExpression *regexp2, 1.1125 + URegexFindProgressCallback *callback, 1.1126 + const void *context, 1.1127 + UErrorCode *status) { 1.1128 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1129 + if (validateRE(regexp, FALSE, status)) { 1.1130 + regexp->fMatcher->setFindProgressCallback(callback, context, *status); 1.1131 + } 1.1132 +} 1.1133 + 1.1134 + 1.1135 +//------------------------------------------------------------------------------ 1.1136 +// 1.1137 +// uregex_getMatchCallback 1.1138 +// 1.1139 +//------------------------------------------------------------------------------ 1.1140 +U_CAPI void U_EXPORT2 1.1141 +uregex_getFindProgressCallback(const URegularExpression *regexp2, 1.1142 + URegexFindProgressCallback **callback, 1.1143 + const void **context, 1.1144 + UErrorCode *status) { 1.1145 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1146 + if (validateRE(regexp, FALSE, status)) { 1.1147 + regexp->fMatcher->getFindProgressCallback(*callback, *context, *status); 1.1148 + } 1.1149 +} 1.1150 + 1.1151 + 1.1152 +//------------------------------------------------------------------------------ 1.1153 +// 1.1154 +// uregex_replaceAll 1.1155 +// 1.1156 +//------------------------------------------------------------------------------ 1.1157 +U_CAPI int32_t U_EXPORT2 1.1158 +uregex_replaceAll(URegularExpression *regexp2, 1.1159 + const UChar *replacementText, 1.1160 + int32_t replacementLength, 1.1161 + UChar *destBuf, 1.1162 + int32_t destCapacity, 1.1163 + UErrorCode *status) { 1.1164 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1165 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1166 + return 0; 1.1167 + } 1.1168 + if (replacementText == NULL || replacementLength < -1 || 1.1169 + (destBuf == NULL && destCapacity > 0) || 1.1170 + destCapacity < 0) { 1.1171 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1172 + return 0; 1.1173 + } 1.1174 + 1.1175 + int32_t len = 0; 1.1176 + 1.1177 + uregex_reset(regexp2, 0, status); 1.1178 + 1.1179 + // Note: Seperate error code variables for findNext() and appendReplacement() 1.1180 + // are used so that destination buffer overflow errors 1.1181 + // in appendReplacement won't stop findNext() from working. 1.1182 + // appendReplacement() and appendTail() special case incoming buffer 1.1183 + // overflow errors, continuing to return the correct length. 1.1184 + UErrorCode findStatus = *status; 1.1185 + while (uregex_findNext(regexp2, &findStatus)) { 1.1186 + len += uregex_appendReplacement(regexp2, replacementText, replacementLength, 1.1187 + &destBuf, &destCapacity, status); 1.1188 + } 1.1189 + len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); 1.1190 + 1.1191 + if (U_FAILURE(findStatus)) { 1.1192 + // If anything went wrong with the findNext(), make that error trump 1.1193 + // whatever may have happened with the append() operations. 1.1194 + // Errors in findNext() are not expected. 1.1195 + *status = findStatus; 1.1196 + } 1.1197 + 1.1198 + return len; 1.1199 +} 1.1200 + 1.1201 + 1.1202 +//------------------------------------------------------------------------------ 1.1203 +// 1.1204 +// uregex_replaceAllUText 1.1205 +// 1.1206 +//------------------------------------------------------------------------------ 1.1207 +U_CAPI UText * U_EXPORT2 1.1208 +uregex_replaceAllUText(URegularExpression *regexp2, 1.1209 + UText *replacementText, 1.1210 + UText *dest, 1.1211 + UErrorCode *status) { 1.1212 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1213 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1214 + return 0; 1.1215 + } 1.1216 + if (replacementText == NULL) { 1.1217 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1218 + return 0; 1.1219 + } 1.1220 + 1.1221 + dest = regexp->fMatcher->replaceAll(replacementText, dest, *status); 1.1222 + return dest; 1.1223 +} 1.1224 + 1.1225 + 1.1226 +//------------------------------------------------------------------------------ 1.1227 +// 1.1228 +// uregex_replaceFirst 1.1229 +// 1.1230 +//------------------------------------------------------------------------------ 1.1231 +U_CAPI int32_t U_EXPORT2 1.1232 +uregex_replaceFirst(URegularExpression *regexp2, 1.1233 + const UChar *replacementText, 1.1234 + int32_t replacementLength, 1.1235 + UChar *destBuf, 1.1236 + int32_t destCapacity, 1.1237 + UErrorCode *status) { 1.1238 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1239 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1240 + return 0; 1.1241 + } 1.1242 + if (replacementText == NULL || replacementLength < -1 || 1.1243 + (destBuf == NULL && destCapacity > 0) || 1.1244 + destCapacity < 0) { 1.1245 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1246 + return 0; 1.1247 + } 1.1248 + 1.1249 + int32_t len = 0; 1.1250 + UBool findSucceeded; 1.1251 + uregex_reset(regexp2, 0, status); 1.1252 + findSucceeded = uregex_find(regexp2, 0, status); 1.1253 + if (findSucceeded) { 1.1254 + len = uregex_appendReplacement(regexp2, replacementText, replacementLength, 1.1255 + &destBuf, &destCapacity, status); 1.1256 + } 1.1257 + len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status); 1.1258 + 1.1259 + return len; 1.1260 +} 1.1261 + 1.1262 + 1.1263 +//------------------------------------------------------------------------------ 1.1264 +// 1.1265 +// uregex_replaceFirstUText 1.1266 +// 1.1267 +//------------------------------------------------------------------------------ 1.1268 +U_CAPI UText * U_EXPORT2 1.1269 +uregex_replaceFirstUText(URegularExpression *regexp2, 1.1270 + UText *replacementText, 1.1271 + UText *dest, 1.1272 + UErrorCode *status) { 1.1273 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1274 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1275 + return 0; 1.1276 + } 1.1277 + if (replacementText == NULL) { 1.1278 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1279 + return 0; 1.1280 + } 1.1281 + 1.1282 + dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status); 1.1283 + return dest; 1.1284 +} 1.1285 + 1.1286 + 1.1287 +//------------------------------------------------------------------------------ 1.1288 +// 1.1289 +// uregex_appendReplacement 1.1290 +// 1.1291 +//------------------------------------------------------------------------------ 1.1292 + 1.1293 +U_NAMESPACE_BEGIN 1.1294 +// 1.1295 +// Dummy class, because these functions need to be friends of class RegexMatcher, 1.1296 +// and stand-alone C functions don't work as friends 1.1297 +// 1.1298 +class RegexCImpl { 1.1299 + public: 1.1300 + inline static int32_t appendReplacement(RegularExpression *regexp, 1.1301 + const UChar *replacementText, 1.1302 + int32_t replacementLength, 1.1303 + UChar **destBuf, 1.1304 + int32_t *destCapacity, 1.1305 + UErrorCode *status); 1.1306 + 1.1307 + inline static int32_t appendTail(RegularExpression *regexp, 1.1308 + UChar **destBuf, 1.1309 + int32_t *destCapacity, 1.1310 + UErrorCode *status); 1.1311 + 1.1312 + inline static int32_t split(RegularExpression *regexp, 1.1313 + UChar *destBuf, 1.1314 + int32_t destCapacity, 1.1315 + int32_t *requiredCapacity, 1.1316 + UChar *destFields[], 1.1317 + int32_t destFieldsCapacity, 1.1318 + UErrorCode *status); 1.1319 +}; 1.1320 + 1.1321 +U_NAMESPACE_END 1.1322 + 1.1323 + 1.1324 + 1.1325 +static const UChar BACKSLASH = 0x5c; 1.1326 +static const UChar DOLLARSIGN = 0x24; 1.1327 + 1.1328 +// 1.1329 +// Move a character to an output buffer, with bounds checking on the index. 1.1330 +// Index advances even if capacity is exceeded, for preflight size computations. 1.1331 +// This little sequence is used a LOT. 1.1332 +// 1.1333 +static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) { 1.1334 + if (*idx < bufCapacity) { 1.1335 + buf[*idx] = c; 1.1336 + } 1.1337 + (*idx)++; 1.1338 +} 1.1339 + 1.1340 + 1.1341 +// 1.1342 +// appendReplacement, the actual implementation. 1.1343 +// 1.1344 +int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, 1.1345 + const UChar *replacementText, 1.1346 + int32_t replacementLength, 1.1347 + UChar **destBuf, 1.1348 + int32_t *destCapacity, 1.1349 + UErrorCode *status) { 1.1350 + 1.1351 + // If we come in with a buffer overflow error, don't suppress the operation. 1.1352 + // A series of appendReplacements, appendTail need to correctly preflight 1.1353 + // the buffer size when an overflow happens somewhere in the middle. 1.1354 + UBool pendingBufferOverflow = FALSE; 1.1355 + if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) { 1.1356 + pendingBufferOverflow = TRUE; 1.1357 + *status = U_ZERO_ERROR; 1.1358 + } 1.1359 + 1.1360 + // 1.1361 + // Validate all paramters 1.1362 + // 1.1363 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1364 + return 0; 1.1365 + } 1.1366 + if (replacementText == NULL || replacementLength < -1 || 1.1367 + destCapacity == NULL || destBuf == NULL || 1.1368 + (*destBuf == NULL && *destCapacity > 0) || 1.1369 + *destCapacity < 0) { 1.1370 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1371 + return 0; 1.1372 + } 1.1373 + 1.1374 + RegexMatcher *m = regexp->fMatcher; 1.1375 + if (m->fMatch == FALSE) { 1.1376 + *status = U_REGEX_INVALID_STATE; 1.1377 + return 0; 1.1378 + } 1.1379 + 1.1380 + UChar *dest = *destBuf; 1.1381 + int32_t capacity = *destCapacity; 1.1382 + int32_t destIdx = 0; 1.1383 + int32_t i; 1.1384 + 1.1385 + // If it wasn't supplied by the caller, get the length of the replacement text. 1.1386 + // TODO: slightly smarter logic in the copy loop could watch for the NUL on 1.1387 + // the fly and avoid this step. 1.1388 + if (replacementLength == -1) { 1.1389 + replacementLength = u_strlen(replacementText); 1.1390 + } 1.1391 + 1.1392 + // Copy input string from the end of previous match to start of current match 1.1393 + if (regexp->fText != NULL) { 1.1394 + int32_t matchStart; 1.1395 + int32_t lastMatchEnd; 1.1396 + if (UTEXT_USES_U16(m->fInputText)) { 1.1397 + lastMatchEnd = (int32_t)m->fLastMatchEnd; 1.1398 + matchStart = (int32_t)m->fMatchStart; 1.1399 + } else { 1.1400 + // !!!: Would like a better way to do this! 1.1401 + UErrorCode status = U_ZERO_ERROR; 1.1402 + lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status); 1.1403 + status = U_ZERO_ERROR; 1.1404 + matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status); 1.1405 + } 1.1406 + for (i=lastMatchEnd; i<matchStart; i++) { 1.1407 + appendToBuf(regexp->fText[i], &destIdx, dest, capacity); 1.1408 + } 1.1409 + } else { 1.1410 + UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore 1.1411 + destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, 1.1412 + dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), 1.1413 + &possibleOverflowError); 1.1414 + } 1.1415 + U_ASSERT(destIdx >= 0); 1.1416 + 1.1417 + // scan the replacement text, looking for substitutions ($n) and \escapes. 1.1418 + int32_t replIdx = 0; 1.1419 + while (replIdx < replacementLength) { 1.1420 + UChar c = replacementText[replIdx]; 1.1421 + replIdx++; 1.1422 + if (c != DOLLARSIGN && c != BACKSLASH) { 1.1423 + // Common case, no substitution, no escaping, 1.1424 + // just copy the char to the dest buf. 1.1425 + appendToBuf(c, &destIdx, dest, capacity); 1.1426 + continue; 1.1427 + } 1.1428 + 1.1429 + if (c == BACKSLASH) { 1.1430 + // Backslash Escape. Copy the following char out without further checks. 1.1431 + // Note: Surrogate pairs don't need any special handling 1.1432 + // The second half wont be a '$' or a '\', and 1.1433 + // will move to the dest normally on the next 1.1434 + // loop iteration. 1.1435 + if (replIdx >= replacementLength) { 1.1436 + break; 1.1437 + } 1.1438 + c = replacementText[replIdx]; 1.1439 + 1.1440 + if (c==0x55/*U*/ || c==0x75/*u*/) { 1.1441 + // We have a \udddd or \Udddddddd escape sequence. 1.1442 + UChar32 escapedChar = 1.1443 + u_unescapeAt(uregex_ucstr_unescape_charAt, 1.1444 + &replIdx, // Index is updated by unescapeAt 1.1445 + replacementLength, // Length of replacement text 1.1446 + (void *)replacementText); 1.1447 + 1.1448 + if (escapedChar != (UChar32)0xFFFFFFFF) { 1.1449 + if (escapedChar <= 0xffff) { 1.1450 + appendToBuf((UChar)escapedChar, &destIdx, dest, capacity); 1.1451 + } else { 1.1452 + appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity); 1.1453 + appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity); 1.1454 + } 1.1455 + continue; 1.1456 + } 1.1457 + // Note: if the \u escape was invalid, just fall through and 1.1458 + // treat it as a plain \<anything> escape. 1.1459 + } 1.1460 + 1.1461 + // Plain backslash escape. Just put out the escaped character. 1.1462 + appendToBuf(c, &destIdx, dest, capacity); 1.1463 + 1.1464 + replIdx++; 1.1465 + continue; 1.1466 + } 1.1467 + 1.1468 + 1.1469 + 1.1470 + // We've got a $. Pick up a capture group number if one follows. 1.1471 + // Consume at most the number of digits necessary for the largest capture 1.1472 + // number that is valid for this pattern. 1.1473 + 1.1474 + int32_t numDigits = 0; 1.1475 + int32_t groupNum = 0; 1.1476 + UChar32 digitC; 1.1477 + for (;;) { 1.1478 + if (replIdx >= replacementLength) { 1.1479 + break; 1.1480 + } 1.1481 + U16_GET(replacementText, 0, replIdx, replacementLength, digitC); 1.1482 + if (u_isdigit(digitC) == FALSE) { 1.1483 + break; 1.1484 + } 1.1485 + 1.1486 + U16_FWD_1(replacementText, replIdx, replacementLength); 1.1487 + groupNum=groupNum*10 + u_charDigitValue(digitC); 1.1488 + numDigits++; 1.1489 + if (numDigits >= m->fPattern->fMaxCaptureDigits) { 1.1490 + break; 1.1491 + } 1.1492 + } 1.1493 + 1.1494 + 1.1495 + if (numDigits == 0) { 1.1496 + // The $ didn't introduce a group number at all. 1.1497 + // Treat it as just part of the substitution text. 1.1498 + appendToBuf(DOLLARSIGN, &destIdx, dest, capacity); 1.1499 + continue; 1.1500 + } 1.1501 + 1.1502 + // Finally, append the capture group data to the destination. 1.1503 + destIdx += uregex_group((URegularExpression*)regexp, groupNum, 1.1504 + dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status); 1.1505 + if (*status == U_BUFFER_OVERFLOW_ERROR) { 1.1506 + // Ignore buffer overflow when extracting the group. We need to 1.1507 + // continue on to get full size of the untruncated result. We will 1.1508 + // raise our own buffer overflow error at the end. 1.1509 + *status = U_ZERO_ERROR; 1.1510 + } 1.1511 + 1.1512 + if (U_FAILURE(*status)) { 1.1513 + // Can fail if group number is out of range. 1.1514 + break; 1.1515 + } 1.1516 + 1.1517 + } 1.1518 + 1.1519 + // 1.1520 + // Nul Terminate the dest buffer if possible. 1.1521 + // Set the appropriate buffer overflow or not terminated error, if needed. 1.1522 + // 1.1523 + if (destIdx < capacity) { 1.1524 + dest[destIdx] = 0; 1.1525 + } else if (destIdx == *destCapacity) { 1.1526 + *status = U_STRING_NOT_TERMINATED_WARNING; 1.1527 + } else { 1.1528 + *status = U_BUFFER_OVERFLOW_ERROR; 1.1529 + } 1.1530 + 1.1531 + // 1.1532 + // Return an updated dest buffer and capacity to the caller. 1.1533 + // 1.1534 + if (destIdx > 0 && *destCapacity > 0) { 1.1535 + if (destIdx < capacity) { 1.1536 + *destBuf += destIdx; 1.1537 + *destCapacity -= destIdx; 1.1538 + } else { 1.1539 + *destBuf += capacity; 1.1540 + *destCapacity = 0; 1.1541 + } 1.1542 + } 1.1543 + 1.1544 + // If we came in with a buffer overflow, make sure we go out with one also. 1.1545 + // (A zero length match right at the end of the previous match could 1.1546 + // make this function succeed even though a previous call had overflowed the buf) 1.1547 + if (pendingBufferOverflow && U_SUCCESS(*status)) { 1.1548 + *status = U_BUFFER_OVERFLOW_ERROR; 1.1549 + } 1.1550 + 1.1551 + return destIdx; 1.1552 +} 1.1553 + 1.1554 +// 1.1555 +// appendReplacement the actual API function, 1.1556 +// 1.1557 +U_CAPI int32_t U_EXPORT2 1.1558 +uregex_appendReplacement(URegularExpression *regexp2, 1.1559 + const UChar *replacementText, 1.1560 + int32_t replacementLength, 1.1561 + UChar **destBuf, 1.1562 + int32_t *destCapacity, 1.1563 + UErrorCode *status) { 1.1564 + 1.1565 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1566 + return RegexCImpl::appendReplacement( 1.1567 + regexp, replacementText, replacementLength,destBuf, destCapacity, status); 1.1568 +} 1.1569 + 1.1570 +// 1.1571 +// uregex_appendReplacementUText...can just use the normal C++ method 1.1572 +// 1.1573 +U_CAPI void U_EXPORT2 1.1574 +uregex_appendReplacementUText(URegularExpression *regexp2, 1.1575 + UText *replText, 1.1576 + UText *dest, 1.1577 + UErrorCode *status) { 1.1578 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1579 + regexp->fMatcher->appendReplacement(dest, replText, *status); 1.1580 +} 1.1581 + 1.1582 + 1.1583 +//------------------------------------------------------------------------------ 1.1584 +// 1.1585 +// uregex_appendTail 1.1586 +// 1.1587 +//------------------------------------------------------------------------------ 1.1588 +int32_t RegexCImpl::appendTail(RegularExpression *regexp, 1.1589 + UChar **destBuf, 1.1590 + int32_t *destCapacity, 1.1591 + UErrorCode *status) 1.1592 +{ 1.1593 + 1.1594 + // If we come in with a buffer overflow error, don't suppress the operation. 1.1595 + // A series of appendReplacements, appendTail need to correctly preflight 1.1596 + // the buffer size when an overflow happens somewhere in the middle. 1.1597 + UBool pendingBufferOverflow = FALSE; 1.1598 + if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) { 1.1599 + pendingBufferOverflow = TRUE; 1.1600 + *status = U_ZERO_ERROR; 1.1601 + } 1.1602 + 1.1603 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1604 + return 0; 1.1605 + } 1.1606 + 1.1607 + if (destCapacity == NULL || destBuf == NULL || 1.1608 + (*destBuf == NULL && *destCapacity > 0) || 1.1609 + *destCapacity < 0) 1.1610 + { 1.1611 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1612 + return 0; 1.1613 + } 1.1614 + 1.1615 + RegexMatcher *m = regexp->fMatcher; 1.1616 + 1.1617 + int32_t destIdx = 0; 1.1618 + int32_t destCap = *destCapacity; 1.1619 + UChar *dest = *destBuf; 1.1620 + 1.1621 + if (regexp->fText != NULL) { 1.1622 + int32_t srcIdx; 1.1623 + int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd); 1.1624 + if (nativeIdx == -1) { 1.1625 + srcIdx = 0; 1.1626 + } else if (UTEXT_USES_U16(m->fInputText)) { 1.1627 + srcIdx = (int32_t)nativeIdx; 1.1628 + } else { 1.1629 + UErrorCode status = U_ZERO_ERROR; 1.1630 + srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status); 1.1631 + } 1.1632 + 1.1633 + for (;;) { 1.1634 + U_ASSERT(destIdx >= 0); 1.1635 + 1.1636 + if (srcIdx == regexp->fTextLength) { 1.1637 + break; 1.1638 + } 1.1639 + UChar c = regexp->fText[srcIdx]; 1.1640 + if (c == 0 && regexp->fTextLength == -1) { 1.1641 + regexp->fTextLength = srcIdx; 1.1642 + break; 1.1643 + } 1.1644 + 1.1645 + if (destIdx < destCap) { 1.1646 + dest[destIdx] = c; 1.1647 + } else { 1.1648 + // We've overflowed the dest buffer. 1.1649 + // If the total input string length is known, we can 1.1650 + // compute the total buffer size needed without scanning through the string. 1.1651 + if (regexp->fTextLength > 0) { 1.1652 + destIdx += (regexp->fTextLength - srcIdx); 1.1653 + break; 1.1654 + } 1.1655 + } 1.1656 + srcIdx++; 1.1657 + destIdx++; 1.1658 + } 1.1659 + } else { 1.1660 + int64_t srcIdx; 1.1661 + if (m->fMatch) { 1.1662 + // The most recent call to find() succeeded. 1.1663 + srcIdx = m->fMatchEnd; 1.1664 + } else { 1.1665 + // The last call to find() on this matcher failed(). 1.1666 + // Look back to the end of the last find() that succeeded for src index. 1.1667 + srcIdx = m->fLastMatchEnd; 1.1668 + if (srcIdx == -1) { 1.1669 + // There has been no successful match with this matcher. 1.1670 + // We want to copy the whole string. 1.1671 + srcIdx = 0; 1.1672 + } 1.1673 + } 1.1674 + 1.1675 + destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status); 1.1676 + } 1.1677 + 1.1678 + // 1.1679 + // NUL terminate the output string, if possible, otherwise issue the 1.1680 + // appropriate error or warning. 1.1681 + // 1.1682 + if (destIdx < destCap) { 1.1683 + dest[destIdx] = 0; 1.1684 + } else if (destIdx == destCap) { 1.1685 + *status = U_STRING_NOT_TERMINATED_WARNING; 1.1686 + } else { 1.1687 + *status = U_BUFFER_OVERFLOW_ERROR; 1.1688 + } 1.1689 + 1.1690 + // 1.1691 + // Update the user's buffer ptr and capacity vars to reflect the 1.1692 + // amount used. 1.1693 + // 1.1694 + if (destIdx < destCap) { 1.1695 + *destBuf += destIdx; 1.1696 + *destCapacity -= destIdx; 1.1697 + } else if (*destBuf != NULL) { 1.1698 + *destBuf += destCap; 1.1699 + *destCapacity = 0; 1.1700 + } 1.1701 + 1.1702 + if (pendingBufferOverflow && U_SUCCESS(*status)) { 1.1703 + *status = U_BUFFER_OVERFLOW_ERROR; 1.1704 + } 1.1705 + 1.1706 + return destIdx; 1.1707 +} 1.1708 + 1.1709 + 1.1710 +// 1.1711 +// appendTail the actual API function 1.1712 +// 1.1713 +U_CAPI int32_t U_EXPORT2 1.1714 +uregex_appendTail(URegularExpression *regexp2, 1.1715 + UChar **destBuf, 1.1716 + int32_t *destCapacity, 1.1717 + UErrorCode *status) { 1.1718 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1719 + return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status); 1.1720 +} 1.1721 + 1.1722 + 1.1723 +// 1.1724 +// uregex_appendTailUText...can just use the normal C++ method 1.1725 +// 1.1726 +U_CAPI UText * U_EXPORT2 1.1727 +uregex_appendTailUText(URegularExpression *regexp2, 1.1728 + UText *dest, 1.1729 + UErrorCode *status) { 1.1730 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1731 + return regexp->fMatcher->appendTail(dest, *status); 1.1732 +} 1.1733 + 1.1734 + 1.1735 +//------------------------------------------------------------------------------ 1.1736 +// 1.1737 +// copyString Internal utility to copy a string to an output buffer, 1.1738 +// while managing buffer overflow and preflight size 1.1739 +// computation. NUL termination is added to destination, 1.1740 +// and the NUL is counted in the output size. 1.1741 +// 1.1742 +//------------------------------------------------------------------------------ 1.1743 +#if 0 1.1744 +static void copyString(UChar *destBuffer, // Destination buffer. 1.1745 + int32_t destCapacity, // Total capacity of dest buffer 1.1746 + int32_t *destIndex, // Index into dest buffer. Updated on return. 1.1747 + // Update not clipped to destCapacity. 1.1748 + const UChar *srcPtr, // Pointer to source string 1.1749 + int32_t srcLen) // Source string len. 1.1750 +{ 1.1751 + int32_t si; 1.1752 + int32_t di = *destIndex; 1.1753 + UChar c; 1.1754 + 1.1755 + for (si=0; si<srcLen; si++) { 1.1756 + c = srcPtr[si]; 1.1757 + if (di < destCapacity) { 1.1758 + destBuffer[di] = c; 1.1759 + di++; 1.1760 + } else { 1.1761 + di += srcLen - si; 1.1762 + break; 1.1763 + } 1.1764 + } 1.1765 + if (di<destCapacity) { 1.1766 + destBuffer[di] = 0; 1.1767 + } 1.1768 + di++; 1.1769 + *destIndex = di; 1.1770 +} 1.1771 +#endif 1.1772 + 1.1773 +//------------------------------------------------------------------------------ 1.1774 +// 1.1775 +// uregex_split 1.1776 +// 1.1777 +//------------------------------------------------------------------------------ 1.1778 +int32_t RegexCImpl::split(RegularExpression *regexp, 1.1779 + UChar *destBuf, 1.1780 + int32_t destCapacity, 1.1781 + int32_t *requiredCapacity, 1.1782 + UChar *destFields[], 1.1783 + int32_t destFieldsCapacity, 1.1784 + UErrorCode *status) { 1.1785 + // 1.1786 + // Reset for the input text 1.1787 + // 1.1788 + regexp->fMatcher->reset(); 1.1789 + UText *inputText = regexp->fMatcher->fInputText; 1.1790 + int64_t nextOutputStringStart = 0; 1.1791 + int64_t inputLen = regexp->fMatcher->fInputLength; 1.1792 + if (inputLen == 0) { 1.1793 + return 0; 1.1794 + } 1.1795 + 1.1796 + // 1.1797 + // Loop through the input text, searching for the delimiter pattern 1.1798 + // 1.1799 + int32_t i; // Index of the field being processed. 1.1800 + int32_t destIdx = 0; // Next available position in destBuf; 1.1801 + int32_t numCaptureGroups = regexp->fMatcher->groupCount(); 1.1802 + UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted 1.1803 + for (i=0; ; i++) { 1.1804 + if (i>=destFieldsCapacity-1) { 1.1805 + // There are one or zero output strings left. 1.1806 + // Fill the last output string with whatever is left from the input, then exit the loop. 1.1807 + // ( i will be == destFieldsCapacity if we filled the output array while processing 1.1808 + // capture groups of the delimiter expression, in which case we will discard the 1.1809 + // last capture group saved in favor of the unprocessed remainder of the 1.1810 + // input string.) 1.1811 + if (inputLen > nextOutputStringStart) { 1.1812 + if (i != destFieldsCapacity-1) { 1.1813 + // No fields are left. Recycle the last one for holding the trailing part of 1.1814 + // the input string. 1.1815 + i = destFieldsCapacity-1; 1.1816 + destIdx = (int32_t)(destFields[i] - destFields[0]); 1.1817 + } 1.1818 + 1.1819 + destFields[i] = &destBuf[destIdx]; 1.1820 + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, 1.1821 + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); 1.1822 + } 1.1823 + break; 1.1824 + } 1.1825 + 1.1826 + if (regexp->fMatcher->find()) { 1.1827 + // We found another delimiter. Move everything from where we started looking 1.1828 + // up until the start of the delimiter into the next output string. 1.1829 + destFields[i] = &destBuf[destIdx]; 1.1830 + 1.1831 + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart, 1.1832 + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus); 1.1833 + if (tStatus == U_BUFFER_OVERFLOW_ERROR) { 1.1834 + tStatus = U_ZERO_ERROR; 1.1835 + } else { 1.1836 + *status = tStatus; 1.1837 + } 1.1838 + nextOutputStringStart = regexp->fMatcher->fMatchEnd; 1.1839 + 1.1840 + // If the delimiter pattern has capturing parentheses, the captured 1.1841 + // text goes out into the next n destination strings. 1.1842 + int32_t groupNum; 1.1843 + for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { 1.1844 + // If we've run out of output string slots, bail out. 1.1845 + if (i==destFieldsCapacity-1) { 1.1846 + break; 1.1847 + } 1.1848 + i++; 1.1849 + 1.1850 + // Set up to extract the capture group contents into the dest buffer. 1.1851 + destFields[i] = &destBuf[destIdx]; 1.1852 + tStatus = U_ZERO_ERROR; 1.1853 + int32_t t = uregex_group((URegularExpression*)regexp, 1.1854 + groupNum, 1.1855 + destFields[i], 1.1856 + REMAINING_CAPACITY(destIdx, destCapacity), 1.1857 + &tStatus); 1.1858 + destIdx += t + 1; // Record the space used in the output string buffer. 1.1859 + // +1 for the NUL that terminates the string. 1.1860 + if (tStatus == U_BUFFER_OVERFLOW_ERROR) { 1.1861 + tStatus = U_ZERO_ERROR; 1.1862 + } else { 1.1863 + *status = tStatus; 1.1864 + } 1.1865 + } 1.1866 + 1.1867 + if (nextOutputStringStart == inputLen) { 1.1868 + // The delimiter was at the end of the string. 1.1869 + // Output an empty string, and then we are done. 1.1870 + if (destIdx < destCapacity) { 1.1871 + destBuf[destIdx] = 0; 1.1872 + } 1.1873 + if (i < destFieldsCapacity-1) { 1.1874 + ++i; 1.1875 + } 1.1876 + if (destIdx < destCapacity) { 1.1877 + destFields[i] = destBuf + destIdx; 1.1878 + } 1.1879 + ++destIdx; 1.1880 + break; 1.1881 + } 1.1882 + 1.1883 + } 1.1884 + else 1.1885 + { 1.1886 + // We ran off the end of the input while looking for the next delimiter. 1.1887 + // All the remaining text goes into the current output string. 1.1888 + destFields[i] = &destBuf[destIdx]; 1.1889 + destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen, 1.1890 + &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status); 1.1891 + break; 1.1892 + } 1.1893 + } 1.1894 + 1.1895 + // Zero out any unused portion of the destFields array 1.1896 + int j; 1.1897 + for (j=i+1; j<destFieldsCapacity; j++) { 1.1898 + destFields[j] = NULL; 1.1899 + } 1.1900 + 1.1901 + if (requiredCapacity != NULL) { 1.1902 + *requiredCapacity = destIdx; 1.1903 + } 1.1904 + if (destIdx > destCapacity) { 1.1905 + *status = U_BUFFER_OVERFLOW_ERROR; 1.1906 + } 1.1907 + return i+1; 1.1908 +} 1.1909 + 1.1910 +// 1.1911 +// uregex_split The actual API function 1.1912 +// 1.1913 +U_CAPI int32_t U_EXPORT2 1.1914 +uregex_split(URegularExpression *regexp2, 1.1915 + UChar *destBuf, 1.1916 + int32_t destCapacity, 1.1917 + int32_t *requiredCapacity, 1.1918 + UChar *destFields[], 1.1919 + int32_t destFieldsCapacity, 1.1920 + UErrorCode *status) { 1.1921 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1922 + if (validateRE(regexp, TRUE, status) == FALSE) { 1.1923 + return 0; 1.1924 + } 1.1925 + if ((destBuf == NULL && destCapacity > 0) || 1.1926 + destCapacity < 0 || 1.1927 + destFields == NULL || 1.1928 + destFieldsCapacity < 1 ) { 1.1929 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.1930 + return 0; 1.1931 + } 1.1932 + 1.1933 + return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status); 1.1934 +} 1.1935 + 1.1936 + 1.1937 +// 1.1938 +// uregex_splitUText...can just use the normal C++ method 1.1939 +// 1.1940 +U_CAPI int32_t U_EXPORT2 1.1941 +uregex_splitUText(URegularExpression *regexp2, 1.1942 + UText *destFields[], 1.1943 + int32_t destFieldsCapacity, 1.1944 + UErrorCode *status) { 1.1945 + RegularExpression *regexp = (RegularExpression*)regexp2; 1.1946 + return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status); 1.1947 +} 1.1948 + 1.1949 + 1.1950 +#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 1.1951 +