intl/icu/source/i18n/uregex.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2004-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: uregex.cpp
michael@0 7 */
michael@0 8
michael@0 9 #include "unicode/utypes.h"
michael@0 10
michael@0 11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 12
michael@0 13 #include "unicode/regex.h"
michael@0 14 #include "unicode/uregex.h"
michael@0 15 #include "unicode/unistr.h"
michael@0 16 #include "unicode/ustring.h"
michael@0 17 #include "unicode/uchar.h"
michael@0 18 #include "unicode/uobject.h"
michael@0 19 #include "unicode/utf16.h"
michael@0 20 #include "umutex.h"
michael@0 21 #include "uassert.h"
michael@0 22 #include "cmemory.h"
michael@0 23
michael@0 24 #include "regextxt.h"
michael@0 25
michael@0 26 #include <stdio.h>
michael@0 27
michael@0 28 U_NAMESPACE_BEGIN
michael@0 29
michael@0 30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
michael@0 31
michael@0 32 struct RegularExpression: public UMemory {
michael@0 33 public:
michael@0 34 RegularExpression();
michael@0 35 ~RegularExpression();
michael@0 36 int32_t fMagic;
michael@0 37 RegexPattern *fPat;
michael@0 38 u_atomic_int32_t *fPatRefCount;
michael@0 39 UChar *fPatString;
michael@0 40 int32_t fPatStringLen;
michael@0 41 RegexMatcher *fMatcher;
michael@0 42 const UChar *fText; // Text from setText()
michael@0 43 int32_t fTextLength; // Length provided by user with setText(), which
michael@0 44 // may be -1.
michael@0 45 UBool fOwnsText;
michael@0 46 };
michael@0 47
michael@0 48 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
michael@0 49
michael@0 50 RegularExpression::RegularExpression() {
michael@0 51 fMagic = REXP_MAGIC;
michael@0 52 fPat = NULL;
michael@0 53 fPatRefCount = NULL;
michael@0 54 fPatString = NULL;
michael@0 55 fPatStringLen = 0;
michael@0 56 fMatcher = NULL;
michael@0 57 fText = NULL;
michael@0 58 fTextLength = 0;
michael@0 59 fOwnsText = FALSE;
michael@0 60 }
michael@0 61
michael@0 62 RegularExpression::~RegularExpression() {
michael@0 63 delete fMatcher;
michael@0 64 fMatcher = NULL;
michael@0 65 if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
michael@0 66 delete fPat;
michael@0 67 uprv_free(fPatString);
michael@0 68 uprv_free((void *)fPatRefCount);
michael@0 69 }
michael@0 70 if (fOwnsText && fText!=NULL) {
michael@0 71 uprv_free((void *)fText);
michael@0 72 }
michael@0 73 fMagic = 0;
michael@0 74 }
michael@0 75
michael@0 76 U_NAMESPACE_END
michael@0 77
michael@0 78 U_NAMESPACE_USE
michael@0 79
michael@0 80 //----------------------------------------------------------------------------------------
michael@0 81 //
michael@0 82 // validateRE Do boilerplate style checks on API function parameters.
michael@0 83 // Return TRUE if they look OK.
michael@0 84 //----------------------------------------------------------------------------------------
michael@0 85 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
michael@0 86 if (U_FAILURE(*status)) {
michael@0 87 return FALSE;
michael@0 88 }
michael@0 89 if (re == NULL || re->fMagic != REXP_MAGIC) {
michael@0 90 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 91 return FALSE;
michael@0 92 }
michael@0 93 // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
michael@0 94 if (requiresText && re->fText == NULL && !re->fOwnsText) {
michael@0 95 *status = U_REGEX_INVALID_STATE;
michael@0 96 return FALSE;
michael@0 97 }
michael@0 98 return TRUE;
michael@0 99 }
michael@0 100
michael@0 101 //----------------------------------------------------------------------------------------
michael@0 102 //
michael@0 103 // uregex_open
michael@0 104 //
michael@0 105 //----------------------------------------------------------------------------------------
michael@0 106 U_CAPI URegularExpression * U_EXPORT2
michael@0 107 uregex_open( const UChar *pattern,
michael@0 108 int32_t patternLength,
michael@0 109 uint32_t flags,
michael@0 110 UParseError *pe,
michael@0 111 UErrorCode *status) {
michael@0 112
michael@0 113 if (U_FAILURE(*status)) {
michael@0 114 return NULL;
michael@0 115 }
michael@0 116 if (pattern == NULL || patternLength < -1 || patternLength == 0) {
michael@0 117 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 118 return NULL;
michael@0 119 }
michael@0 120 int32_t actualPatLen = patternLength;
michael@0 121 if (actualPatLen == -1) {
michael@0 122 actualPatLen = u_strlen(pattern);
michael@0 123 }
michael@0 124
michael@0 125 RegularExpression *re = new RegularExpression;
michael@0 126 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
michael@0 127 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
michael@0 128 if (re == NULL || refC == NULL || patBuf == NULL) {
michael@0 129 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 130 delete re;
michael@0 131 uprv_free((void *)refC);
michael@0 132 uprv_free(patBuf);
michael@0 133 return NULL;
michael@0 134 }
michael@0 135 re->fPatRefCount = refC;
michael@0 136 *re->fPatRefCount = 1;
michael@0 137
michael@0 138 //
michael@0 139 // Make a copy of the pattern string, so we can return it later if asked.
michael@0 140 // For compiling the pattern, we will use a UText wrapper around
michael@0 141 // this local copy, to avoid making even more copies.
michael@0 142 //
michael@0 143 re->fPatString = patBuf;
michael@0 144 re->fPatStringLen = patternLength;
michael@0 145 u_memcpy(patBuf, pattern, actualPatLen);
michael@0 146 patBuf[actualPatLen] = 0;
michael@0 147
michael@0 148 UText patText = UTEXT_INITIALIZER;
michael@0 149 utext_openUChars(&patText, patBuf, patternLength, status);
michael@0 150
michael@0 151 //
michael@0 152 // Compile the pattern
michael@0 153 //
michael@0 154 if (pe != NULL) {
michael@0 155 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
michael@0 156 } else {
michael@0 157 re->fPat = RegexPattern::compile(&patText, flags, *status);
michael@0 158 }
michael@0 159 utext_close(&patText);
michael@0 160
michael@0 161 if (U_FAILURE(*status)) {
michael@0 162 goto ErrorExit;
michael@0 163 }
michael@0 164
michael@0 165 //
michael@0 166 // Create the matcher object
michael@0 167 //
michael@0 168 re->fMatcher = re->fPat->matcher(*status);
michael@0 169 if (U_SUCCESS(*status)) {
michael@0 170 return (URegularExpression*)re;
michael@0 171 }
michael@0 172
michael@0 173 ErrorExit:
michael@0 174 delete re;
michael@0 175 return NULL;
michael@0 176
michael@0 177 }
michael@0 178
michael@0 179 //----------------------------------------------------------------------------------------
michael@0 180 //
michael@0 181 // uregex_openUText
michael@0 182 //
michael@0 183 //----------------------------------------------------------------------------------------
michael@0 184 U_CAPI URegularExpression * U_EXPORT2
michael@0 185 uregex_openUText(UText *pattern,
michael@0 186 uint32_t flags,
michael@0 187 UParseError *pe,
michael@0 188 UErrorCode *status) {
michael@0 189
michael@0 190 if (U_FAILURE(*status)) {
michael@0 191 return NULL;
michael@0 192 }
michael@0 193 if (pattern == NULL) {
michael@0 194 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 195 return NULL;
michael@0 196 }
michael@0 197
michael@0 198 int64_t patternNativeLength = utext_nativeLength(pattern);
michael@0 199
michael@0 200 if (patternNativeLength == 0) {
michael@0 201 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 202 return NULL;
michael@0 203 }
michael@0 204
michael@0 205 RegularExpression *re = new RegularExpression;
michael@0 206
michael@0 207 UErrorCode lengthStatus = U_ZERO_ERROR;
michael@0 208 int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
michael@0 209
michael@0 210 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
michael@0 211 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
michael@0 212 if (re == NULL || refC == NULL || patBuf == NULL) {
michael@0 213 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 214 delete re;
michael@0 215 uprv_free((void *)refC);
michael@0 216 uprv_free(patBuf);
michael@0 217 return NULL;
michael@0 218 }
michael@0 219 re->fPatRefCount = refC;
michael@0 220 *re->fPatRefCount = 1;
michael@0 221
michael@0 222 //
michael@0 223 // Make a copy of the pattern string, so we can return it later if asked.
michael@0 224 // For compiling the pattern, we will use a read-only UText wrapper
michael@0 225 // around this local copy, to avoid making even more copies.
michael@0 226 //
michael@0 227 re->fPatString = patBuf;
michael@0 228 re->fPatStringLen = pattern16Length;
michael@0 229 utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
michael@0 230
michael@0 231 UText patText = UTEXT_INITIALIZER;
michael@0 232 utext_openUChars(&patText, patBuf, pattern16Length, status);
michael@0 233
michael@0 234 //
michael@0 235 // Compile the pattern
michael@0 236 //
michael@0 237 if (pe != NULL) {
michael@0 238 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
michael@0 239 } else {
michael@0 240 re->fPat = RegexPattern::compile(&patText, flags, *status);
michael@0 241 }
michael@0 242 utext_close(&patText);
michael@0 243
michael@0 244 if (U_FAILURE(*status)) {
michael@0 245 goto ErrorExit;
michael@0 246 }
michael@0 247
michael@0 248 //
michael@0 249 // Create the matcher object
michael@0 250 //
michael@0 251 re->fMatcher = re->fPat->matcher(*status);
michael@0 252 if (U_SUCCESS(*status)) {
michael@0 253 return (URegularExpression*)re;
michael@0 254 }
michael@0 255
michael@0 256 ErrorExit:
michael@0 257 delete re;
michael@0 258 return NULL;
michael@0 259
michael@0 260 }
michael@0 261
michael@0 262 //----------------------------------------------------------------------------------------
michael@0 263 //
michael@0 264 // uregex_close
michael@0 265 //
michael@0 266 //----------------------------------------------------------------------------------------
michael@0 267 U_CAPI void U_EXPORT2
michael@0 268 uregex_close(URegularExpression *re2) {
michael@0 269 RegularExpression *re = (RegularExpression*)re2;
michael@0 270 UErrorCode status = U_ZERO_ERROR;
michael@0 271 if (validateRE(re, FALSE, &status) == FALSE) {
michael@0 272 return;
michael@0 273 }
michael@0 274 delete re;
michael@0 275 }
michael@0 276
michael@0 277
michael@0 278 //----------------------------------------------------------------------------------------
michael@0 279 //
michael@0 280 // uregex_clone
michael@0 281 //
michael@0 282 //----------------------------------------------------------------------------------------
michael@0 283 U_CAPI URegularExpression * U_EXPORT2
michael@0 284 uregex_clone(const URegularExpression *source2, UErrorCode *status) {
michael@0 285 RegularExpression *source = (RegularExpression*)source2;
michael@0 286 if (validateRE(source, FALSE, status) == FALSE) {
michael@0 287 return NULL;
michael@0 288 }
michael@0 289
michael@0 290 RegularExpression *clone = new RegularExpression;
michael@0 291 if (clone == NULL) {
michael@0 292 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 293 return NULL;
michael@0 294 }
michael@0 295
michael@0 296 clone->fMatcher = source->fPat->matcher(*status);
michael@0 297 if (U_FAILURE(*status)) {
michael@0 298 delete clone;
michael@0 299 return NULL;
michael@0 300 }
michael@0 301
michael@0 302 clone->fPat = source->fPat;
michael@0 303 clone->fPatRefCount = source->fPatRefCount;
michael@0 304 clone->fPatString = source->fPatString;
michael@0 305 clone->fPatStringLen = source->fPatStringLen;
michael@0 306 umtx_atomic_inc(source->fPatRefCount);
michael@0 307 // Note: fText is not cloned.
michael@0 308
michael@0 309 return (URegularExpression*)clone;
michael@0 310 }
michael@0 311
michael@0 312
michael@0 313
michael@0 314
michael@0 315 //------------------------------------------------------------------------------
michael@0 316 //
michael@0 317 // uregex_pattern
michael@0 318 //
michael@0 319 //------------------------------------------------------------------------------
michael@0 320 U_CAPI const UChar * U_EXPORT2
michael@0 321 uregex_pattern(const URegularExpression *regexp2,
michael@0 322 int32_t *patLength,
michael@0 323 UErrorCode *status) {
michael@0 324 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 325
michael@0 326 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 327 return NULL;
michael@0 328 }
michael@0 329 if (patLength != NULL) {
michael@0 330 *patLength = regexp->fPatStringLen;
michael@0 331 }
michael@0 332 return regexp->fPatString;
michael@0 333 }
michael@0 334
michael@0 335
michael@0 336 //------------------------------------------------------------------------------
michael@0 337 //
michael@0 338 // uregex_patternUText
michael@0 339 //
michael@0 340 //------------------------------------------------------------------------------
michael@0 341 U_CAPI UText * U_EXPORT2
michael@0 342 uregex_patternUText(const URegularExpression *regexp2,
michael@0 343 UErrorCode *status) {
michael@0 344 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 345 return regexp->fPat->patternText(*status);
michael@0 346 }
michael@0 347
michael@0 348
michael@0 349 //------------------------------------------------------------------------------
michael@0 350 //
michael@0 351 // uregex_flags
michael@0 352 //
michael@0 353 //------------------------------------------------------------------------------
michael@0 354 U_CAPI int32_t U_EXPORT2
michael@0 355 uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
michael@0 356 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 357 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 358 return 0;
michael@0 359 }
michael@0 360 int32_t flags = regexp->fPat->flags();
michael@0 361 return flags;
michael@0 362 }
michael@0 363
michael@0 364
michael@0 365 //------------------------------------------------------------------------------
michael@0 366 //
michael@0 367 // uregex_setText
michael@0 368 //
michael@0 369 //------------------------------------------------------------------------------
michael@0 370 U_CAPI void U_EXPORT2
michael@0 371 uregex_setText(URegularExpression *regexp2,
michael@0 372 const UChar *text,
michael@0 373 int32_t textLength,
michael@0 374 UErrorCode *status) {
michael@0 375 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 376 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 377 return;
michael@0 378 }
michael@0 379 if (text == NULL || textLength < -1) {
michael@0 380 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 381 return;
michael@0 382 }
michael@0 383
michael@0 384 if (regexp->fOwnsText && regexp->fText != NULL) {
michael@0 385 uprv_free((void *)regexp->fText);
michael@0 386 }
michael@0 387
michael@0 388 regexp->fText = text;
michael@0 389 regexp->fTextLength = textLength;
michael@0 390 regexp->fOwnsText = FALSE;
michael@0 391
michael@0 392 UText input = UTEXT_INITIALIZER;
michael@0 393 utext_openUChars(&input, text, textLength, status);
michael@0 394 regexp->fMatcher->reset(&input);
michael@0 395 utext_close(&input); // reset() made a shallow clone, so we don't need this copy
michael@0 396 }
michael@0 397
michael@0 398
michael@0 399 //------------------------------------------------------------------------------
michael@0 400 //
michael@0 401 // uregex_setUText
michael@0 402 //
michael@0 403 //------------------------------------------------------------------------------
michael@0 404 U_CAPI void U_EXPORT2
michael@0 405 uregex_setUText(URegularExpression *regexp2,
michael@0 406 UText *text,
michael@0 407 UErrorCode *status) {
michael@0 408 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 409 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 410 return;
michael@0 411 }
michael@0 412 if (text == NULL) {
michael@0 413 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 414 return;
michael@0 415 }
michael@0 416
michael@0 417 if (regexp->fOwnsText && regexp->fText != NULL) {
michael@0 418 uprv_free((void *)regexp->fText);
michael@0 419 }
michael@0 420
michael@0 421 regexp->fText = NULL; // only fill it in on request
michael@0 422 regexp->fTextLength = -1;
michael@0 423 regexp->fOwnsText = TRUE;
michael@0 424 regexp->fMatcher->reset(text);
michael@0 425 }
michael@0 426
michael@0 427
michael@0 428
michael@0 429 //------------------------------------------------------------------------------
michael@0 430 //
michael@0 431 // uregex_getText
michael@0 432 //
michael@0 433 //------------------------------------------------------------------------------
michael@0 434 U_CAPI const UChar * U_EXPORT2
michael@0 435 uregex_getText(URegularExpression *regexp2,
michael@0 436 int32_t *textLength,
michael@0 437 UErrorCode *status) {
michael@0 438 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 439 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 440 return NULL;
michael@0 441 }
michael@0 442
michael@0 443 if (regexp->fText == NULL) {
michael@0 444 // need to fill in the text
michael@0 445 UText *inputText = regexp->fMatcher->inputText();
michael@0 446 int64_t inputNativeLength = utext_nativeLength(inputText);
michael@0 447 if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
michael@0 448 regexp->fText = inputText->chunkContents;
michael@0 449 regexp->fTextLength = (int32_t)inputNativeLength;
michael@0 450 regexp->fOwnsText = FALSE; // because the UText owns it
michael@0 451 } else {
michael@0 452 UErrorCode lengthStatus = U_ZERO_ERROR;
michael@0 453 regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
michael@0 454 UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
michael@0 455
michael@0 456 utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
michael@0 457 regexp->fText = inputChars;
michael@0 458 regexp->fOwnsText = TRUE; // should already be set but just in case
michael@0 459 }
michael@0 460 }
michael@0 461
michael@0 462 if (textLength != NULL) {
michael@0 463 *textLength = regexp->fTextLength;
michael@0 464 }
michael@0 465 return regexp->fText;
michael@0 466 }
michael@0 467
michael@0 468
michael@0 469 //------------------------------------------------------------------------------
michael@0 470 //
michael@0 471 // uregex_getUText
michael@0 472 //
michael@0 473 //------------------------------------------------------------------------------
michael@0 474 U_CAPI UText * U_EXPORT2
michael@0 475 uregex_getUText(URegularExpression *regexp2,
michael@0 476 UText *dest,
michael@0 477 UErrorCode *status) {
michael@0 478 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 479 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 480 return dest;
michael@0 481 }
michael@0 482 return regexp->fMatcher->getInput(dest, *status);
michael@0 483 }
michael@0 484
michael@0 485
michael@0 486 //------------------------------------------------------------------------------
michael@0 487 //
michael@0 488 // uregex_refreshUText
michael@0 489 //
michael@0 490 //------------------------------------------------------------------------------
michael@0 491 U_CAPI void U_EXPORT2
michael@0 492 uregex_refreshUText(URegularExpression *regexp2,
michael@0 493 UText *text,
michael@0 494 UErrorCode *status) {
michael@0 495 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 496 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 497 return;
michael@0 498 }
michael@0 499 regexp->fMatcher->refreshInputText(text, *status);
michael@0 500 }
michael@0 501
michael@0 502
michael@0 503 //------------------------------------------------------------------------------
michael@0 504 //
michael@0 505 // uregex_matches
michael@0 506 //
michael@0 507 //------------------------------------------------------------------------------
michael@0 508 U_CAPI UBool U_EXPORT2
michael@0 509 uregex_matches(URegularExpression *regexp2,
michael@0 510 int32_t startIndex,
michael@0 511 UErrorCode *status) {
michael@0 512 return uregex_matches64( regexp2, (int64_t)startIndex, status);
michael@0 513 }
michael@0 514
michael@0 515 U_CAPI UBool U_EXPORT2
michael@0 516 uregex_matches64(URegularExpression *regexp2,
michael@0 517 int64_t startIndex,
michael@0 518 UErrorCode *status) {
michael@0 519 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 520 UBool result = FALSE;
michael@0 521 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 522 return result;
michael@0 523 }
michael@0 524 if (startIndex == -1) {
michael@0 525 result = regexp->fMatcher->matches(*status);
michael@0 526 } else {
michael@0 527 result = regexp->fMatcher->matches(startIndex, *status);
michael@0 528 }
michael@0 529 return result;
michael@0 530 }
michael@0 531
michael@0 532
michael@0 533 //------------------------------------------------------------------------------
michael@0 534 //
michael@0 535 // uregex_lookingAt
michael@0 536 //
michael@0 537 //------------------------------------------------------------------------------
michael@0 538 U_CAPI UBool U_EXPORT2
michael@0 539 uregex_lookingAt(URegularExpression *regexp2,
michael@0 540 int32_t startIndex,
michael@0 541 UErrorCode *status) {
michael@0 542 return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
michael@0 543 }
michael@0 544
michael@0 545 U_CAPI UBool U_EXPORT2
michael@0 546 uregex_lookingAt64(URegularExpression *regexp2,
michael@0 547 int64_t startIndex,
michael@0 548 UErrorCode *status) {
michael@0 549 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 550 UBool result = FALSE;
michael@0 551 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 552 return result;
michael@0 553 }
michael@0 554 if (startIndex == -1) {
michael@0 555 result = regexp->fMatcher->lookingAt(*status);
michael@0 556 } else {
michael@0 557 result = regexp->fMatcher->lookingAt(startIndex, *status);
michael@0 558 }
michael@0 559 return result;
michael@0 560 }
michael@0 561
michael@0 562
michael@0 563
michael@0 564 //------------------------------------------------------------------------------
michael@0 565 //
michael@0 566 // uregex_find
michael@0 567 //
michael@0 568 //------------------------------------------------------------------------------
michael@0 569 U_CAPI UBool U_EXPORT2
michael@0 570 uregex_find(URegularExpression *regexp2,
michael@0 571 int32_t startIndex,
michael@0 572 UErrorCode *status) {
michael@0 573 return uregex_find64( regexp2, (int64_t)startIndex, status);
michael@0 574 }
michael@0 575
michael@0 576 U_CAPI UBool U_EXPORT2
michael@0 577 uregex_find64(URegularExpression *regexp2,
michael@0 578 int64_t startIndex,
michael@0 579 UErrorCode *status) {
michael@0 580 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 581 UBool result = FALSE;
michael@0 582 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 583 return result;
michael@0 584 }
michael@0 585 if (startIndex == -1) {
michael@0 586 regexp->fMatcher->resetPreserveRegion();
michael@0 587 result = regexp->fMatcher->find();
michael@0 588 } else {
michael@0 589 result = regexp->fMatcher->find(startIndex, *status);
michael@0 590 }
michael@0 591 return result;
michael@0 592 }
michael@0 593
michael@0 594
michael@0 595 //------------------------------------------------------------------------------
michael@0 596 //
michael@0 597 // uregex_findNext
michael@0 598 //
michael@0 599 //------------------------------------------------------------------------------
michael@0 600 U_CAPI UBool U_EXPORT2
michael@0 601 uregex_findNext(URegularExpression *regexp2,
michael@0 602 UErrorCode *status) {
michael@0 603 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 604 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 605 return FALSE;
michael@0 606 }
michael@0 607 UBool result = regexp->fMatcher->find();
michael@0 608 return result;
michael@0 609 }
michael@0 610
michael@0 611 //------------------------------------------------------------------------------
michael@0 612 //
michael@0 613 // uregex_groupCount
michael@0 614 //
michael@0 615 //------------------------------------------------------------------------------
michael@0 616 U_CAPI int32_t U_EXPORT2
michael@0 617 uregex_groupCount(URegularExpression *regexp2,
michael@0 618 UErrorCode *status) {
michael@0 619 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 620 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 621 return 0;
michael@0 622 }
michael@0 623 int32_t result = regexp->fMatcher->groupCount();
michael@0 624 return result;
michael@0 625 }
michael@0 626
michael@0 627
michael@0 628 //------------------------------------------------------------------------------
michael@0 629 //
michael@0 630 // uregex_group
michael@0 631 //
michael@0 632 //------------------------------------------------------------------------------
michael@0 633 U_CAPI int32_t U_EXPORT2
michael@0 634 uregex_group(URegularExpression *regexp2,
michael@0 635 int32_t groupNum,
michael@0 636 UChar *dest,
michael@0 637 int32_t destCapacity,
michael@0 638 UErrorCode *status) {
michael@0 639 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 640 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 641 return 0;
michael@0 642 }
michael@0 643 if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
michael@0 644 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 645 return 0;
michael@0 646 }
michael@0 647
michael@0 648 if (destCapacity == 0 || regexp->fText != NULL) {
michael@0 649 // If preflighting or if we already have the text as UChars,
michael@0 650 // this is a little cheaper than going through uregex_groupUTextDeep()
michael@0 651
michael@0 652 //
michael@0 653 // Pick up the range of characters from the matcher
michael@0 654 //
michael@0 655 int32_t startIx = regexp->fMatcher->start(groupNum, *status);
michael@0 656 int32_t endIx = regexp->fMatcher->end (groupNum, *status);
michael@0 657 if (U_FAILURE(*status)) {
michael@0 658 return 0;
michael@0 659 }
michael@0 660
michael@0 661 //
michael@0 662 // Trim length based on buffer capacity
michael@0 663 //
michael@0 664 int32_t fullLength = endIx - startIx;
michael@0 665 int32_t copyLength = fullLength;
michael@0 666 if (copyLength < destCapacity) {
michael@0 667 dest[copyLength] = 0;
michael@0 668 } else if (copyLength == destCapacity) {
michael@0 669 *status = U_STRING_NOT_TERMINATED_WARNING;
michael@0 670 } else {
michael@0 671 copyLength = destCapacity;
michael@0 672 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 673 }
michael@0 674
michael@0 675 //
michael@0 676 // Copy capture group to user's buffer
michael@0 677 //
michael@0 678 if (copyLength > 0) {
michael@0 679 u_memcpy(dest, &regexp->fText[startIx], copyLength);
michael@0 680 }
michael@0 681 return fullLength;
michael@0 682 } else {
michael@0 683 UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
michael@0 684 int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
michael@0 685 utext_close(groupText);
michael@0 686 return result;
michael@0 687 }
michael@0 688 }
michael@0 689
michael@0 690
michael@0 691 //------------------------------------------------------------------------------
michael@0 692 //
michael@0 693 // uregex_groupUText
michael@0 694 //
michael@0 695 //------------------------------------------------------------------------------
michael@0 696 U_CAPI UText * U_EXPORT2
michael@0 697 uregex_groupUText(URegularExpression *regexp2,
michael@0 698 int32_t groupNum,
michael@0 699 UText *dest,
michael@0 700 int64_t *groupLength,
michael@0 701 UErrorCode *status) {
michael@0 702 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 703 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 704 UErrorCode emptyTextStatus = U_ZERO_ERROR;
michael@0 705 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
michael@0 706 }
michael@0 707
michael@0 708 return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
michael@0 709 }
michael@0 710
michael@0 711 //------------------------------------------------------------------------------
michael@0 712 //
michael@0 713 // uregex_groupUTextDeep
michael@0 714 //
michael@0 715 //------------------------------------------------------------------------------
michael@0 716 U_CAPI UText * U_EXPORT2
michael@0 717 uregex_groupUTextDeep(URegularExpression *regexp2,
michael@0 718 int32_t groupNum,
michael@0 719 UText *dest,
michael@0 720 UErrorCode *status) {
michael@0 721 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 722 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 723 UErrorCode emptyTextStatus = U_ZERO_ERROR;
michael@0 724 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
michael@0 725 }
michael@0 726
michael@0 727 if (regexp->fText != NULL) {
michael@0 728 //
michael@0 729 // Pick up the range of characters from the matcher
michael@0 730 // and use our already-extracted characters
michael@0 731 //
michael@0 732 int32_t startIx = regexp->fMatcher->start(groupNum, *status);
michael@0 733 int32_t endIx = regexp->fMatcher->end (groupNum, *status);
michael@0 734 if (U_FAILURE(*status)) {
michael@0 735 UErrorCode emptyTextStatus = U_ZERO_ERROR;
michael@0 736 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
michael@0 737 }
michael@0 738
michael@0 739 if (dest) {
michael@0 740 utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
michael@0 741 } else {
michael@0 742 UText groupText = UTEXT_INITIALIZER;
michael@0 743 utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
michael@0 744 dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
michael@0 745 utext_close(&groupText);
michael@0 746 }
michael@0 747
michael@0 748 return dest;
michael@0 749 } else {
michael@0 750 return regexp->fMatcher->group(groupNum, dest, *status);
michael@0 751 }
michael@0 752 }
michael@0 753
michael@0 754 //------------------------------------------------------------------------------
michael@0 755 //
michael@0 756 // uregex_start
michael@0 757 //
michael@0 758 //------------------------------------------------------------------------------
michael@0 759 U_CAPI int32_t U_EXPORT2
michael@0 760 uregex_start(URegularExpression *regexp2,
michael@0 761 int32_t groupNum,
michael@0 762 UErrorCode *status) {
michael@0 763 return (int32_t)uregex_start64( regexp2, groupNum, status);
michael@0 764 }
michael@0 765
michael@0 766 U_CAPI int64_t U_EXPORT2
michael@0 767 uregex_start64(URegularExpression *regexp2,
michael@0 768 int32_t groupNum,
michael@0 769 UErrorCode *status) {
michael@0 770 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 771 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 772 return 0;
michael@0 773 }
michael@0 774 int32_t result = regexp->fMatcher->start(groupNum, *status);
michael@0 775 return result;
michael@0 776 }
michael@0 777
michael@0 778 //------------------------------------------------------------------------------
michael@0 779 //
michael@0 780 // uregex_end
michael@0 781 //
michael@0 782 //------------------------------------------------------------------------------
michael@0 783 U_CAPI int32_t U_EXPORT2
michael@0 784 uregex_end(URegularExpression *regexp2,
michael@0 785 int32_t groupNum,
michael@0 786 UErrorCode *status) {
michael@0 787 return (int32_t)uregex_end64( regexp2, groupNum, status);
michael@0 788 }
michael@0 789
michael@0 790 U_CAPI int64_t U_EXPORT2
michael@0 791 uregex_end64(URegularExpression *regexp2,
michael@0 792 int32_t groupNum,
michael@0 793 UErrorCode *status) {
michael@0 794 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 795 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 796 return 0;
michael@0 797 }
michael@0 798 int32_t result = regexp->fMatcher->end(groupNum, *status);
michael@0 799 return result;
michael@0 800 }
michael@0 801
michael@0 802 //------------------------------------------------------------------------------
michael@0 803 //
michael@0 804 // uregex_reset
michael@0 805 //
michael@0 806 //------------------------------------------------------------------------------
michael@0 807 U_CAPI void U_EXPORT2
michael@0 808 uregex_reset(URegularExpression *regexp2,
michael@0 809 int32_t index,
michael@0 810 UErrorCode *status) {
michael@0 811 uregex_reset64( regexp2, (int64_t)index, status);
michael@0 812 }
michael@0 813
michael@0 814 U_CAPI void U_EXPORT2
michael@0 815 uregex_reset64(URegularExpression *regexp2,
michael@0 816 int64_t index,
michael@0 817 UErrorCode *status) {
michael@0 818 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 819 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 820 return;
michael@0 821 }
michael@0 822 regexp->fMatcher->reset(index, *status);
michael@0 823 }
michael@0 824
michael@0 825
michael@0 826 //------------------------------------------------------------------------------
michael@0 827 //
michael@0 828 // uregex_setRegion
michael@0 829 //
michael@0 830 //------------------------------------------------------------------------------
michael@0 831 U_CAPI void U_EXPORT2
michael@0 832 uregex_setRegion(URegularExpression *regexp2,
michael@0 833 int32_t regionStart,
michael@0 834 int32_t regionLimit,
michael@0 835 UErrorCode *status) {
michael@0 836 uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
michael@0 837 }
michael@0 838
michael@0 839 U_CAPI void U_EXPORT2
michael@0 840 uregex_setRegion64(URegularExpression *regexp2,
michael@0 841 int64_t regionStart,
michael@0 842 int64_t regionLimit,
michael@0 843 UErrorCode *status) {
michael@0 844 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 845 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 846 return;
michael@0 847 }
michael@0 848 regexp->fMatcher->region(regionStart, regionLimit, *status);
michael@0 849 }
michael@0 850
michael@0 851
michael@0 852 //------------------------------------------------------------------------------
michael@0 853 //
michael@0 854 // uregex_setRegionAndStart
michael@0 855 //
michael@0 856 //------------------------------------------------------------------------------
michael@0 857 U_CAPI void U_EXPORT2
michael@0 858 uregex_setRegionAndStart(URegularExpression *regexp2,
michael@0 859 int64_t regionStart,
michael@0 860 int64_t regionLimit,
michael@0 861 int64_t startIndex,
michael@0 862 UErrorCode *status) {
michael@0 863 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 864 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 865 return;
michael@0 866 }
michael@0 867 regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
michael@0 868 }
michael@0 869
michael@0 870 //------------------------------------------------------------------------------
michael@0 871 //
michael@0 872 // uregex_regionStart
michael@0 873 //
michael@0 874 //------------------------------------------------------------------------------
michael@0 875 U_CAPI int32_t U_EXPORT2
michael@0 876 uregex_regionStart(const URegularExpression *regexp2,
michael@0 877 UErrorCode *status) {
michael@0 878 return (int32_t)uregex_regionStart64(regexp2, status);
michael@0 879 }
michael@0 880
michael@0 881 U_CAPI int64_t U_EXPORT2
michael@0 882 uregex_regionStart64(const URegularExpression *regexp2,
michael@0 883 UErrorCode *status) {
michael@0 884 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 885 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 886 return 0;
michael@0 887 }
michael@0 888 return regexp->fMatcher->regionStart();
michael@0 889 }
michael@0 890
michael@0 891
michael@0 892 //------------------------------------------------------------------------------
michael@0 893 //
michael@0 894 // uregex_regionEnd
michael@0 895 //
michael@0 896 //------------------------------------------------------------------------------
michael@0 897 U_CAPI int32_t U_EXPORT2
michael@0 898 uregex_regionEnd(const URegularExpression *regexp2,
michael@0 899 UErrorCode *status) {
michael@0 900 return (int32_t)uregex_regionEnd64(regexp2, status);
michael@0 901 }
michael@0 902
michael@0 903 U_CAPI int64_t U_EXPORT2
michael@0 904 uregex_regionEnd64(const URegularExpression *regexp2,
michael@0 905 UErrorCode *status) {
michael@0 906 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 907 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 908 return 0;
michael@0 909 }
michael@0 910 return regexp->fMatcher->regionEnd();
michael@0 911 }
michael@0 912
michael@0 913
michael@0 914 //------------------------------------------------------------------------------
michael@0 915 //
michael@0 916 // uregex_hasTransparentBounds
michael@0 917 //
michael@0 918 //------------------------------------------------------------------------------
michael@0 919 U_CAPI UBool U_EXPORT2
michael@0 920 uregex_hasTransparentBounds(const URegularExpression *regexp2,
michael@0 921 UErrorCode *status) {
michael@0 922 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 923 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 924 return FALSE;
michael@0 925 }
michael@0 926 return regexp->fMatcher->hasTransparentBounds();
michael@0 927 }
michael@0 928
michael@0 929
michael@0 930 //------------------------------------------------------------------------------
michael@0 931 //
michael@0 932 // uregex_useTransparentBounds
michael@0 933 //
michael@0 934 //------------------------------------------------------------------------------
michael@0 935 U_CAPI void U_EXPORT2
michael@0 936 uregex_useTransparentBounds(URegularExpression *regexp2,
michael@0 937 UBool b,
michael@0 938 UErrorCode *status) {
michael@0 939 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 940 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 941 return;
michael@0 942 }
michael@0 943 regexp->fMatcher->useTransparentBounds(b);
michael@0 944 }
michael@0 945
michael@0 946
michael@0 947 //------------------------------------------------------------------------------
michael@0 948 //
michael@0 949 // uregex_hasAnchoringBounds
michael@0 950 //
michael@0 951 //------------------------------------------------------------------------------
michael@0 952 U_CAPI UBool U_EXPORT2
michael@0 953 uregex_hasAnchoringBounds(const URegularExpression *regexp2,
michael@0 954 UErrorCode *status) {
michael@0 955 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 956 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 957 return FALSE;
michael@0 958 }
michael@0 959 return regexp->fMatcher->hasAnchoringBounds();
michael@0 960 }
michael@0 961
michael@0 962
michael@0 963 //------------------------------------------------------------------------------
michael@0 964 //
michael@0 965 // uregex_useAnchoringBounds
michael@0 966 //
michael@0 967 //------------------------------------------------------------------------------
michael@0 968 U_CAPI void U_EXPORT2
michael@0 969 uregex_useAnchoringBounds(URegularExpression *regexp2,
michael@0 970 UBool b,
michael@0 971 UErrorCode *status) {
michael@0 972 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 973 if (validateRE(regexp, FALSE, status) == FALSE) {
michael@0 974 return;
michael@0 975 }
michael@0 976 regexp->fMatcher->useAnchoringBounds(b);
michael@0 977 }
michael@0 978
michael@0 979
michael@0 980 //------------------------------------------------------------------------------
michael@0 981 //
michael@0 982 // uregex_hitEnd
michael@0 983 //
michael@0 984 //------------------------------------------------------------------------------
michael@0 985 U_CAPI UBool U_EXPORT2
michael@0 986 uregex_hitEnd(const URegularExpression *regexp2,
michael@0 987 UErrorCode *status) {
michael@0 988 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 989 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 990 return FALSE;
michael@0 991 }
michael@0 992 return regexp->fMatcher->hitEnd();
michael@0 993 }
michael@0 994
michael@0 995
michael@0 996 //------------------------------------------------------------------------------
michael@0 997 //
michael@0 998 // uregex_requireEnd
michael@0 999 //
michael@0 1000 //------------------------------------------------------------------------------
michael@0 1001 U_CAPI UBool U_EXPORT2
michael@0 1002 uregex_requireEnd(const URegularExpression *regexp2,
michael@0 1003 UErrorCode *status) {
michael@0 1004 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1005 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1006 return FALSE;
michael@0 1007 }
michael@0 1008 return regexp->fMatcher->requireEnd();
michael@0 1009 }
michael@0 1010
michael@0 1011
michael@0 1012 //------------------------------------------------------------------------------
michael@0 1013 //
michael@0 1014 // uregex_setTimeLimit
michael@0 1015 //
michael@0 1016 //------------------------------------------------------------------------------
michael@0 1017 U_CAPI void U_EXPORT2
michael@0 1018 uregex_setTimeLimit(URegularExpression *regexp2,
michael@0 1019 int32_t limit,
michael@0 1020 UErrorCode *status) {
michael@0 1021 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1022 if (validateRE(regexp, FALSE, status)) {
michael@0 1023 regexp->fMatcher->setTimeLimit(limit, *status);
michael@0 1024 }
michael@0 1025 }
michael@0 1026
michael@0 1027
michael@0 1028
michael@0 1029 //------------------------------------------------------------------------------
michael@0 1030 //
michael@0 1031 // uregex_getTimeLimit
michael@0 1032 //
michael@0 1033 //------------------------------------------------------------------------------
michael@0 1034 U_CAPI int32_t U_EXPORT2
michael@0 1035 uregex_getTimeLimit(const URegularExpression *regexp2,
michael@0 1036 UErrorCode *status) {
michael@0 1037 int32_t retVal = 0;
michael@0 1038 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1039 if (validateRE(regexp, FALSE, status)) {
michael@0 1040 retVal = regexp->fMatcher->getTimeLimit();
michael@0 1041 }
michael@0 1042 return retVal;
michael@0 1043 }
michael@0 1044
michael@0 1045
michael@0 1046
michael@0 1047 //------------------------------------------------------------------------------
michael@0 1048 //
michael@0 1049 // uregex_setStackLimit
michael@0 1050 //
michael@0 1051 //------------------------------------------------------------------------------
michael@0 1052 U_CAPI void U_EXPORT2
michael@0 1053 uregex_setStackLimit(URegularExpression *regexp2,
michael@0 1054 int32_t limit,
michael@0 1055 UErrorCode *status) {
michael@0 1056 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1057 if (validateRE(regexp, FALSE, status)) {
michael@0 1058 regexp->fMatcher->setStackLimit(limit, *status);
michael@0 1059 }
michael@0 1060 }
michael@0 1061
michael@0 1062
michael@0 1063
michael@0 1064 //------------------------------------------------------------------------------
michael@0 1065 //
michael@0 1066 // uregex_getStackLimit
michael@0 1067 //
michael@0 1068 //------------------------------------------------------------------------------
michael@0 1069 U_CAPI int32_t U_EXPORT2
michael@0 1070 uregex_getStackLimit(const URegularExpression *regexp2,
michael@0 1071 UErrorCode *status) {
michael@0 1072 int32_t retVal = 0;
michael@0 1073 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1074 if (validateRE(regexp, FALSE, status)) {
michael@0 1075 retVal = regexp->fMatcher->getStackLimit();
michael@0 1076 }
michael@0 1077 return retVal;
michael@0 1078 }
michael@0 1079
michael@0 1080
michael@0 1081 //------------------------------------------------------------------------------
michael@0 1082 //
michael@0 1083 // uregex_setMatchCallback
michael@0 1084 //
michael@0 1085 //------------------------------------------------------------------------------
michael@0 1086 U_CAPI void U_EXPORT2
michael@0 1087 uregex_setMatchCallback(URegularExpression *regexp2,
michael@0 1088 URegexMatchCallback *callback,
michael@0 1089 const void *context,
michael@0 1090 UErrorCode *status) {
michael@0 1091 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1092 if (validateRE(regexp, FALSE, status)) {
michael@0 1093 regexp->fMatcher->setMatchCallback(callback, context, *status);
michael@0 1094 }
michael@0 1095 }
michael@0 1096
michael@0 1097
michael@0 1098 //------------------------------------------------------------------------------
michael@0 1099 //
michael@0 1100 // uregex_getMatchCallback
michael@0 1101 //
michael@0 1102 //------------------------------------------------------------------------------
michael@0 1103 U_CAPI void U_EXPORT2
michael@0 1104 uregex_getMatchCallback(const URegularExpression *regexp2,
michael@0 1105 URegexMatchCallback **callback,
michael@0 1106 const void **context,
michael@0 1107 UErrorCode *status) {
michael@0 1108 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1109 if (validateRE(regexp, FALSE, status)) {
michael@0 1110 regexp->fMatcher->getMatchCallback(*callback, *context, *status);
michael@0 1111 }
michael@0 1112 }
michael@0 1113
michael@0 1114
michael@0 1115 //------------------------------------------------------------------------------
michael@0 1116 //
michael@0 1117 // uregex_setMatchProgressCallback
michael@0 1118 //
michael@0 1119 //------------------------------------------------------------------------------
michael@0 1120 U_CAPI void U_EXPORT2
michael@0 1121 uregex_setFindProgressCallback(URegularExpression *regexp2,
michael@0 1122 URegexFindProgressCallback *callback,
michael@0 1123 const void *context,
michael@0 1124 UErrorCode *status) {
michael@0 1125 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1126 if (validateRE(regexp, FALSE, status)) {
michael@0 1127 regexp->fMatcher->setFindProgressCallback(callback, context, *status);
michael@0 1128 }
michael@0 1129 }
michael@0 1130
michael@0 1131
michael@0 1132 //------------------------------------------------------------------------------
michael@0 1133 //
michael@0 1134 // uregex_getMatchCallback
michael@0 1135 //
michael@0 1136 //------------------------------------------------------------------------------
michael@0 1137 U_CAPI void U_EXPORT2
michael@0 1138 uregex_getFindProgressCallback(const URegularExpression *regexp2,
michael@0 1139 URegexFindProgressCallback **callback,
michael@0 1140 const void **context,
michael@0 1141 UErrorCode *status) {
michael@0 1142 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1143 if (validateRE(regexp, FALSE, status)) {
michael@0 1144 regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
michael@0 1145 }
michael@0 1146 }
michael@0 1147
michael@0 1148
michael@0 1149 //------------------------------------------------------------------------------
michael@0 1150 //
michael@0 1151 // uregex_replaceAll
michael@0 1152 //
michael@0 1153 //------------------------------------------------------------------------------
michael@0 1154 U_CAPI int32_t U_EXPORT2
michael@0 1155 uregex_replaceAll(URegularExpression *regexp2,
michael@0 1156 const UChar *replacementText,
michael@0 1157 int32_t replacementLength,
michael@0 1158 UChar *destBuf,
michael@0 1159 int32_t destCapacity,
michael@0 1160 UErrorCode *status) {
michael@0 1161 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1162 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1163 return 0;
michael@0 1164 }
michael@0 1165 if (replacementText == NULL || replacementLength < -1 ||
michael@0 1166 (destBuf == NULL && destCapacity > 0) ||
michael@0 1167 destCapacity < 0) {
michael@0 1168 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1169 return 0;
michael@0 1170 }
michael@0 1171
michael@0 1172 int32_t len = 0;
michael@0 1173
michael@0 1174 uregex_reset(regexp2, 0, status);
michael@0 1175
michael@0 1176 // Note: Seperate error code variables for findNext() and appendReplacement()
michael@0 1177 // are used so that destination buffer overflow errors
michael@0 1178 // in appendReplacement won't stop findNext() from working.
michael@0 1179 // appendReplacement() and appendTail() special case incoming buffer
michael@0 1180 // overflow errors, continuing to return the correct length.
michael@0 1181 UErrorCode findStatus = *status;
michael@0 1182 while (uregex_findNext(regexp2, &findStatus)) {
michael@0 1183 len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
michael@0 1184 &destBuf, &destCapacity, status);
michael@0 1185 }
michael@0 1186 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
michael@0 1187
michael@0 1188 if (U_FAILURE(findStatus)) {
michael@0 1189 // If anything went wrong with the findNext(), make that error trump
michael@0 1190 // whatever may have happened with the append() operations.
michael@0 1191 // Errors in findNext() are not expected.
michael@0 1192 *status = findStatus;
michael@0 1193 }
michael@0 1194
michael@0 1195 return len;
michael@0 1196 }
michael@0 1197
michael@0 1198
michael@0 1199 //------------------------------------------------------------------------------
michael@0 1200 //
michael@0 1201 // uregex_replaceAllUText
michael@0 1202 //
michael@0 1203 //------------------------------------------------------------------------------
michael@0 1204 U_CAPI UText * U_EXPORT2
michael@0 1205 uregex_replaceAllUText(URegularExpression *regexp2,
michael@0 1206 UText *replacementText,
michael@0 1207 UText *dest,
michael@0 1208 UErrorCode *status) {
michael@0 1209 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1210 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1211 return 0;
michael@0 1212 }
michael@0 1213 if (replacementText == NULL) {
michael@0 1214 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1215 return 0;
michael@0 1216 }
michael@0 1217
michael@0 1218 dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
michael@0 1219 return dest;
michael@0 1220 }
michael@0 1221
michael@0 1222
michael@0 1223 //------------------------------------------------------------------------------
michael@0 1224 //
michael@0 1225 // uregex_replaceFirst
michael@0 1226 //
michael@0 1227 //------------------------------------------------------------------------------
michael@0 1228 U_CAPI int32_t U_EXPORT2
michael@0 1229 uregex_replaceFirst(URegularExpression *regexp2,
michael@0 1230 const UChar *replacementText,
michael@0 1231 int32_t replacementLength,
michael@0 1232 UChar *destBuf,
michael@0 1233 int32_t destCapacity,
michael@0 1234 UErrorCode *status) {
michael@0 1235 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1236 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1237 return 0;
michael@0 1238 }
michael@0 1239 if (replacementText == NULL || replacementLength < -1 ||
michael@0 1240 (destBuf == NULL && destCapacity > 0) ||
michael@0 1241 destCapacity < 0) {
michael@0 1242 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1243 return 0;
michael@0 1244 }
michael@0 1245
michael@0 1246 int32_t len = 0;
michael@0 1247 UBool findSucceeded;
michael@0 1248 uregex_reset(regexp2, 0, status);
michael@0 1249 findSucceeded = uregex_find(regexp2, 0, status);
michael@0 1250 if (findSucceeded) {
michael@0 1251 len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
michael@0 1252 &destBuf, &destCapacity, status);
michael@0 1253 }
michael@0 1254 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
michael@0 1255
michael@0 1256 return len;
michael@0 1257 }
michael@0 1258
michael@0 1259
michael@0 1260 //------------------------------------------------------------------------------
michael@0 1261 //
michael@0 1262 // uregex_replaceFirstUText
michael@0 1263 //
michael@0 1264 //------------------------------------------------------------------------------
michael@0 1265 U_CAPI UText * U_EXPORT2
michael@0 1266 uregex_replaceFirstUText(URegularExpression *regexp2,
michael@0 1267 UText *replacementText,
michael@0 1268 UText *dest,
michael@0 1269 UErrorCode *status) {
michael@0 1270 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1271 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1272 return 0;
michael@0 1273 }
michael@0 1274 if (replacementText == NULL) {
michael@0 1275 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1276 return 0;
michael@0 1277 }
michael@0 1278
michael@0 1279 dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
michael@0 1280 return dest;
michael@0 1281 }
michael@0 1282
michael@0 1283
michael@0 1284 //------------------------------------------------------------------------------
michael@0 1285 //
michael@0 1286 // uregex_appendReplacement
michael@0 1287 //
michael@0 1288 //------------------------------------------------------------------------------
michael@0 1289
michael@0 1290 U_NAMESPACE_BEGIN
michael@0 1291 //
michael@0 1292 // Dummy class, because these functions need to be friends of class RegexMatcher,
michael@0 1293 // and stand-alone C functions don't work as friends
michael@0 1294 //
michael@0 1295 class RegexCImpl {
michael@0 1296 public:
michael@0 1297 inline static int32_t appendReplacement(RegularExpression *regexp,
michael@0 1298 const UChar *replacementText,
michael@0 1299 int32_t replacementLength,
michael@0 1300 UChar **destBuf,
michael@0 1301 int32_t *destCapacity,
michael@0 1302 UErrorCode *status);
michael@0 1303
michael@0 1304 inline static int32_t appendTail(RegularExpression *regexp,
michael@0 1305 UChar **destBuf,
michael@0 1306 int32_t *destCapacity,
michael@0 1307 UErrorCode *status);
michael@0 1308
michael@0 1309 inline static int32_t split(RegularExpression *regexp,
michael@0 1310 UChar *destBuf,
michael@0 1311 int32_t destCapacity,
michael@0 1312 int32_t *requiredCapacity,
michael@0 1313 UChar *destFields[],
michael@0 1314 int32_t destFieldsCapacity,
michael@0 1315 UErrorCode *status);
michael@0 1316 };
michael@0 1317
michael@0 1318 U_NAMESPACE_END
michael@0 1319
michael@0 1320
michael@0 1321
michael@0 1322 static const UChar BACKSLASH = 0x5c;
michael@0 1323 static const UChar DOLLARSIGN = 0x24;
michael@0 1324
michael@0 1325 //
michael@0 1326 // Move a character to an output buffer, with bounds checking on the index.
michael@0 1327 // Index advances even if capacity is exceeded, for preflight size computations.
michael@0 1328 // This little sequence is used a LOT.
michael@0 1329 //
michael@0 1330 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
michael@0 1331 if (*idx < bufCapacity) {
michael@0 1332 buf[*idx] = c;
michael@0 1333 }
michael@0 1334 (*idx)++;
michael@0 1335 }
michael@0 1336
michael@0 1337
michael@0 1338 //
michael@0 1339 // appendReplacement, the actual implementation.
michael@0 1340 //
michael@0 1341 int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
michael@0 1342 const UChar *replacementText,
michael@0 1343 int32_t replacementLength,
michael@0 1344 UChar **destBuf,
michael@0 1345 int32_t *destCapacity,
michael@0 1346 UErrorCode *status) {
michael@0 1347
michael@0 1348 // If we come in with a buffer overflow error, don't suppress the operation.
michael@0 1349 // A series of appendReplacements, appendTail need to correctly preflight
michael@0 1350 // the buffer size when an overflow happens somewhere in the middle.
michael@0 1351 UBool pendingBufferOverflow = FALSE;
michael@0 1352 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
michael@0 1353 pendingBufferOverflow = TRUE;
michael@0 1354 *status = U_ZERO_ERROR;
michael@0 1355 }
michael@0 1356
michael@0 1357 //
michael@0 1358 // Validate all paramters
michael@0 1359 //
michael@0 1360 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1361 return 0;
michael@0 1362 }
michael@0 1363 if (replacementText == NULL || replacementLength < -1 ||
michael@0 1364 destCapacity == NULL || destBuf == NULL ||
michael@0 1365 (*destBuf == NULL && *destCapacity > 0) ||
michael@0 1366 *destCapacity < 0) {
michael@0 1367 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1368 return 0;
michael@0 1369 }
michael@0 1370
michael@0 1371 RegexMatcher *m = regexp->fMatcher;
michael@0 1372 if (m->fMatch == FALSE) {
michael@0 1373 *status = U_REGEX_INVALID_STATE;
michael@0 1374 return 0;
michael@0 1375 }
michael@0 1376
michael@0 1377 UChar *dest = *destBuf;
michael@0 1378 int32_t capacity = *destCapacity;
michael@0 1379 int32_t destIdx = 0;
michael@0 1380 int32_t i;
michael@0 1381
michael@0 1382 // If it wasn't supplied by the caller, get the length of the replacement text.
michael@0 1383 // TODO: slightly smarter logic in the copy loop could watch for the NUL on
michael@0 1384 // the fly and avoid this step.
michael@0 1385 if (replacementLength == -1) {
michael@0 1386 replacementLength = u_strlen(replacementText);
michael@0 1387 }
michael@0 1388
michael@0 1389 // Copy input string from the end of previous match to start of current match
michael@0 1390 if (regexp->fText != NULL) {
michael@0 1391 int32_t matchStart;
michael@0 1392 int32_t lastMatchEnd;
michael@0 1393 if (UTEXT_USES_U16(m->fInputText)) {
michael@0 1394 lastMatchEnd = (int32_t)m->fLastMatchEnd;
michael@0 1395 matchStart = (int32_t)m->fMatchStart;
michael@0 1396 } else {
michael@0 1397 // !!!: Would like a better way to do this!
michael@0 1398 UErrorCode status = U_ZERO_ERROR;
michael@0 1399 lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
michael@0 1400 status = U_ZERO_ERROR;
michael@0 1401 matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
michael@0 1402 }
michael@0 1403 for (i=lastMatchEnd; i<matchStart; i++) {
michael@0 1404 appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
michael@0 1405 }
michael@0 1406 } else {
michael@0 1407 UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
michael@0 1408 destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
michael@0 1409 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
michael@0 1410 &possibleOverflowError);
michael@0 1411 }
michael@0 1412 U_ASSERT(destIdx >= 0);
michael@0 1413
michael@0 1414 // scan the replacement text, looking for substitutions ($n) and \escapes.
michael@0 1415 int32_t replIdx = 0;
michael@0 1416 while (replIdx < replacementLength) {
michael@0 1417 UChar c = replacementText[replIdx];
michael@0 1418 replIdx++;
michael@0 1419 if (c != DOLLARSIGN && c != BACKSLASH) {
michael@0 1420 // Common case, no substitution, no escaping,
michael@0 1421 // just copy the char to the dest buf.
michael@0 1422 appendToBuf(c, &destIdx, dest, capacity);
michael@0 1423 continue;
michael@0 1424 }
michael@0 1425
michael@0 1426 if (c == BACKSLASH) {
michael@0 1427 // Backslash Escape. Copy the following char out without further checks.
michael@0 1428 // Note: Surrogate pairs don't need any special handling
michael@0 1429 // The second half wont be a '$' or a '\', and
michael@0 1430 // will move to the dest normally on the next
michael@0 1431 // loop iteration.
michael@0 1432 if (replIdx >= replacementLength) {
michael@0 1433 break;
michael@0 1434 }
michael@0 1435 c = replacementText[replIdx];
michael@0 1436
michael@0 1437 if (c==0x55/*U*/ || c==0x75/*u*/) {
michael@0 1438 // We have a \udddd or \Udddddddd escape sequence.
michael@0 1439 UChar32 escapedChar =
michael@0 1440 u_unescapeAt(uregex_ucstr_unescape_charAt,
michael@0 1441 &replIdx, // Index is updated by unescapeAt
michael@0 1442 replacementLength, // Length of replacement text
michael@0 1443 (void *)replacementText);
michael@0 1444
michael@0 1445 if (escapedChar != (UChar32)0xFFFFFFFF) {
michael@0 1446 if (escapedChar <= 0xffff) {
michael@0 1447 appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
michael@0 1448 } else {
michael@0 1449 appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
michael@0 1450 appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
michael@0 1451 }
michael@0 1452 continue;
michael@0 1453 }
michael@0 1454 // Note: if the \u escape was invalid, just fall through and
michael@0 1455 // treat it as a plain \<anything> escape.
michael@0 1456 }
michael@0 1457
michael@0 1458 // Plain backslash escape. Just put out the escaped character.
michael@0 1459 appendToBuf(c, &destIdx, dest, capacity);
michael@0 1460
michael@0 1461 replIdx++;
michael@0 1462 continue;
michael@0 1463 }
michael@0 1464
michael@0 1465
michael@0 1466
michael@0 1467 // We've got a $. Pick up a capture group number if one follows.
michael@0 1468 // Consume at most the number of digits necessary for the largest capture
michael@0 1469 // number that is valid for this pattern.
michael@0 1470
michael@0 1471 int32_t numDigits = 0;
michael@0 1472 int32_t groupNum = 0;
michael@0 1473 UChar32 digitC;
michael@0 1474 for (;;) {
michael@0 1475 if (replIdx >= replacementLength) {
michael@0 1476 break;
michael@0 1477 }
michael@0 1478 U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
michael@0 1479 if (u_isdigit(digitC) == FALSE) {
michael@0 1480 break;
michael@0 1481 }
michael@0 1482
michael@0 1483 U16_FWD_1(replacementText, replIdx, replacementLength);
michael@0 1484 groupNum=groupNum*10 + u_charDigitValue(digitC);
michael@0 1485 numDigits++;
michael@0 1486 if (numDigits >= m->fPattern->fMaxCaptureDigits) {
michael@0 1487 break;
michael@0 1488 }
michael@0 1489 }
michael@0 1490
michael@0 1491
michael@0 1492 if (numDigits == 0) {
michael@0 1493 // The $ didn't introduce a group number at all.
michael@0 1494 // Treat it as just part of the substitution text.
michael@0 1495 appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
michael@0 1496 continue;
michael@0 1497 }
michael@0 1498
michael@0 1499 // Finally, append the capture group data to the destination.
michael@0 1500 destIdx += uregex_group((URegularExpression*)regexp, groupNum,
michael@0 1501 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
michael@0 1502 if (*status == U_BUFFER_OVERFLOW_ERROR) {
michael@0 1503 // Ignore buffer overflow when extracting the group. We need to
michael@0 1504 // continue on to get full size of the untruncated result. We will
michael@0 1505 // raise our own buffer overflow error at the end.
michael@0 1506 *status = U_ZERO_ERROR;
michael@0 1507 }
michael@0 1508
michael@0 1509 if (U_FAILURE(*status)) {
michael@0 1510 // Can fail if group number is out of range.
michael@0 1511 break;
michael@0 1512 }
michael@0 1513
michael@0 1514 }
michael@0 1515
michael@0 1516 //
michael@0 1517 // Nul Terminate the dest buffer if possible.
michael@0 1518 // Set the appropriate buffer overflow or not terminated error, if needed.
michael@0 1519 //
michael@0 1520 if (destIdx < capacity) {
michael@0 1521 dest[destIdx] = 0;
michael@0 1522 } else if (destIdx == *destCapacity) {
michael@0 1523 *status = U_STRING_NOT_TERMINATED_WARNING;
michael@0 1524 } else {
michael@0 1525 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 1526 }
michael@0 1527
michael@0 1528 //
michael@0 1529 // Return an updated dest buffer and capacity to the caller.
michael@0 1530 //
michael@0 1531 if (destIdx > 0 && *destCapacity > 0) {
michael@0 1532 if (destIdx < capacity) {
michael@0 1533 *destBuf += destIdx;
michael@0 1534 *destCapacity -= destIdx;
michael@0 1535 } else {
michael@0 1536 *destBuf += capacity;
michael@0 1537 *destCapacity = 0;
michael@0 1538 }
michael@0 1539 }
michael@0 1540
michael@0 1541 // If we came in with a buffer overflow, make sure we go out with one also.
michael@0 1542 // (A zero length match right at the end of the previous match could
michael@0 1543 // make this function succeed even though a previous call had overflowed the buf)
michael@0 1544 if (pendingBufferOverflow && U_SUCCESS(*status)) {
michael@0 1545 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 1546 }
michael@0 1547
michael@0 1548 return destIdx;
michael@0 1549 }
michael@0 1550
michael@0 1551 //
michael@0 1552 // appendReplacement the actual API function,
michael@0 1553 //
michael@0 1554 U_CAPI int32_t U_EXPORT2
michael@0 1555 uregex_appendReplacement(URegularExpression *regexp2,
michael@0 1556 const UChar *replacementText,
michael@0 1557 int32_t replacementLength,
michael@0 1558 UChar **destBuf,
michael@0 1559 int32_t *destCapacity,
michael@0 1560 UErrorCode *status) {
michael@0 1561
michael@0 1562 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1563 return RegexCImpl::appendReplacement(
michael@0 1564 regexp, replacementText, replacementLength,destBuf, destCapacity, status);
michael@0 1565 }
michael@0 1566
michael@0 1567 //
michael@0 1568 // uregex_appendReplacementUText...can just use the normal C++ method
michael@0 1569 //
michael@0 1570 U_CAPI void U_EXPORT2
michael@0 1571 uregex_appendReplacementUText(URegularExpression *regexp2,
michael@0 1572 UText *replText,
michael@0 1573 UText *dest,
michael@0 1574 UErrorCode *status) {
michael@0 1575 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1576 regexp->fMatcher->appendReplacement(dest, replText, *status);
michael@0 1577 }
michael@0 1578
michael@0 1579
michael@0 1580 //------------------------------------------------------------------------------
michael@0 1581 //
michael@0 1582 // uregex_appendTail
michael@0 1583 //
michael@0 1584 //------------------------------------------------------------------------------
michael@0 1585 int32_t RegexCImpl::appendTail(RegularExpression *regexp,
michael@0 1586 UChar **destBuf,
michael@0 1587 int32_t *destCapacity,
michael@0 1588 UErrorCode *status)
michael@0 1589 {
michael@0 1590
michael@0 1591 // If we come in with a buffer overflow error, don't suppress the operation.
michael@0 1592 // A series of appendReplacements, appendTail need to correctly preflight
michael@0 1593 // the buffer size when an overflow happens somewhere in the middle.
michael@0 1594 UBool pendingBufferOverflow = FALSE;
michael@0 1595 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
michael@0 1596 pendingBufferOverflow = TRUE;
michael@0 1597 *status = U_ZERO_ERROR;
michael@0 1598 }
michael@0 1599
michael@0 1600 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1601 return 0;
michael@0 1602 }
michael@0 1603
michael@0 1604 if (destCapacity == NULL || destBuf == NULL ||
michael@0 1605 (*destBuf == NULL && *destCapacity > 0) ||
michael@0 1606 *destCapacity < 0)
michael@0 1607 {
michael@0 1608 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1609 return 0;
michael@0 1610 }
michael@0 1611
michael@0 1612 RegexMatcher *m = regexp->fMatcher;
michael@0 1613
michael@0 1614 int32_t destIdx = 0;
michael@0 1615 int32_t destCap = *destCapacity;
michael@0 1616 UChar *dest = *destBuf;
michael@0 1617
michael@0 1618 if (regexp->fText != NULL) {
michael@0 1619 int32_t srcIdx;
michael@0 1620 int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
michael@0 1621 if (nativeIdx == -1) {
michael@0 1622 srcIdx = 0;
michael@0 1623 } else if (UTEXT_USES_U16(m->fInputText)) {
michael@0 1624 srcIdx = (int32_t)nativeIdx;
michael@0 1625 } else {
michael@0 1626 UErrorCode status = U_ZERO_ERROR;
michael@0 1627 srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
michael@0 1628 }
michael@0 1629
michael@0 1630 for (;;) {
michael@0 1631 U_ASSERT(destIdx >= 0);
michael@0 1632
michael@0 1633 if (srcIdx == regexp->fTextLength) {
michael@0 1634 break;
michael@0 1635 }
michael@0 1636 UChar c = regexp->fText[srcIdx];
michael@0 1637 if (c == 0 && regexp->fTextLength == -1) {
michael@0 1638 regexp->fTextLength = srcIdx;
michael@0 1639 break;
michael@0 1640 }
michael@0 1641
michael@0 1642 if (destIdx < destCap) {
michael@0 1643 dest[destIdx] = c;
michael@0 1644 } else {
michael@0 1645 // We've overflowed the dest buffer.
michael@0 1646 // If the total input string length is known, we can
michael@0 1647 // compute the total buffer size needed without scanning through the string.
michael@0 1648 if (regexp->fTextLength > 0) {
michael@0 1649 destIdx += (regexp->fTextLength - srcIdx);
michael@0 1650 break;
michael@0 1651 }
michael@0 1652 }
michael@0 1653 srcIdx++;
michael@0 1654 destIdx++;
michael@0 1655 }
michael@0 1656 } else {
michael@0 1657 int64_t srcIdx;
michael@0 1658 if (m->fMatch) {
michael@0 1659 // The most recent call to find() succeeded.
michael@0 1660 srcIdx = m->fMatchEnd;
michael@0 1661 } else {
michael@0 1662 // The last call to find() on this matcher failed().
michael@0 1663 // Look back to the end of the last find() that succeeded for src index.
michael@0 1664 srcIdx = m->fLastMatchEnd;
michael@0 1665 if (srcIdx == -1) {
michael@0 1666 // There has been no successful match with this matcher.
michael@0 1667 // We want to copy the whole string.
michael@0 1668 srcIdx = 0;
michael@0 1669 }
michael@0 1670 }
michael@0 1671
michael@0 1672 destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
michael@0 1673 }
michael@0 1674
michael@0 1675 //
michael@0 1676 // NUL terminate the output string, if possible, otherwise issue the
michael@0 1677 // appropriate error or warning.
michael@0 1678 //
michael@0 1679 if (destIdx < destCap) {
michael@0 1680 dest[destIdx] = 0;
michael@0 1681 } else if (destIdx == destCap) {
michael@0 1682 *status = U_STRING_NOT_TERMINATED_WARNING;
michael@0 1683 } else {
michael@0 1684 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 1685 }
michael@0 1686
michael@0 1687 //
michael@0 1688 // Update the user's buffer ptr and capacity vars to reflect the
michael@0 1689 // amount used.
michael@0 1690 //
michael@0 1691 if (destIdx < destCap) {
michael@0 1692 *destBuf += destIdx;
michael@0 1693 *destCapacity -= destIdx;
michael@0 1694 } else if (*destBuf != NULL) {
michael@0 1695 *destBuf += destCap;
michael@0 1696 *destCapacity = 0;
michael@0 1697 }
michael@0 1698
michael@0 1699 if (pendingBufferOverflow && U_SUCCESS(*status)) {
michael@0 1700 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 1701 }
michael@0 1702
michael@0 1703 return destIdx;
michael@0 1704 }
michael@0 1705
michael@0 1706
michael@0 1707 //
michael@0 1708 // appendTail the actual API function
michael@0 1709 //
michael@0 1710 U_CAPI int32_t U_EXPORT2
michael@0 1711 uregex_appendTail(URegularExpression *regexp2,
michael@0 1712 UChar **destBuf,
michael@0 1713 int32_t *destCapacity,
michael@0 1714 UErrorCode *status) {
michael@0 1715 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1716 return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
michael@0 1717 }
michael@0 1718
michael@0 1719
michael@0 1720 //
michael@0 1721 // uregex_appendTailUText...can just use the normal C++ method
michael@0 1722 //
michael@0 1723 U_CAPI UText * U_EXPORT2
michael@0 1724 uregex_appendTailUText(URegularExpression *regexp2,
michael@0 1725 UText *dest,
michael@0 1726 UErrorCode *status) {
michael@0 1727 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1728 return regexp->fMatcher->appendTail(dest, *status);
michael@0 1729 }
michael@0 1730
michael@0 1731
michael@0 1732 //------------------------------------------------------------------------------
michael@0 1733 //
michael@0 1734 // copyString Internal utility to copy a string to an output buffer,
michael@0 1735 // while managing buffer overflow and preflight size
michael@0 1736 // computation. NUL termination is added to destination,
michael@0 1737 // and the NUL is counted in the output size.
michael@0 1738 //
michael@0 1739 //------------------------------------------------------------------------------
michael@0 1740 #if 0
michael@0 1741 static void copyString(UChar *destBuffer, // Destination buffer.
michael@0 1742 int32_t destCapacity, // Total capacity of dest buffer
michael@0 1743 int32_t *destIndex, // Index into dest buffer. Updated on return.
michael@0 1744 // Update not clipped to destCapacity.
michael@0 1745 const UChar *srcPtr, // Pointer to source string
michael@0 1746 int32_t srcLen) // Source string len.
michael@0 1747 {
michael@0 1748 int32_t si;
michael@0 1749 int32_t di = *destIndex;
michael@0 1750 UChar c;
michael@0 1751
michael@0 1752 for (si=0; si<srcLen; si++) {
michael@0 1753 c = srcPtr[si];
michael@0 1754 if (di < destCapacity) {
michael@0 1755 destBuffer[di] = c;
michael@0 1756 di++;
michael@0 1757 } else {
michael@0 1758 di += srcLen - si;
michael@0 1759 break;
michael@0 1760 }
michael@0 1761 }
michael@0 1762 if (di<destCapacity) {
michael@0 1763 destBuffer[di] = 0;
michael@0 1764 }
michael@0 1765 di++;
michael@0 1766 *destIndex = di;
michael@0 1767 }
michael@0 1768 #endif
michael@0 1769
michael@0 1770 //------------------------------------------------------------------------------
michael@0 1771 //
michael@0 1772 // uregex_split
michael@0 1773 //
michael@0 1774 //------------------------------------------------------------------------------
michael@0 1775 int32_t RegexCImpl::split(RegularExpression *regexp,
michael@0 1776 UChar *destBuf,
michael@0 1777 int32_t destCapacity,
michael@0 1778 int32_t *requiredCapacity,
michael@0 1779 UChar *destFields[],
michael@0 1780 int32_t destFieldsCapacity,
michael@0 1781 UErrorCode *status) {
michael@0 1782 //
michael@0 1783 // Reset for the input text
michael@0 1784 //
michael@0 1785 regexp->fMatcher->reset();
michael@0 1786 UText *inputText = regexp->fMatcher->fInputText;
michael@0 1787 int64_t nextOutputStringStart = 0;
michael@0 1788 int64_t inputLen = regexp->fMatcher->fInputLength;
michael@0 1789 if (inputLen == 0) {
michael@0 1790 return 0;
michael@0 1791 }
michael@0 1792
michael@0 1793 //
michael@0 1794 // Loop through the input text, searching for the delimiter pattern
michael@0 1795 //
michael@0 1796 int32_t i; // Index of the field being processed.
michael@0 1797 int32_t destIdx = 0; // Next available position in destBuf;
michael@0 1798 int32_t numCaptureGroups = regexp->fMatcher->groupCount();
michael@0 1799 UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted
michael@0 1800 for (i=0; ; i++) {
michael@0 1801 if (i>=destFieldsCapacity-1) {
michael@0 1802 // There are one or zero output strings left.
michael@0 1803 // Fill the last output string with whatever is left from the input, then exit the loop.
michael@0 1804 // ( i will be == destFieldsCapacity if we filled the output array while processing
michael@0 1805 // capture groups of the delimiter expression, in which case we will discard the
michael@0 1806 // last capture group saved in favor of the unprocessed remainder of the
michael@0 1807 // input string.)
michael@0 1808 if (inputLen > nextOutputStringStart) {
michael@0 1809 if (i != destFieldsCapacity-1) {
michael@0 1810 // No fields are left. Recycle the last one for holding the trailing part of
michael@0 1811 // the input string.
michael@0 1812 i = destFieldsCapacity-1;
michael@0 1813 destIdx = (int32_t)(destFields[i] - destFields[0]);
michael@0 1814 }
michael@0 1815
michael@0 1816 destFields[i] = &destBuf[destIdx];
michael@0 1817 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
michael@0 1818 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
michael@0 1819 }
michael@0 1820 break;
michael@0 1821 }
michael@0 1822
michael@0 1823 if (regexp->fMatcher->find()) {
michael@0 1824 // We found another delimiter. Move everything from where we started looking
michael@0 1825 // up until the start of the delimiter into the next output string.
michael@0 1826 destFields[i] = &destBuf[destIdx];
michael@0 1827
michael@0 1828 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
michael@0 1829 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
michael@0 1830 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
michael@0 1831 tStatus = U_ZERO_ERROR;
michael@0 1832 } else {
michael@0 1833 *status = tStatus;
michael@0 1834 }
michael@0 1835 nextOutputStringStart = regexp->fMatcher->fMatchEnd;
michael@0 1836
michael@0 1837 // If the delimiter pattern has capturing parentheses, the captured
michael@0 1838 // text goes out into the next n destination strings.
michael@0 1839 int32_t groupNum;
michael@0 1840 for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
michael@0 1841 // If we've run out of output string slots, bail out.
michael@0 1842 if (i==destFieldsCapacity-1) {
michael@0 1843 break;
michael@0 1844 }
michael@0 1845 i++;
michael@0 1846
michael@0 1847 // Set up to extract the capture group contents into the dest buffer.
michael@0 1848 destFields[i] = &destBuf[destIdx];
michael@0 1849 tStatus = U_ZERO_ERROR;
michael@0 1850 int32_t t = uregex_group((URegularExpression*)regexp,
michael@0 1851 groupNum,
michael@0 1852 destFields[i],
michael@0 1853 REMAINING_CAPACITY(destIdx, destCapacity),
michael@0 1854 &tStatus);
michael@0 1855 destIdx += t + 1; // Record the space used in the output string buffer.
michael@0 1856 // +1 for the NUL that terminates the string.
michael@0 1857 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
michael@0 1858 tStatus = U_ZERO_ERROR;
michael@0 1859 } else {
michael@0 1860 *status = tStatus;
michael@0 1861 }
michael@0 1862 }
michael@0 1863
michael@0 1864 if (nextOutputStringStart == inputLen) {
michael@0 1865 // The delimiter was at the end of the string.
michael@0 1866 // Output an empty string, and then we are done.
michael@0 1867 if (destIdx < destCapacity) {
michael@0 1868 destBuf[destIdx] = 0;
michael@0 1869 }
michael@0 1870 if (i < destFieldsCapacity-1) {
michael@0 1871 ++i;
michael@0 1872 }
michael@0 1873 if (destIdx < destCapacity) {
michael@0 1874 destFields[i] = destBuf + destIdx;
michael@0 1875 }
michael@0 1876 ++destIdx;
michael@0 1877 break;
michael@0 1878 }
michael@0 1879
michael@0 1880 }
michael@0 1881 else
michael@0 1882 {
michael@0 1883 // We ran off the end of the input while looking for the next delimiter.
michael@0 1884 // All the remaining text goes into the current output string.
michael@0 1885 destFields[i] = &destBuf[destIdx];
michael@0 1886 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
michael@0 1887 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
michael@0 1888 break;
michael@0 1889 }
michael@0 1890 }
michael@0 1891
michael@0 1892 // Zero out any unused portion of the destFields array
michael@0 1893 int j;
michael@0 1894 for (j=i+1; j<destFieldsCapacity; j++) {
michael@0 1895 destFields[j] = NULL;
michael@0 1896 }
michael@0 1897
michael@0 1898 if (requiredCapacity != NULL) {
michael@0 1899 *requiredCapacity = destIdx;
michael@0 1900 }
michael@0 1901 if (destIdx > destCapacity) {
michael@0 1902 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 1903 }
michael@0 1904 return i+1;
michael@0 1905 }
michael@0 1906
michael@0 1907 //
michael@0 1908 // uregex_split The actual API function
michael@0 1909 //
michael@0 1910 U_CAPI int32_t U_EXPORT2
michael@0 1911 uregex_split(URegularExpression *regexp2,
michael@0 1912 UChar *destBuf,
michael@0 1913 int32_t destCapacity,
michael@0 1914 int32_t *requiredCapacity,
michael@0 1915 UChar *destFields[],
michael@0 1916 int32_t destFieldsCapacity,
michael@0 1917 UErrorCode *status) {
michael@0 1918 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1919 if (validateRE(regexp, TRUE, status) == FALSE) {
michael@0 1920 return 0;
michael@0 1921 }
michael@0 1922 if ((destBuf == NULL && destCapacity > 0) ||
michael@0 1923 destCapacity < 0 ||
michael@0 1924 destFields == NULL ||
michael@0 1925 destFieldsCapacity < 1 ) {
michael@0 1926 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1927 return 0;
michael@0 1928 }
michael@0 1929
michael@0 1930 return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
michael@0 1931 }
michael@0 1932
michael@0 1933
michael@0 1934 //
michael@0 1935 // uregex_splitUText...can just use the normal C++ method
michael@0 1936 //
michael@0 1937 U_CAPI int32_t U_EXPORT2
michael@0 1938 uregex_splitUText(URegularExpression *regexp2,
michael@0 1939 UText *destFields[],
michael@0 1940 int32_t destFieldsCapacity,
michael@0 1941 UErrorCode *status) {
michael@0 1942 RegularExpression *regexp = (RegularExpression*)regexp2;
michael@0 1943 return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
michael@0 1944 }
michael@0 1945
michael@0 1946
michael@0 1947 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 1948

mercurial