1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ustrcase.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,808 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2001-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ustrcase.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2002feb20 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Implementation file for string casing C API functions. 1.20 +* Uses functions from uchar.c for basic functionality that requires access 1.21 +* to the Unicode Character Database (uprops.dat). 1.22 +*/ 1.23 + 1.24 +#include "unicode/utypes.h" 1.25 +#include "unicode/brkiter.h" 1.26 +#include "unicode/ustring.h" 1.27 +#include "unicode/ucasemap.h" 1.28 +#include "unicode/ubrk.h" 1.29 +#include "unicode/utf.h" 1.30 +#include "unicode/utf16.h" 1.31 +#include "cmemory.h" 1.32 +#include "ucase.h" 1.33 +#include "ustr_imp.h" 1.34 + 1.35 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.36 + 1.37 +U_NAMESPACE_USE 1.38 + 1.39 +/* string casing ------------------------------------------------------------ */ 1.40 + 1.41 +/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ 1.42 +static inline int32_t 1.43 +appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 1.44 + int32_t result, const UChar *s) { 1.45 + UChar32 c; 1.46 + int32_t length; 1.47 + 1.48 + /* decode the result */ 1.49 + if(result<0) { 1.50 + /* (not) original code point */ 1.51 + c=~result; 1.52 + length=-1; 1.53 + } else if(result<=UCASE_MAX_STRING_LENGTH) { 1.54 + c=U_SENTINEL; 1.55 + length=result; 1.56 + } else { 1.57 + c=result; 1.58 + length=-1; 1.59 + } 1.60 + 1.61 + if(destIndex<destCapacity) { 1.62 + /* append the result */ 1.63 + if(length<0) { 1.64 + /* code point */ 1.65 + UBool isError=FALSE; 1.66 + U16_APPEND(dest, destIndex, destCapacity, c, isError); 1.67 + if(isError) { 1.68 + /* overflow, nothing written */ 1.69 + destIndex+=U16_LENGTH(c); 1.70 + } 1.71 + } else { 1.72 + /* string */ 1.73 + if((destIndex+length)<=destCapacity) { 1.74 + while(length>0) { 1.75 + dest[destIndex++]=*s++; 1.76 + --length; 1.77 + } 1.78 + } else { 1.79 + /* overflow */ 1.80 + destIndex+=length; 1.81 + } 1.82 + } 1.83 + } else { 1.84 + /* preflight */ 1.85 + if(length<0) { 1.86 + destIndex+=U16_LENGTH(c); 1.87 + } else { 1.88 + destIndex+=length; 1.89 + } 1.90 + } 1.91 + return destIndex; 1.92 +} 1.93 + 1.94 +static UChar32 U_CALLCONV 1.95 +utf16_caseContextIterator(void *context, int8_t dir) { 1.96 + UCaseContext *csc=(UCaseContext *)context; 1.97 + UChar32 c; 1.98 + 1.99 + if(dir<0) { 1.100 + /* reset for backward iteration */ 1.101 + csc->index=csc->cpStart; 1.102 + csc->dir=dir; 1.103 + } else if(dir>0) { 1.104 + /* reset for forward iteration */ 1.105 + csc->index=csc->cpLimit; 1.106 + csc->dir=dir; 1.107 + } else { 1.108 + /* continue current iteration direction */ 1.109 + dir=csc->dir; 1.110 + } 1.111 + 1.112 + if(dir<0) { 1.113 + if(csc->start<csc->index) { 1.114 + U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 1.115 + return c; 1.116 + } 1.117 + } else { 1.118 + if(csc->index<csc->limit) { 1.119 + U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 1.120 + return c; 1.121 + } 1.122 + } 1.123 + return U_SENTINEL; 1.124 +} 1.125 + 1.126 +/* 1.127 + * Case-maps [srcStart..srcLimit[ but takes 1.128 + * context [0..srcLength[ into account. 1.129 + */ 1.130 +static int32_t 1.131 +_caseMap(const UCaseMap *csm, UCaseMapFull *map, 1.132 + UChar *dest, int32_t destCapacity, 1.133 + const UChar *src, UCaseContext *csc, 1.134 + int32_t srcStart, int32_t srcLimit, 1.135 + UErrorCode *pErrorCode) { 1.136 + const UChar *s; 1.137 + UChar32 c, c2 = 0; 1.138 + int32_t srcIndex, destIndex; 1.139 + int32_t locCache; 1.140 + 1.141 + locCache=csm->locCache; 1.142 + 1.143 + /* case mapping loop */ 1.144 + srcIndex=srcStart; 1.145 + destIndex=0; 1.146 + while(srcIndex<srcLimit) { 1.147 + csc->cpStart=srcIndex; 1.148 + U16_NEXT(src, srcIndex, srcLimit, c); 1.149 + csc->cpLimit=srcIndex; 1.150 + c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 1.151 + if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 1.152 + /* fast path version of appendResult() for BMP results */ 1.153 + dest[destIndex++]=(UChar)c2; 1.154 + } else { 1.155 + destIndex=appendResult(dest, destIndex, destCapacity, c, s); 1.156 + } 1.157 + } 1.158 + 1.159 + if(destIndex>destCapacity) { 1.160 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.161 + } 1.162 + return destIndex; 1.163 +} 1.164 + 1.165 +#if !UCONFIG_NO_BREAK_ITERATION 1.166 + 1.167 +U_CFUNC int32_t U_CALLCONV 1.168 +ustrcase_internalToTitle(const UCaseMap *csm, 1.169 + UChar *dest, int32_t destCapacity, 1.170 + const UChar *src, int32_t srcLength, 1.171 + UErrorCode *pErrorCode) { 1.172 + const UChar *s; 1.173 + UChar32 c; 1.174 + int32_t prev, titleStart, titleLimit, idx, destIndex, length; 1.175 + UBool isFirstIndex; 1.176 + 1.177 + if(U_FAILURE(*pErrorCode)) { 1.178 + return 0; 1.179 + } 1.180 + 1.181 + // Use the C++ abstract base class to minimize dependencies. 1.182 + // TODO: Change UCaseMap.iter to store a BreakIterator directly. 1.183 + BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter); 1.184 + 1.185 + /* set up local variables */ 1.186 + int32_t locCache=csm->locCache; 1.187 + UCaseContext csc=UCASECONTEXT_INITIALIZER; 1.188 + csc.p=(void *)src; 1.189 + csc.limit=srcLength; 1.190 + destIndex=0; 1.191 + prev=0; 1.192 + isFirstIndex=TRUE; 1.193 + 1.194 + /* titlecasing loop */ 1.195 + while(prev<srcLength) { 1.196 + /* find next index where to titlecase */ 1.197 + if(isFirstIndex) { 1.198 + isFirstIndex=FALSE; 1.199 + idx=bi->first(); 1.200 + } else { 1.201 + idx=bi->next(); 1.202 + } 1.203 + if(idx==UBRK_DONE || idx>srcLength) { 1.204 + idx=srcLength; 1.205 + } 1.206 + 1.207 + /* 1.208 + * Unicode 4 & 5 section 3.13 Default Case Operations: 1.209 + * 1.210 + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 1.211 + * #29, "Text Boundaries." Between each pair of word boundaries, find the first 1.212 + * cased character F. If F exists, map F to default_title(F); then map each 1.213 + * subsequent character C to default_lower(C). 1.214 + * 1.215 + * In this implementation, segment [prev..index[ into 3 parts: 1.216 + * a) uncased characters (copy as-is) [prev..titleStart[ 1.217 + * b) first case letter (titlecase) [titleStart..titleLimit[ 1.218 + * c) subsequent characters (lowercase) [titleLimit..index[ 1.219 + */ 1.220 + if(prev<idx) { 1.221 + /* find and copy uncased characters [prev..titleStart[ */ 1.222 + titleStart=titleLimit=prev; 1.223 + U16_NEXT(src, titleLimit, idx, c); 1.224 + if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 1.225 + /* Adjust the titlecasing index (titleStart) to the next cased character. */ 1.226 + for(;;) { 1.227 + titleStart=titleLimit; 1.228 + if(titleLimit==idx) { 1.229 + /* 1.230 + * only uncased characters in [prev..index[ 1.231 + * stop with titleStart==titleLimit==index 1.232 + */ 1.233 + break; 1.234 + } 1.235 + U16_NEXT(src, titleLimit, idx, c); 1.236 + if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 1.237 + break; /* cased letter at [titleStart..titleLimit[ */ 1.238 + } 1.239 + } 1.240 + length=titleStart-prev; 1.241 + if(length>0) { 1.242 + if((destIndex+length)<=destCapacity) { 1.243 + uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 1.244 + } 1.245 + destIndex+=length; 1.246 + } 1.247 + } 1.248 + 1.249 + if(titleStart<titleLimit) { 1.250 + /* titlecase c which is from [titleStart..titleLimit[ */ 1.251 + csc.cpStart=titleStart; 1.252 + csc.cpLimit=titleLimit; 1.253 + c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); 1.254 + destIndex=appendResult(dest, destIndex, destCapacity, c, s); 1.255 + 1.256 + /* Special case Dutch IJ titlecasing */ 1.257 + if ( titleStart+1 < idx && 1.258 + ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && 1.259 + ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && 1.260 + ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 1.261 + c=(UChar32) 0x004A; 1.262 + destIndex=appendResult(dest, destIndex, destCapacity, c, s); 1.263 + titleLimit++; 1.264 + } 1.265 + 1.266 + /* lowercase [titleLimit..index[ */ 1.267 + if(titleLimit<idx) { 1.268 + if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 1.269 + /* Normal operation: Lowercase the rest of the word. */ 1.270 + destIndex+= 1.271 + _caseMap( 1.272 + csm, ucase_toFullLower, 1.273 + dest+destIndex, destCapacity-destIndex, 1.274 + src, &csc, 1.275 + titleLimit, idx, 1.276 + pErrorCode); 1.277 + } else { 1.278 + /* Optionally just copy the rest of the word unchanged. */ 1.279 + length=idx-titleLimit; 1.280 + if((destIndex+length)<=destCapacity) { 1.281 + uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 1.282 + } 1.283 + destIndex+=length; 1.284 + } 1.285 + } 1.286 + } 1.287 + } 1.288 + 1.289 + prev=idx; 1.290 + } 1.291 + 1.292 + if(destIndex>destCapacity) { 1.293 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.294 + } 1.295 + return destIndex; 1.296 +} 1.297 + 1.298 +#endif // !UCONFIG_NO_BREAK_ITERATION 1.299 + 1.300 +/* functions available in the common library (for unistr_case.cpp) */ 1.301 + 1.302 +U_CFUNC int32_t U_CALLCONV 1.303 +ustrcase_internalToLower(const UCaseMap *csm, 1.304 + UChar *dest, int32_t destCapacity, 1.305 + const UChar *src, int32_t srcLength, 1.306 + UErrorCode *pErrorCode) { 1.307 + UCaseContext csc=UCASECONTEXT_INITIALIZER; 1.308 + csc.p=(void *)src; 1.309 + csc.limit=srcLength; 1.310 + return _caseMap( 1.311 + csm, ucase_toFullLower, 1.312 + dest, destCapacity, 1.313 + src, &csc, 0, srcLength, 1.314 + pErrorCode); 1.315 +} 1.316 + 1.317 +U_CFUNC int32_t U_CALLCONV 1.318 +ustrcase_internalToUpper(const UCaseMap *csm, 1.319 + UChar *dest, int32_t destCapacity, 1.320 + const UChar *src, int32_t srcLength, 1.321 + UErrorCode *pErrorCode) { 1.322 + UCaseContext csc=UCASECONTEXT_INITIALIZER; 1.323 + csc.p=(void *)src; 1.324 + csc.limit=srcLength; 1.325 + return _caseMap( 1.326 + csm, ucase_toFullUpper, 1.327 + dest, destCapacity, 1.328 + src, &csc, 0, srcLength, 1.329 + pErrorCode); 1.330 +} 1.331 + 1.332 +static int32_t 1.333 +ustr_foldCase(const UCaseProps *csp, 1.334 + UChar *dest, int32_t destCapacity, 1.335 + const UChar *src, int32_t srcLength, 1.336 + uint32_t options, 1.337 + UErrorCode *pErrorCode) { 1.338 + int32_t srcIndex, destIndex; 1.339 + 1.340 + const UChar *s; 1.341 + UChar32 c, c2 = 0; 1.342 + 1.343 + /* case mapping loop */ 1.344 + srcIndex=destIndex=0; 1.345 + while(srcIndex<srcLength) { 1.346 + U16_NEXT(src, srcIndex, srcLength, c); 1.347 + c=ucase_toFullFolding(csp, c, &s, options); 1.348 + if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 1.349 + /* fast path version of appendResult() for BMP results */ 1.350 + dest[destIndex++]=(UChar)c2; 1.351 + } else { 1.352 + destIndex=appendResult(dest, destIndex, destCapacity, c, s); 1.353 + } 1.354 + } 1.355 + 1.356 + if(destIndex>destCapacity) { 1.357 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.358 + } 1.359 + return destIndex; 1.360 +} 1.361 + 1.362 +U_CFUNC int32_t U_CALLCONV 1.363 +ustrcase_internalFold(const UCaseMap *csm, 1.364 + UChar *dest, int32_t destCapacity, 1.365 + const UChar *src, int32_t srcLength, 1.366 + UErrorCode *pErrorCode) { 1.367 + return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); 1.368 +} 1.369 + 1.370 +U_CFUNC int32_t 1.371 +ustrcase_map(const UCaseMap *csm, 1.372 + UChar *dest, int32_t destCapacity, 1.373 + const UChar *src, int32_t srcLength, 1.374 + UStringCaseMapper *stringCaseMapper, 1.375 + UErrorCode *pErrorCode) { 1.376 + UChar buffer[300]; 1.377 + UChar *temp; 1.378 + 1.379 + int32_t destLength; 1.380 + 1.381 + /* check argument values */ 1.382 + if(U_FAILURE(*pErrorCode)) { 1.383 + return 0; 1.384 + } 1.385 + if( destCapacity<0 || 1.386 + (dest==NULL && destCapacity>0) || 1.387 + src==NULL || 1.388 + srcLength<-1 1.389 + ) { 1.390 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.391 + return 0; 1.392 + } 1.393 + 1.394 + /* get the string length */ 1.395 + if(srcLength==-1) { 1.396 + srcLength=u_strlen(src); 1.397 + } 1.398 + 1.399 + /* check for overlapping source and destination */ 1.400 + if( dest!=NULL && 1.401 + ((src>=dest && src<(dest+destCapacity)) || 1.402 + (dest>=src && dest<(src+srcLength))) 1.403 + ) { 1.404 + /* overlap: provide a temporary destination buffer and later copy the result */ 1.405 + if(destCapacity<=LENGTHOF(buffer)) { 1.406 + /* the stack buffer is large enough */ 1.407 + temp=buffer; 1.408 + } else { 1.409 + /* allocate a buffer */ 1.410 + temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 1.411 + if(temp==NULL) { 1.412 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.413 + return 0; 1.414 + } 1.415 + } 1.416 + } else { 1.417 + temp=dest; 1.418 + } 1.419 + 1.420 + destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); 1.421 + if(temp!=dest) { 1.422 + /* copy the result string to the destination buffer */ 1.423 + if(destLength>0) { 1.424 + int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 1.425 + if(copyLength>0) { 1.426 + uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 1.427 + } 1.428 + } 1.429 + if(temp!=buffer) { 1.430 + uprv_free(temp); 1.431 + } 1.432 + } 1.433 + 1.434 + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1.435 +} 1.436 + 1.437 +/* public API functions */ 1.438 + 1.439 +U_CAPI int32_t U_EXPORT2 1.440 +u_strFoldCase(UChar *dest, int32_t destCapacity, 1.441 + const UChar *src, int32_t srcLength, 1.442 + uint32_t options, 1.443 + UErrorCode *pErrorCode) { 1.444 + UCaseMap csm=UCASEMAP_INITIALIZER; 1.445 + csm.csp=ucase_getSingleton(); 1.446 + csm.options=options; 1.447 + return ustrcase_map( 1.448 + &csm, 1.449 + dest, destCapacity, 1.450 + src, srcLength, 1.451 + ustrcase_internalFold, pErrorCode); 1.452 +} 1.453 + 1.454 +/* case-insensitive string comparisons -------------------------------------- */ 1.455 + 1.456 +/* 1.457 + * This function is a copy of unorm_cmpEquivFold() minus the parts for 1.458 + * canonical equivalence. 1.459 + * Keep the functions in sync, and see there for how this works. 1.460 + * The duplication is for modularization: 1.461 + * It makes caseless (but not canonical caseless) matches independent of 1.462 + * the normalization code. 1.463 + */ 1.464 + 1.465 +/* stack element for previous-level source/decomposition pointers */ 1.466 +struct CmpEquivLevel { 1.467 + const UChar *start, *s, *limit; 1.468 +}; 1.469 +typedef struct CmpEquivLevel CmpEquivLevel; 1.470 + 1.471 +/* internal function */ 1.472 +U_CFUNC int32_t 1.473 +u_strcmpFold(const UChar *s1, int32_t length1, 1.474 + const UChar *s2, int32_t length2, 1.475 + uint32_t options, 1.476 + UErrorCode *pErrorCode) { 1.477 + const UCaseProps *csp; 1.478 + 1.479 + /* current-level start/limit - s1/s2 as current */ 1.480 + const UChar *start1, *start2, *limit1, *limit2; 1.481 + 1.482 + /* case folding variables */ 1.483 + const UChar *p; 1.484 + int32_t length; 1.485 + 1.486 + /* stacks of previous-level start/current/limit */ 1.487 + CmpEquivLevel stack1[2], stack2[2]; 1.488 + 1.489 + /* case folding buffers, only use current-level start/limit */ 1.490 + UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 1.491 + 1.492 + /* track which is the current level per string */ 1.493 + int32_t level1, level2; 1.494 + 1.495 + /* current code units, and code points for lookups */ 1.496 + UChar32 c1, c2, cp1, cp2; 1.497 + 1.498 + /* no argument error checking because this itself is not an API */ 1.499 + 1.500 + /* 1.501 + * assume that at least the option U_COMPARE_IGNORE_CASE is set 1.502 + * otherwise this function would have to behave exactly as uprv_strCompare() 1.503 + */ 1.504 + csp=ucase_getSingleton(); 1.505 + if(U_FAILURE(*pErrorCode)) { 1.506 + return 0; 1.507 + } 1.508 + 1.509 + /* initialize */ 1.510 + start1=s1; 1.511 + if(length1==-1) { 1.512 + limit1=NULL; 1.513 + } else { 1.514 + limit1=s1+length1; 1.515 + } 1.516 + 1.517 + start2=s2; 1.518 + if(length2==-1) { 1.519 + limit2=NULL; 1.520 + } else { 1.521 + limit2=s2+length2; 1.522 + } 1.523 + 1.524 + level1=level2=0; 1.525 + c1=c2=-1; 1.526 + 1.527 + /* comparison loop */ 1.528 + for(;;) { 1.529 + /* 1.530 + * here a code unit value of -1 means "get another code unit" 1.531 + * below it will mean "this source is finished" 1.532 + */ 1.533 + 1.534 + if(c1<0) { 1.535 + /* get next code unit from string 1, post-increment */ 1.536 + for(;;) { 1.537 + if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 1.538 + if(level1==0) { 1.539 + c1=-1; 1.540 + break; 1.541 + } 1.542 + } else { 1.543 + ++s1; 1.544 + break; 1.545 + } 1.546 + 1.547 + /* reached end of level buffer, pop one level */ 1.548 + do { 1.549 + --level1; 1.550 + start1=stack1[level1].start; /*Not uninitialized*/ 1.551 + } while(start1==NULL); 1.552 + s1=stack1[level1].s; /*Not uninitialized*/ 1.553 + limit1=stack1[level1].limit; /*Not uninitialized*/ 1.554 + } 1.555 + } 1.556 + 1.557 + if(c2<0) { 1.558 + /* get next code unit from string 2, post-increment */ 1.559 + for(;;) { 1.560 + if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 1.561 + if(level2==0) { 1.562 + c2=-1; 1.563 + break; 1.564 + } 1.565 + } else { 1.566 + ++s2; 1.567 + break; 1.568 + } 1.569 + 1.570 + /* reached end of level buffer, pop one level */ 1.571 + do { 1.572 + --level2; 1.573 + start2=stack2[level2].start; /*Not uninitialized*/ 1.574 + } while(start2==NULL); 1.575 + s2=stack2[level2].s; /*Not uninitialized*/ 1.576 + limit2=stack2[level2].limit; /*Not uninitialized*/ 1.577 + } 1.578 + } 1.579 + 1.580 + /* 1.581 + * compare c1 and c2 1.582 + * either variable c1, c2 is -1 only if the corresponding string is finished 1.583 + */ 1.584 + if(c1==c2) { 1.585 + if(c1<0) { 1.586 + return 0; /* c1==c2==-1 indicating end of strings */ 1.587 + } 1.588 + c1=c2=-1; /* make us fetch new code units */ 1.589 + continue; 1.590 + } else if(c1<0) { 1.591 + return -1; /* string 1 ends before string 2 */ 1.592 + } else if(c2<0) { 1.593 + return 1; /* string 2 ends before string 1 */ 1.594 + } 1.595 + /* c1!=c2 && c1>=0 && c2>=0 */ 1.596 + 1.597 + /* get complete code points for c1, c2 for lookups if either is a surrogate */ 1.598 + cp1=c1; 1.599 + if(U_IS_SURROGATE(c1)) { 1.600 + UChar c; 1.601 + 1.602 + if(U_IS_SURROGATE_LEAD(c1)) { 1.603 + if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 1.604 + /* advance ++s1; only below if cp1 decomposes/case-folds */ 1.605 + cp1=U16_GET_SUPPLEMENTARY(c1, c); 1.606 + } 1.607 + } else /* isTrail(c1) */ { 1.608 + if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 1.609 + cp1=U16_GET_SUPPLEMENTARY(c, c1); 1.610 + } 1.611 + } 1.612 + } 1.613 + 1.614 + cp2=c2; 1.615 + if(U_IS_SURROGATE(c2)) { 1.616 + UChar c; 1.617 + 1.618 + if(U_IS_SURROGATE_LEAD(c2)) { 1.619 + if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 1.620 + /* advance ++s2; only below if cp2 decomposes/case-folds */ 1.621 + cp2=U16_GET_SUPPLEMENTARY(c2, c); 1.622 + } 1.623 + } else /* isTrail(c2) */ { 1.624 + if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 1.625 + cp2=U16_GET_SUPPLEMENTARY(c, c2); 1.626 + } 1.627 + } 1.628 + } 1.629 + 1.630 + /* 1.631 + * go down one level for each string 1.632 + * continue with the main loop as soon as there is a real change 1.633 + */ 1.634 + 1.635 + if( level1==0 && 1.636 + (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 1.637 + ) { 1.638 + /* cp1 case-folds to the code point "length" or to p[length] */ 1.639 + if(U_IS_SURROGATE(c1)) { 1.640 + if(U_IS_SURROGATE_LEAD(c1)) { 1.641 + /* advance beyond source surrogate pair if it case-folds */ 1.642 + ++s1; 1.643 + } else /* isTrail(c1) */ { 1.644 + /* 1.645 + * we got a supplementary code point when hitting its trail surrogate, 1.646 + * therefore the lead surrogate must have been the same as in the other string; 1.647 + * compare this decomposition with the lead surrogate in the other string 1.648 + * remember that this simulates bulk text replacement: 1.649 + * the decomposition would replace the entire code point 1.650 + */ 1.651 + --s2; 1.652 + c2=*(s2-1); 1.653 + } 1.654 + } 1.655 + 1.656 + /* push current level pointers */ 1.657 + stack1[0].start=start1; 1.658 + stack1[0].s=s1; 1.659 + stack1[0].limit=limit1; 1.660 + ++level1; 1.661 + 1.662 + /* copy the folding result to fold1[] */ 1.663 + if(length<=UCASE_MAX_STRING_LENGTH) { 1.664 + u_memcpy(fold1, p, length); 1.665 + } else { 1.666 + int32_t i=0; 1.667 + U16_APPEND_UNSAFE(fold1, i, length); 1.668 + length=i; 1.669 + } 1.670 + 1.671 + /* set next level pointers to case folding */ 1.672 + start1=s1=fold1; 1.673 + limit1=fold1+length; 1.674 + 1.675 + /* get ready to read from decomposition, continue with loop */ 1.676 + c1=-1; 1.677 + continue; 1.678 + } 1.679 + 1.680 + if( level2==0 && 1.681 + (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 1.682 + ) { 1.683 + /* cp2 case-folds to the code point "length" or to p[length] */ 1.684 + if(U_IS_SURROGATE(c2)) { 1.685 + if(U_IS_SURROGATE_LEAD(c2)) { 1.686 + /* advance beyond source surrogate pair if it case-folds */ 1.687 + ++s2; 1.688 + } else /* isTrail(c2) */ { 1.689 + /* 1.690 + * we got a supplementary code point when hitting its trail surrogate, 1.691 + * therefore the lead surrogate must have been the same as in the other string; 1.692 + * compare this decomposition with the lead surrogate in the other string 1.693 + * remember that this simulates bulk text replacement: 1.694 + * the decomposition would replace the entire code point 1.695 + */ 1.696 + --s1; 1.697 + c1=*(s1-1); 1.698 + } 1.699 + } 1.700 + 1.701 + /* push current level pointers */ 1.702 + stack2[0].start=start2; 1.703 + stack2[0].s=s2; 1.704 + stack2[0].limit=limit2; 1.705 + ++level2; 1.706 + 1.707 + /* copy the folding result to fold2[] */ 1.708 + if(length<=UCASE_MAX_STRING_LENGTH) { 1.709 + u_memcpy(fold2, p, length); 1.710 + } else { 1.711 + int32_t i=0; 1.712 + U16_APPEND_UNSAFE(fold2, i, length); 1.713 + length=i; 1.714 + } 1.715 + 1.716 + /* set next level pointers to case folding */ 1.717 + start2=s2=fold2; 1.718 + limit2=fold2+length; 1.719 + 1.720 + /* get ready to read from decomposition, continue with loop */ 1.721 + c2=-1; 1.722 + continue; 1.723 + } 1.724 + 1.725 + /* 1.726 + * no decomposition/case folding, max level for both sides: 1.727 + * return difference result 1.728 + * 1.729 + * code point order comparison must not just return cp1-cp2 1.730 + * because when single surrogates are present then the surrogate pairs 1.731 + * that formed cp1 and cp2 may be from different string indexes 1.732 + * 1.733 + * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 1.734 + * c1=d800 cp1=10001 c2=dc00 cp2=10000 1.735 + * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 1.736 + * 1.737 + * therefore, use same fix-up as in ustring.c/uprv_strCompare() 1.738 + * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 1.739 + * so we have slightly different pointer/start/limit comparisons here 1.740 + */ 1.741 + 1.742 + if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 1.743 + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 1.744 + if( 1.745 + (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 1.746 + (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 1.747 + ) { 1.748 + /* part of a surrogate pair, leave >=d800 */ 1.749 + } else { 1.750 + /* BMP code point - may be surrogate code point - make <d800 */ 1.751 + c1-=0x2800; 1.752 + } 1.753 + 1.754 + if( 1.755 + (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 1.756 + (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 1.757 + ) { 1.758 + /* part of a surrogate pair, leave >=d800 */ 1.759 + } else { 1.760 + /* BMP code point - may be surrogate code point - make <d800 */ 1.761 + c2-=0x2800; 1.762 + } 1.763 + } 1.764 + 1.765 + return c1-c2; 1.766 + } 1.767 +} 1.768 + 1.769 +/* public API functions */ 1.770 + 1.771 +U_CAPI int32_t U_EXPORT2 1.772 +u_strCaseCompare(const UChar *s1, int32_t length1, 1.773 + const UChar *s2, int32_t length2, 1.774 + uint32_t options, 1.775 + UErrorCode *pErrorCode) { 1.776 + /* argument checking */ 1.777 + if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 1.778 + return 0; 1.779 + } 1.780 + if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 1.781 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.782 + return 0; 1.783 + } 1.784 + return u_strcmpFold(s1, length1, s2, length2, 1.785 + options|U_COMPARE_IGNORE_CASE, 1.786 + pErrorCode); 1.787 +} 1.788 + 1.789 +U_CAPI int32_t U_EXPORT2 1.790 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 1.791 + UErrorCode errorCode=U_ZERO_ERROR; 1.792 + return u_strcmpFold(s1, -1, s2, -1, 1.793 + options|U_COMPARE_IGNORE_CASE, 1.794 + &errorCode); 1.795 +} 1.796 + 1.797 +U_CAPI int32_t U_EXPORT2 1.798 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 1.799 + UErrorCode errorCode=U_ZERO_ERROR; 1.800 + return u_strcmpFold(s1, length, s2, length, 1.801 + options|U_COMPARE_IGNORE_CASE, 1.802 + &errorCode); 1.803 +} 1.804 + 1.805 +U_CAPI int32_t U_EXPORT2 1.806 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 1.807 + UErrorCode errorCode=U_ZERO_ERROR; 1.808 + return u_strcmpFold(s1, n, s2, n, 1.809 + options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 1.810 + &errorCode); 1.811 +}