1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/usprep.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,912 @@ 1.4 +/* 1.5 + ******************************************************************************* 1.6 + * 1.7 + * Copyright (C) 2003-2013, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ******************************************************************************* 1.11 + * file name: usprep.cpp 1.12 + * encoding: US-ASCII 1.13 + * tab size: 8 (not used) 1.14 + * indentation:4 1.15 + * 1.16 + * created on: 2003jul2 1.17 + * created by: Ram Viswanadha 1.18 + */ 1.19 + 1.20 +#include "unicode/utypes.h" 1.21 + 1.22 +#if !UCONFIG_NO_IDNA 1.23 + 1.24 +#include "unicode/usprep.h" 1.25 + 1.26 +#include "unicode/unorm.h" 1.27 +#include "unicode/ustring.h" 1.28 +#include "unicode/uchar.h" 1.29 +#include "unicode/uversion.h" 1.30 +#include "umutex.h" 1.31 +#include "cmemory.h" 1.32 +#include "sprpimpl.h" 1.33 +#include "ustr_imp.h" 1.34 +#include "uhash.h" 1.35 +#include "cstring.h" 1.36 +#include "udataswp.h" 1.37 +#include "ucln_cmn.h" 1.38 +#include "ubidi_props.h" 1.39 + 1.40 +U_NAMESPACE_USE 1.41 + 1.42 +U_CDECL_BEGIN 1.43 + 1.44 +/* 1.45 +Static cache for already opened StringPrep profiles 1.46 +*/ 1.47 +static UHashtable *SHARED_DATA_HASHTABLE = NULL; 1.48 +static icu::UInitOnce gSharedDataInitOnce; 1.49 + 1.50 +static UMutex usprepMutex = U_MUTEX_INITIALIZER; 1.51 + 1.52 +/* format version of spp file */ 1.53 +//static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 1.54 + 1.55 +/* the Unicode version of the sprep data */ 1.56 +static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 1.57 + 1.58 +/* Profile names must be aligned to UStringPrepProfileType */ 1.59 +static const char * const PROFILE_NAMES[] = { 1.60 + "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 1.61 + "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 1.62 + "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 1.63 + "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 1.64 + "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 1.65 + "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 1.66 + "rfc3722", /* USPREP_RFC3722_ISCSI */ 1.67 + "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 1.68 + "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 1.69 + "rfc4011", /* USPREP_RFC4011_MIB */ 1.70 + "rfc4013", /* USPREP_RFC4013_SASLPREP */ 1.71 + "rfc4505", /* USPREP_RFC4505_TRACE */ 1.72 + "rfc4518", /* USPREP_RFC4518_LDAP */ 1.73 + "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 1.74 +}; 1.75 + 1.76 +static UBool U_CALLCONV 1.77 +isSPrepAcceptable(void * /* context */, 1.78 + const char * /* type */, 1.79 + const char * /* name */, 1.80 + const UDataInfo *pInfo) { 1.81 + if( 1.82 + pInfo->size>=20 && 1.83 + pInfo->isBigEndian==U_IS_BIG_ENDIAN && 1.84 + pInfo->charsetFamily==U_CHARSET_FAMILY && 1.85 + pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 1.86 + pInfo->dataFormat[1]==0x50 && 1.87 + pInfo->dataFormat[2]==0x52 && 1.88 + pInfo->dataFormat[3]==0x50 && 1.89 + pInfo->formatVersion[0]==3 && 1.90 + pInfo->formatVersion[2]==UTRIE_SHIFT && 1.91 + pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 1.92 + ) { 1.93 + //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 1.94 + uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 1.95 + return TRUE; 1.96 + } else { 1.97 + return FALSE; 1.98 + } 1.99 +} 1.100 + 1.101 +static int32_t U_CALLCONV 1.102 +getSPrepFoldingOffset(uint32_t data) { 1.103 + 1.104 + return (int32_t)data; 1.105 + 1.106 +} 1.107 + 1.108 +/* hashes an entry */ 1.109 +static int32_t U_CALLCONV 1.110 +hashEntry(const UHashTok parm) { 1.111 + UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 1.112 + UHashTok namekey, pathkey; 1.113 + namekey.pointer = b->name; 1.114 + pathkey.pointer = b->path; 1.115 + return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); 1.116 +} 1.117 + 1.118 +/* compares two entries */ 1.119 +static UBool U_CALLCONV 1.120 +compareEntries(const UHashTok p1, const UHashTok p2) { 1.121 + UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 1.122 + UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 1.123 + UHashTok name1, name2, path1, path2; 1.124 + name1.pointer = b1->name; 1.125 + name2.pointer = b2->name; 1.126 + path1.pointer = b1->path; 1.127 + path2.pointer = b2->path; 1.128 + return ((UBool)(uhash_compareChars(name1, name2) & 1.129 + uhash_compareChars(path1, path2))); 1.130 +} 1.131 + 1.132 +static void 1.133 +usprep_unload(UStringPrepProfile* data){ 1.134 + udata_close(data->sprepData); 1.135 +} 1.136 + 1.137 +static int32_t 1.138 +usprep_internal_flushCache(UBool noRefCount){ 1.139 + UStringPrepProfile *profile = NULL; 1.140 + UStringPrepKey *key = NULL; 1.141 + int32_t pos = -1; 1.142 + int32_t deletedNum = 0; 1.143 + const UHashElement *e; 1.144 + 1.145 + /* 1.146 + * if shared data hasn't even been lazy evaluated yet 1.147 + * return 0 1.148 + */ 1.149 + umtx_lock(&usprepMutex); 1.150 + if (SHARED_DATA_HASHTABLE == NULL) { 1.151 + umtx_unlock(&usprepMutex); 1.152 + return 0; 1.153 + } 1.154 + 1.155 + /*creates an enumeration to iterate through every element in the table */ 1.156 + while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) 1.157 + { 1.158 + profile = (UStringPrepProfile *) e->value.pointer; 1.159 + key = (UStringPrepKey *) e->key.pointer; 1.160 + 1.161 + if ((noRefCount== FALSE && profile->refCount == 0) || 1.162 + noRefCount== TRUE) { 1.163 + deletedNum++; 1.164 + uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1.165 + 1.166 + /* unload the data */ 1.167 + usprep_unload(profile); 1.168 + 1.169 + if(key->name != NULL) { 1.170 + uprv_free(key->name); 1.171 + key->name=NULL; 1.172 + } 1.173 + if(key->path != NULL) { 1.174 + uprv_free(key->path); 1.175 + key->path=NULL; 1.176 + } 1.177 + uprv_free(profile); 1.178 + uprv_free(key); 1.179 + } 1.180 + 1.181 + } 1.182 + umtx_unlock(&usprepMutex); 1.183 + 1.184 + return deletedNum; 1.185 +} 1.186 + 1.187 +/* Works just like ucnv_flushCache() 1.188 +static int32_t 1.189 +usprep_flushCache(){ 1.190 + return usprep_internal_flushCache(FALSE); 1.191 +} 1.192 +*/ 1.193 + 1.194 +static UBool U_CALLCONV usprep_cleanup(void){ 1.195 + if (SHARED_DATA_HASHTABLE != NULL) { 1.196 + usprep_internal_flushCache(TRUE); 1.197 + if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 1.198 + uhash_close(SHARED_DATA_HASHTABLE); 1.199 + SHARED_DATA_HASHTABLE = NULL; 1.200 + } 1.201 + } 1.202 + gSharedDataInitOnce.reset(); 1.203 + return (SHARED_DATA_HASHTABLE == NULL); 1.204 +} 1.205 +U_CDECL_END 1.206 + 1.207 + 1.208 +/** Initializes the cache for resources */ 1.209 +static void U_CALLCONV 1.210 +createCache(UErrorCode &status) { 1.211 + SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); 1.212 + if (U_FAILURE(status)) { 1.213 + SHARED_DATA_HASHTABLE = NULL; 1.214 + } 1.215 + ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 1.216 +} 1.217 + 1.218 +static void 1.219 +initCache(UErrorCode *status) { 1.220 + umtx_initOnce(gSharedDataInitOnce, &createCache, *status); 1.221 +} 1.222 + 1.223 +static UBool U_CALLCONV 1.224 +loadData(UStringPrepProfile* profile, 1.225 + const char* path, 1.226 + const char* name, 1.227 + const char* type, 1.228 + UErrorCode* errorCode) { 1.229 + /* load Unicode SPREP data from file */ 1.230 + UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; 1.231 + UDataMemory *dataMemory; 1.232 + const int32_t *p=NULL; 1.233 + const uint8_t *pb; 1.234 + UVersionInfo normUnicodeVersion; 1.235 + int32_t normUniVer, sprepUniVer, normCorrVer; 1.236 + 1.237 + if(errorCode==NULL || U_FAILURE(*errorCode)) { 1.238 + return 0; 1.239 + } 1.240 + 1.241 + /* open the data outside the mutex block */ 1.242 + //TODO: change the path 1.243 + dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); 1.244 + if(U_FAILURE(*errorCode)) { 1.245 + return FALSE; 1.246 + } 1.247 + 1.248 + p=(const int32_t *)udata_getMemory(dataMemory); 1.249 + pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); 1.250 + utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 1.251 + _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 1.252 + 1.253 + 1.254 + if(U_FAILURE(*errorCode)) { 1.255 + udata_close(dataMemory); 1.256 + return FALSE; 1.257 + } 1.258 + 1.259 + /* in the mutex block, set the data for this process */ 1.260 + umtx_lock(&usprepMutex); 1.261 + if(profile->sprepData==NULL) { 1.262 + profile->sprepData=dataMemory; 1.263 + dataMemory=NULL; 1.264 + uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 1.265 + uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 1.266 + } else { 1.267 + p=(const int32_t *)udata_getMemory(profile->sprepData); 1.268 + } 1.269 + umtx_unlock(&usprepMutex); 1.270 + /* initialize some variables */ 1.271 + profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 1.272 + 1.273 + u_getUnicodeVersion(normUnicodeVersion); 1.274 + normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 1.275 + (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 1.276 + sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 1.277 + (dataVersion[2] << 8 ) + (dataVersion[3]); 1.278 + normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 1.279 + 1.280 + if(U_FAILURE(*errorCode)){ 1.281 + udata_close(dataMemory); 1.282 + return FALSE; 1.283 + } 1.284 + if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ 1.285 + normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ 1.286 + ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 1.287 + ){ 1.288 + *errorCode = U_INVALID_FORMAT_ERROR; 1.289 + udata_close(dataMemory); 1.290 + return FALSE; 1.291 + } 1.292 + profile->isDataLoaded = TRUE; 1.293 + 1.294 + /* if a different thread set it first, then close the extra data */ 1.295 + if(dataMemory!=NULL) { 1.296 + udata_close(dataMemory); /* NULL if it was set correctly */ 1.297 + } 1.298 + 1.299 + 1.300 + return profile->isDataLoaded; 1.301 +} 1.302 + 1.303 +static UStringPrepProfile* 1.304 +usprep_getProfile(const char* path, 1.305 + const char* name, 1.306 + UErrorCode *status){ 1.307 + 1.308 + UStringPrepProfile* profile = NULL; 1.309 + 1.310 + initCache(status); 1.311 + 1.312 + if(U_FAILURE(*status)){ 1.313 + return NULL; 1.314 + } 1.315 + 1.316 + UStringPrepKey stackKey; 1.317 + /* 1.318 + * const is cast way to save malloc, strcpy and free calls 1.319 + * we use the passed in pointers for fetching the data from the 1.320 + * hash table which is safe 1.321 + */ 1.322 + stackKey.name = (char*) name; 1.323 + stackKey.path = (char*) path; 1.324 + 1.325 + /* fetch the data from the cache */ 1.326 + umtx_lock(&usprepMutex); 1.327 + profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 1.328 + if(profile != NULL) { 1.329 + profile->refCount++; 1.330 + } 1.331 + umtx_unlock(&usprepMutex); 1.332 + 1.333 + if(profile == NULL) { 1.334 + /* else load the data and put the data in the cache */ 1.335 + LocalMemory<UStringPrepProfile> newProfile; 1.336 + if(newProfile.allocateInsteadAndReset() == NULL) { 1.337 + *status = U_MEMORY_ALLOCATION_ERROR; 1.338 + return NULL; 1.339 + } 1.340 + 1.341 + /* load the data */ 1.342 + if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 1.343 + return NULL; 1.344 + } 1.345 + 1.346 + /* get the options */ 1.347 + newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 1.348 + newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 1.349 + 1.350 + if(newProfile->checkBiDi) { 1.351 + newProfile->bdp = ubidi_getSingleton(); 1.352 + } 1.353 + 1.354 + LocalMemory<UStringPrepKey> key; 1.355 + LocalMemory<char> keyName; 1.356 + LocalMemory<char> keyPath; 1.357 + if( key.allocateInsteadAndReset() == NULL || 1.358 + keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || 1.359 + (path != NULL && 1.360 + keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) 1.361 + ) { 1.362 + *status = U_MEMORY_ALLOCATION_ERROR; 1.363 + usprep_unload(newProfile.getAlias()); 1.364 + return NULL; 1.365 + } 1.366 + 1.367 + umtx_lock(&usprepMutex); 1.368 + // If another thread already inserted the same key/value, refcount and cleanup our thread data 1.369 + profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); 1.370 + if(profile != NULL) { 1.371 + profile->refCount++; 1.372 + usprep_unload(newProfile.getAlias()); 1.373 + } 1.374 + else { 1.375 + /* initialize the key members */ 1.376 + key->name = keyName.orphan(); 1.377 + uprv_strcpy(key->name, name); 1.378 + if(path != NULL){ 1.379 + key->path = keyPath.orphan(); 1.380 + uprv_strcpy(key->path, path); 1.381 + } 1.382 + profile = newProfile.orphan(); 1.383 + 1.384 + /* add the data object to the cache */ 1.385 + profile->refCount = 1; 1.386 + uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 1.387 + } 1.388 + umtx_unlock(&usprepMutex); 1.389 + } 1.390 + 1.391 + return profile; 1.392 +} 1.393 + 1.394 +U_CAPI UStringPrepProfile* U_EXPORT2 1.395 +usprep_open(const char* path, 1.396 + const char* name, 1.397 + UErrorCode* status){ 1.398 + 1.399 + if(status == NULL || U_FAILURE(*status)){ 1.400 + return NULL; 1.401 + } 1.402 + 1.403 + /* initialize the profile struct members */ 1.404 + return usprep_getProfile(path,name,status); 1.405 +} 1.406 + 1.407 +U_CAPI UStringPrepProfile* U_EXPORT2 1.408 +usprep_openByType(UStringPrepProfileType type, 1.409 + UErrorCode* status) { 1.410 + if(status == NULL || U_FAILURE(*status)){ 1.411 + return NULL; 1.412 + } 1.413 + int32_t index = (int32_t)type; 1.414 + if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) { 1.415 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.416 + return NULL; 1.417 + } 1.418 + return usprep_open(NULL, PROFILE_NAMES[index], status); 1.419 +} 1.420 + 1.421 +U_CAPI void U_EXPORT2 1.422 +usprep_close(UStringPrepProfile* profile){ 1.423 + if(profile==NULL){ 1.424 + return; 1.425 + } 1.426 + 1.427 + umtx_lock(&usprepMutex); 1.428 + /* decrement the ref count*/ 1.429 + if(profile->refCount > 0){ 1.430 + profile->refCount--; 1.431 + } 1.432 + umtx_unlock(&usprepMutex); 1.433 + 1.434 +} 1.435 + 1.436 +U_CFUNC void 1.437 +uprv_syntaxError(const UChar* rules, 1.438 + int32_t pos, 1.439 + int32_t rulesLen, 1.440 + UParseError* parseError){ 1.441 + if(parseError == NULL){ 1.442 + return; 1.443 + } 1.444 + parseError->offset = pos; 1.445 + parseError->line = 0 ; // we are not using line numbers 1.446 + 1.447 + // for pre-context 1.448 + int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 1.449 + int32_t limit = pos; 1.450 + 1.451 + u_memcpy(parseError->preContext,rules+start,limit-start); 1.452 + //null terminate the buffer 1.453 + parseError->preContext[limit-start] = 0; 1.454 + 1.455 + // for post-context; include error rules[pos] 1.456 + start = pos; 1.457 + limit = start + (U_PARSE_CONTEXT_LEN-1); 1.458 + if (limit > rulesLen) { 1.459 + limit = rulesLen; 1.460 + } 1.461 + if (start < rulesLen) { 1.462 + u_memcpy(parseError->postContext,rules+start,limit-start); 1.463 + } 1.464 + //null terminate the buffer 1.465 + parseError->postContext[limit-start]= 0; 1.466 +} 1.467 + 1.468 + 1.469 +static inline UStringPrepType 1.470 +getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 1.471 + 1.472 + UStringPrepType type; 1.473 + if(trieWord == 0){ 1.474 + /* 1.475 + * Initial value stored in the mapping table 1.476 + * just return USPREP_TYPE_LIMIT .. so that 1.477 + * the source codepoint is copied to the destination 1.478 + */ 1.479 + type = USPREP_TYPE_LIMIT; 1.480 + isIndex =FALSE; 1.481 + value = 0; 1.482 + }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 1.483 + type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); 1.484 + isIndex =FALSE; 1.485 + value = 0; 1.486 + }else{ 1.487 + /* get the type */ 1.488 + type = USPREP_MAP; 1.489 + /* ascertain if the value is index or delta */ 1.490 + if(trieWord & 0x02){ 1.491 + isIndex = TRUE; 1.492 + value = trieWord >> 2; //mask off the lower 2 bits and shift 1.493 + }else{ 1.494 + isIndex = FALSE; 1.495 + value = (int16_t)trieWord; 1.496 + value = (value >> 2); 1.497 + } 1.498 + 1.499 + if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 1.500 + type = USPREP_DELETE; 1.501 + isIndex =FALSE; 1.502 + value = 0; 1.503 + } 1.504 + } 1.505 + return type; 1.506 +} 1.507 + 1.508 + 1.509 + 1.510 +static int32_t 1.511 +usprep_map( const UStringPrepProfile* profile, 1.512 + const UChar* src, int32_t srcLength, 1.513 + UChar* dest, int32_t destCapacity, 1.514 + int32_t options, 1.515 + UParseError* parseError, 1.516 + UErrorCode* status ){ 1.517 + 1.518 + uint16_t result; 1.519 + int32_t destIndex=0; 1.520 + int32_t srcIndex; 1.521 + UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); 1.522 + UStringPrepType type; 1.523 + int16_t value; 1.524 + UBool isIndex; 1.525 + const int32_t* indexes = profile->indexes; 1.526 + 1.527 + // no error checking the caller check for error and arguments 1.528 + // no string length check the caller finds out the string length 1.529 + 1.530 + for(srcIndex=0;srcIndex<srcLength;){ 1.531 + UChar32 ch; 1.532 + 1.533 + U16_NEXT(src,srcIndex,srcLength,ch); 1.534 + 1.535 + result=0; 1.536 + 1.537 + UTRIE_GET16(&profile->sprepTrie,ch,result); 1.538 + 1.539 + type = getValues(result, value, isIndex); 1.540 + 1.541 + // check if the source codepoint is unassigned 1.542 + if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ 1.543 + 1.544 + uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 1.545 + *status = U_STRINGPREP_UNASSIGNED_ERROR; 1.546 + return 0; 1.547 + 1.548 + }else if(type == USPREP_MAP){ 1.549 + 1.550 + int32_t index, length; 1.551 + 1.552 + if(isIndex){ 1.553 + index = value; 1.554 + if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 1.555 + index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 1.556 + length = 1; 1.557 + }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 1.558 + index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 1.559 + length = 2; 1.560 + }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 1.561 + index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 1.562 + length = 3; 1.563 + }else{ 1.564 + length = profile->mappingData[index++]; 1.565 + 1.566 + } 1.567 + 1.568 + /* copy mapping to destination */ 1.569 + for(int32_t i=0; i< length; i++){ 1.570 + if(destIndex < destCapacity ){ 1.571 + dest[destIndex] = profile->mappingData[index+i]; 1.572 + } 1.573 + destIndex++; /* for pre-flighting */ 1.574 + } 1.575 + continue; 1.576 + }else{ 1.577 + // subtract the delta to arrive at the code point 1.578 + ch -= value; 1.579 + } 1.580 + 1.581 + }else if(type==USPREP_DELETE){ 1.582 + // just consume the codepoint and contine 1.583 + continue; 1.584 + } 1.585 + //copy the code point into destination 1.586 + if(ch <= 0xFFFF){ 1.587 + if(destIndex < destCapacity ){ 1.588 + dest[destIndex] = (UChar)ch; 1.589 + } 1.590 + destIndex++; 1.591 + }else{ 1.592 + if(destIndex+1 < destCapacity ){ 1.593 + dest[destIndex] = U16_LEAD(ch); 1.594 + dest[destIndex+1] = U16_TRAIL(ch); 1.595 + } 1.596 + destIndex +=2; 1.597 + } 1.598 + 1.599 + } 1.600 + 1.601 + return u_terminateUChars(dest, destCapacity, destIndex, status); 1.602 +} 1.603 + 1.604 + 1.605 +static int32_t 1.606 +usprep_normalize( const UChar* src, int32_t srcLength, 1.607 + UChar* dest, int32_t destCapacity, 1.608 + UErrorCode* status ){ 1.609 + return unorm_normalize( 1.610 + src, srcLength, 1.611 + UNORM_NFKC, UNORM_UNICODE_3_2, 1.612 + dest, destCapacity, 1.613 + status); 1.614 +} 1.615 + 1.616 + 1.617 + /* 1.618 + 1) Map -- For each character in the input, check if it has a mapping 1.619 + and, if so, replace it with its mapping. 1.620 + 1.621 + 2) Normalize -- Possibly normalize the result of step 1 using Unicode 1.622 + normalization. 1.623 + 1.624 + 3) Prohibit -- Check for any characters that are not allowed in the 1.625 + output. If any are found, return an error. 1.626 + 1.627 + 4) Check bidi -- Possibly check for right-to-left characters, and if 1.628 + any are found, make sure that the whole string satisfies the 1.629 + requirements for bidirectional strings. If the string does not 1.630 + satisfy the requirements for bidirectional strings, return an 1.631 + error. 1.632 + [Unicode3.2] defines several bidirectional categories; each character 1.633 + has one bidirectional category assigned to it. For the purposes of 1.634 + the requirements below, an "RandALCat character" is a character that 1.635 + has Unicode bidirectional categories "R" or "AL"; an "LCat character" 1.636 + is a character that has Unicode bidirectional category "L". Note 1.637 + 1.638 + 1.639 + that there are many characters which fall in neither of the above 1.640 + definitions; Latin digits (<U+0030> through <U+0039>) are examples of 1.641 + this because they have bidirectional category "EN". 1.642 + 1.643 + In any profile that specifies bidirectional character handling, all 1.644 + three of the following requirements MUST be met: 1.645 + 1.646 + 1) The characters in section 5.8 MUST be prohibited. 1.647 + 1.648 + 2) If a string contains any RandALCat character, the string MUST NOT 1.649 + contain any LCat character. 1.650 + 1.651 + 3) If a string contains any RandALCat character, a RandALCat 1.652 + character MUST be the first character of the string, and a 1.653 + RandALCat character MUST be the last character of the string. 1.654 +*/ 1.655 + 1.656 +#define MAX_STACK_BUFFER_SIZE 300 1.657 + 1.658 + 1.659 +U_CAPI int32_t U_EXPORT2 1.660 +usprep_prepare( const UStringPrepProfile* profile, 1.661 + const UChar* src, int32_t srcLength, 1.662 + UChar* dest, int32_t destCapacity, 1.663 + int32_t options, 1.664 + UParseError* parseError, 1.665 + UErrorCode* status ){ 1.666 + 1.667 + // check error status 1.668 + if(status == NULL || U_FAILURE(*status)){ 1.669 + return 0; 1.670 + } 1.671 + 1.672 + //check arguments 1.673 + if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { 1.674 + *status=U_ILLEGAL_ARGUMENT_ERROR; 1.675 + return 0; 1.676 + } 1.677 + 1.678 + UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 1.679 + UChar *b1 = b1Stack, *b2 = b2Stack; 1.680 + int32_t b1Len, b2Len=0, 1.681 + b1Capacity = MAX_STACK_BUFFER_SIZE , 1.682 + b2Capacity = MAX_STACK_BUFFER_SIZE; 1.683 + uint16_t result; 1.684 + int32_t b2Index = 0; 1.685 + UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 1.686 + UBool leftToRight=FALSE, rightToLeft=FALSE; 1.687 + int32_t rtlPos =-1, ltrPos =-1; 1.688 + 1.689 + //get the string length 1.690 + if(srcLength == -1){ 1.691 + srcLength = u_strlen(src); 1.692 + } 1.693 + // map 1.694 + b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status); 1.695 + 1.696 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.697 + // redo processing of string 1.698 + /* we do not have enough room so grow the buffer*/ 1.699 + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1.700 + if(b1==NULL){ 1.701 + *status = U_MEMORY_ALLOCATION_ERROR; 1.702 + goto CLEANUP; 1.703 + } 1.704 + 1.705 + *status = U_ZERO_ERROR; // reset error 1.706 + 1.707 + b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status); 1.708 + 1.709 + } 1.710 + 1.711 + // normalize 1.712 + if(profile->doNFKC == TRUE){ 1.713 + b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); 1.714 + 1.715 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.716 + // redo processing of string 1.717 + /* we do not have enough room so grow the buffer*/ 1.718 + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1.719 + if(b2==NULL){ 1.720 + *status = U_MEMORY_ALLOCATION_ERROR; 1.721 + goto CLEANUP; 1.722 + } 1.723 + 1.724 + *status = U_ZERO_ERROR; // reset error 1.725 + 1.726 + b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); 1.727 + 1.728 + } 1.729 + 1.730 + }else{ 1.731 + b2 = b1; 1.732 + b2Len = b1Len; 1.733 + } 1.734 + 1.735 + 1.736 + if(U_FAILURE(*status)){ 1.737 + goto CLEANUP; 1.738 + } 1.739 + 1.740 + UChar32 ch; 1.741 + UStringPrepType type; 1.742 + int16_t value; 1.743 + UBool isIndex; 1.744 + 1.745 + // Prohibit and checkBiDi in one pass 1.746 + for(b2Index=0; b2Index<b2Len;){ 1.747 + 1.748 + ch = 0; 1.749 + 1.750 + U16_NEXT(b2, b2Index, b2Len, ch); 1.751 + 1.752 + UTRIE_GET16(&profile->sprepTrie,ch,result); 1.753 + 1.754 + type = getValues(result, value, isIndex); 1.755 + 1.756 + if( type == USPREP_PROHIBITED || 1.757 + ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 1.758 + ){ 1.759 + *status = U_STRINGPREP_PROHIBITED_ERROR; 1.760 + uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 1.761 + goto CLEANUP; 1.762 + } 1.763 + 1.764 + if(profile->checkBiDi) { 1.765 + direction = ubidi_getClass(profile->bdp, ch); 1.766 + if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 1.767 + firstCharDir = direction; 1.768 + } 1.769 + if(direction == U_LEFT_TO_RIGHT){ 1.770 + leftToRight = TRUE; 1.771 + ltrPos = b2Index-1; 1.772 + } 1.773 + if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 1.774 + rightToLeft = TRUE; 1.775 + rtlPos = b2Index-1; 1.776 + } 1.777 + } 1.778 + } 1.779 + if(profile->checkBiDi == TRUE){ 1.780 + // satisfy 2 1.781 + if( leftToRight == TRUE && rightToLeft == TRUE){ 1.782 + *status = U_STRINGPREP_CHECK_BIDI_ERROR; 1.783 + uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 1.784 + goto CLEANUP; 1.785 + } 1.786 + 1.787 + //satisfy 3 1.788 + if( rightToLeft == TRUE && 1.789 + !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 1.790 + (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 1.791 + ){ 1.792 + *status = U_STRINGPREP_CHECK_BIDI_ERROR; 1.793 + uprv_syntaxError(b2, rtlPos, b2Len, parseError); 1.794 + return FALSE; 1.795 + } 1.796 + } 1.797 + if(b2Len>0 && b2Len <= destCapacity){ 1.798 + uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); 1.799 + } 1.800 + 1.801 +CLEANUP: 1.802 + if(b1!=b1Stack){ 1.803 + uprv_free(b1); 1.804 + b1=NULL; 1.805 + } 1.806 + 1.807 + if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ 1.808 + uprv_free(b2); 1.809 + b2=NULL; 1.810 + } 1.811 + return u_terminateUChars(dest, destCapacity, b2Len, status); 1.812 +} 1.813 + 1.814 + 1.815 +/* data swapping ------------------------------------------------------------ */ 1.816 + 1.817 +U_CAPI int32_t U_EXPORT2 1.818 +usprep_swap(const UDataSwapper *ds, 1.819 + const void *inData, int32_t length, void *outData, 1.820 + UErrorCode *pErrorCode) { 1.821 + const UDataInfo *pInfo; 1.822 + int32_t headerSize; 1.823 + 1.824 + const uint8_t *inBytes; 1.825 + uint8_t *outBytes; 1.826 + 1.827 + const int32_t *inIndexes; 1.828 + int32_t indexes[16]; 1.829 + 1.830 + int32_t i, offset, count, size; 1.831 + 1.832 + /* udata_swapDataHeader checks the arguments */ 1.833 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.834 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.835 + return 0; 1.836 + } 1.837 + 1.838 + /* check data format and format version */ 1.839 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.840 + if(!( 1.841 + pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 1.842 + pInfo->dataFormat[1]==0x50 && 1.843 + pInfo->dataFormat[2]==0x52 && 1.844 + pInfo->dataFormat[3]==0x50 && 1.845 + pInfo->formatVersion[0]==3 1.846 + )) { 1.847 + udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 1.848 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.849 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.850 + pInfo->formatVersion[0]); 1.851 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.852 + return 0; 1.853 + } 1.854 + 1.855 + inBytes=(const uint8_t *)inData+headerSize; 1.856 + outBytes=(uint8_t *)outData+headerSize; 1.857 + 1.858 + inIndexes=(const int32_t *)inBytes; 1.859 + 1.860 + if(length>=0) { 1.861 + length-=headerSize; 1.862 + if(length<16*4) { 1.863 + udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 1.864 + length); 1.865 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.866 + return 0; 1.867 + } 1.868 + } 1.869 + 1.870 + /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 1.871 + for(i=0; i<16; ++i) { 1.872 + indexes[i]=udata_readInt32(ds, inIndexes[i]); 1.873 + } 1.874 + 1.875 + /* calculate the total length of the data */ 1.876 + size= 1.877 + 16*4+ /* size of indexes[] */ 1.878 + indexes[_SPREP_INDEX_TRIE_SIZE]+ 1.879 + indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 1.880 + 1.881 + if(length>=0) { 1.882 + if(length<size) { 1.883 + udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 1.884 + length); 1.885 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.886 + return 0; 1.887 + } 1.888 + 1.889 + /* copy the data for inaccessible bytes */ 1.890 + if(inBytes!=outBytes) { 1.891 + uprv_memcpy(outBytes, inBytes, size); 1.892 + } 1.893 + 1.894 + offset=0; 1.895 + 1.896 + /* swap the int32_t indexes[] */ 1.897 + count=16*4; 1.898 + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 1.899 + offset+=count; 1.900 + 1.901 + /* swap the UTrie */ 1.902 + count=indexes[_SPREP_INDEX_TRIE_SIZE]; 1.903 + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.904 + offset+=count; 1.905 + 1.906 + /* swap the uint16_t mappingTable[] */ 1.907 + count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 1.908 + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.909 + offset+=count; 1.910 + } 1.911 + 1.912 + return headerSize+size; 1.913 +} 1.914 + 1.915 +#endif /* #if !UCONFIG_NO_IDNA */