Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_IDNA
21 #include "unicode/usprep.h"
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "ubidi_props.h"
37 U_NAMESPACE_USE
39 U_CDECL_BEGIN
41 /*
42 Static cache for already opened StringPrep profiles
43 */
44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45 static icu::UInitOnce gSharedDataInitOnce;
47 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
49 /* format version of spp file */
50 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
52 /* the Unicode version of the sprep data */
53 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
55 /* Profile names must be aligned to UStringPrepProfileType */
56 static const char * const PROFILE_NAMES[] = {
57 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
58 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
59 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
60 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
61 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
62 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
63 "rfc3722", /* USPREP_RFC3722_ISCSI */
64 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
65 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
66 "rfc4011", /* USPREP_RFC4011_MIB */
67 "rfc4013", /* USPREP_RFC4013_SASLPREP */
68 "rfc4505", /* USPREP_RFC4505_TRACE */
69 "rfc4518", /* USPREP_RFC4518_LDAP */
70 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
71 };
73 static UBool U_CALLCONV
74 isSPrepAcceptable(void * /* context */,
75 const char * /* type */,
76 const char * /* name */,
77 const UDataInfo *pInfo) {
78 if(
79 pInfo->size>=20 &&
80 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
81 pInfo->charsetFamily==U_CHARSET_FAMILY &&
82 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
83 pInfo->dataFormat[1]==0x50 &&
84 pInfo->dataFormat[2]==0x52 &&
85 pInfo->dataFormat[3]==0x50 &&
86 pInfo->formatVersion[0]==3 &&
87 pInfo->formatVersion[2]==UTRIE_SHIFT &&
88 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
89 ) {
90 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
91 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
92 return TRUE;
93 } else {
94 return FALSE;
95 }
96 }
98 static int32_t U_CALLCONV
99 getSPrepFoldingOffset(uint32_t data) {
101 return (int32_t)data;
103 }
105 /* hashes an entry */
106 static int32_t U_CALLCONV
107 hashEntry(const UHashTok parm) {
108 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
109 UHashTok namekey, pathkey;
110 namekey.pointer = b->name;
111 pathkey.pointer = b->path;
112 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
113 }
115 /* compares two entries */
116 static UBool U_CALLCONV
117 compareEntries(const UHashTok p1, const UHashTok p2) {
118 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
119 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
120 UHashTok name1, name2, path1, path2;
121 name1.pointer = b1->name;
122 name2.pointer = b2->name;
123 path1.pointer = b1->path;
124 path2.pointer = b2->path;
125 return ((UBool)(uhash_compareChars(name1, name2) &
126 uhash_compareChars(path1, path2)));
127 }
129 static void
130 usprep_unload(UStringPrepProfile* data){
131 udata_close(data->sprepData);
132 }
134 static int32_t
135 usprep_internal_flushCache(UBool noRefCount){
136 UStringPrepProfile *profile = NULL;
137 UStringPrepKey *key = NULL;
138 int32_t pos = -1;
139 int32_t deletedNum = 0;
140 const UHashElement *e;
142 /*
143 * if shared data hasn't even been lazy evaluated yet
144 * return 0
145 */
146 umtx_lock(&usprepMutex);
147 if (SHARED_DATA_HASHTABLE == NULL) {
148 umtx_unlock(&usprepMutex);
149 return 0;
150 }
152 /*creates an enumeration to iterate through every element in the table */
153 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
154 {
155 profile = (UStringPrepProfile *) e->value.pointer;
156 key = (UStringPrepKey *) e->key.pointer;
158 if ((noRefCount== FALSE && profile->refCount == 0) ||
159 noRefCount== TRUE) {
160 deletedNum++;
161 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
163 /* unload the data */
164 usprep_unload(profile);
166 if(key->name != NULL) {
167 uprv_free(key->name);
168 key->name=NULL;
169 }
170 if(key->path != NULL) {
171 uprv_free(key->path);
172 key->path=NULL;
173 }
174 uprv_free(profile);
175 uprv_free(key);
176 }
178 }
179 umtx_unlock(&usprepMutex);
181 return deletedNum;
182 }
184 /* Works just like ucnv_flushCache()
185 static int32_t
186 usprep_flushCache(){
187 return usprep_internal_flushCache(FALSE);
188 }
189 */
191 static UBool U_CALLCONV usprep_cleanup(void){
192 if (SHARED_DATA_HASHTABLE != NULL) {
193 usprep_internal_flushCache(TRUE);
194 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
195 uhash_close(SHARED_DATA_HASHTABLE);
196 SHARED_DATA_HASHTABLE = NULL;
197 }
198 }
199 gSharedDataInitOnce.reset();
200 return (SHARED_DATA_HASHTABLE == NULL);
201 }
202 U_CDECL_END
205 /** Initializes the cache for resources */
206 static void U_CALLCONV
207 createCache(UErrorCode &status) {
208 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
209 if (U_FAILURE(status)) {
210 SHARED_DATA_HASHTABLE = NULL;
211 }
212 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
213 }
215 static void
216 initCache(UErrorCode *status) {
217 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
218 }
220 static UBool U_CALLCONV
221 loadData(UStringPrepProfile* profile,
222 const char* path,
223 const char* name,
224 const char* type,
225 UErrorCode* errorCode) {
226 /* load Unicode SPREP data from file */
227 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
228 UDataMemory *dataMemory;
229 const int32_t *p=NULL;
230 const uint8_t *pb;
231 UVersionInfo normUnicodeVersion;
232 int32_t normUniVer, sprepUniVer, normCorrVer;
234 if(errorCode==NULL || U_FAILURE(*errorCode)) {
235 return 0;
236 }
238 /* open the data outside the mutex block */
239 //TODO: change the path
240 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
241 if(U_FAILURE(*errorCode)) {
242 return FALSE;
243 }
245 p=(const int32_t *)udata_getMemory(dataMemory);
246 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
247 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
248 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
251 if(U_FAILURE(*errorCode)) {
252 udata_close(dataMemory);
253 return FALSE;
254 }
256 /* in the mutex block, set the data for this process */
257 umtx_lock(&usprepMutex);
258 if(profile->sprepData==NULL) {
259 profile->sprepData=dataMemory;
260 dataMemory=NULL;
261 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
262 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
263 } else {
264 p=(const int32_t *)udata_getMemory(profile->sprepData);
265 }
266 umtx_unlock(&usprepMutex);
267 /* initialize some variables */
268 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
270 u_getUnicodeVersion(normUnicodeVersion);
271 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
272 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
273 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
274 (dataVersion[2] << 8 ) + (dataVersion[3]);
275 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
277 if(U_FAILURE(*errorCode)){
278 udata_close(dataMemory);
279 return FALSE;
280 }
281 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
282 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
283 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
284 ){
285 *errorCode = U_INVALID_FORMAT_ERROR;
286 udata_close(dataMemory);
287 return FALSE;
288 }
289 profile->isDataLoaded = TRUE;
291 /* if a different thread set it first, then close the extra data */
292 if(dataMemory!=NULL) {
293 udata_close(dataMemory); /* NULL if it was set correctly */
294 }
297 return profile->isDataLoaded;
298 }
300 static UStringPrepProfile*
301 usprep_getProfile(const char* path,
302 const char* name,
303 UErrorCode *status){
305 UStringPrepProfile* profile = NULL;
307 initCache(status);
309 if(U_FAILURE(*status)){
310 return NULL;
311 }
313 UStringPrepKey stackKey;
314 /*
315 * const is cast way to save malloc, strcpy and free calls
316 * we use the passed in pointers for fetching the data from the
317 * hash table which is safe
318 */
319 stackKey.name = (char*) name;
320 stackKey.path = (char*) path;
322 /* fetch the data from the cache */
323 umtx_lock(&usprepMutex);
324 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
325 if(profile != NULL) {
326 profile->refCount++;
327 }
328 umtx_unlock(&usprepMutex);
330 if(profile == NULL) {
331 /* else load the data and put the data in the cache */
332 LocalMemory<UStringPrepProfile> newProfile;
333 if(newProfile.allocateInsteadAndReset() == NULL) {
334 *status = U_MEMORY_ALLOCATION_ERROR;
335 return NULL;
336 }
338 /* load the data */
339 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
340 return NULL;
341 }
343 /* get the options */
344 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
345 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
347 if(newProfile->checkBiDi) {
348 newProfile->bdp = ubidi_getSingleton();
349 }
351 LocalMemory<UStringPrepKey> key;
352 LocalMemory<char> keyName;
353 LocalMemory<char> keyPath;
354 if( key.allocateInsteadAndReset() == NULL ||
355 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
356 (path != NULL &&
357 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
358 ) {
359 *status = U_MEMORY_ALLOCATION_ERROR;
360 usprep_unload(newProfile.getAlias());
361 return NULL;
362 }
364 umtx_lock(&usprepMutex);
365 // If another thread already inserted the same key/value, refcount and cleanup our thread data
366 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
367 if(profile != NULL) {
368 profile->refCount++;
369 usprep_unload(newProfile.getAlias());
370 }
371 else {
372 /* initialize the key members */
373 key->name = keyName.orphan();
374 uprv_strcpy(key->name, name);
375 if(path != NULL){
376 key->path = keyPath.orphan();
377 uprv_strcpy(key->path, path);
378 }
379 profile = newProfile.orphan();
381 /* add the data object to the cache */
382 profile->refCount = 1;
383 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
384 }
385 umtx_unlock(&usprepMutex);
386 }
388 return profile;
389 }
391 U_CAPI UStringPrepProfile* U_EXPORT2
392 usprep_open(const char* path,
393 const char* name,
394 UErrorCode* status){
396 if(status == NULL || U_FAILURE(*status)){
397 return NULL;
398 }
400 /* initialize the profile struct members */
401 return usprep_getProfile(path,name,status);
402 }
404 U_CAPI UStringPrepProfile* U_EXPORT2
405 usprep_openByType(UStringPrepProfileType type,
406 UErrorCode* status) {
407 if(status == NULL || U_FAILURE(*status)){
408 return NULL;
409 }
410 int32_t index = (int32_t)type;
411 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
412 *status = U_ILLEGAL_ARGUMENT_ERROR;
413 return NULL;
414 }
415 return usprep_open(NULL, PROFILE_NAMES[index], status);
416 }
418 U_CAPI void U_EXPORT2
419 usprep_close(UStringPrepProfile* profile){
420 if(profile==NULL){
421 return;
422 }
424 umtx_lock(&usprepMutex);
425 /* decrement the ref count*/
426 if(profile->refCount > 0){
427 profile->refCount--;
428 }
429 umtx_unlock(&usprepMutex);
431 }
433 U_CFUNC void
434 uprv_syntaxError(const UChar* rules,
435 int32_t pos,
436 int32_t rulesLen,
437 UParseError* parseError){
438 if(parseError == NULL){
439 return;
440 }
441 parseError->offset = pos;
442 parseError->line = 0 ; // we are not using line numbers
444 // for pre-context
445 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
446 int32_t limit = pos;
448 u_memcpy(parseError->preContext,rules+start,limit-start);
449 //null terminate the buffer
450 parseError->preContext[limit-start] = 0;
452 // for post-context; include error rules[pos]
453 start = pos;
454 limit = start + (U_PARSE_CONTEXT_LEN-1);
455 if (limit > rulesLen) {
456 limit = rulesLen;
457 }
458 if (start < rulesLen) {
459 u_memcpy(parseError->postContext,rules+start,limit-start);
460 }
461 //null terminate the buffer
462 parseError->postContext[limit-start]= 0;
463 }
466 static inline UStringPrepType
467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469 UStringPrepType type;
470 if(trieWord == 0){
471 /*
472 * Initial value stored in the mapping table
473 * just return USPREP_TYPE_LIMIT .. so that
474 * the source codepoint is copied to the destination
475 */
476 type = USPREP_TYPE_LIMIT;
477 isIndex =FALSE;
478 value = 0;
479 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
480 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
481 isIndex =FALSE;
482 value = 0;
483 }else{
484 /* get the type */
485 type = USPREP_MAP;
486 /* ascertain if the value is index or delta */
487 if(trieWord & 0x02){
488 isIndex = TRUE;
489 value = trieWord >> 2; //mask off the lower 2 bits and shift
490 }else{
491 isIndex = FALSE;
492 value = (int16_t)trieWord;
493 value = (value >> 2);
494 }
496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497 type = USPREP_DELETE;
498 isIndex =FALSE;
499 value = 0;
500 }
501 }
502 return type;
503 }
507 static int32_t
508 usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity,
511 int32_t options,
512 UParseError* parseError,
513 UErrorCode* status ){
515 uint16_t result;
516 int32_t destIndex=0;
517 int32_t srcIndex;
518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519 UStringPrepType type;
520 int16_t value;
521 UBool isIndex;
522 const int32_t* indexes = profile->indexes;
524 // no error checking the caller check for error and arguments
525 // no string length check the caller finds out the string length
527 for(srcIndex=0;srcIndex<srcLength;){
528 UChar32 ch;
530 U16_NEXT(src,srcIndex,srcLength,ch);
532 result=0;
534 UTRIE_GET16(&profile->sprepTrie,ch,result);
536 type = getValues(result, value, isIndex);
538 // check if the source codepoint is unassigned
539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542 *status = U_STRINGPREP_UNASSIGNED_ERROR;
543 return 0;
545 }else if(type == USPREP_MAP){
547 int32_t index, length;
549 if(isIndex){
550 index = value;
551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553 length = 1;
554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556 length = 2;
557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559 length = 3;
560 }else{
561 length = profile->mappingData[index++];
563 }
565 /* copy mapping to destination */
566 for(int32_t i=0; i< length; i++){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = profile->mappingData[index+i];
569 }
570 destIndex++; /* for pre-flighting */
571 }
572 continue;
573 }else{
574 // subtract the delta to arrive at the code point
575 ch -= value;
576 }
578 }else if(type==USPREP_DELETE){
579 // just consume the codepoint and contine
580 continue;
581 }
582 //copy the code point into destination
583 if(ch <= 0xFFFF){
584 if(destIndex < destCapacity ){
585 dest[destIndex] = (UChar)ch;
586 }
587 destIndex++;
588 }else{
589 if(destIndex+1 < destCapacity ){
590 dest[destIndex] = U16_LEAD(ch);
591 dest[destIndex+1] = U16_TRAIL(ch);
592 }
593 destIndex +=2;
594 }
596 }
598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599 }
602 static int32_t
603 usprep_normalize( const UChar* src, int32_t srcLength,
604 UChar* dest, int32_t destCapacity,
605 UErrorCode* status ){
606 return unorm_normalize(
607 src, srcLength,
608 UNORM_NFKC, UNORM_UNICODE_3_2,
609 dest, destCapacity,
610 status);
611 }
614 /*
615 1) Map -- For each character in the input, check if it has a mapping
616 and, if so, replace it with its mapping.
618 2) Normalize -- Possibly normalize the result of step 1 using Unicode
619 normalization.
621 3) Prohibit -- Check for any characters that are not allowed in the
622 output. If any are found, return an error.
624 4) Check bidi -- Possibly check for right-to-left characters, and if
625 any are found, make sure that the whole string satisfies the
626 requirements for bidirectional strings. If the string does not
627 satisfy the requirements for bidirectional strings, return an
628 error.
629 [Unicode3.2] defines several bidirectional categories; each character
630 has one bidirectional category assigned to it. For the purposes of
631 the requirements below, an "RandALCat character" is a character that
632 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
633 is a character that has Unicode bidirectional category "L". Note
636 that there are many characters which fall in neither of the above
637 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
638 this because they have bidirectional category "EN".
640 In any profile that specifies bidirectional character handling, all
641 three of the following requirements MUST be met:
643 1) The characters in section 5.8 MUST be prohibited.
645 2) If a string contains any RandALCat character, the string MUST NOT
646 contain any LCat character.
648 3) If a string contains any RandALCat character, a RandALCat
649 character MUST be the first character of the string, and a
650 RandALCat character MUST be the last character of the string.
651 */
653 #define MAX_STACK_BUFFER_SIZE 300
656 U_CAPI int32_t U_EXPORT2
657 usprep_prepare( const UStringPrepProfile* profile,
658 const UChar* src, int32_t srcLength,
659 UChar* dest, int32_t destCapacity,
660 int32_t options,
661 UParseError* parseError,
662 UErrorCode* status ){
664 // check error status
665 if(status == NULL || U_FAILURE(*status)){
666 return 0;
667 }
669 //check arguments
670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
671 *status=U_ILLEGAL_ARGUMENT_ERROR;
672 return 0;
673 }
675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
676 UChar *b1 = b1Stack, *b2 = b2Stack;
677 int32_t b1Len, b2Len=0,
678 b1Capacity = MAX_STACK_BUFFER_SIZE ,
679 b2Capacity = MAX_STACK_BUFFER_SIZE;
680 uint16_t result;
681 int32_t b2Index = 0;
682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
683 UBool leftToRight=FALSE, rightToLeft=FALSE;
684 int32_t rtlPos =-1, ltrPos =-1;
686 //get the string length
687 if(srcLength == -1){
688 srcLength = u_strlen(src);
689 }
690 // map
691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
693 if(*status == U_BUFFER_OVERFLOW_ERROR){
694 // redo processing of string
695 /* we do not have enough room so grow the buffer*/
696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
697 if(b1==NULL){
698 *status = U_MEMORY_ALLOCATION_ERROR;
699 goto CLEANUP;
700 }
702 *status = U_ZERO_ERROR; // reset error
704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
706 }
708 // normalize
709 if(profile->doNFKC == TRUE){
710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
712 if(*status == U_BUFFER_OVERFLOW_ERROR){
713 // redo processing of string
714 /* we do not have enough room so grow the buffer*/
715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
716 if(b2==NULL){
717 *status = U_MEMORY_ALLOCATION_ERROR;
718 goto CLEANUP;
719 }
721 *status = U_ZERO_ERROR; // reset error
723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
725 }
727 }else{
728 b2 = b1;
729 b2Len = b1Len;
730 }
733 if(U_FAILURE(*status)){
734 goto CLEANUP;
735 }
737 UChar32 ch;
738 UStringPrepType type;
739 int16_t value;
740 UBool isIndex;
742 // Prohibit and checkBiDi in one pass
743 for(b2Index=0; b2Index<b2Len;){
745 ch = 0;
747 U16_NEXT(b2, b2Index, b2Len, ch);
749 UTRIE_GET16(&profile->sprepTrie,ch,result);
751 type = getValues(result, value, isIndex);
753 if( type == USPREP_PROHIBITED ||
754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
755 ){
756 *status = U_STRINGPREP_PROHIBITED_ERROR;
757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
758 goto CLEANUP;
759 }
761 if(profile->checkBiDi) {
762 direction = ubidi_getClass(profile->bdp, ch);
763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
764 firstCharDir = direction;
765 }
766 if(direction == U_LEFT_TO_RIGHT){
767 leftToRight = TRUE;
768 ltrPos = b2Index-1;
769 }
770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
771 rightToLeft = TRUE;
772 rtlPos = b2Index-1;
773 }
774 }
775 }
776 if(profile->checkBiDi == TRUE){
777 // satisfy 2
778 if( leftToRight == TRUE && rightToLeft == TRUE){
779 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
781 goto CLEANUP;
782 }
784 //satisfy 3
785 if( rightToLeft == TRUE &&
786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
788 ){
789 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
790 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
791 return FALSE;
792 }
793 }
794 if(b2Len>0 && b2Len <= destCapacity){
795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
796 }
798 CLEANUP:
799 if(b1!=b1Stack){
800 uprv_free(b1);
801 b1=NULL;
802 }
804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
805 uprv_free(b2);
806 b2=NULL;
807 }
808 return u_terminateUChars(dest, destCapacity, b2Len, status);
809 }
812 /* data swapping ------------------------------------------------------------ */
814 U_CAPI int32_t U_EXPORT2
815 usprep_swap(const UDataSwapper *ds,
816 const void *inData, int32_t length, void *outData,
817 UErrorCode *pErrorCode) {
818 const UDataInfo *pInfo;
819 int32_t headerSize;
821 const uint8_t *inBytes;
822 uint8_t *outBytes;
824 const int32_t *inIndexes;
825 int32_t indexes[16];
827 int32_t i, offset, count, size;
829 /* udata_swapDataHeader checks the arguments */
830 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
831 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
832 return 0;
833 }
835 /* check data format and format version */
836 pInfo=(const UDataInfo *)((const char *)inData+4);
837 if(!(
838 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
839 pInfo->dataFormat[1]==0x50 &&
840 pInfo->dataFormat[2]==0x52 &&
841 pInfo->dataFormat[3]==0x50 &&
842 pInfo->formatVersion[0]==3
843 )) {
844 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
845 pInfo->dataFormat[0], pInfo->dataFormat[1],
846 pInfo->dataFormat[2], pInfo->dataFormat[3],
847 pInfo->formatVersion[0]);
848 *pErrorCode=U_UNSUPPORTED_ERROR;
849 return 0;
850 }
852 inBytes=(const uint8_t *)inData+headerSize;
853 outBytes=(uint8_t *)outData+headerSize;
855 inIndexes=(const int32_t *)inBytes;
857 if(length>=0) {
858 length-=headerSize;
859 if(length<16*4) {
860 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
861 length);
862 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
863 return 0;
864 }
865 }
867 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
868 for(i=0; i<16; ++i) {
869 indexes[i]=udata_readInt32(ds, inIndexes[i]);
870 }
872 /* calculate the total length of the data */
873 size=
874 16*4+ /* size of indexes[] */
875 indexes[_SPREP_INDEX_TRIE_SIZE]+
876 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
878 if(length>=0) {
879 if(length<size) {
880 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
881 length);
882 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
883 return 0;
884 }
886 /* copy the data for inaccessible bytes */
887 if(inBytes!=outBytes) {
888 uprv_memcpy(outBytes, inBytes, size);
889 }
891 offset=0;
893 /* swap the int32_t indexes[] */
894 count=16*4;
895 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
896 offset+=count;
898 /* swap the UTrie */
899 count=indexes[_SPREP_INDEX_TRIE_SIZE];
900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
901 offset+=count;
903 /* swap the uint16_t mappingTable[] */
904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
906 offset+=count;
907 }
909 return headerSize+size;
910 }
912 #endif /* #if !UCONFIG_NO_IDNA */