intl/icu/source/common/usprep.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *******************************************************************************
     3  *
     4  *   Copyright (C) 2003-2013, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  *******************************************************************************
     8  *   file name:  usprep.cpp
     9  *   encoding:   US-ASCII
    10  *   tab size:   8 (not used)
    11  *   indentation:4
    12  *
    13  *   created on: 2003jul2
    14  *   created by: Ram Viswanadha
    15  */
    17 #include "unicode/utypes.h"
    19 #if !UCONFIG_NO_IDNA
    21 #include "unicode/usprep.h"
    23 #include "unicode/unorm.h"
    24 #include "unicode/ustring.h"
    25 #include "unicode/uchar.h"
    26 #include "unicode/uversion.h"
    27 #include "umutex.h"
    28 #include "cmemory.h"
    29 #include "sprpimpl.h"
    30 #include "ustr_imp.h"
    31 #include "uhash.h"
    32 #include "cstring.h"
    33 #include "udataswp.h"
    34 #include "ucln_cmn.h"
    35 #include "ubidi_props.h"
    37 U_NAMESPACE_USE
    39 U_CDECL_BEGIN
    41 /*
    42 Static cache for already opened StringPrep profiles
    43 */
    44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
    45 static icu::UInitOnce gSharedDataInitOnce;
    47 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
    49 /* format version of spp file */
    50 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
    52 /* the Unicode version of the sprep data */
    53 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
    55 /* Profile names must be aligned to UStringPrepProfileType */
    56 static const char * const PROFILE_NAMES[] = {
    57     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
    58     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
    59     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
    60     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
    61     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
    62     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
    63     "rfc3722",      /* USPREP_RFC3722_ISCSI */
    64     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
    65     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
    66     "rfc4011",      /* USPREP_RFC4011_MIB */
    67     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
    68     "rfc4505",      /* USPREP_RFC4505_TRACE */
    69     "rfc4518",      /* USPREP_RFC4518_LDAP */
    70     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
    71 };
    73 static UBool U_CALLCONV
    74 isSPrepAcceptable(void * /* context */,
    75              const char * /* type */, 
    76              const char * /* name */,
    77              const UDataInfo *pInfo) {
    78     if(
    79         pInfo->size>=20 &&
    80         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    81         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    82         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
    83         pInfo->dataFormat[1]==0x50 &&
    84         pInfo->dataFormat[2]==0x52 &&
    85         pInfo->dataFormat[3]==0x50 &&
    86         pInfo->formatVersion[0]==3 &&
    87         pInfo->formatVersion[2]==UTRIE_SHIFT &&
    88         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
    89     ) {
    90         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
    91         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
    92         return TRUE;
    93     } else {
    94         return FALSE;
    95     }
    96 }
    98 static int32_t U_CALLCONV
    99 getSPrepFoldingOffset(uint32_t data) {
   101     return (int32_t)data;
   103 }
   105 /* hashes an entry  */
   106 static int32_t U_CALLCONV 
   107 hashEntry(const UHashTok parm) {
   108     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
   109     UHashTok namekey, pathkey;
   110     namekey.pointer = b->name;
   111     pathkey.pointer = b->path;
   112     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
   113 }
   115 /* compares two entries */
   116 static UBool U_CALLCONV 
   117 compareEntries(const UHashTok p1, const UHashTok p2) {
   118     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
   119     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
   120     UHashTok name1, name2, path1, path2;
   121     name1.pointer = b1->name;
   122     name2.pointer = b2->name;
   123     path1.pointer = b1->path;
   124     path2.pointer = b2->path;
   125     return ((UBool)(uhash_compareChars(name1, name2) & 
   126         uhash_compareChars(path1, path2)));
   127 }
   129 static void 
   130 usprep_unload(UStringPrepProfile* data){
   131     udata_close(data->sprepData);
   132 }
   134 static int32_t 
   135 usprep_internal_flushCache(UBool noRefCount){
   136     UStringPrepProfile *profile = NULL;
   137     UStringPrepKey  *key  = NULL;
   138     int32_t pos = -1;
   139     int32_t deletedNum = 0;
   140     const UHashElement *e;
   142     /*
   143      * if shared data hasn't even been lazy evaluated yet
   144      * return 0
   145      */
   146     umtx_lock(&usprepMutex);
   147     if (SHARED_DATA_HASHTABLE == NULL) {
   148         umtx_unlock(&usprepMutex);
   149         return 0;
   150     }
   152     /*creates an enumeration to iterate through every element in the table */
   153     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
   154     {
   155         profile = (UStringPrepProfile *) e->value.pointer;
   156         key  = (UStringPrepKey *) e->key.pointer;
   158         if ((noRefCount== FALSE && profile->refCount == 0) || 
   159              noRefCount== TRUE) {
   160             deletedNum++;
   161             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
   163             /* unload the data */
   164             usprep_unload(profile);
   166             if(key->name != NULL) {
   167                 uprv_free(key->name);
   168                 key->name=NULL;
   169             }
   170             if(key->path != NULL) {
   171                 uprv_free(key->path);
   172                 key->path=NULL;
   173             }
   174             uprv_free(profile);
   175             uprv_free(key);
   176         }
   178     }
   179     umtx_unlock(&usprepMutex);
   181     return deletedNum;
   182 }
   184 /* Works just like ucnv_flushCache() 
   185 static int32_t 
   186 usprep_flushCache(){
   187     return usprep_internal_flushCache(FALSE);
   188 }
   189 */
   191 static UBool U_CALLCONV usprep_cleanup(void){
   192     if (SHARED_DATA_HASHTABLE != NULL) {
   193         usprep_internal_flushCache(TRUE);
   194         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
   195             uhash_close(SHARED_DATA_HASHTABLE);
   196             SHARED_DATA_HASHTABLE = NULL;
   197         }
   198     }
   199     gSharedDataInitOnce.reset();
   200     return (SHARED_DATA_HASHTABLE == NULL);
   201 }
   202 U_CDECL_END
   205 /** Initializes the cache for resources */
   206 static void U_CALLCONV
   207 createCache(UErrorCode &status) {
   208     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
   209     if (U_FAILURE(status)) {
   210         SHARED_DATA_HASHTABLE = NULL;
   211     }
   212     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
   213 }
   215 static void 
   216 initCache(UErrorCode *status) {
   217     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
   218 }
   220 static UBool U_CALLCONV
   221 loadData(UStringPrepProfile* profile, 
   222          const char* path, 
   223          const char* name, 
   224          const char* type, 
   225          UErrorCode* errorCode) {
   226     /* load Unicode SPREP data from file */    
   227     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
   228     UDataMemory *dataMemory;
   229     const int32_t *p=NULL;
   230     const uint8_t *pb;
   231     UVersionInfo normUnicodeVersion;
   232     int32_t normUniVer, sprepUniVer, normCorrVer;
   234     if(errorCode==NULL || U_FAILURE(*errorCode)) {
   235         return 0;
   236     }
   238     /* open the data outside the mutex block */
   239     //TODO: change the path
   240     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
   241     if(U_FAILURE(*errorCode)) {
   242         return FALSE;
   243     }
   245     p=(const int32_t *)udata_getMemory(dataMemory);
   246     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
   247     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
   248     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
   251     if(U_FAILURE(*errorCode)) {
   252         udata_close(dataMemory);
   253         return FALSE;
   254     }
   256     /* in the mutex block, set the data for this process */
   257     umtx_lock(&usprepMutex);
   258     if(profile->sprepData==NULL) {
   259         profile->sprepData=dataMemory;
   260         dataMemory=NULL;
   261         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
   262         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
   263     } else {
   264         p=(const int32_t *)udata_getMemory(profile->sprepData);
   265     }
   266     umtx_unlock(&usprepMutex);
   267     /* initialize some variables */
   268     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
   270     u_getUnicodeVersion(normUnicodeVersion);
   271     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 
   272                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
   273     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 
   274                   (dataVersion[2] << 8 ) + (dataVersion[3]);
   275     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
   277     if(U_FAILURE(*errorCode)){
   278         udata_close(dataMemory);
   279         return FALSE;
   280     }
   281     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
   282         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
   283         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
   284       ){
   285         *errorCode = U_INVALID_FORMAT_ERROR;
   286         udata_close(dataMemory);
   287         return FALSE;
   288     }
   289     profile->isDataLoaded = TRUE;
   291     /* if a different thread set it first, then close the extra data */
   292     if(dataMemory!=NULL) {
   293         udata_close(dataMemory); /* NULL if it was set correctly */
   294     }
   297     return profile->isDataLoaded;
   298 }
   300 static UStringPrepProfile* 
   301 usprep_getProfile(const char* path, 
   302                   const char* name,
   303                   UErrorCode *status){
   305     UStringPrepProfile* profile = NULL;
   307     initCache(status);
   309     if(U_FAILURE(*status)){
   310         return NULL;
   311     }
   313     UStringPrepKey stackKey;
   314     /* 
   315      * const is cast way to save malloc, strcpy and free calls 
   316      * we use the passed in pointers for fetching the data from the 
   317      * hash table which is safe
   318      */
   319     stackKey.name = (char*) name;
   320     stackKey.path = (char*) path;
   322     /* fetch the data from the cache */
   323     umtx_lock(&usprepMutex);
   324     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
   325     if(profile != NULL) {
   326         profile->refCount++;
   327     }
   328     umtx_unlock(&usprepMutex);
   330     if(profile == NULL) {
   331         /* else load the data and put the data in the cache */
   332         LocalMemory<UStringPrepProfile> newProfile;
   333         if(newProfile.allocateInsteadAndReset() == NULL) {
   334             *status = U_MEMORY_ALLOCATION_ERROR;
   335             return NULL;
   336         }
   338         /* load the data */
   339         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
   340             return NULL;
   341         }
   343         /* get the options */
   344         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
   345         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
   347         if(newProfile->checkBiDi) {
   348             newProfile->bdp = ubidi_getSingleton();
   349         }
   351         LocalMemory<UStringPrepKey> key;
   352         LocalMemory<char> keyName;
   353         LocalMemory<char> keyPath;
   354         if( key.allocateInsteadAndReset() == NULL ||
   355             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
   356             (path != NULL &&
   357              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
   358          ) {
   359             *status = U_MEMORY_ALLOCATION_ERROR;
   360             usprep_unload(newProfile.getAlias());
   361             return NULL;
   362         }
   364         umtx_lock(&usprepMutex);
   365         // If another thread already inserted the same key/value, refcount and cleanup our thread data
   366         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
   367         if(profile != NULL) {
   368             profile->refCount++;
   369             usprep_unload(newProfile.getAlias());
   370         }
   371         else {
   372             /* initialize the key members */
   373             key->name = keyName.orphan();
   374             uprv_strcpy(key->name, name);
   375             if(path != NULL){
   376                 key->path = keyPath.orphan();
   377                 uprv_strcpy(key->path, path);
   378             }        
   379             profile = newProfile.orphan();
   381             /* add the data object to the cache */
   382             profile->refCount = 1;
   383             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
   384         }
   385         umtx_unlock(&usprepMutex);
   386     }
   388     return profile;
   389 }
   391 U_CAPI UStringPrepProfile* U_EXPORT2
   392 usprep_open(const char* path, 
   393             const char* name,
   394             UErrorCode* status){
   396     if(status == NULL || U_FAILURE(*status)){
   397         return NULL;
   398     }
   400     /* initialize the profile struct members */
   401     return usprep_getProfile(path,name,status);
   402 }
   404 U_CAPI UStringPrepProfile* U_EXPORT2
   405 usprep_openByType(UStringPrepProfileType type,
   406 				  UErrorCode* status) {
   407     if(status == NULL || U_FAILURE(*status)){
   408         return NULL;
   409     }
   410     int32_t index = (int32_t)type;
   411     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
   412         *status = U_ILLEGAL_ARGUMENT_ERROR;
   413         return NULL;
   414     }
   415     return usprep_open(NULL, PROFILE_NAMES[index], status);
   416 }
   418 U_CAPI void U_EXPORT2
   419 usprep_close(UStringPrepProfile* profile){
   420     if(profile==NULL){
   421         return;
   422     }
   424     umtx_lock(&usprepMutex);
   425     /* decrement the ref count*/
   426     if(profile->refCount > 0){
   427         profile->refCount--;
   428     }
   429     umtx_unlock(&usprepMutex);
   431 }
   433 U_CFUNC void 
   434 uprv_syntaxError(const UChar* rules, 
   435                  int32_t pos,
   436                  int32_t rulesLen,
   437                  UParseError* parseError){
   438     if(parseError == NULL){
   439         return;
   440     }
   441     parseError->offset = pos;
   442     parseError->line = 0 ; // we are not using line numbers 
   444     // for pre-context
   445     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
   446     int32_t limit = pos;
   448     u_memcpy(parseError->preContext,rules+start,limit-start);
   449     //null terminate the buffer
   450     parseError->preContext[limit-start] = 0;
   452     // for post-context; include error rules[pos]
   453     start = pos;
   454     limit = start + (U_PARSE_CONTEXT_LEN-1);
   455     if (limit > rulesLen) {
   456         limit = rulesLen;
   457     }
   458     if (start < rulesLen) {
   459         u_memcpy(parseError->postContext,rules+start,limit-start);
   460     }
   461     //null terminate the buffer
   462     parseError->postContext[limit-start]= 0;
   463 }
   466 static inline UStringPrepType
   467 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
   469     UStringPrepType type;
   470     if(trieWord == 0){
   471         /* 
   472          * Initial value stored in the mapping table 
   473          * just return USPREP_TYPE_LIMIT .. so that
   474          * the source codepoint is copied to the destination
   475          */
   476         type = USPREP_TYPE_LIMIT;
   477         isIndex =FALSE;
   478         value = 0;
   479     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
   480         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
   481         isIndex =FALSE;
   482         value = 0;
   483     }else{
   484         /* get the type */
   485         type = USPREP_MAP;
   486         /* ascertain if the value is index or delta */
   487         if(trieWord & 0x02){
   488             isIndex = TRUE;
   489             value = trieWord  >> 2; //mask off the lower 2 bits and shift
   490         }else{
   491             isIndex = FALSE;
   492             value = (int16_t)trieWord;
   493             value =  (value >> 2);
   494         }
   496         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
   497             type = USPREP_DELETE;
   498             isIndex =FALSE;
   499             value = 0;
   500         }
   501     }
   502     return type;
   503 }
   507 static int32_t 
   508 usprep_map(  const UStringPrepProfile* profile, 
   509              const UChar* src, int32_t srcLength, 
   510              UChar* dest, int32_t destCapacity,
   511              int32_t options,
   512              UParseError* parseError,
   513              UErrorCode* status ){
   515     uint16_t result;
   516     int32_t destIndex=0;
   517     int32_t srcIndex;
   518     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
   519     UStringPrepType type;
   520     int16_t value;
   521     UBool isIndex;
   522     const int32_t* indexes = profile->indexes;
   524     // no error checking the caller check for error and arguments
   525     // no string length check the caller finds out the string length
   527     for(srcIndex=0;srcIndex<srcLength;){
   528         UChar32 ch;
   530         U16_NEXT(src,srcIndex,srcLength,ch);
   532         result=0;
   534         UTRIE_GET16(&profile->sprepTrie,ch,result);
   536         type = getValues(result, value, isIndex);
   538         // check if the source codepoint is unassigned
   539         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
   541             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
   542             *status = U_STRINGPREP_UNASSIGNED_ERROR;
   543             return 0;
   545         }else if(type == USPREP_MAP){
   547             int32_t index, length;
   549             if(isIndex){
   550                 index = value;
   551                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
   552                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
   553                     length = 1;
   554                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
   555                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
   556                     length = 2;
   557                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
   558                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
   559                     length = 3;
   560                 }else{
   561                     length = profile->mappingData[index++];
   563                 }
   565                 /* copy mapping to destination */
   566                 for(int32_t i=0; i< length; i++){
   567                     if(destIndex < destCapacity  ){
   568                         dest[destIndex] = profile->mappingData[index+i];
   569                     }
   570                     destIndex++; /* for pre-flighting */
   571                 }  
   572                 continue;
   573             }else{
   574                 // subtract the delta to arrive at the code point
   575                 ch -= value;
   576             }
   578         }else if(type==USPREP_DELETE){
   579              // just consume the codepoint and contine
   580             continue;
   581         }
   582         //copy the code point into destination
   583         if(ch <= 0xFFFF){
   584             if(destIndex < destCapacity ){
   585                 dest[destIndex] = (UChar)ch;
   586             }
   587             destIndex++;
   588         }else{
   589             if(destIndex+1 < destCapacity ){
   590                 dest[destIndex]   = U16_LEAD(ch);
   591                 dest[destIndex+1] = U16_TRAIL(ch);
   592             }
   593             destIndex +=2;
   594         }
   596     }
   598     return u_terminateUChars(dest, destCapacity, destIndex, status);
   599 }
   602 static int32_t 
   603 usprep_normalize(   const UChar* src, int32_t srcLength, 
   604                     UChar* dest, int32_t destCapacity,
   605                     UErrorCode* status ){
   606     return unorm_normalize(
   607         src, srcLength,
   608         UNORM_NFKC, UNORM_UNICODE_3_2,
   609         dest, destCapacity,
   610         status);
   611 }
   614  /*
   615    1) Map -- For each character in the input, check if it has a mapping
   616       and, if so, replace it with its mapping.  
   618    2) Normalize -- Possibly normalize the result of step 1 using Unicode
   619       normalization. 
   621    3) Prohibit -- Check for any characters that are not allowed in the
   622       output.  If any are found, return an error.  
   624    4) Check bidi -- Possibly check for right-to-left characters, and if
   625       any are found, make sure that the whole string satisfies the
   626       requirements for bidirectional strings.  If the string does not
   627       satisfy the requirements for bidirectional strings, return an
   628       error.  
   629       [Unicode3.2] defines several bidirectional categories; each character
   630        has one bidirectional category assigned to it.  For the purposes of
   631        the requirements below, an "RandALCat character" is a character that
   632        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
   633        is a character that has Unicode bidirectional category "L".  Note
   636        that there are many characters which fall in neither of the above
   637        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
   638        this because they have bidirectional category "EN".
   640        In any profile that specifies bidirectional character handling, all
   641        three of the following requirements MUST be met:
   643        1) The characters in section 5.8 MUST be prohibited.
   645        2) If a string contains any RandALCat character, the string MUST NOT
   646           contain any LCat character.
   648        3) If a string contains any RandALCat character, a RandALCat
   649           character MUST be the first character of the string, and a
   650           RandALCat character MUST be the last character of the string.
   651 */
   653 #define MAX_STACK_BUFFER_SIZE 300
   656 U_CAPI int32_t U_EXPORT2
   657 usprep_prepare(   const UStringPrepProfile* profile,
   658                   const UChar* src, int32_t srcLength, 
   659                   UChar* dest, int32_t destCapacity,
   660                   int32_t options,
   661                   UParseError* parseError,
   662                   UErrorCode* status ){
   664     // check error status
   665     if(status == NULL || U_FAILURE(*status)){
   666         return 0;
   667     }
   669     //check arguments
   670     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
   671         *status=U_ILLEGAL_ARGUMENT_ERROR;
   672         return 0;
   673     }
   675     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
   676     UChar *b1 = b1Stack, *b2 = b2Stack;
   677     int32_t b1Len, b2Len=0,
   678             b1Capacity = MAX_STACK_BUFFER_SIZE , 
   679             b2Capacity = MAX_STACK_BUFFER_SIZE;
   680     uint16_t result;
   681     int32_t b2Index = 0;
   682     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
   683     UBool leftToRight=FALSE, rightToLeft=FALSE;
   684     int32_t rtlPos =-1, ltrPos =-1;
   686     //get the string length
   687     if(srcLength == -1){
   688         srcLength = u_strlen(src);
   689     }
   690     // map
   691     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
   693     if(*status == U_BUFFER_OVERFLOW_ERROR){
   694         // redo processing of string
   695         /* we do not have enough room so grow the buffer*/
   696         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   697         if(b1==NULL){
   698             *status = U_MEMORY_ALLOCATION_ERROR;
   699             goto CLEANUP;
   700         }
   702         *status = U_ZERO_ERROR; // reset error
   704         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
   706     }
   708     // normalize
   709     if(profile->doNFKC == TRUE){
   710         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
   712         if(*status == U_BUFFER_OVERFLOW_ERROR){
   713             // redo processing of string
   714             /* we do not have enough room so grow the buffer*/
   715             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
   716             if(b2==NULL){
   717                 *status = U_MEMORY_ALLOCATION_ERROR;
   718                 goto CLEANUP;
   719             }
   721             *status = U_ZERO_ERROR; // reset error
   723             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
   725         }
   727     }else{
   728         b2 = b1;
   729         b2Len = b1Len;
   730     }
   733     if(U_FAILURE(*status)){
   734         goto CLEANUP;
   735     }
   737     UChar32 ch;
   738     UStringPrepType type;
   739     int16_t value;
   740     UBool isIndex;
   742     // Prohibit and checkBiDi in one pass
   743     for(b2Index=0; b2Index<b2Len;){
   745         ch = 0;
   747         U16_NEXT(b2, b2Index, b2Len, ch);
   749         UTRIE_GET16(&profile->sprepTrie,ch,result);
   751         type = getValues(result, value, isIndex);
   753         if( type == USPREP_PROHIBITED || 
   754             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
   755            ){
   756             *status = U_STRINGPREP_PROHIBITED_ERROR;
   757             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
   758             goto CLEANUP;
   759         }
   761         if(profile->checkBiDi) {
   762             direction = ubidi_getClass(profile->bdp, ch);
   763             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
   764                 firstCharDir = direction;
   765             }
   766             if(direction == U_LEFT_TO_RIGHT){
   767                 leftToRight = TRUE;
   768                 ltrPos = b2Index-1;
   769             }
   770             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
   771                 rightToLeft = TRUE;
   772                 rtlPos = b2Index-1;
   773             }
   774         }
   775     }           
   776     if(profile->checkBiDi == TRUE){
   777         // satisfy 2
   778         if( leftToRight == TRUE && rightToLeft == TRUE){
   779             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
   780             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
   781             goto CLEANUP;
   782         }
   784         //satisfy 3
   785         if( rightToLeft == TRUE && 
   786             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
   787               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
   788            ){
   789             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
   790             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
   791             return FALSE;
   792         }
   793     }
   794     if(b2Len>0 && b2Len <= destCapacity){
   795         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
   796     }
   798 CLEANUP:
   799     if(b1!=b1Stack){
   800         uprv_free(b1);
   801         b1=NULL;
   802     }
   804     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
   805         uprv_free(b2);
   806         b2=NULL;
   807     }
   808     return u_terminateUChars(dest, destCapacity, b2Len, status);
   809 }
   812 /* data swapping ------------------------------------------------------------ */
   814 U_CAPI int32_t U_EXPORT2
   815 usprep_swap(const UDataSwapper *ds,
   816             const void *inData, int32_t length, void *outData,
   817             UErrorCode *pErrorCode) {
   818     const UDataInfo *pInfo;
   819     int32_t headerSize;
   821     const uint8_t *inBytes;
   822     uint8_t *outBytes;
   824     const int32_t *inIndexes;
   825     int32_t indexes[16];
   827     int32_t i, offset, count, size;
   829     /* udata_swapDataHeader checks the arguments */
   830     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   831     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   832         return 0;
   833     }
   835     /* check data format and format version */
   836     pInfo=(const UDataInfo *)((const char *)inData+4);
   837     if(!(
   838         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
   839         pInfo->dataFormat[1]==0x50 &&
   840         pInfo->dataFormat[2]==0x52 &&
   841         pInfo->dataFormat[3]==0x50 &&
   842         pInfo->formatVersion[0]==3
   843     )) {
   844         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
   845                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   846                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   847                          pInfo->formatVersion[0]);
   848         *pErrorCode=U_UNSUPPORTED_ERROR;
   849         return 0;
   850     }
   852     inBytes=(const uint8_t *)inData+headerSize;
   853     outBytes=(uint8_t *)outData+headerSize;
   855     inIndexes=(const int32_t *)inBytes;
   857     if(length>=0) {
   858         length-=headerSize;
   859         if(length<16*4) {
   860             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
   861                              length);
   862             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   863             return 0;
   864         }
   865     }
   867     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
   868     for(i=0; i<16; ++i) {
   869         indexes[i]=udata_readInt32(ds, inIndexes[i]);
   870     }
   872     /* calculate the total length of the data */
   873     size=
   874         16*4+ /* size of indexes[] */
   875         indexes[_SPREP_INDEX_TRIE_SIZE]+
   876         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
   878     if(length>=0) {
   879         if(length<size) {
   880             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
   881                              length);
   882             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   883             return 0;
   884         }
   886         /* copy the data for inaccessible bytes */
   887         if(inBytes!=outBytes) {
   888             uprv_memcpy(outBytes, inBytes, size);
   889         }
   891         offset=0;
   893         /* swap the int32_t indexes[] */
   894         count=16*4;
   895         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   896         offset+=count;
   898         /* swap the UTrie */
   899         count=indexes[_SPREP_INDEX_TRIE_SIZE];
   900         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   901         offset+=count;
   903         /* swap the uint16_t mappingTable[] */
   904         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
   905         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   906         offset+=count;
   907     }
   909     return headerSize+size;
   910 }
   912 #endif /* #if !UCONFIG_NO_IDNA */

mercurial