intl/icu/source/common/uidna.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2  *******************************************************************************
     3  *
     4  *   Copyright (C) 2003-2009, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  *******************************************************************************
     8  *   file name:  uidna.cpp
     9  *   encoding:   US-ASCII
    10  *   tab size:   8 (not used)
    11  *   indentation:4
    12  *
    13  *   created on: 2003feb1
    14  *   created by: Ram Viswanadha
    15  */
    17 #include "unicode/utypes.h"
    19 #if !UCONFIG_NO_IDNA
    21 #include "unicode/uidna.h"
    22 #include "unicode/ustring.h"
    23 #include "unicode/usprep.h"
    24 #include "punycode.h"
    25 #include "ustr_imp.h"
    26 #include "cmemory.h"
    27 #include "uassert.h"
    28 #include "sprpimpl.h"
    30 /* it is official IDNA ACE Prefix is "xn--" */
    31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
    32 #define ACE_PREFIX_LENGTH 4
    34 #define MAX_LABEL_LENGTH 63
    35 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
    36 #define MAX_LABEL_BUFFER_SIZE 100
    38 #define MAX_DOMAIN_NAME_LENGTH 255
    39 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
    40 #define MAX_IDN_BUFFER_SIZE   MAX_DOMAIN_NAME_LENGTH+1
    42 #define LOWER_CASE_DELTA 0x0020
    43 #define HYPHEN           0x002D
    44 #define FULL_STOP        0x002E
    45 #define CAPITAL_A        0x0041
    46 #define CAPITAL_Z        0x005A
    48 inline static UChar 
    49 toASCIILower(UChar ch){
    50     if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
    51         return ch + LOWER_CASE_DELTA;
    52     }
    53     return ch;
    54 }
    56 inline static UBool 
    57 startsWithPrefix(const UChar* src , int32_t srcLength){
    58     UBool startsWithPrefix = TRUE;
    60     if(srcLength < ACE_PREFIX_LENGTH){
    61         return FALSE;
    62     }
    64     for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
    65         if(toASCIILower(src[i]) != ACE_PREFIX[i]){
    66             startsWithPrefix = FALSE;
    67         }
    68     }
    69     return startsWithPrefix;
    70 }
    73 inline static int32_t
    74 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 
    75                             const UChar* s2, int32_t s2Len){
    77     int32_t minLength;
    78     int32_t lengthResult;
    80     // are we comparing different lengths?
    81     if(s1Len != s2Len) {
    82         if(s1Len < s2Len) {
    83             minLength = s1Len;
    84             lengthResult = -1;
    85         } else {
    86             minLength = s2Len;
    87             lengthResult = 1;
    88         }
    89     } else {
    90         // ok the lengths are equal
    91         minLength = s1Len;
    92         lengthResult = 0;
    93     }
    95     UChar c1,c2;
    96     int32_t rc;
    98     for(int32_t i =0;/* no condition */;i++) {
   100         /* If we reach the ends of both strings then they match */
   101         if(i == minLength) {
   102             return lengthResult;
   103         }
   105         c1 = s1[i];
   106         c2 = s2[i];
   108         /* Case-insensitive comparison */
   109         if(c1!=c2) {
   110             rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
   111             if(rc!=0) {
   112                 lengthResult=rc;
   113                 break;
   114             }
   115         }
   116     }
   117     return lengthResult;
   118 }
   121 /**
   122  * Ascertain if the given code point is a label separator as 
   123  * defined by the IDNA RFC
   124  * 
   125  * @param ch The code point to be ascertained
   126  * @return true if the char is a label separator
   127  * @stable ICU 2.8
   128  */
   129 static inline UBool isLabelSeparator(UChar ch){
   130     switch(ch){
   131         case 0x002e:
   132         case 0x3002:
   133         case 0xFF0E:
   134         case 0xFF61:
   135             return TRUE;
   136         default:
   137             return FALSE;           
   138     }
   139 }
   141 // returns the length of the label excluding the separator
   142 // if *limit == separator then the length returned does not include 
   143 // the separtor.
   144 static inline int32_t
   145 getNextSeparator(UChar *src, int32_t srcLength,
   146                  UChar **limit, UBool *done){
   147     if(srcLength == -1){
   148         int32_t i;
   149         for(i=0 ; ;i++){
   150             if(src[i] == 0){
   151                 *limit = src + i; // point to null
   152                 *done = TRUE;
   153                 return i;
   154             }
   155             if(isLabelSeparator(src[i])){
   156                 *limit = src + (i+1); // go past the delimiter
   157                 return i;
   159             }
   160         }
   161     }else{
   162         int32_t i;
   163         for(i=0;i<srcLength;i++){
   164             if(isLabelSeparator(src[i])){
   165                 *limit = src + (i+1); // go past the delimiter
   166                 return i;
   167             }
   168         }
   169         // we have not found the delimiter
   170         // if(i==srcLength)
   171         *limit = src+srcLength;
   172         *done = TRUE;
   174         return i;
   175     }
   176 }
   177 static inline UBool isLDHChar(UChar ch){
   178     // high runner case
   179     if(ch>0x007A){
   180         return FALSE;
   181     }
   182     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
   183     if( (ch==0x002D) || 
   184         (0x0030 <= ch && ch <= 0x0039) ||
   185         (0x0041 <= ch && ch <= 0x005A) ||
   186         (0x0061 <= ch && ch <= 0x007A)
   187       ){
   188         return TRUE;
   189     }
   190     return FALSE;
   191 }
   193 static int32_t 
   194 _internal_toASCII(const UChar* src, int32_t srcLength, 
   195                   UChar* dest, int32_t destCapacity,
   196                   int32_t options,
   197                   UStringPrepProfile* nameprep,
   198                   UParseError* parseError,
   199                   UErrorCode* status)
   200 {
   202     // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
   203     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
   204     //initialize pointers to stack buffers
   205     UChar  *b1 = b1Stack, *b2 = b2Stack;
   206     int32_t b1Len=0, b2Len, 
   207             b1Capacity = MAX_LABEL_BUFFER_SIZE, 
   208             b2Capacity = MAX_LABEL_BUFFER_SIZE ,
   209             reqLength=0;
   211     int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
   212     UBool* caseFlags = NULL;
   214     // the source contains all ascii codepoints
   215     UBool srcIsASCII  = TRUE;
   216     // assume the source contains all LDH codepoints
   217     UBool srcIsLDH = TRUE; 
   219     int32_t j=0;
   221     //get the options
   222     UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
   224     int32_t failPos = -1;
   226     if(srcLength == -1){
   227         srcLength = u_strlen(src);
   228     }
   230     if(srcLength > b1Capacity){
   231         b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
   232         if(b1==NULL){
   233             *status = U_MEMORY_ALLOCATION_ERROR;
   234             goto CLEANUP;
   235         }
   236         b1Capacity = srcLength;
   237     }
   239     // step 1 
   240     for( j=0;j<srcLength;j++){
   241         if(src[j] > 0x7F){
   242             srcIsASCII = FALSE;
   243         }
   244         b1[b1Len++] = src[j];
   245     }
   247     // step 2 is performed only if the source contains non ASCII
   248     if(srcIsASCII == FALSE){
   250         // step 2    
   251         b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
   253         if(*status == U_BUFFER_OVERFLOW_ERROR){
   254             // redo processing of string
   255             // we do not have enough room so grow the buffer
   256             if(b1 != b1Stack){
   257                 uprv_free(b1);
   258             }
   259             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   260             if(b1==NULL){
   261                 *status = U_MEMORY_ALLOCATION_ERROR;
   262                 goto CLEANUP;
   263             }
   265             *status = U_ZERO_ERROR; // reset error
   267             b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
   268         }
   269     }
   270     // error bail out
   271     if(U_FAILURE(*status)){
   272         goto CLEANUP;
   273     }
   274     if(b1Len == 0){
   275         *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
   276         goto CLEANUP;
   277     }
   279     // for step 3 & 4
   280     srcIsASCII = TRUE;
   281     for( j=0;j<b1Len;j++){
   282         // check if output of usprep_prepare is all ASCII 
   283         if(b1[j] > 0x7F){
   284             srcIsASCII = FALSE;
   285         }else if(isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character
   286             srcIsLDH = FALSE;
   287             failPos = j;
   288         }
   289     }
   290     if(useSTD3ASCIIRules == TRUE){
   291         // verify 3a and 3b
   292         // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
   293         //  absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   294         // 3(b) Verify the absence of leading and trailing hyphen-minus; that
   295         //  is, the absence of U+002D at the beginning and end of the
   296         //  sequence.
   297         if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
   298             || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
   299             *status = U_IDNA_STD3_ASCII_RULES_ERROR;
   301             /* populate the parseError struct */
   302             if(srcIsLDH==FALSE){
   303                 // failPos is always set the index of failure
   304                 uprv_syntaxError(b1,failPos, b1Len,parseError);
   305             }else if(b1[0] == HYPHEN){
   306                 // fail position is 0 
   307                 uprv_syntaxError(b1,0,b1Len,parseError);
   308             }else{
   309                 // the last index in the source is always length-1
   310                 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
   311             }
   313             goto CLEANUP;
   314         }
   315     }
   316     // Step 4: if the source is ASCII then proceed to step 8
   317     if(srcIsASCII){
   318         if(b1Len <= destCapacity){
   319             uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
   320             reqLength = b1Len;
   321         }else{
   322             reqLength = b1Len;
   323             goto CLEANUP;
   324         }
   325     }else{
   326         // step 5 : verify the sequence does not begin with ACE prefix
   327         if(!startsWithPrefix(b1,b1Len)){
   329             //step 6: encode the sequence with punycode
   331             // do not preserve the case flags for now!
   332             // TODO: Preserve the case while implementing the RFE
   333             // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
   334             // uprv_memset(caseFlags,TRUE,b1Len);
   336             b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
   338             if(*status == U_BUFFER_OVERFLOW_ERROR){
   339                 // redo processing of string
   340                 /* we do not have enough room so grow the buffer*/
   341                 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 
   342                 if(b2 == NULL){
   343                     *status = U_MEMORY_ALLOCATION_ERROR;
   344                     goto CLEANUP;
   345                 }
   347                 *status = U_ZERO_ERROR; // reset error
   349                 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
   350             }
   351             //error bail out
   352             if(U_FAILURE(*status)){
   353                 goto CLEANUP;
   354             }
   355             // TODO : Reconsider while implementing the case preserve RFE
   356             // convert all codepoints to lower case ASCII
   357             // toASCIILower(b2,b2Len);
   358             reqLength = b2Len+ACE_PREFIX_LENGTH;
   360             if(reqLength > destCapacity){
   361                 *status = U_BUFFER_OVERFLOW_ERROR;
   362                 goto CLEANUP;
   363             }
   364             //Step 7: prepend the ACE prefix
   365             uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
   366             //Step 6: copy the contents in b2 into dest
   367             uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
   369         }else{
   370             *status = U_IDNA_ACE_PREFIX_ERROR; 
   371             //position of failure is 0
   372             uprv_syntaxError(b1,0,b1Len,parseError);
   373             goto CLEANUP;
   374         }
   375     }
   376     // step 8: verify the length of label
   377     if(reqLength > MAX_LABEL_LENGTH){
   378         *status = U_IDNA_LABEL_TOO_LONG_ERROR;
   379     }
   381 CLEANUP:
   382     if(b1 != b1Stack){
   383         uprv_free(b1);
   384     }
   385     if(b2 != b2Stack){
   386         uprv_free(b2);
   387     }
   388     uprv_free(caseFlags);
   390     return u_terminateUChars(dest, destCapacity, reqLength, status);
   391 }
   393 static int32_t
   394 _internal_toUnicode(const UChar* src, int32_t srcLength,
   395                     UChar* dest, int32_t destCapacity,
   396                     int32_t options,
   397                     UStringPrepProfile* nameprep,
   398                     UParseError* parseError,
   399                     UErrorCode* status)
   400 {
   402     //get the options
   403     //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
   404     int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 
   406     // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
   407     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
   409     //initialize pointers to stack buffers
   410     UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
   411     int32_t b1Len, b2Len, b1PrimeLen, b3Len,
   412             b1Capacity = MAX_LABEL_BUFFER_SIZE, 
   413             b2Capacity = MAX_LABEL_BUFFER_SIZE,
   414             b3Capacity = MAX_LABEL_BUFFER_SIZE,
   415             reqLength=0;
   417     b1Len = 0;
   418     UBool* caseFlags = NULL;
   420     UBool srcIsASCII = TRUE;
   421     /*UBool srcIsLDH = TRUE;
   422     int32_t failPos =0;*/
   424     // step 1: find out if all the codepoints in src are ASCII  
   425     if(srcLength==-1){
   426         srcLength = 0;
   427         for(;src[srcLength]!=0;){
   428             if(src[srcLength]> 0x7f){
   429                 srcIsASCII = FALSE;
   430             }/*else if(isLDHChar(src[srcLength])==FALSE){
   431                 // here we do not assemble surrogates
   432                 // since we know that LDH code points
   433                 // are in the ASCII range only
   434                 srcIsLDH = FALSE;
   435                 failPos = srcLength;
   436             }*/
   437             srcLength++;
   438         }
   439     }else if(srcLength > 0){
   440         for(int32_t j=0; j<srcLength; j++){
   441             if(src[j]> 0x7f){
   442                 srcIsASCII = FALSE;
   443             }/*else if(isLDHChar(src[j])==FALSE){
   444                 // here we do not assemble surrogates
   445                 // since we know that LDH code points
   446                 // are in the ASCII range only
   447                 srcIsLDH = FALSE;
   448                 failPos = j;
   449             }*/
   450         }
   451     }else{
   452         return 0;
   453     }
   455     if(srcIsASCII == FALSE){
   456         // step 2: process the string
   457         b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
   458         if(*status == U_BUFFER_OVERFLOW_ERROR){
   459             // redo processing of string
   460             /* we do not have enough room so grow the buffer*/
   461             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   462             if(b1==NULL){
   463                 *status = U_MEMORY_ALLOCATION_ERROR;
   464                 goto CLEANUP;
   465             }
   467             *status = U_ZERO_ERROR; // reset error
   469             b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
   470         }
   471         //bail out on error
   472         if(U_FAILURE(*status)){
   473             goto CLEANUP;
   474         }
   475     }else{
   477         //just point src to b1
   478         b1 = (UChar*) src;
   479         b1Len = srcLength;
   480     }
   482     // The RFC states that 
   483     // <quote>
   484     // ToUnicode never fails. If any step fails, then the original input
   485     // is returned immediately in that step.
   486     // </quote>
   488     //step 3: verify ACE Prefix
   489     if(startsWithPrefix(b1,b1Len)){
   491         //step 4: Remove the ACE Prefix
   492         b1Prime = b1 + ACE_PREFIX_LENGTH;
   493         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
   495         //step 5: Decode using punycode
   496         b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
   498         if(*status == U_BUFFER_OVERFLOW_ERROR){
   499             // redo processing of string
   500             /* we do not have enough room so grow the buffer*/
   501             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
   502             if(b2==NULL){
   503                 *status = U_MEMORY_ALLOCATION_ERROR;
   504                 goto CLEANUP;
   505             }
   507             *status = U_ZERO_ERROR; // reset error
   509             b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
   510         }
   513         //step 6:Apply toASCII
   514         b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
   516         if(*status == U_BUFFER_OVERFLOW_ERROR){
   517             // redo processing of string
   518             /* we do not have enough room so grow the buffer*/
   519             b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
   520             if(b3==NULL){
   521                 *status = U_MEMORY_ALLOCATION_ERROR;
   522                 goto CLEANUP;
   523             }
   525             *status = U_ZERO_ERROR; // reset error
   527             b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
   529         }
   530         //bail out on error
   531         if(U_FAILURE(*status)){
   532             goto CLEANUP;
   533         }
   535         //step 7: verify
   536         if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
   537             // Cause the original to be returned.
   538             *status = U_IDNA_VERIFICATION_ERROR;
   539             goto CLEANUP;
   540         }
   542         //step 8: return output of step 5
   543         reqLength = b2Len;
   544         if(b2Len <= destCapacity) {
   545             uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
   546         }
   547     }
   548     else{
   549         // See the start of this if statement for why this is commented out.
   550         // verify that STD3 ASCII rules are satisfied
   551         /*if(useSTD3ASCIIRules == TRUE){
   552             if( srcIsLDH == FALSE // source contains some non-LDH characters
   553                 || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
   554                 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
   556                 // populate the parseError struct
   557                 if(srcIsLDH==FALSE){
   558                     // failPos is always set the index of failure
   559                     uprv_syntaxError(src,failPos, srcLength,parseError);
   560                 }else if(src[0] == HYPHEN){
   561                     // fail position is 0 
   562                     uprv_syntaxError(src,0,srcLength,parseError);
   563                 }else{
   564                     // the last index in the source is always length-1
   565                     uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
   566                 }
   568                 goto CLEANUP;
   569             }
   570         }*/
   571         // just return the source
   572         //copy the source to destination
   573         if(srcLength <= destCapacity){
   574             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
   575         }
   576         reqLength = srcLength;
   577     }
   580 CLEANUP:
   582     if(b1 != b1Stack && b1!=src){
   583         uprv_free(b1);
   584     }
   585     if(b2 != b2Stack){
   586         uprv_free(b2);
   587     }
   588     uprv_free(caseFlags);
   590     // The RFC states that 
   591     // <quote>
   592     // ToUnicode never fails. If any step fails, then the original input
   593     // is returned immediately in that step.
   594     // </quote>
   595     // So if any step fails lets copy source to destination
   596     if(U_FAILURE(*status)){
   597         //copy the source to destination
   598         if(dest && srcLength <= destCapacity){
   599             // srcLength should have already been set earlier.
   600             U_ASSERT(srcLength >= 0);
   601             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
   602         }
   603         reqLength = srcLength;
   604         *status = U_ZERO_ERROR;
   605     }
   607     return u_terminateUChars(dest, destCapacity, reqLength, status);
   608 }
   610 U_CAPI int32_t U_EXPORT2
   611 uidna_toASCII(const UChar* src, int32_t srcLength, 
   612               UChar* dest, int32_t destCapacity,
   613               int32_t options,
   614               UParseError* parseError,
   615               UErrorCode* status){
   617     if(status == NULL || U_FAILURE(*status)){
   618         return 0;
   619     }
   620     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   621         *status = U_ILLEGAL_ARGUMENT_ERROR;
   622         return 0;
   623     }
   625     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   627     if(U_FAILURE(*status)){
   628         return -1;
   629     }
   631     int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
   633     /* close the profile*/
   634     usprep_close(nameprep);
   636     return retLen;
   637 }
   639 U_CAPI int32_t U_EXPORT2
   640 uidna_toUnicode(const UChar* src, int32_t srcLength,
   641                 UChar* dest, int32_t destCapacity,
   642                 int32_t options,
   643                 UParseError* parseError,
   644                 UErrorCode* status){
   646     if(status == NULL || U_FAILURE(*status)){
   647         return 0;
   648     }
   649     if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   650         *status = U_ILLEGAL_ARGUMENT_ERROR;
   651         return 0;
   652     }  
   654     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   656     if(U_FAILURE(*status)){
   657         return -1;
   658     }
   660     int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
   662     usprep_close(nameprep);
   664     return retLen;
   665 }
   668 U_CAPI int32_t U_EXPORT2
   669 uidna_IDNToASCII(  const UChar *src, int32_t srcLength,
   670                    UChar* dest, int32_t destCapacity,
   671                    int32_t options,
   672                    UParseError *parseError,
   673                    UErrorCode *status){
   675     if(status == NULL || U_FAILURE(*status)){
   676         return 0;
   677     }
   678     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   679         *status = U_ILLEGAL_ARGUMENT_ERROR;
   680         return 0;
   681     }
   683     int32_t reqLength = 0;
   685     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   687     if(U_FAILURE(*status)){
   688         return 0;
   689     }
   691     //initialize pointers 
   692     UChar *delimiter = (UChar*)src;
   693     UChar *labelStart = (UChar*)src;
   694     UChar *currentDest = (UChar*) dest;
   695     int32_t remainingLen = srcLength;
   696     int32_t remainingDestCapacity = destCapacity;
   697     int32_t labelLen = 0, labelReqLength = 0;
   698     UBool done = FALSE;
   701     for(;;){
   703         labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
   704         labelReqLength = 0;
   705         if(!(labelLen==0 && done)){// make sure this is not a root label separator.
   707             labelReqLength = _internal_toASCII( labelStart, labelLen, 
   708                                                 currentDest, remainingDestCapacity, 
   709                                                 options, nameprep, 
   710                                                 parseError, status);
   712             if(*status == U_BUFFER_OVERFLOW_ERROR){
   714                 *status = U_ZERO_ERROR; // reset error
   715                 remainingDestCapacity = 0;
   716             }
   717         }
   720         if(U_FAILURE(*status)){
   721             break;
   722         }
   724         reqLength +=labelReqLength;
   725         // adjust the destination pointer
   726         if(labelReqLength < remainingDestCapacity){
   727             currentDest = currentDest + labelReqLength;
   728             remainingDestCapacity -= labelReqLength;
   729         }else{
   730             // should never occur
   731             remainingDestCapacity = 0;
   732         }
   734         if(done == TRUE){
   735             break;
   736         }
   738         // add the label separator
   739         if(remainingDestCapacity > 0){
   740             *currentDest++ = FULL_STOP;
   741             remainingDestCapacity--;
   742         }
   743         reqLength++;
   745         labelStart = delimiter;
   746         if(remainingLen >0 ){
   747             remainingLen = (int32_t)(srcLength - (delimiter - src));
   748         }
   750     }
   752     if(reqLength > MAX_DOMAIN_NAME_LENGTH){
   753         *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
   754     }
   756     usprep_close(nameprep);
   758     return u_terminateUChars(dest, destCapacity, reqLength, status);
   759 }
   761 U_CAPI int32_t U_EXPORT2
   762 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
   763                      UChar* dest, int32_t destCapacity,
   764                      int32_t options,
   765                      UParseError* parseError,
   766                      UErrorCode* status){
   768     if(status == NULL || U_FAILURE(*status)){
   769         return 0;
   770     }
   771     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   772         *status = U_ILLEGAL_ARGUMENT_ERROR;
   773         return 0;
   774     }
   776     int32_t reqLength = 0;
   778     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   780     if(U_FAILURE(*status)){
   781         return 0;
   782     }
   784     //initialize pointers
   785     UChar *delimiter = (UChar*)src;
   786     UChar *labelStart = (UChar*)src;
   787     UChar *currentDest = (UChar*) dest;
   788     int32_t remainingLen = srcLength;
   789     int32_t remainingDestCapacity = destCapacity;
   790     int32_t labelLen = 0, labelReqLength = 0;
   791     UBool done = FALSE;
   793     for(;;){
   795         labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
   797         // The RFC states that 
   798         // <quote>
   799         // ToUnicode never fails. If any step fails, then the original input
   800         // is returned immediately in that step.
   801         // </quote>
   802         // _internal_toUnicode will copy the label.
   803         /*if(labelLen==0 && done==FALSE){ 
   804             *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
   805             break;
   806         }*/
   808         labelReqLength = _internal_toUnicode(labelStart, labelLen, 
   809                                              currentDest, remainingDestCapacity, 
   810                                              options, nameprep, 
   811                                              parseError, status);
   813         if(*status == U_BUFFER_OVERFLOW_ERROR){
   814             *status = U_ZERO_ERROR; // reset error
   815             remainingDestCapacity = 0;
   816         }
   818         if(U_FAILURE(*status)){
   819             break;
   820         }
   822         reqLength +=labelReqLength;
   823         // adjust the destination pointer
   824         if(labelReqLength < remainingDestCapacity){
   825             currentDest = currentDest + labelReqLength;
   826             remainingDestCapacity -= labelReqLength;
   827         }else{
   828             // should never occur
   829             remainingDestCapacity = 0;
   830         }
   832         if(done == TRUE){
   833             break;
   834         }
   836         // add the label separator
   837         // Unlike the ToASCII operation we don't normalize the label separators
   838         if(remainingDestCapacity > 0){
   839             *currentDest++ = *(labelStart + labelLen);
   840             remainingDestCapacity--;
   841         }
   842         reqLength++;
   844         labelStart = delimiter;
   845         if(remainingLen >0 ){
   846             remainingLen = (int32_t)(srcLength - (delimiter - src));
   847         }
   849     }
   851     if(reqLength > MAX_DOMAIN_NAME_LENGTH){
   852         *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
   853     }
   855     usprep_close(nameprep);
   857     return u_terminateUChars(dest, destCapacity, reqLength, status);
   858 }
   860 U_CAPI int32_t U_EXPORT2
   861 uidna_compare(  const UChar *s1, int32_t length1,
   862                 const UChar *s2, int32_t length2,
   863                 int32_t options,
   864                 UErrorCode* status){
   866     if(status == NULL || U_FAILURE(*status)){
   867         return -1;
   868     }
   870     UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
   871     UChar *b1 = b1Stack, *b2 = b2Stack;
   872     int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
   873     int32_t result=-1;
   875     UParseError parseError;
   877     b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
   878     if(*status == U_BUFFER_OVERFLOW_ERROR){
   879         // redo processing of string
   880         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   881         if(b1==NULL){
   882             *status = U_MEMORY_ALLOCATION_ERROR;
   883             goto CLEANUP;
   884         }
   886         *status = U_ZERO_ERROR; // reset error
   888         b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
   890     }
   892     b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
   893     if(*status == U_BUFFER_OVERFLOW_ERROR){
   894         // redo processing of string
   895         b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
   896         if(b2==NULL){
   897             *status = U_MEMORY_ALLOCATION_ERROR;
   898             goto CLEANUP;
   899         }
   901         *status = U_ZERO_ERROR; // reset error
   903         b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
   905     }
   906     // when toASCII is applied all label separators are replaced with FULL_STOP
   907     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
   909 CLEANUP:
   910     if(b1 != b1Stack){
   911         uprv_free(b1);
   912     }
   914     if(b2 != b2Stack){
   915         uprv_free(b2);
   916     }
   918     return result;
   919 }
   921 #endif /* #if !UCONFIG_NO_IDNA */

mercurial