intl/icu/source/common/uidna.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/uidna.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,921 @@
     1.4 +/*
     1.5 + *******************************************************************************
     1.6 + *
     1.7 + *   Copyright (C) 2003-2009, International Business Machines
     1.8 + *   Corporation and others.  All Rights Reserved.
     1.9 + *
    1.10 + *******************************************************************************
    1.11 + *   file name:  uidna.cpp
    1.12 + *   encoding:   US-ASCII
    1.13 + *   tab size:   8 (not used)
    1.14 + *   indentation:4
    1.15 + *
    1.16 + *   created on: 2003feb1
    1.17 + *   created by: Ram Viswanadha
    1.18 + */
    1.19 +
    1.20 +#include "unicode/utypes.h"
    1.21 +
    1.22 +#if !UCONFIG_NO_IDNA
    1.23 +
    1.24 +#include "unicode/uidna.h"
    1.25 +#include "unicode/ustring.h"
    1.26 +#include "unicode/usprep.h"
    1.27 +#include "punycode.h"
    1.28 +#include "ustr_imp.h"
    1.29 +#include "cmemory.h"
    1.30 +#include "uassert.h"
    1.31 +#include "sprpimpl.h"
    1.32 +
    1.33 +/* it is official IDNA ACE Prefix is "xn--" */
    1.34 +static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
    1.35 +#define ACE_PREFIX_LENGTH 4
    1.36 +
    1.37 +#define MAX_LABEL_LENGTH 63
    1.38 +/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
    1.39 +#define MAX_LABEL_BUFFER_SIZE 100
    1.40 +
    1.41 +#define MAX_DOMAIN_NAME_LENGTH 255
    1.42 +/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
    1.43 +#define MAX_IDN_BUFFER_SIZE   MAX_DOMAIN_NAME_LENGTH+1
    1.44 +
    1.45 +#define LOWER_CASE_DELTA 0x0020
    1.46 +#define HYPHEN           0x002D
    1.47 +#define FULL_STOP        0x002E
    1.48 +#define CAPITAL_A        0x0041
    1.49 +#define CAPITAL_Z        0x005A
    1.50 +
    1.51 +inline static UChar 
    1.52 +toASCIILower(UChar ch){
    1.53 +    if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
    1.54 +        return ch + LOWER_CASE_DELTA;
    1.55 +    }
    1.56 +    return ch;
    1.57 +}
    1.58 +
    1.59 +inline static UBool 
    1.60 +startsWithPrefix(const UChar* src , int32_t srcLength){
    1.61 +    UBool startsWithPrefix = TRUE;
    1.62 +
    1.63 +    if(srcLength < ACE_PREFIX_LENGTH){
    1.64 +        return FALSE;
    1.65 +    }
    1.66 +
    1.67 +    for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
    1.68 +        if(toASCIILower(src[i]) != ACE_PREFIX[i]){
    1.69 +            startsWithPrefix = FALSE;
    1.70 +        }
    1.71 +    }
    1.72 +    return startsWithPrefix;
    1.73 +}
    1.74 +
    1.75 +
    1.76 +inline static int32_t
    1.77 +compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 
    1.78 +                            const UChar* s2, int32_t s2Len){
    1.79 +    
    1.80 +    int32_t minLength;
    1.81 +    int32_t lengthResult;
    1.82 +
    1.83 +    // are we comparing different lengths?
    1.84 +    if(s1Len != s2Len) {
    1.85 +        if(s1Len < s2Len) {
    1.86 +            minLength = s1Len;
    1.87 +            lengthResult = -1;
    1.88 +        } else {
    1.89 +            minLength = s2Len;
    1.90 +            lengthResult = 1;
    1.91 +        }
    1.92 +    } else {
    1.93 +        // ok the lengths are equal
    1.94 +        minLength = s1Len;
    1.95 +        lengthResult = 0;
    1.96 +    }
    1.97 +
    1.98 +    UChar c1,c2;
    1.99 +    int32_t rc;
   1.100 +
   1.101 +    for(int32_t i =0;/* no condition */;i++) {
   1.102 +
   1.103 +        /* If we reach the ends of both strings then they match */
   1.104 +        if(i == minLength) {
   1.105 +            return lengthResult;
   1.106 +        }
   1.107 +        
   1.108 +        c1 = s1[i];
   1.109 +        c2 = s2[i];
   1.110 +        
   1.111 +        /* Case-insensitive comparison */
   1.112 +        if(c1!=c2) {
   1.113 +            rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
   1.114 +            if(rc!=0) {
   1.115 +                lengthResult=rc;
   1.116 +                break;
   1.117 +            }
   1.118 +        }
   1.119 +    }
   1.120 +    return lengthResult;
   1.121 +}
   1.122 +
   1.123 +
   1.124 +/**
   1.125 + * Ascertain if the given code point is a label separator as 
   1.126 + * defined by the IDNA RFC
   1.127 + * 
   1.128 + * @param ch The code point to be ascertained
   1.129 + * @return true if the char is a label separator
   1.130 + * @stable ICU 2.8
   1.131 + */
   1.132 +static inline UBool isLabelSeparator(UChar ch){
   1.133 +    switch(ch){
   1.134 +        case 0x002e:
   1.135 +        case 0x3002:
   1.136 +        case 0xFF0E:
   1.137 +        case 0xFF61:
   1.138 +            return TRUE;
   1.139 +        default:
   1.140 +            return FALSE;           
   1.141 +    }
   1.142 +}
   1.143 +
   1.144 +// returns the length of the label excluding the separator
   1.145 +// if *limit == separator then the length returned does not include 
   1.146 +// the separtor.
   1.147 +static inline int32_t
   1.148 +getNextSeparator(UChar *src, int32_t srcLength,
   1.149 +                 UChar **limit, UBool *done){
   1.150 +    if(srcLength == -1){
   1.151 +        int32_t i;
   1.152 +        for(i=0 ; ;i++){
   1.153 +            if(src[i] == 0){
   1.154 +                *limit = src + i; // point to null
   1.155 +                *done = TRUE;
   1.156 +                return i;
   1.157 +            }
   1.158 +            if(isLabelSeparator(src[i])){
   1.159 +                *limit = src + (i+1); // go past the delimiter
   1.160 +                return i;
   1.161 +                
   1.162 +            }
   1.163 +        }
   1.164 +    }else{
   1.165 +        int32_t i;
   1.166 +        for(i=0;i<srcLength;i++){
   1.167 +            if(isLabelSeparator(src[i])){
   1.168 +                *limit = src + (i+1); // go past the delimiter
   1.169 +                return i;
   1.170 +            }
   1.171 +        }
   1.172 +        // we have not found the delimiter
   1.173 +        // if(i==srcLength)
   1.174 +        *limit = src+srcLength;
   1.175 +        *done = TRUE;
   1.176 +
   1.177 +        return i;
   1.178 +    }
   1.179 +}
   1.180 +static inline UBool isLDHChar(UChar ch){
   1.181 +    // high runner case
   1.182 +    if(ch>0x007A){
   1.183 +        return FALSE;
   1.184 +    }
   1.185 +    //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
   1.186 +    if( (ch==0x002D) || 
   1.187 +        (0x0030 <= ch && ch <= 0x0039) ||
   1.188 +        (0x0041 <= ch && ch <= 0x005A) ||
   1.189 +        (0x0061 <= ch && ch <= 0x007A)
   1.190 +      ){
   1.191 +        return TRUE;
   1.192 +    }
   1.193 +    return FALSE;
   1.194 +}
   1.195 +
   1.196 +static int32_t 
   1.197 +_internal_toASCII(const UChar* src, int32_t srcLength, 
   1.198 +                  UChar* dest, int32_t destCapacity,
   1.199 +                  int32_t options,
   1.200 +                  UStringPrepProfile* nameprep,
   1.201 +                  UParseError* parseError,
   1.202 +                  UErrorCode* status)
   1.203 +{
   1.204 +
   1.205 +    // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
   1.206 +    UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
   1.207 +    //initialize pointers to stack buffers
   1.208 +    UChar  *b1 = b1Stack, *b2 = b2Stack;
   1.209 +    int32_t b1Len=0, b2Len, 
   1.210 +            b1Capacity = MAX_LABEL_BUFFER_SIZE, 
   1.211 +            b2Capacity = MAX_LABEL_BUFFER_SIZE ,
   1.212 +            reqLength=0;
   1.213 +
   1.214 +    int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
   1.215 +    UBool* caseFlags = NULL;
   1.216 +    
   1.217 +    // the source contains all ascii codepoints
   1.218 +    UBool srcIsASCII  = TRUE;
   1.219 +    // assume the source contains all LDH codepoints
   1.220 +    UBool srcIsLDH = TRUE; 
   1.221 +
   1.222 +    int32_t j=0;
   1.223 +
   1.224 +    //get the options
   1.225 +    UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
   1.226 +
   1.227 +    int32_t failPos = -1;
   1.228 +    
   1.229 +    if(srcLength == -1){
   1.230 +        srcLength = u_strlen(src);
   1.231 +    }
   1.232 +    
   1.233 +    if(srcLength > b1Capacity){
   1.234 +        b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
   1.235 +        if(b1==NULL){
   1.236 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.237 +            goto CLEANUP;
   1.238 +        }
   1.239 +        b1Capacity = srcLength;
   1.240 +    }
   1.241 +
   1.242 +    // step 1 
   1.243 +    for( j=0;j<srcLength;j++){
   1.244 +        if(src[j] > 0x7F){
   1.245 +            srcIsASCII = FALSE;
   1.246 +        }
   1.247 +        b1[b1Len++] = src[j];
   1.248 +    }
   1.249 +    
   1.250 +    // step 2 is performed only if the source contains non ASCII
   1.251 +    if(srcIsASCII == FALSE){
   1.252 +        
   1.253 +        // step 2    
   1.254 +        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
   1.255 +
   1.256 +        if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.257 +            // redo processing of string
   1.258 +            // we do not have enough room so grow the buffer
   1.259 +            if(b1 != b1Stack){
   1.260 +                uprv_free(b1);
   1.261 +            }
   1.262 +            b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   1.263 +            if(b1==NULL){
   1.264 +                *status = U_MEMORY_ALLOCATION_ERROR;
   1.265 +                goto CLEANUP;
   1.266 +            }
   1.267 +
   1.268 +            *status = U_ZERO_ERROR; // reset error
   1.269 +            
   1.270 +            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
   1.271 +        }
   1.272 +    }
   1.273 +    // error bail out
   1.274 +    if(U_FAILURE(*status)){
   1.275 +        goto CLEANUP;
   1.276 +    }
   1.277 +    if(b1Len == 0){
   1.278 +        *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
   1.279 +        goto CLEANUP;
   1.280 +    }
   1.281 +
   1.282 +    // for step 3 & 4
   1.283 +    srcIsASCII = TRUE;
   1.284 +    for( j=0;j<b1Len;j++){
   1.285 +        // check if output of usprep_prepare is all ASCII 
   1.286 +        if(b1[j] > 0x7F){
   1.287 +            srcIsASCII = FALSE;
   1.288 +        }else if(isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character
   1.289 +            srcIsLDH = FALSE;
   1.290 +            failPos = j;
   1.291 +        }
   1.292 +    }
   1.293 +    if(useSTD3ASCIIRules == TRUE){
   1.294 +        // verify 3a and 3b
   1.295 +        // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
   1.296 +        //  absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   1.297 +        // 3(b) Verify the absence of leading and trailing hyphen-minus; that
   1.298 +        //  is, the absence of U+002D at the beginning and end of the
   1.299 +        //  sequence.
   1.300 +        if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
   1.301 +            || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
   1.302 +            *status = U_IDNA_STD3_ASCII_RULES_ERROR;
   1.303 +
   1.304 +            /* populate the parseError struct */
   1.305 +            if(srcIsLDH==FALSE){
   1.306 +                // failPos is always set the index of failure
   1.307 +                uprv_syntaxError(b1,failPos, b1Len,parseError);
   1.308 +            }else if(b1[0] == HYPHEN){
   1.309 +                // fail position is 0 
   1.310 +                uprv_syntaxError(b1,0,b1Len,parseError);
   1.311 +            }else{
   1.312 +                // the last index in the source is always length-1
   1.313 +                uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
   1.314 +            }
   1.315 +
   1.316 +            goto CLEANUP;
   1.317 +        }
   1.318 +    }
   1.319 +    // Step 4: if the source is ASCII then proceed to step 8
   1.320 +    if(srcIsASCII){
   1.321 +        if(b1Len <= destCapacity){
   1.322 +            uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
   1.323 +            reqLength = b1Len;
   1.324 +        }else{
   1.325 +            reqLength = b1Len;
   1.326 +            goto CLEANUP;
   1.327 +        }
   1.328 +    }else{
   1.329 +        // step 5 : verify the sequence does not begin with ACE prefix
   1.330 +        if(!startsWithPrefix(b1,b1Len)){
   1.331 +
   1.332 +            //step 6: encode the sequence with punycode
   1.333 +
   1.334 +            // do not preserve the case flags for now!
   1.335 +            // TODO: Preserve the case while implementing the RFE
   1.336 +            // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
   1.337 +            // uprv_memset(caseFlags,TRUE,b1Len);
   1.338 +
   1.339 +            b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
   1.340 +
   1.341 +            if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.342 +                // redo processing of string
   1.343 +                /* we do not have enough room so grow the buffer*/
   1.344 +                b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 
   1.345 +                if(b2 == NULL){
   1.346 +                    *status = U_MEMORY_ALLOCATION_ERROR;
   1.347 +                    goto CLEANUP;
   1.348 +                }
   1.349 +
   1.350 +                *status = U_ZERO_ERROR; // reset error
   1.351 +                
   1.352 +                b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
   1.353 +            }
   1.354 +            //error bail out
   1.355 +            if(U_FAILURE(*status)){
   1.356 +                goto CLEANUP;
   1.357 +            }
   1.358 +            // TODO : Reconsider while implementing the case preserve RFE
   1.359 +            // convert all codepoints to lower case ASCII
   1.360 +            // toASCIILower(b2,b2Len);
   1.361 +            reqLength = b2Len+ACE_PREFIX_LENGTH;
   1.362 +
   1.363 +            if(reqLength > destCapacity){
   1.364 +                *status = U_BUFFER_OVERFLOW_ERROR;
   1.365 +                goto CLEANUP;
   1.366 +            }
   1.367 +            //Step 7: prepend the ACE prefix
   1.368 +            uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
   1.369 +            //Step 6: copy the contents in b2 into dest
   1.370 +            uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
   1.371 +
   1.372 +        }else{
   1.373 +            *status = U_IDNA_ACE_PREFIX_ERROR; 
   1.374 +            //position of failure is 0
   1.375 +            uprv_syntaxError(b1,0,b1Len,parseError);
   1.376 +            goto CLEANUP;
   1.377 +        }
   1.378 +    }
   1.379 +    // step 8: verify the length of label
   1.380 +    if(reqLength > MAX_LABEL_LENGTH){
   1.381 +        *status = U_IDNA_LABEL_TOO_LONG_ERROR;
   1.382 +    }
   1.383 +
   1.384 +CLEANUP:
   1.385 +    if(b1 != b1Stack){
   1.386 +        uprv_free(b1);
   1.387 +    }
   1.388 +    if(b2 != b2Stack){
   1.389 +        uprv_free(b2);
   1.390 +    }
   1.391 +    uprv_free(caseFlags);
   1.392 +    
   1.393 +    return u_terminateUChars(dest, destCapacity, reqLength, status);
   1.394 +}
   1.395 +
   1.396 +static int32_t
   1.397 +_internal_toUnicode(const UChar* src, int32_t srcLength,
   1.398 +                    UChar* dest, int32_t destCapacity,
   1.399 +                    int32_t options,
   1.400 +                    UStringPrepProfile* nameprep,
   1.401 +                    UParseError* parseError,
   1.402 +                    UErrorCode* status)
   1.403 +{
   1.404 +
   1.405 +    //get the options
   1.406 +    //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
   1.407 +    int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 
   1.408 +
   1.409 +    // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
   1.410 +    UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
   1.411 +
   1.412 +    //initialize pointers to stack buffers
   1.413 +    UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
   1.414 +    int32_t b1Len, b2Len, b1PrimeLen, b3Len,
   1.415 +            b1Capacity = MAX_LABEL_BUFFER_SIZE, 
   1.416 +            b2Capacity = MAX_LABEL_BUFFER_SIZE,
   1.417 +            b3Capacity = MAX_LABEL_BUFFER_SIZE,
   1.418 +            reqLength=0;
   1.419 +
   1.420 +    b1Len = 0;
   1.421 +    UBool* caseFlags = NULL;
   1.422 +
   1.423 +    UBool srcIsASCII = TRUE;
   1.424 +    /*UBool srcIsLDH = TRUE;
   1.425 +    int32_t failPos =0;*/
   1.426 +
   1.427 +    // step 1: find out if all the codepoints in src are ASCII  
   1.428 +    if(srcLength==-1){
   1.429 +        srcLength = 0;
   1.430 +        for(;src[srcLength]!=0;){
   1.431 +            if(src[srcLength]> 0x7f){
   1.432 +                srcIsASCII = FALSE;
   1.433 +            }/*else if(isLDHChar(src[srcLength])==FALSE){
   1.434 +                // here we do not assemble surrogates
   1.435 +                // since we know that LDH code points
   1.436 +                // are in the ASCII range only
   1.437 +                srcIsLDH = FALSE;
   1.438 +                failPos = srcLength;
   1.439 +            }*/
   1.440 +            srcLength++;
   1.441 +        }
   1.442 +    }else if(srcLength > 0){
   1.443 +        for(int32_t j=0; j<srcLength; j++){
   1.444 +            if(src[j]> 0x7f){
   1.445 +                srcIsASCII = FALSE;
   1.446 +            }/*else if(isLDHChar(src[j])==FALSE){
   1.447 +                // here we do not assemble surrogates
   1.448 +                // since we know that LDH code points
   1.449 +                // are in the ASCII range only
   1.450 +                srcIsLDH = FALSE;
   1.451 +                failPos = j;
   1.452 +            }*/
   1.453 +        }
   1.454 +    }else{
   1.455 +        return 0;
   1.456 +    }
   1.457 +    
   1.458 +    if(srcIsASCII == FALSE){
   1.459 +        // step 2: process the string
   1.460 +        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
   1.461 +        if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.462 +            // redo processing of string
   1.463 +            /* we do not have enough room so grow the buffer*/
   1.464 +            b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   1.465 +            if(b1==NULL){
   1.466 +                *status = U_MEMORY_ALLOCATION_ERROR;
   1.467 +                goto CLEANUP;
   1.468 +            }
   1.469 +
   1.470 +            *status = U_ZERO_ERROR; // reset error
   1.471 +            
   1.472 +            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
   1.473 +        }
   1.474 +        //bail out on error
   1.475 +        if(U_FAILURE(*status)){
   1.476 +            goto CLEANUP;
   1.477 +        }
   1.478 +    }else{
   1.479 +
   1.480 +        //just point src to b1
   1.481 +        b1 = (UChar*) src;
   1.482 +        b1Len = srcLength;
   1.483 +    }
   1.484 +
   1.485 +    // The RFC states that 
   1.486 +    // <quote>
   1.487 +    // ToUnicode never fails. If any step fails, then the original input
   1.488 +    // is returned immediately in that step.
   1.489 +    // </quote>
   1.490 +
   1.491 +    //step 3: verify ACE Prefix
   1.492 +    if(startsWithPrefix(b1,b1Len)){
   1.493 +
   1.494 +        //step 4: Remove the ACE Prefix
   1.495 +        b1Prime = b1 + ACE_PREFIX_LENGTH;
   1.496 +        b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
   1.497 +
   1.498 +        //step 5: Decode using punycode
   1.499 +        b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
   1.500 +
   1.501 +        if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.502 +            // redo processing of string
   1.503 +            /* we do not have enough room so grow the buffer*/
   1.504 +            b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
   1.505 +            if(b2==NULL){
   1.506 +                *status = U_MEMORY_ALLOCATION_ERROR;
   1.507 +                goto CLEANUP;
   1.508 +            }
   1.509 +
   1.510 +            *status = U_ZERO_ERROR; // reset error
   1.511 +
   1.512 +            b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
   1.513 +        }
   1.514 +
   1.515 +
   1.516 +        //step 6:Apply toASCII
   1.517 +        b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
   1.518 +
   1.519 +        if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.520 +            // redo processing of string
   1.521 +            /* we do not have enough room so grow the buffer*/
   1.522 +            b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
   1.523 +            if(b3==NULL){
   1.524 +                *status = U_MEMORY_ALLOCATION_ERROR;
   1.525 +                goto CLEANUP;
   1.526 +            }
   1.527 +
   1.528 +            *status = U_ZERO_ERROR; // reset error
   1.529 +
   1.530 +            b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
   1.531 +
   1.532 +        }
   1.533 +        //bail out on error
   1.534 +        if(U_FAILURE(*status)){
   1.535 +            goto CLEANUP;
   1.536 +        }
   1.537 +
   1.538 +        //step 7: verify
   1.539 +        if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
   1.540 +            // Cause the original to be returned.
   1.541 +            *status = U_IDNA_VERIFICATION_ERROR;
   1.542 +            goto CLEANUP;
   1.543 +        }
   1.544 +
   1.545 +        //step 8: return output of step 5
   1.546 +        reqLength = b2Len;
   1.547 +        if(b2Len <= destCapacity) {
   1.548 +            uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
   1.549 +        }
   1.550 +    }
   1.551 +    else{
   1.552 +        // See the start of this if statement for why this is commented out.
   1.553 +        // verify that STD3 ASCII rules are satisfied
   1.554 +        /*if(useSTD3ASCIIRules == TRUE){
   1.555 +            if( srcIsLDH == FALSE // source contains some non-LDH characters
   1.556 +                || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
   1.557 +                *status = U_IDNA_STD3_ASCII_RULES_ERROR;
   1.558 +
   1.559 +                // populate the parseError struct
   1.560 +                if(srcIsLDH==FALSE){
   1.561 +                    // failPos is always set the index of failure
   1.562 +                    uprv_syntaxError(src,failPos, srcLength,parseError);
   1.563 +                }else if(src[0] == HYPHEN){
   1.564 +                    // fail position is 0 
   1.565 +                    uprv_syntaxError(src,0,srcLength,parseError);
   1.566 +                }else{
   1.567 +                    // the last index in the source is always length-1
   1.568 +                    uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
   1.569 +                }
   1.570 +
   1.571 +                goto CLEANUP;
   1.572 +            }
   1.573 +        }*/
   1.574 +        // just return the source
   1.575 +        //copy the source to destination
   1.576 +        if(srcLength <= destCapacity){
   1.577 +            uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
   1.578 +        }
   1.579 +        reqLength = srcLength;
   1.580 +    }
   1.581 +
   1.582 +
   1.583 +CLEANUP:
   1.584 +
   1.585 +    if(b1 != b1Stack && b1!=src){
   1.586 +        uprv_free(b1);
   1.587 +    }
   1.588 +    if(b2 != b2Stack){
   1.589 +        uprv_free(b2);
   1.590 +    }
   1.591 +    uprv_free(caseFlags);
   1.592 +
   1.593 +    // The RFC states that 
   1.594 +    // <quote>
   1.595 +    // ToUnicode never fails. If any step fails, then the original input
   1.596 +    // is returned immediately in that step.
   1.597 +    // </quote>
   1.598 +    // So if any step fails lets copy source to destination
   1.599 +    if(U_FAILURE(*status)){
   1.600 +        //copy the source to destination
   1.601 +        if(dest && srcLength <= destCapacity){
   1.602 +            // srcLength should have already been set earlier.
   1.603 +            U_ASSERT(srcLength >= 0);
   1.604 +            uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
   1.605 +        }
   1.606 +        reqLength = srcLength;
   1.607 +        *status = U_ZERO_ERROR;
   1.608 +    }
   1.609 +
   1.610 +    return u_terminateUChars(dest, destCapacity, reqLength, status);
   1.611 +}
   1.612 +
   1.613 +U_CAPI int32_t U_EXPORT2
   1.614 +uidna_toASCII(const UChar* src, int32_t srcLength, 
   1.615 +              UChar* dest, int32_t destCapacity,
   1.616 +              int32_t options,
   1.617 +              UParseError* parseError,
   1.618 +              UErrorCode* status){
   1.619 +    
   1.620 +    if(status == NULL || U_FAILURE(*status)){
   1.621 +        return 0;
   1.622 +    }
   1.623 +    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   1.624 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.625 +        return 0;
   1.626 +    }
   1.627 +
   1.628 +    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   1.629 +    
   1.630 +    if(U_FAILURE(*status)){
   1.631 +        return -1;
   1.632 +    }
   1.633 +    
   1.634 +    int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
   1.635 +    
   1.636 +    /* close the profile*/
   1.637 +    usprep_close(nameprep);
   1.638 +    
   1.639 +    return retLen;
   1.640 +}
   1.641 +
   1.642 +U_CAPI int32_t U_EXPORT2
   1.643 +uidna_toUnicode(const UChar* src, int32_t srcLength,
   1.644 +                UChar* dest, int32_t destCapacity,
   1.645 +                int32_t options,
   1.646 +                UParseError* parseError,
   1.647 +                UErrorCode* status){
   1.648 +
   1.649 +    if(status == NULL || U_FAILURE(*status)){
   1.650 +        return 0;
   1.651 +    }
   1.652 +    if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   1.653 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.654 +        return 0;
   1.655 +    }  
   1.656 +
   1.657 +    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   1.658 +    
   1.659 +    if(U_FAILURE(*status)){
   1.660 +        return -1;
   1.661 +    }
   1.662 +    
   1.663 +    int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
   1.664 +
   1.665 +    usprep_close(nameprep);
   1.666 +    
   1.667 +    return retLen;
   1.668 +}
   1.669 +
   1.670 +
   1.671 +U_CAPI int32_t U_EXPORT2
   1.672 +uidna_IDNToASCII(  const UChar *src, int32_t srcLength,
   1.673 +                   UChar* dest, int32_t destCapacity,
   1.674 +                   int32_t options,
   1.675 +                   UParseError *parseError,
   1.676 +                   UErrorCode *status){
   1.677 +
   1.678 +    if(status == NULL || U_FAILURE(*status)){
   1.679 +        return 0;
   1.680 +    }
   1.681 +    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   1.682 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.683 +        return 0;
   1.684 +    }
   1.685 +
   1.686 +    int32_t reqLength = 0;
   1.687 +
   1.688 +    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   1.689 +    
   1.690 +    if(U_FAILURE(*status)){
   1.691 +        return 0;
   1.692 +    }
   1.693 +
   1.694 +    //initialize pointers 
   1.695 +    UChar *delimiter = (UChar*)src;
   1.696 +    UChar *labelStart = (UChar*)src;
   1.697 +    UChar *currentDest = (UChar*) dest;
   1.698 +    int32_t remainingLen = srcLength;
   1.699 +    int32_t remainingDestCapacity = destCapacity;
   1.700 +    int32_t labelLen = 0, labelReqLength = 0;
   1.701 +    UBool done = FALSE;
   1.702 +
   1.703 +
   1.704 +    for(;;){
   1.705 +
   1.706 +        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
   1.707 +        labelReqLength = 0;
   1.708 +        if(!(labelLen==0 && done)){// make sure this is not a root label separator.
   1.709 +        
   1.710 +            labelReqLength = _internal_toASCII( labelStart, labelLen, 
   1.711 +                                                currentDest, remainingDestCapacity, 
   1.712 +                                                options, nameprep, 
   1.713 +                                                parseError, status);
   1.714 +    
   1.715 +            if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.716 +                
   1.717 +                *status = U_ZERO_ERROR; // reset error
   1.718 +                remainingDestCapacity = 0;
   1.719 +            }
   1.720 +        }
   1.721 +
   1.722 +    
   1.723 +        if(U_FAILURE(*status)){
   1.724 +            break;
   1.725 +        }
   1.726 +        
   1.727 +        reqLength +=labelReqLength;
   1.728 +        // adjust the destination pointer
   1.729 +        if(labelReqLength < remainingDestCapacity){
   1.730 +            currentDest = currentDest + labelReqLength;
   1.731 +            remainingDestCapacity -= labelReqLength;
   1.732 +        }else{
   1.733 +            // should never occur
   1.734 +            remainingDestCapacity = 0;
   1.735 +        }
   1.736 +
   1.737 +        if(done == TRUE){
   1.738 +            break;
   1.739 +        }
   1.740 +
   1.741 +        // add the label separator
   1.742 +        if(remainingDestCapacity > 0){
   1.743 +            *currentDest++ = FULL_STOP;
   1.744 +            remainingDestCapacity--;
   1.745 +        }
   1.746 +        reqLength++;
   1.747 +
   1.748 +        labelStart = delimiter;
   1.749 +        if(remainingLen >0 ){
   1.750 +            remainingLen = (int32_t)(srcLength - (delimiter - src));
   1.751 +        }
   1.752 +
   1.753 +    }
   1.754 +
   1.755 +    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
   1.756 +        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
   1.757 +    }
   1.758 +
   1.759 +    usprep_close(nameprep);
   1.760 +    
   1.761 +    return u_terminateUChars(dest, destCapacity, reqLength, status);
   1.762 +}
   1.763 +
   1.764 +U_CAPI int32_t U_EXPORT2
   1.765 +uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
   1.766 +                     UChar* dest, int32_t destCapacity,
   1.767 +                     int32_t options,
   1.768 +                     UParseError* parseError,
   1.769 +                     UErrorCode* status){
   1.770 +    
   1.771 +    if(status == NULL || U_FAILURE(*status)){
   1.772 +        return 0;
   1.773 +    }
   1.774 +    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
   1.775 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.776 +        return 0;
   1.777 +    }
   1.778 +
   1.779 +    int32_t reqLength = 0;
   1.780 +
   1.781 +    UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
   1.782 +    
   1.783 +    if(U_FAILURE(*status)){
   1.784 +        return 0;
   1.785 +    }
   1.786 +
   1.787 +    //initialize pointers
   1.788 +    UChar *delimiter = (UChar*)src;
   1.789 +    UChar *labelStart = (UChar*)src;
   1.790 +    UChar *currentDest = (UChar*) dest;
   1.791 +    int32_t remainingLen = srcLength;
   1.792 +    int32_t remainingDestCapacity = destCapacity;
   1.793 +    int32_t labelLen = 0, labelReqLength = 0;
   1.794 +    UBool done = FALSE;
   1.795 +
   1.796 +    for(;;){
   1.797 +
   1.798 +        labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
   1.799 +        
   1.800 +        // The RFC states that 
   1.801 +        // <quote>
   1.802 +        // ToUnicode never fails. If any step fails, then the original input
   1.803 +        // is returned immediately in that step.
   1.804 +        // </quote>
   1.805 +        // _internal_toUnicode will copy the label.
   1.806 +        /*if(labelLen==0 && done==FALSE){ 
   1.807 +            *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
   1.808 +            break;
   1.809 +        }*/
   1.810 +        
   1.811 +        labelReqLength = _internal_toUnicode(labelStart, labelLen, 
   1.812 +                                             currentDest, remainingDestCapacity, 
   1.813 +                                             options, nameprep, 
   1.814 +                                             parseError, status);
   1.815 +
   1.816 +        if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.817 +            *status = U_ZERO_ERROR; // reset error
   1.818 +            remainingDestCapacity = 0;
   1.819 +        }
   1.820 +
   1.821 +        if(U_FAILURE(*status)){
   1.822 +            break;
   1.823 +        }
   1.824 +        
   1.825 +        reqLength +=labelReqLength;
   1.826 +        // adjust the destination pointer
   1.827 +        if(labelReqLength < remainingDestCapacity){
   1.828 +            currentDest = currentDest + labelReqLength;
   1.829 +            remainingDestCapacity -= labelReqLength;
   1.830 +        }else{
   1.831 +            // should never occur
   1.832 +            remainingDestCapacity = 0;
   1.833 +        }
   1.834 +
   1.835 +        if(done == TRUE){
   1.836 +            break;
   1.837 +        }
   1.838 +
   1.839 +        // add the label separator
   1.840 +        // Unlike the ToASCII operation we don't normalize the label separators
   1.841 +        if(remainingDestCapacity > 0){
   1.842 +            *currentDest++ = *(labelStart + labelLen);
   1.843 +            remainingDestCapacity--;
   1.844 +        }
   1.845 +        reqLength++;
   1.846 +
   1.847 +        labelStart = delimiter;
   1.848 +        if(remainingLen >0 ){
   1.849 +            remainingLen = (int32_t)(srcLength - (delimiter - src));
   1.850 +        }
   1.851 +
   1.852 +    }
   1.853 +
   1.854 +    if(reqLength > MAX_DOMAIN_NAME_LENGTH){
   1.855 +        *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
   1.856 +    }
   1.857 +
   1.858 +    usprep_close(nameprep);
   1.859 +    
   1.860 +    return u_terminateUChars(dest, destCapacity, reqLength, status);
   1.861 +}
   1.862 +
   1.863 +U_CAPI int32_t U_EXPORT2
   1.864 +uidna_compare(  const UChar *s1, int32_t length1,
   1.865 +                const UChar *s2, int32_t length2,
   1.866 +                int32_t options,
   1.867 +                UErrorCode* status){
   1.868 +
   1.869 +    if(status == NULL || U_FAILURE(*status)){
   1.870 +        return -1;
   1.871 +    }
   1.872 +
   1.873 +    UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
   1.874 +    UChar *b1 = b1Stack, *b2 = b2Stack;
   1.875 +    int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
   1.876 +    int32_t result=-1;
   1.877 +    
   1.878 +    UParseError parseError;
   1.879 +
   1.880 +    b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
   1.881 +    if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.882 +        // redo processing of string
   1.883 +        b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
   1.884 +        if(b1==NULL){
   1.885 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.886 +            goto CLEANUP;
   1.887 +        }
   1.888 +
   1.889 +        *status = U_ZERO_ERROR; // reset error
   1.890 +        
   1.891 +        b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
   1.892 +        
   1.893 +    }
   1.894 +
   1.895 +    b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
   1.896 +    if(*status == U_BUFFER_OVERFLOW_ERROR){
   1.897 +        // redo processing of string
   1.898 +        b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
   1.899 +        if(b2==NULL){
   1.900 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.901 +            goto CLEANUP;
   1.902 +        }
   1.903 +
   1.904 +        *status = U_ZERO_ERROR; // reset error
   1.905 +        
   1.906 +        b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
   1.907 +        
   1.908 +    }
   1.909 +    // when toASCII is applied all label separators are replaced with FULL_STOP
   1.910 +    result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
   1.911 +
   1.912 +CLEANUP:
   1.913 +    if(b1 != b1Stack){
   1.914 +        uprv_free(b1);
   1.915 +    }
   1.916 +
   1.917 +    if(b2 != b2Stack){
   1.918 +        uprv_free(b2);
   1.919 +    }
   1.920 +
   1.921 +    return result;
   1.922 +}
   1.923 +
   1.924 +#endif /* #if !UCONFIG_NO_IDNA */

mercurial