1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uidna.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,921 @@ 1.4 +/* 1.5 + ******************************************************************************* 1.6 + * 1.7 + * Copyright (C) 2003-2009, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ******************************************************************************* 1.11 + * file name: uidna.cpp 1.12 + * encoding: US-ASCII 1.13 + * tab size: 8 (not used) 1.14 + * indentation:4 1.15 + * 1.16 + * created on: 2003feb1 1.17 + * created by: Ram Viswanadha 1.18 + */ 1.19 + 1.20 +#include "unicode/utypes.h" 1.21 + 1.22 +#if !UCONFIG_NO_IDNA 1.23 + 1.24 +#include "unicode/uidna.h" 1.25 +#include "unicode/ustring.h" 1.26 +#include "unicode/usprep.h" 1.27 +#include "punycode.h" 1.28 +#include "ustr_imp.h" 1.29 +#include "cmemory.h" 1.30 +#include "uassert.h" 1.31 +#include "sprpimpl.h" 1.32 + 1.33 +/* it is official IDNA ACE Prefix is "xn--" */ 1.34 +static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 1.35 +#define ACE_PREFIX_LENGTH 4 1.36 + 1.37 +#define MAX_LABEL_LENGTH 63 1.38 +/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ 1.39 +#define MAX_LABEL_BUFFER_SIZE 100 1.40 + 1.41 +#define MAX_DOMAIN_NAME_LENGTH 255 1.42 +/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ 1.43 +#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 1.44 + 1.45 +#define LOWER_CASE_DELTA 0x0020 1.46 +#define HYPHEN 0x002D 1.47 +#define FULL_STOP 0x002E 1.48 +#define CAPITAL_A 0x0041 1.49 +#define CAPITAL_Z 0x005A 1.50 + 1.51 +inline static UChar 1.52 +toASCIILower(UChar ch){ 1.53 + if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 1.54 + return ch + LOWER_CASE_DELTA; 1.55 + } 1.56 + return ch; 1.57 +} 1.58 + 1.59 +inline static UBool 1.60 +startsWithPrefix(const UChar* src , int32_t srcLength){ 1.61 + UBool startsWithPrefix = TRUE; 1.62 + 1.63 + if(srcLength < ACE_PREFIX_LENGTH){ 1.64 + return FALSE; 1.65 + } 1.66 + 1.67 + for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 1.68 + if(toASCIILower(src[i]) != ACE_PREFIX[i]){ 1.69 + startsWithPrefix = FALSE; 1.70 + } 1.71 + } 1.72 + return startsWithPrefix; 1.73 +} 1.74 + 1.75 + 1.76 +inline static int32_t 1.77 +compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 1.78 + const UChar* s2, int32_t s2Len){ 1.79 + 1.80 + int32_t minLength; 1.81 + int32_t lengthResult; 1.82 + 1.83 + // are we comparing different lengths? 1.84 + if(s1Len != s2Len) { 1.85 + if(s1Len < s2Len) { 1.86 + minLength = s1Len; 1.87 + lengthResult = -1; 1.88 + } else { 1.89 + minLength = s2Len; 1.90 + lengthResult = 1; 1.91 + } 1.92 + } else { 1.93 + // ok the lengths are equal 1.94 + minLength = s1Len; 1.95 + lengthResult = 0; 1.96 + } 1.97 + 1.98 + UChar c1,c2; 1.99 + int32_t rc; 1.100 + 1.101 + for(int32_t i =0;/* no condition */;i++) { 1.102 + 1.103 + /* If we reach the ends of both strings then they match */ 1.104 + if(i == minLength) { 1.105 + return lengthResult; 1.106 + } 1.107 + 1.108 + c1 = s1[i]; 1.109 + c2 = s2[i]; 1.110 + 1.111 + /* Case-insensitive comparison */ 1.112 + if(c1!=c2) { 1.113 + rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); 1.114 + if(rc!=0) { 1.115 + lengthResult=rc; 1.116 + break; 1.117 + } 1.118 + } 1.119 + } 1.120 + return lengthResult; 1.121 +} 1.122 + 1.123 + 1.124 +/** 1.125 + * Ascertain if the given code point is a label separator as 1.126 + * defined by the IDNA RFC 1.127 + * 1.128 + * @param ch The code point to be ascertained 1.129 + * @return true if the char is a label separator 1.130 + * @stable ICU 2.8 1.131 + */ 1.132 +static inline UBool isLabelSeparator(UChar ch){ 1.133 + switch(ch){ 1.134 + case 0x002e: 1.135 + case 0x3002: 1.136 + case 0xFF0E: 1.137 + case 0xFF61: 1.138 + return TRUE; 1.139 + default: 1.140 + return FALSE; 1.141 + } 1.142 +} 1.143 + 1.144 +// returns the length of the label excluding the separator 1.145 +// if *limit == separator then the length returned does not include 1.146 +// the separtor. 1.147 +static inline int32_t 1.148 +getNextSeparator(UChar *src, int32_t srcLength, 1.149 + UChar **limit, UBool *done){ 1.150 + if(srcLength == -1){ 1.151 + int32_t i; 1.152 + for(i=0 ; ;i++){ 1.153 + if(src[i] == 0){ 1.154 + *limit = src + i; // point to null 1.155 + *done = TRUE; 1.156 + return i; 1.157 + } 1.158 + if(isLabelSeparator(src[i])){ 1.159 + *limit = src + (i+1); // go past the delimiter 1.160 + return i; 1.161 + 1.162 + } 1.163 + } 1.164 + }else{ 1.165 + int32_t i; 1.166 + for(i=0;i<srcLength;i++){ 1.167 + if(isLabelSeparator(src[i])){ 1.168 + *limit = src + (i+1); // go past the delimiter 1.169 + return i; 1.170 + } 1.171 + } 1.172 + // we have not found the delimiter 1.173 + // if(i==srcLength) 1.174 + *limit = src+srcLength; 1.175 + *done = TRUE; 1.176 + 1.177 + return i; 1.178 + } 1.179 +} 1.180 +static inline UBool isLDHChar(UChar ch){ 1.181 + // high runner case 1.182 + if(ch>0x007A){ 1.183 + return FALSE; 1.184 + } 1.185 + //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 1.186 + if( (ch==0x002D) || 1.187 + (0x0030 <= ch && ch <= 0x0039) || 1.188 + (0x0041 <= ch && ch <= 0x005A) || 1.189 + (0x0061 <= ch && ch <= 0x007A) 1.190 + ){ 1.191 + return TRUE; 1.192 + } 1.193 + return FALSE; 1.194 +} 1.195 + 1.196 +static int32_t 1.197 +_internal_toASCII(const UChar* src, int32_t srcLength, 1.198 + UChar* dest, int32_t destCapacity, 1.199 + int32_t options, 1.200 + UStringPrepProfile* nameprep, 1.201 + UParseError* parseError, 1.202 + UErrorCode* status) 1.203 +{ 1.204 + 1.205 + // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 1.206 + UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 1.207 + //initialize pointers to stack buffers 1.208 + UChar *b1 = b1Stack, *b2 = b2Stack; 1.209 + int32_t b1Len=0, b2Len, 1.210 + b1Capacity = MAX_LABEL_BUFFER_SIZE, 1.211 + b2Capacity = MAX_LABEL_BUFFER_SIZE , 1.212 + reqLength=0; 1.213 + 1.214 + int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 1.215 + UBool* caseFlags = NULL; 1.216 + 1.217 + // the source contains all ascii codepoints 1.218 + UBool srcIsASCII = TRUE; 1.219 + // assume the source contains all LDH codepoints 1.220 + UBool srcIsLDH = TRUE; 1.221 + 1.222 + int32_t j=0; 1.223 + 1.224 + //get the options 1.225 + UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); 1.226 + 1.227 + int32_t failPos = -1; 1.228 + 1.229 + if(srcLength == -1){ 1.230 + srcLength = u_strlen(src); 1.231 + } 1.232 + 1.233 + if(srcLength > b1Capacity){ 1.234 + b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); 1.235 + if(b1==NULL){ 1.236 + *status = U_MEMORY_ALLOCATION_ERROR; 1.237 + goto CLEANUP; 1.238 + } 1.239 + b1Capacity = srcLength; 1.240 + } 1.241 + 1.242 + // step 1 1.243 + for( j=0;j<srcLength;j++){ 1.244 + if(src[j] > 0x7F){ 1.245 + srcIsASCII = FALSE; 1.246 + } 1.247 + b1[b1Len++] = src[j]; 1.248 + } 1.249 + 1.250 + // step 2 is performed only if the source contains non ASCII 1.251 + if(srcIsASCII == FALSE){ 1.252 + 1.253 + // step 2 1.254 + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); 1.255 + 1.256 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.257 + // redo processing of string 1.258 + // we do not have enough room so grow the buffer 1.259 + if(b1 != b1Stack){ 1.260 + uprv_free(b1); 1.261 + } 1.262 + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1.263 + if(b1==NULL){ 1.264 + *status = U_MEMORY_ALLOCATION_ERROR; 1.265 + goto CLEANUP; 1.266 + } 1.267 + 1.268 + *status = U_ZERO_ERROR; // reset error 1.269 + 1.270 + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); 1.271 + } 1.272 + } 1.273 + // error bail out 1.274 + if(U_FAILURE(*status)){ 1.275 + goto CLEANUP; 1.276 + } 1.277 + if(b1Len == 0){ 1.278 + *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 1.279 + goto CLEANUP; 1.280 + } 1.281 + 1.282 + // for step 3 & 4 1.283 + srcIsASCII = TRUE; 1.284 + for( j=0;j<b1Len;j++){ 1.285 + // check if output of usprep_prepare is all ASCII 1.286 + if(b1[j] > 0x7F){ 1.287 + srcIsASCII = FALSE; 1.288 + }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character 1.289 + srcIsLDH = FALSE; 1.290 + failPos = j; 1.291 + } 1.292 + } 1.293 + if(useSTD3ASCIIRules == TRUE){ 1.294 + // verify 3a and 3b 1.295 + // 3(a) Verify the absence of non-LDH ASCII code points; that is, the 1.296 + // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. 1.297 + // 3(b) Verify the absence of leading and trailing hyphen-minus; that 1.298 + // is, the absence of U+002D at the beginning and end of the 1.299 + // sequence. 1.300 + if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ 1.301 + || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 1.302 + *status = U_IDNA_STD3_ASCII_RULES_ERROR; 1.303 + 1.304 + /* populate the parseError struct */ 1.305 + if(srcIsLDH==FALSE){ 1.306 + // failPos is always set the index of failure 1.307 + uprv_syntaxError(b1,failPos, b1Len,parseError); 1.308 + }else if(b1[0] == HYPHEN){ 1.309 + // fail position is 0 1.310 + uprv_syntaxError(b1,0,b1Len,parseError); 1.311 + }else{ 1.312 + // the last index in the source is always length-1 1.313 + uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); 1.314 + } 1.315 + 1.316 + goto CLEANUP; 1.317 + } 1.318 + } 1.319 + // Step 4: if the source is ASCII then proceed to step 8 1.320 + if(srcIsASCII){ 1.321 + if(b1Len <= destCapacity){ 1.322 + uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); 1.323 + reqLength = b1Len; 1.324 + }else{ 1.325 + reqLength = b1Len; 1.326 + goto CLEANUP; 1.327 + } 1.328 + }else{ 1.329 + // step 5 : verify the sequence does not begin with ACE prefix 1.330 + if(!startsWithPrefix(b1,b1Len)){ 1.331 + 1.332 + //step 6: encode the sequence with punycode 1.333 + 1.334 + // do not preserve the case flags for now! 1.335 + // TODO: Preserve the case while implementing the RFE 1.336 + // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 1.337 + // uprv_memset(caseFlags,TRUE,b1Len); 1.338 + 1.339 + b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); 1.340 + 1.341 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.342 + // redo processing of string 1.343 + /* we do not have enough room so grow the buffer*/ 1.344 + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1.345 + if(b2 == NULL){ 1.346 + *status = U_MEMORY_ALLOCATION_ERROR; 1.347 + goto CLEANUP; 1.348 + } 1.349 + 1.350 + *status = U_ZERO_ERROR; // reset error 1.351 + 1.352 + b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); 1.353 + } 1.354 + //error bail out 1.355 + if(U_FAILURE(*status)){ 1.356 + goto CLEANUP; 1.357 + } 1.358 + // TODO : Reconsider while implementing the case preserve RFE 1.359 + // convert all codepoints to lower case ASCII 1.360 + // toASCIILower(b2,b2Len); 1.361 + reqLength = b2Len+ACE_PREFIX_LENGTH; 1.362 + 1.363 + if(reqLength > destCapacity){ 1.364 + *status = U_BUFFER_OVERFLOW_ERROR; 1.365 + goto CLEANUP; 1.366 + } 1.367 + //Step 7: prepend the ACE prefix 1.368 + uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); 1.369 + //Step 6: copy the contents in b2 into dest 1.370 + uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); 1.371 + 1.372 + }else{ 1.373 + *status = U_IDNA_ACE_PREFIX_ERROR; 1.374 + //position of failure is 0 1.375 + uprv_syntaxError(b1,0,b1Len,parseError); 1.376 + goto CLEANUP; 1.377 + } 1.378 + } 1.379 + // step 8: verify the length of label 1.380 + if(reqLength > MAX_LABEL_LENGTH){ 1.381 + *status = U_IDNA_LABEL_TOO_LONG_ERROR; 1.382 + } 1.383 + 1.384 +CLEANUP: 1.385 + if(b1 != b1Stack){ 1.386 + uprv_free(b1); 1.387 + } 1.388 + if(b2 != b2Stack){ 1.389 + uprv_free(b2); 1.390 + } 1.391 + uprv_free(caseFlags); 1.392 + 1.393 + return u_terminateUChars(dest, destCapacity, reqLength, status); 1.394 +} 1.395 + 1.396 +static int32_t 1.397 +_internal_toUnicode(const UChar* src, int32_t srcLength, 1.398 + UChar* dest, int32_t destCapacity, 1.399 + int32_t options, 1.400 + UStringPrepProfile* nameprep, 1.401 + UParseError* parseError, 1.402 + UErrorCode* status) 1.403 +{ 1.404 + 1.405 + //get the options 1.406 + //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); 1.407 + int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 1.408 + 1.409 + // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 1.410 + UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 1.411 + 1.412 + //initialize pointers to stack buffers 1.413 + UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; 1.414 + int32_t b1Len, b2Len, b1PrimeLen, b3Len, 1.415 + b1Capacity = MAX_LABEL_BUFFER_SIZE, 1.416 + b2Capacity = MAX_LABEL_BUFFER_SIZE, 1.417 + b3Capacity = MAX_LABEL_BUFFER_SIZE, 1.418 + reqLength=0; 1.419 + 1.420 + b1Len = 0; 1.421 + UBool* caseFlags = NULL; 1.422 + 1.423 + UBool srcIsASCII = TRUE; 1.424 + /*UBool srcIsLDH = TRUE; 1.425 + int32_t failPos =0;*/ 1.426 + 1.427 + // step 1: find out if all the codepoints in src are ASCII 1.428 + if(srcLength==-1){ 1.429 + srcLength = 0; 1.430 + for(;src[srcLength]!=0;){ 1.431 + if(src[srcLength]> 0x7f){ 1.432 + srcIsASCII = FALSE; 1.433 + }/*else if(isLDHChar(src[srcLength])==FALSE){ 1.434 + // here we do not assemble surrogates 1.435 + // since we know that LDH code points 1.436 + // are in the ASCII range only 1.437 + srcIsLDH = FALSE; 1.438 + failPos = srcLength; 1.439 + }*/ 1.440 + srcLength++; 1.441 + } 1.442 + }else if(srcLength > 0){ 1.443 + for(int32_t j=0; j<srcLength; j++){ 1.444 + if(src[j]> 0x7f){ 1.445 + srcIsASCII = FALSE; 1.446 + }/*else if(isLDHChar(src[j])==FALSE){ 1.447 + // here we do not assemble surrogates 1.448 + // since we know that LDH code points 1.449 + // are in the ASCII range only 1.450 + srcIsLDH = FALSE; 1.451 + failPos = j; 1.452 + }*/ 1.453 + } 1.454 + }else{ 1.455 + return 0; 1.456 + } 1.457 + 1.458 + if(srcIsASCII == FALSE){ 1.459 + // step 2: process the string 1.460 + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); 1.461 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.462 + // redo processing of string 1.463 + /* we do not have enough room so grow the buffer*/ 1.464 + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1.465 + if(b1==NULL){ 1.466 + *status = U_MEMORY_ALLOCATION_ERROR; 1.467 + goto CLEANUP; 1.468 + } 1.469 + 1.470 + *status = U_ZERO_ERROR; // reset error 1.471 + 1.472 + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); 1.473 + } 1.474 + //bail out on error 1.475 + if(U_FAILURE(*status)){ 1.476 + goto CLEANUP; 1.477 + } 1.478 + }else{ 1.479 + 1.480 + //just point src to b1 1.481 + b1 = (UChar*) src; 1.482 + b1Len = srcLength; 1.483 + } 1.484 + 1.485 + // The RFC states that 1.486 + // <quote> 1.487 + // ToUnicode never fails. If any step fails, then the original input 1.488 + // is returned immediately in that step. 1.489 + // </quote> 1.490 + 1.491 + //step 3: verify ACE Prefix 1.492 + if(startsWithPrefix(b1,b1Len)){ 1.493 + 1.494 + //step 4: Remove the ACE Prefix 1.495 + b1Prime = b1 + ACE_PREFIX_LENGTH; 1.496 + b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 1.497 + 1.498 + //step 5: Decode using punycode 1.499 + b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); 1.500 + 1.501 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.502 + // redo processing of string 1.503 + /* we do not have enough room so grow the buffer*/ 1.504 + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1.505 + if(b2==NULL){ 1.506 + *status = U_MEMORY_ALLOCATION_ERROR; 1.507 + goto CLEANUP; 1.508 + } 1.509 + 1.510 + *status = U_ZERO_ERROR; // reset error 1.511 + 1.512 + b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); 1.513 + } 1.514 + 1.515 + 1.516 + //step 6:Apply toASCII 1.517 + b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); 1.518 + 1.519 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.520 + // redo processing of string 1.521 + /* we do not have enough room so grow the buffer*/ 1.522 + b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); 1.523 + if(b3==NULL){ 1.524 + *status = U_MEMORY_ALLOCATION_ERROR; 1.525 + goto CLEANUP; 1.526 + } 1.527 + 1.528 + *status = U_ZERO_ERROR; // reset error 1.529 + 1.530 + b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); 1.531 + 1.532 + } 1.533 + //bail out on error 1.534 + if(U_FAILURE(*status)){ 1.535 + goto CLEANUP; 1.536 + } 1.537 + 1.538 + //step 7: verify 1.539 + if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 1.540 + // Cause the original to be returned. 1.541 + *status = U_IDNA_VERIFICATION_ERROR; 1.542 + goto CLEANUP; 1.543 + } 1.544 + 1.545 + //step 8: return output of step 5 1.546 + reqLength = b2Len; 1.547 + if(b2Len <= destCapacity) { 1.548 + uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); 1.549 + } 1.550 + } 1.551 + else{ 1.552 + // See the start of this if statement for why this is commented out. 1.553 + // verify that STD3 ASCII rules are satisfied 1.554 + /*if(useSTD3ASCIIRules == TRUE){ 1.555 + if( srcIsLDH == FALSE // source contains some non-LDH characters 1.556 + || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 1.557 + *status = U_IDNA_STD3_ASCII_RULES_ERROR; 1.558 + 1.559 + // populate the parseError struct 1.560 + if(srcIsLDH==FALSE){ 1.561 + // failPos is always set the index of failure 1.562 + uprv_syntaxError(src,failPos, srcLength,parseError); 1.563 + }else if(src[0] == HYPHEN){ 1.564 + // fail position is 0 1.565 + uprv_syntaxError(src,0,srcLength,parseError); 1.566 + }else{ 1.567 + // the last index in the source is always length-1 1.568 + uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 1.569 + } 1.570 + 1.571 + goto CLEANUP; 1.572 + } 1.573 + }*/ 1.574 + // just return the source 1.575 + //copy the source to destination 1.576 + if(srcLength <= destCapacity){ 1.577 + uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 1.578 + } 1.579 + reqLength = srcLength; 1.580 + } 1.581 + 1.582 + 1.583 +CLEANUP: 1.584 + 1.585 + if(b1 != b1Stack && b1!=src){ 1.586 + uprv_free(b1); 1.587 + } 1.588 + if(b2 != b2Stack){ 1.589 + uprv_free(b2); 1.590 + } 1.591 + uprv_free(caseFlags); 1.592 + 1.593 + // The RFC states that 1.594 + // <quote> 1.595 + // ToUnicode never fails. If any step fails, then the original input 1.596 + // is returned immediately in that step. 1.597 + // </quote> 1.598 + // So if any step fails lets copy source to destination 1.599 + if(U_FAILURE(*status)){ 1.600 + //copy the source to destination 1.601 + if(dest && srcLength <= destCapacity){ 1.602 + // srcLength should have already been set earlier. 1.603 + U_ASSERT(srcLength >= 0); 1.604 + uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 1.605 + } 1.606 + reqLength = srcLength; 1.607 + *status = U_ZERO_ERROR; 1.608 + } 1.609 + 1.610 + return u_terminateUChars(dest, destCapacity, reqLength, status); 1.611 +} 1.612 + 1.613 +U_CAPI int32_t U_EXPORT2 1.614 +uidna_toASCII(const UChar* src, int32_t srcLength, 1.615 + UChar* dest, int32_t destCapacity, 1.616 + int32_t options, 1.617 + UParseError* parseError, 1.618 + UErrorCode* status){ 1.619 + 1.620 + if(status == NULL || U_FAILURE(*status)){ 1.621 + return 0; 1.622 + } 1.623 + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 1.624 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.625 + return 0; 1.626 + } 1.627 + 1.628 + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 1.629 + 1.630 + if(U_FAILURE(*status)){ 1.631 + return -1; 1.632 + } 1.633 + 1.634 + int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 1.635 + 1.636 + /* close the profile*/ 1.637 + usprep_close(nameprep); 1.638 + 1.639 + return retLen; 1.640 +} 1.641 + 1.642 +U_CAPI int32_t U_EXPORT2 1.643 +uidna_toUnicode(const UChar* src, int32_t srcLength, 1.644 + UChar* dest, int32_t destCapacity, 1.645 + int32_t options, 1.646 + UParseError* parseError, 1.647 + UErrorCode* status){ 1.648 + 1.649 + if(status == NULL || U_FAILURE(*status)){ 1.650 + return 0; 1.651 + } 1.652 + if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 1.653 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.654 + return 0; 1.655 + } 1.656 + 1.657 + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 1.658 + 1.659 + if(U_FAILURE(*status)){ 1.660 + return -1; 1.661 + } 1.662 + 1.663 + int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 1.664 + 1.665 + usprep_close(nameprep); 1.666 + 1.667 + return retLen; 1.668 +} 1.669 + 1.670 + 1.671 +U_CAPI int32_t U_EXPORT2 1.672 +uidna_IDNToASCII( const UChar *src, int32_t srcLength, 1.673 + UChar* dest, int32_t destCapacity, 1.674 + int32_t options, 1.675 + UParseError *parseError, 1.676 + UErrorCode *status){ 1.677 + 1.678 + if(status == NULL || U_FAILURE(*status)){ 1.679 + return 0; 1.680 + } 1.681 + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 1.682 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.683 + return 0; 1.684 + } 1.685 + 1.686 + int32_t reqLength = 0; 1.687 + 1.688 + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 1.689 + 1.690 + if(U_FAILURE(*status)){ 1.691 + return 0; 1.692 + } 1.693 + 1.694 + //initialize pointers 1.695 + UChar *delimiter = (UChar*)src; 1.696 + UChar *labelStart = (UChar*)src; 1.697 + UChar *currentDest = (UChar*) dest; 1.698 + int32_t remainingLen = srcLength; 1.699 + int32_t remainingDestCapacity = destCapacity; 1.700 + int32_t labelLen = 0, labelReqLength = 0; 1.701 + UBool done = FALSE; 1.702 + 1.703 + 1.704 + for(;;){ 1.705 + 1.706 + labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 1.707 + labelReqLength = 0; 1.708 + if(!(labelLen==0 && done)){// make sure this is not a root label separator. 1.709 + 1.710 + labelReqLength = _internal_toASCII( labelStart, labelLen, 1.711 + currentDest, remainingDestCapacity, 1.712 + options, nameprep, 1.713 + parseError, status); 1.714 + 1.715 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.716 + 1.717 + *status = U_ZERO_ERROR; // reset error 1.718 + remainingDestCapacity = 0; 1.719 + } 1.720 + } 1.721 + 1.722 + 1.723 + if(U_FAILURE(*status)){ 1.724 + break; 1.725 + } 1.726 + 1.727 + reqLength +=labelReqLength; 1.728 + // adjust the destination pointer 1.729 + if(labelReqLength < remainingDestCapacity){ 1.730 + currentDest = currentDest + labelReqLength; 1.731 + remainingDestCapacity -= labelReqLength; 1.732 + }else{ 1.733 + // should never occur 1.734 + remainingDestCapacity = 0; 1.735 + } 1.736 + 1.737 + if(done == TRUE){ 1.738 + break; 1.739 + } 1.740 + 1.741 + // add the label separator 1.742 + if(remainingDestCapacity > 0){ 1.743 + *currentDest++ = FULL_STOP; 1.744 + remainingDestCapacity--; 1.745 + } 1.746 + reqLength++; 1.747 + 1.748 + labelStart = delimiter; 1.749 + if(remainingLen >0 ){ 1.750 + remainingLen = (int32_t)(srcLength - (delimiter - src)); 1.751 + } 1.752 + 1.753 + } 1.754 + 1.755 + if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 1.756 + *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 1.757 + } 1.758 + 1.759 + usprep_close(nameprep); 1.760 + 1.761 + return u_terminateUChars(dest, destCapacity, reqLength, status); 1.762 +} 1.763 + 1.764 +U_CAPI int32_t U_EXPORT2 1.765 +uidna_IDNToUnicode( const UChar* src, int32_t srcLength, 1.766 + UChar* dest, int32_t destCapacity, 1.767 + int32_t options, 1.768 + UParseError* parseError, 1.769 + UErrorCode* status){ 1.770 + 1.771 + if(status == NULL || U_FAILURE(*status)){ 1.772 + return 0; 1.773 + } 1.774 + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 1.775 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.776 + return 0; 1.777 + } 1.778 + 1.779 + int32_t reqLength = 0; 1.780 + 1.781 + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 1.782 + 1.783 + if(U_FAILURE(*status)){ 1.784 + return 0; 1.785 + } 1.786 + 1.787 + //initialize pointers 1.788 + UChar *delimiter = (UChar*)src; 1.789 + UChar *labelStart = (UChar*)src; 1.790 + UChar *currentDest = (UChar*) dest; 1.791 + int32_t remainingLen = srcLength; 1.792 + int32_t remainingDestCapacity = destCapacity; 1.793 + int32_t labelLen = 0, labelReqLength = 0; 1.794 + UBool done = FALSE; 1.795 + 1.796 + for(;;){ 1.797 + 1.798 + labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 1.799 + 1.800 + // The RFC states that 1.801 + // <quote> 1.802 + // ToUnicode never fails. If any step fails, then the original input 1.803 + // is returned immediately in that step. 1.804 + // </quote> 1.805 + // _internal_toUnicode will copy the label. 1.806 + /*if(labelLen==0 && done==FALSE){ 1.807 + *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 1.808 + break; 1.809 + }*/ 1.810 + 1.811 + labelReqLength = _internal_toUnicode(labelStart, labelLen, 1.812 + currentDest, remainingDestCapacity, 1.813 + options, nameprep, 1.814 + parseError, status); 1.815 + 1.816 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.817 + *status = U_ZERO_ERROR; // reset error 1.818 + remainingDestCapacity = 0; 1.819 + } 1.820 + 1.821 + if(U_FAILURE(*status)){ 1.822 + break; 1.823 + } 1.824 + 1.825 + reqLength +=labelReqLength; 1.826 + // adjust the destination pointer 1.827 + if(labelReqLength < remainingDestCapacity){ 1.828 + currentDest = currentDest + labelReqLength; 1.829 + remainingDestCapacity -= labelReqLength; 1.830 + }else{ 1.831 + // should never occur 1.832 + remainingDestCapacity = 0; 1.833 + } 1.834 + 1.835 + if(done == TRUE){ 1.836 + break; 1.837 + } 1.838 + 1.839 + // add the label separator 1.840 + // Unlike the ToASCII operation we don't normalize the label separators 1.841 + if(remainingDestCapacity > 0){ 1.842 + *currentDest++ = *(labelStart + labelLen); 1.843 + remainingDestCapacity--; 1.844 + } 1.845 + reqLength++; 1.846 + 1.847 + labelStart = delimiter; 1.848 + if(remainingLen >0 ){ 1.849 + remainingLen = (int32_t)(srcLength - (delimiter - src)); 1.850 + } 1.851 + 1.852 + } 1.853 + 1.854 + if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 1.855 + *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 1.856 + } 1.857 + 1.858 + usprep_close(nameprep); 1.859 + 1.860 + return u_terminateUChars(dest, destCapacity, reqLength, status); 1.861 +} 1.862 + 1.863 +U_CAPI int32_t U_EXPORT2 1.864 +uidna_compare( const UChar *s1, int32_t length1, 1.865 + const UChar *s2, int32_t length2, 1.866 + int32_t options, 1.867 + UErrorCode* status){ 1.868 + 1.869 + if(status == NULL || U_FAILURE(*status)){ 1.870 + return -1; 1.871 + } 1.872 + 1.873 + UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 1.874 + UChar *b1 = b1Stack, *b2 = b2Stack; 1.875 + int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 1.876 + int32_t result=-1; 1.877 + 1.878 + UParseError parseError; 1.879 + 1.880 + b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); 1.881 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.882 + // redo processing of string 1.883 + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1.884 + if(b1==NULL){ 1.885 + *status = U_MEMORY_ALLOCATION_ERROR; 1.886 + goto CLEANUP; 1.887 + } 1.888 + 1.889 + *status = U_ZERO_ERROR; // reset error 1.890 + 1.891 + b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); 1.892 + 1.893 + } 1.894 + 1.895 + b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); 1.896 + if(*status == U_BUFFER_OVERFLOW_ERROR){ 1.897 + // redo processing of string 1.898 + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1.899 + if(b2==NULL){ 1.900 + *status = U_MEMORY_ALLOCATION_ERROR; 1.901 + goto CLEANUP; 1.902 + } 1.903 + 1.904 + *status = U_ZERO_ERROR; // reset error 1.905 + 1.906 + b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); 1.907 + 1.908 + } 1.909 + // when toASCII is applied all label separators are replaced with FULL_STOP 1.910 + result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 1.911 + 1.912 +CLEANUP: 1.913 + if(b1 != b1Stack){ 1.914 + uprv_free(b1); 1.915 + } 1.916 + 1.917 + if(b2 != b2Stack){ 1.918 + uprv_free(b2); 1.919 + } 1.920 + 1.921 + return result; 1.922 +} 1.923 + 1.924 +#endif /* #if !UCONFIG_NO_IDNA */