1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uts46.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1459 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2010-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: uts46.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010mar09 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#include "unicode/utypes.h" 1.19 + 1.20 +#if !UCONFIG_NO_IDNA 1.21 + 1.22 +#include "unicode/idna.h" 1.23 +#include "unicode/normalizer2.h" 1.24 +#include "unicode/uscript.h" 1.25 +#include "unicode/ustring.h" 1.26 +#include "unicode/utf16.h" 1.27 +#include "cmemory.h" 1.28 +#include "cstring.h" 1.29 +#include "punycode.h" 1.30 +#include "ubidi_props.h" 1.31 +#include "ustr_imp.h" 1.32 + 1.33 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.34 + 1.35 +// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: 1.36 +// 1.37 +// The domain name length limit is 255 octets in an internal DNS representation 1.38 +// where the last ("root") label is the empty label 1.39 +// represented by length byte 0 alone. 1.40 +// In a conventional string, this translates to 253 characters, or 254 1.41 +// if there is a trailing dot for the root label. 1.42 + 1.43 +U_NAMESPACE_BEGIN 1.44 + 1.45 +// Severe errors which usually result in a U+FFFD replacement character in the result string. 1.46 +const uint32_t severeErrors= 1.47 + UIDNA_ERROR_LEADING_COMBINING_MARK| 1.48 + UIDNA_ERROR_DISALLOWED| 1.49 + UIDNA_ERROR_PUNYCODE| 1.50 + UIDNA_ERROR_LABEL_HAS_DOT| 1.51 + UIDNA_ERROR_INVALID_ACE_LABEL; 1.52 + 1.53 +static inline UBool 1.54 +isASCIIString(const UnicodeString &dest) { 1.55 + const UChar *s=dest.getBuffer(); 1.56 + const UChar *limit=s+dest.length(); 1.57 + while(s<limit) { 1.58 + if(*s++>0x7f) { 1.59 + return FALSE; 1.60 + } 1.61 + } 1.62 + return TRUE; 1.63 +} 1.64 + 1.65 +static UBool 1.66 +isASCIIOkBiDi(const UChar *s, int32_t length); 1.67 + 1.68 +static UBool 1.69 +isASCIIOkBiDi(const char *s, int32_t length); 1.70 + 1.71 +// IDNA class default implementations -------------------------------------- *** 1.72 + 1.73 +IDNA::~IDNA() {} 1.74 + 1.75 +void 1.76 +IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 1.77 + IDNAInfo &info, UErrorCode &errorCode) const { 1.78 + if(U_SUCCESS(errorCode)) { 1.79 + UnicodeString destString; 1.80 + labelToASCII(UnicodeString::fromUTF8(label), destString, 1.81 + info, errorCode).toUTF8(dest); 1.82 + } 1.83 +} 1.84 + 1.85 +void 1.86 +IDNA::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 1.87 + IDNAInfo &info, UErrorCode &errorCode) const { 1.88 + if(U_SUCCESS(errorCode)) { 1.89 + UnicodeString destString; 1.90 + labelToUnicode(UnicodeString::fromUTF8(label), destString, 1.91 + info, errorCode).toUTF8(dest); 1.92 + } 1.93 +} 1.94 + 1.95 +void 1.96 +IDNA::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 1.97 + IDNAInfo &info, UErrorCode &errorCode) const { 1.98 + if(U_SUCCESS(errorCode)) { 1.99 + UnicodeString destString; 1.100 + nameToASCII(UnicodeString::fromUTF8(name), destString, 1.101 + info, errorCode).toUTF8(dest); 1.102 + } 1.103 +} 1.104 + 1.105 +void 1.106 +IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 1.107 + IDNAInfo &info, UErrorCode &errorCode) const { 1.108 + if(U_SUCCESS(errorCode)) { 1.109 + UnicodeString destString; 1.110 + nameToUnicode(UnicodeString::fromUTF8(name), destString, 1.111 + info, errorCode).toUTF8(dest); 1.112 + } 1.113 +} 1.114 + 1.115 +// UTS46 class declaration ------------------------------------------------- *** 1.116 + 1.117 +class UTS46 : public IDNA { 1.118 +public: 1.119 + UTS46(uint32_t options, UErrorCode &errorCode); 1.120 + virtual ~UTS46(); 1.121 + 1.122 + virtual UnicodeString & 1.123 + labelToASCII(const UnicodeString &label, UnicodeString &dest, 1.124 + IDNAInfo &info, UErrorCode &errorCode) const; 1.125 + 1.126 + virtual UnicodeString & 1.127 + labelToUnicode(const UnicodeString &label, UnicodeString &dest, 1.128 + IDNAInfo &info, UErrorCode &errorCode) const; 1.129 + 1.130 + virtual UnicodeString & 1.131 + nameToASCII(const UnicodeString &name, UnicodeString &dest, 1.132 + IDNAInfo &info, UErrorCode &errorCode) const; 1.133 + 1.134 + virtual UnicodeString & 1.135 + nameToUnicode(const UnicodeString &name, UnicodeString &dest, 1.136 + IDNAInfo &info, UErrorCode &errorCode) const; 1.137 + 1.138 + virtual void 1.139 + labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 1.140 + IDNAInfo &info, UErrorCode &errorCode) const; 1.141 + 1.142 + virtual void 1.143 + labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 1.144 + IDNAInfo &info, UErrorCode &errorCode) const; 1.145 + 1.146 + virtual void 1.147 + nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 1.148 + IDNAInfo &info, UErrorCode &errorCode) const; 1.149 + 1.150 + virtual void 1.151 + nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 1.152 + IDNAInfo &info, UErrorCode &errorCode) const; 1.153 + 1.154 +private: 1.155 + UnicodeString & 1.156 + process(const UnicodeString &src, 1.157 + UBool isLabel, UBool toASCII, 1.158 + UnicodeString &dest, 1.159 + IDNAInfo &info, UErrorCode &errorCode) const; 1.160 + 1.161 + void 1.162 + processUTF8(const StringPiece &src, 1.163 + UBool isLabel, UBool toASCII, 1.164 + ByteSink &dest, 1.165 + IDNAInfo &info, UErrorCode &errorCode) const; 1.166 + 1.167 + UnicodeString & 1.168 + processUnicode(const UnicodeString &src, 1.169 + int32_t labelStart, int32_t mappingStart, 1.170 + UBool isLabel, UBool toASCII, 1.171 + UnicodeString &dest, 1.172 + IDNAInfo &info, UErrorCode &errorCode) const; 1.173 + 1.174 + // returns the new dest.length() 1.175 + int32_t 1.176 + mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, 1.177 + UErrorCode &errorCode) const; 1.178 + 1.179 + // returns the new label length 1.180 + int32_t 1.181 + processLabel(UnicodeString &dest, 1.182 + int32_t labelStart, int32_t labelLength, 1.183 + UBool toASCII, 1.184 + IDNAInfo &info, UErrorCode &errorCode) const; 1.185 + int32_t 1.186 + markBadACELabel(UnicodeString &dest, 1.187 + int32_t labelStart, int32_t labelLength, 1.188 + UBool toASCII, IDNAInfo &info) const; 1.189 + 1.190 + void 1.191 + checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; 1.192 + 1.193 + UBool 1.194 + isLabelOkContextJ(const UChar *label, int32_t labelLength) const; 1.195 + 1.196 + void 1.197 + checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; 1.198 + 1.199 + const Normalizer2 &uts46Norm2; // uts46.nrm 1.200 + uint32_t options; 1.201 +}; 1.202 + 1.203 +IDNA * 1.204 +IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { 1.205 + if(U_SUCCESS(errorCode)) { 1.206 + IDNA *idna=new UTS46(options, errorCode); 1.207 + if(idna==NULL) { 1.208 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.209 + } else if(U_FAILURE(errorCode)) { 1.210 + delete idna; 1.211 + idna=NULL; 1.212 + } 1.213 + return idna; 1.214 + } else { 1.215 + return NULL; 1.216 + } 1.217 +} 1.218 + 1.219 +// UTS46 implementation ---------------------------------------------------- *** 1.220 + 1.221 +UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) 1.222 + : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), 1.223 + options(opt) {} 1.224 + 1.225 +UTS46::~UTS46() {} 1.226 + 1.227 +UnicodeString & 1.228 +UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, 1.229 + IDNAInfo &info, UErrorCode &errorCode) const { 1.230 + return process(label, TRUE, TRUE, dest, info, errorCode); 1.231 +} 1.232 + 1.233 +UnicodeString & 1.234 +UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, 1.235 + IDNAInfo &info, UErrorCode &errorCode) const { 1.236 + return process(label, TRUE, FALSE, dest, info, errorCode); 1.237 +} 1.238 + 1.239 +UnicodeString & 1.240 +UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, 1.241 + IDNAInfo &info, UErrorCode &errorCode) const { 1.242 + process(name, FALSE, TRUE, dest, info, errorCode); 1.243 + if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && 1.244 + isASCIIString(dest) && 1.245 + (dest.length()>254 || dest[253]!=0x2e) 1.246 + ) { 1.247 + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; 1.248 + } 1.249 + return dest; 1.250 +} 1.251 + 1.252 +UnicodeString & 1.253 +UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, 1.254 + IDNAInfo &info, UErrorCode &errorCode) const { 1.255 + return process(name, FALSE, FALSE, dest, info, errorCode); 1.256 +} 1.257 + 1.258 +void 1.259 +UTS46::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 1.260 + IDNAInfo &info, UErrorCode &errorCode) const { 1.261 + processUTF8(label, TRUE, TRUE, dest, info, errorCode); 1.262 +} 1.263 + 1.264 +void 1.265 +UTS46::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 1.266 + IDNAInfo &info, UErrorCode &errorCode) const { 1.267 + processUTF8(label, TRUE, FALSE, dest, info, errorCode); 1.268 +} 1.269 + 1.270 +void 1.271 +UTS46::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 1.272 + IDNAInfo &info, UErrorCode &errorCode) const { 1.273 + processUTF8(name, FALSE, TRUE, dest, info, errorCode); 1.274 +} 1.275 + 1.276 +void 1.277 +UTS46::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 1.278 + IDNAInfo &info, UErrorCode &errorCode) const { 1.279 + processUTF8(name, FALSE, FALSE, dest, info, errorCode); 1.280 +} 1.281 + 1.282 +// UTS #46 data for ASCII characters. 1.283 +// The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase 1.284 +// and passes through all other ASCII characters. 1.285 +// If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed 1.286 +// using this data. 1.287 +// The ASCII fastpath also uses this data. 1.288 +// Values: -1=disallowed 0==valid 1==mapped (lowercase) 1.289 +static const int8_t asciiData[128]={ 1.290 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1.291 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1.292 + // 002D..002E; valid # HYPHEN-MINUS..FULL STOP 1.293 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, 1.294 + // 0030..0039; valid # DIGIT ZERO..DIGIT NINE 1.295 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, 1.296 + // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 1.297 + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.298 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, 1.299 + // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z 1.300 + -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.301 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 1.302 +}; 1.303 + 1.304 +UnicodeString & 1.305 +UTS46::process(const UnicodeString &src, 1.306 + UBool isLabel, UBool toASCII, 1.307 + UnicodeString &dest, 1.308 + IDNAInfo &info, UErrorCode &errorCode) const { 1.309 + // uts46Norm2.normalize() would do all of this error checking and setup, 1.310 + // but with the ASCII fastpath we do not always call it, and do not 1.311 + // call it first. 1.312 + if(U_FAILURE(errorCode)) { 1.313 + dest.setToBogus(); 1.314 + return dest; 1.315 + } 1.316 + const UChar *srcArray=src.getBuffer(); 1.317 + if(&dest==&src || srcArray==NULL) { 1.318 + errorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.319 + dest.setToBogus(); 1.320 + return dest; 1.321 + } 1.322 + // Arguments are fine, reset output values. 1.323 + dest.remove(); 1.324 + info.reset(); 1.325 + int32_t srcLength=src.length(); 1.326 + if(srcLength==0) { 1.327 + if(toASCII) { 1.328 + info.errors|=UIDNA_ERROR_EMPTY_LABEL; 1.329 + } 1.330 + return dest; 1.331 + } 1.332 + UChar *destArray=dest.getBuffer(srcLength); 1.333 + if(destArray==NULL) { 1.334 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.335 + return dest; 1.336 + } 1.337 + // ASCII fastpath 1.338 + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 1.339 + int32_t labelStart=0; 1.340 + int32_t i; 1.341 + for(i=0;; ++i) { 1.342 + if(i==srcLength) { 1.343 + if(toASCII) { 1.344 + if((i-labelStart)>63) { 1.345 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.346 + } 1.347 + // There is a trailing dot if labelStart==i. 1.348 + if(!isLabel && i>=254 && (i>254 || labelStart<i)) { 1.349 + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; 1.350 + } 1.351 + } 1.352 + info.errors|=info.labelErrors; 1.353 + dest.releaseBuffer(i); 1.354 + return dest; 1.355 + } 1.356 + UChar c=srcArray[i]; 1.357 + if(c>0x7f) { 1.358 + break; 1.359 + } 1.360 + int cData=asciiData[c]; 1.361 + if(cData>0) { 1.362 + destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. 1.363 + } else if(cData<0 && disallowNonLDHDot) { 1.364 + break; // Replacing with U+FFFD can be complicated for toASCII. 1.365 + } else { 1.366 + destArray[i]=c; 1.367 + if(c==0x2d) { // hyphen 1.368 + if(i==(labelStart+3) && srcArray[i-1]==0x2d) { 1.369 + // "??--..." is Punycode or forbidden. 1.370 + ++i; // '-' was copied to dest already 1.371 + break; 1.372 + } 1.373 + if(i==labelStart) { 1.374 + // label starts with "-" 1.375 + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 1.376 + } 1.377 + if((i+1)==srcLength || srcArray[i+1]==0x2e) { 1.378 + // label ends with "-" 1.379 + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; 1.380 + } 1.381 + } else if(c==0x2e) { // dot 1.382 + if(isLabel) { 1.383 + // Replacing with U+FFFD can be complicated for toASCII. 1.384 + ++i; // '.' was copied to dest already 1.385 + break; 1.386 + } 1.387 + if(toASCII) { 1.388 + // Permit an empty label at the end but not elsewhere. 1.389 + if(i==labelStart && i<(srcLength-1)) { 1.390 + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 1.391 + } else if((i-labelStart)>63) { 1.392 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.393 + } 1.394 + } 1.395 + info.errors|=info.labelErrors; 1.396 + info.labelErrors=0; 1.397 + labelStart=i+1; 1.398 + } 1.399 + } 1.400 + } 1.401 + info.errors|=info.labelErrors; 1.402 + dest.releaseBuffer(i); 1.403 + processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); 1.404 + if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && 1.405 + (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) 1.406 + ) { 1.407 + info.errors|=UIDNA_ERROR_BIDI; 1.408 + } 1.409 + return dest; 1.410 +} 1.411 + 1.412 +void 1.413 +UTS46::processUTF8(const StringPiece &src, 1.414 + UBool isLabel, UBool toASCII, 1.415 + ByteSink &dest, 1.416 + IDNAInfo &info, UErrorCode &errorCode) const { 1.417 + if(U_FAILURE(errorCode)) { 1.418 + return; 1.419 + } 1.420 + const char *srcArray=src.data(); 1.421 + int32_t srcLength=src.length(); 1.422 + if(srcArray==NULL && srcLength!=0) { 1.423 + errorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.424 + return; 1.425 + } 1.426 + // Arguments are fine, reset output values. 1.427 + info.reset(); 1.428 + if(srcLength==0) { 1.429 + if(toASCII) { 1.430 + info.errors|=UIDNA_ERROR_EMPTY_LABEL; 1.431 + } 1.432 + dest.Flush(); 1.433 + return; 1.434 + } 1.435 + UnicodeString destString; 1.436 + int32_t labelStart=0; 1.437 + if(srcLength<=256) { // length of stackArray[] 1.438 + // ASCII fastpath 1.439 + char stackArray[256]; 1.440 + int32_t destCapacity; 1.441 + char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, 1.442 + stackArray, LENGTHOF(stackArray), &destCapacity); 1.443 + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 1.444 + int32_t i; 1.445 + for(i=0;; ++i) { 1.446 + if(i==srcLength) { 1.447 + if(toASCII) { 1.448 + if((i-labelStart)>63) { 1.449 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.450 + } 1.451 + // There is a trailing dot if labelStart==i. 1.452 + if(!isLabel && i>=254 && (i>254 || labelStart<i)) { 1.453 + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; 1.454 + } 1.455 + } 1.456 + info.errors|=info.labelErrors; 1.457 + dest.Append(destArray, i); 1.458 + dest.Flush(); 1.459 + return; 1.460 + } 1.461 + char c=srcArray[i]; 1.462 + if((int8_t)c<0) { // (uint8_t)c>0x7f 1.463 + break; 1.464 + } 1.465 + int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. 1.466 + if(cData>0) { 1.467 + destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. 1.468 + } else if(cData<0 && disallowNonLDHDot) { 1.469 + break; // Replacing with U+FFFD can be complicated for toASCII. 1.470 + } else { 1.471 + destArray[i]=c; 1.472 + if(c==0x2d) { // hyphen 1.473 + if(i==(labelStart+3) && srcArray[i-1]==0x2d) { 1.474 + // "??--..." is Punycode or forbidden. 1.475 + break; 1.476 + } 1.477 + if(i==labelStart) { 1.478 + // label starts with "-" 1.479 + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 1.480 + } 1.481 + if((i+1)==srcLength || srcArray[i+1]==0x2e) { 1.482 + // label ends with "-" 1.483 + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; 1.484 + } 1.485 + } else if(c==0x2e) { // dot 1.486 + if(isLabel) { 1.487 + break; // Replacing with U+FFFD can be complicated for toASCII. 1.488 + } 1.489 + if(toASCII) { 1.490 + // Permit an empty label at the end but not elsewhere. 1.491 + if(i==labelStart && i<(srcLength-1)) { 1.492 + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 1.493 + } else if((i-labelStart)>63) { 1.494 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.495 + } 1.496 + } 1.497 + info.errors|=info.labelErrors; 1.498 + info.labelErrors=0; 1.499 + labelStart=i+1; 1.500 + } 1.501 + } 1.502 + } 1.503 + info.errors|=info.labelErrors; 1.504 + // Convert the processed ASCII prefix of the current label to UTF-16. 1.505 + int32_t mappingStart=i-labelStart; 1.506 + destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); 1.507 + // Output the previous ASCII labels and process the rest of src in UTF-16. 1.508 + dest.Append(destArray, labelStart); 1.509 + processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, 1.510 + isLabel, toASCII, 1.511 + destString, info, errorCode); 1.512 + } else { 1.513 + // src is too long for the ASCII fastpath implementation. 1.514 + processUnicode(UnicodeString::fromUTF8(src), 0, 0, 1.515 + isLabel, toASCII, 1.516 + destString, info, errorCode); 1.517 + } 1.518 + destString.toUTF8(dest); // calls dest.Flush() 1.519 + if(toASCII && !isLabel) { 1.520 + // length==labelStart==254 means that there is a trailing dot (ok) and 1.521 + // destString is empty (do not index at 253-labelStart). 1.522 + int32_t length=labelStart+destString.length(); 1.523 + if( length>=254 && isASCIIString(destString) && 1.524 + (length>254 || 1.525 + (labelStart<254 && destString[253-labelStart]!=0x2e)) 1.526 + ) { 1.527 + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; 1.528 + } 1.529 + } 1.530 + if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && 1.531 + (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) 1.532 + ) { 1.533 + info.errors|=UIDNA_ERROR_BIDI; 1.534 + } 1.535 +} 1.536 + 1.537 +UnicodeString & 1.538 +UTS46::processUnicode(const UnicodeString &src, 1.539 + int32_t labelStart, int32_t mappingStart, 1.540 + UBool isLabel, UBool toASCII, 1.541 + UnicodeString &dest, 1.542 + IDNAInfo &info, UErrorCode &errorCode) const { 1.543 + if(mappingStart==0) { 1.544 + uts46Norm2.normalize(src, dest, errorCode); 1.545 + } else { 1.546 + uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); 1.547 + } 1.548 + if(U_FAILURE(errorCode)) { 1.549 + return dest; 1.550 + } 1.551 + UBool doMapDevChars= 1.552 + toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : 1.553 + (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; 1.554 + const UChar *destArray=dest.getBuffer(); 1.555 + int32_t destLength=dest.length(); 1.556 + int32_t labelLimit=labelStart; 1.557 + while(labelLimit<destLength) { 1.558 + UChar c=destArray[labelLimit]; 1.559 + if(c==0x2e && !isLabel) { 1.560 + int32_t labelLength=labelLimit-labelStart; 1.561 + int32_t newLength=processLabel(dest, labelStart, labelLength, 1.562 + toASCII, info, errorCode); 1.563 + info.errors|=info.labelErrors; 1.564 + info.labelErrors=0; 1.565 + if(U_FAILURE(errorCode)) { 1.566 + return dest; 1.567 + } 1.568 + destArray=dest.getBuffer(); 1.569 + destLength+=newLength-labelLength; 1.570 + labelLimit=labelStart+=newLength+1; 1.571 + } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { 1.572 + info.isTransDiff=TRUE; 1.573 + if(doMapDevChars) { 1.574 + destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); 1.575 + if(U_FAILURE(errorCode)) { 1.576 + return dest; 1.577 + } 1.578 + destArray=dest.getBuffer(); 1.579 + // Do not increment labelLimit in case c was removed. 1.580 + // All deviation characters have been mapped, no need to check for them again. 1.581 + doMapDevChars=FALSE; 1.582 + } else { 1.583 + ++labelLimit; 1.584 + } 1.585 + } else { 1.586 + ++labelLimit; 1.587 + } 1.588 + } 1.589 + // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok) 1.590 + // but not an empty label elsewhere nor a completely empty domain name. 1.591 + // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0. 1.592 + if(0==labelStart || labelStart<labelLimit) { 1.593 + processLabel(dest, labelStart, labelLimit-labelStart, 1.594 + toASCII, info, errorCode); 1.595 + info.errors|=info.labelErrors; 1.596 + } 1.597 + return dest; 1.598 +} 1.599 + 1.600 +int32_t 1.601 +UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, 1.602 + UErrorCode &errorCode) const { 1.603 + int32_t length=dest.length(); 1.604 + UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); 1.605 + if(s==NULL) { 1.606 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.607 + return length; 1.608 + } 1.609 + int32_t capacity=dest.getCapacity(); 1.610 + UBool didMapDevChars=FALSE; 1.611 + int32_t readIndex=mappingStart, writeIndex=mappingStart; 1.612 + do { 1.613 + UChar c=s[readIndex++]; 1.614 + switch(c) { 1.615 + case 0xdf: 1.616 + // Map sharp s to ss. 1.617 + didMapDevChars=TRUE; 1.618 + s[writeIndex++]=0x73; // Replace sharp s with first s. 1.619 + // Insert second s and account for possible buffer reallocation. 1.620 + if(writeIndex==readIndex) { 1.621 + if(length==capacity) { 1.622 + dest.releaseBuffer(length); 1.623 + s=dest.getBuffer(length+1); 1.624 + if(s==NULL) { 1.625 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.626 + return length; 1.627 + } 1.628 + capacity=dest.getCapacity(); 1.629 + } 1.630 + u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex); 1.631 + ++readIndex; 1.632 + } 1.633 + s[writeIndex++]=0x73; 1.634 + ++length; 1.635 + break; 1.636 + case 0x3c2: // Map final sigma to nonfinal sigma. 1.637 + didMapDevChars=TRUE; 1.638 + s[writeIndex++]=0x3c3; 1.639 + break; 1.640 + case 0x200c: // Ignore/remove ZWNJ. 1.641 + case 0x200d: // Ignore/remove ZWJ. 1.642 + didMapDevChars=TRUE; 1.643 + --length; 1.644 + break; 1.645 + default: 1.646 + // Only really necessary if writeIndex was different from readIndex. 1.647 + s[writeIndex++]=c; 1.648 + break; 1.649 + } 1.650 + } while(writeIndex<length); 1.651 + dest.releaseBuffer(length); 1.652 + if(didMapDevChars) { 1.653 + // Mapping deviation characters might have resulted in an un-NFC string. 1.654 + // We could use either the NFC or the UTS #46 normalizer. 1.655 + // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file. 1.656 + UnicodeString normalized; 1.657 + uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); 1.658 + if(U_SUCCESS(errorCode)) { 1.659 + dest.replace(labelStart, 0x7fffffff, normalized); 1.660 + return dest.length(); 1.661 + } 1.662 + } 1.663 + return length; 1.664 +} 1.665 + 1.666 +// Some non-ASCII characters are equivalent to sequences with 1.667 +// non-LDH ASCII characters. To find them: 1.668 +// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) 1.669 +static inline UBool 1.670 +isNonASCIIDisallowedSTD3Valid(UChar32 c) { 1.671 + return c==0x2260 || c==0x226E || c==0x226F; 1.672 +} 1.673 + 1.674 +// Replace the label in dest with the label string, if the label was modified. 1.675 +// If &label==&dest then the label was modified in-place and labelLength 1.676 +// is the new label length, different from label.length(). 1.677 +// If &label!=&dest then labelLength==label.length(). 1.678 +// Returns labelLength (= the new label length). 1.679 +static int32_t 1.680 +replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, 1.681 + const UnicodeString &label, int32_t labelLength) { 1.682 + if(&label!=&dest) { 1.683 + dest.replace(destLabelStart, destLabelLength, label); 1.684 + } 1.685 + return labelLength; 1.686 +} 1.687 + 1.688 +int32_t 1.689 +UTS46::processLabel(UnicodeString &dest, 1.690 + int32_t labelStart, int32_t labelLength, 1.691 + UBool toASCII, 1.692 + IDNAInfo &info, UErrorCode &errorCode) const { 1.693 + UnicodeString fromPunycode; 1.694 + UnicodeString *labelString; 1.695 + const UChar *label=dest.getBuffer()+labelStart; 1.696 + int32_t destLabelStart=labelStart; 1.697 + int32_t destLabelLength=labelLength; 1.698 + UBool wasPunycode; 1.699 + if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { 1.700 + // Label starts with "xn--", try to un-Punycode it. 1.701 + wasPunycode=TRUE; 1.702 + UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit 1.703 + if(unicodeBuffer==NULL) { 1.704 + // Should never occur if we used capacity==-1 which uses the internal buffer. 1.705 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.706 + return labelLength; 1.707 + } 1.708 + UErrorCode punycodeErrorCode=U_ZERO_ERROR; 1.709 + int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, 1.710 + unicodeBuffer, fromPunycode.getCapacity(), 1.711 + NULL, &punycodeErrorCode); 1.712 + if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1.713 + fromPunycode.releaseBuffer(0); 1.714 + unicodeBuffer=fromPunycode.getBuffer(unicodeLength); 1.715 + if(unicodeBuffer==NULL) { 1.716 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.717 + return labelLength; 1.718 + } 1.719 + punycodeErrorCode=U_ZERO_ERROR; 1.720 + unicodeLength=u_strFromPunycode(label+4, labelLength-4, 1.721 + unicodeBuffer, fromPunycode.getCapacity(), 1.722 + NULL, &punycodeErrorCode); 1.723 + } 1.724 + fromPunycode.releaseBuffer(unicodeLength); 1.725 + if(U_FAILURE(punycodeErrorCode)) { 1.726 + info.labelErrors|=UIDNA_ERROR_PUNYCODE; 1.727 + return markBadACELabel(dest, labelStart, labelLength, toASCII, info); 1.728 + } 1.729 + // Check for NFC, and for characters that are not 1.730 + // valid or deviation characters according to the normalizer. 1.731 + // If there is something wrong, then the string will change. 1.732 + // Note that the normalizer passes through non-LDH ASCII and deviation characters. 1.733 + // Deviation characters are ok in Punycode even in transitional processing. 1.734 + // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES 1.735 + // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. 1.736 + UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); 1.737 + if(U_FAILURE(errorCode)) { 1.738 + return labelLength; 1.739 + } 1.740 + if(!isValid) { 1.741 + info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; 1.742 + return markBadACELabel(dest, labelStart, labelLength, toASCII, info); 1.743 + } 1.744 + labelString=&fromPunycode; 1.745 + label=fromPunycode.getBuffer(); 1.746 + labelStart=0; 1.747 + labelLength=fromPunycode.length(); 1.748 + } else { 1.749 + wasPunycode=FALSE; 1.750 + labelString=&dest; 1.751 + } 1.752 + // Validity check 1.753 + if(labelLength==0) { 1.754 + if(toASCII) { 1.755 + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 1.756 + } 1.757 + return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); 1.758 + } 1.759 + // labelLength>0 1.760 + if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { 1.761 + // label starts with "??--" 1.762 + info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; 1.763 + } 1.764 + if(label[0]==0x2d) { 1.765 + // label starts with "-" 1.766 + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 1.767 + } 1.768 + if(label[labelLength-1]==0x2d) { 1.769 + // label ends with "-" 1.770 + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; 1.771 + } 1.772 + // If the label was not a Punycode label, then it was the result of 1.773 + // mapping, normalization and label segmentation. 1.774 + // If the label was in Punycode, then we mapped it again above 1.775 + // and checked its validity. 1.776 + // Now we handle the STD3 restriction to LDH characters (if set) 1.777 + // and we look for U+FFFD which indicates disallowed characters 1.778 + // in a non-Punycode label or U+FFFD itself in a Punycode label. 1.779 + // We also check for dots which can come from the input to a single-label function. 1.780 + // Ok to cast away const because we own the UnicodeString. 1.781 + UChar *s=(UChar *)label; 1.782 + const UChar *limit=label+labelLength; 1.783 + UChar oredChars=0; 1.784 + // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. 1.785 + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 1.786 + do { 1.787 + UChar c=*s; 1.788 + if(c<=0x7f) { 1.789 + if(c==0x2e) { 1.790 + info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; 1.791 + *s=0xfffd; 1.792 + } else if(disallowNonLDHDot && asciiData[c]<0) { 1.793 + info.labelErrors|=UIDNA_ERROR_DISALLOWED; 1.794 + *s=0xfffd; 1.795 + } 1.796 + } else { 1.797 + oredChars|=c; 1.798 + if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { 1.799 + info.labelErrors|=UIDNA_ERROR_DISALLOWED; 1.800 + *s=0xfffd; 1.801 + } else if(c==0xfffd) { 1.802 + info.labelErrors|=UIDNA_ERROR_DISALLOWED; 1.803 + } 1.804 + } 1.805 + ++s; 1.806 + } while(s<limit); 1.807 + // Check for a leading combining mark after other validity checks 1.808 + // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here. 1.809 + UChar32 c; 1.810 + int32_t cpLength=0; 1.811 + // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. 1.812 + U16_NEXT_UNSAFE(label, cpLength, c); 1.813 + if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) { 1.814 + info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK; 1.815 + labelString->replace(labelStart, cpLength, (UChar)0xfffd); 1.816 + label=labelString->getBuffer()+labelStart; 1.817 + labelLength+=1-cpLength; 1.818 + if(labelString==&dest) { 1.819 + destLabelLength=labelLength; 1.820 + } 1.821 + } 1.822 + if((info.labelErrors&severeErrors)==0) { 1.823 + // Do contextual checks only if we do not have U+FFFD from a severe error 1.824 + // because U+FFFD can make these checks fail. 1.825 + if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { 1.826 + checkLabelBiDi(label, labelLength, info); 1.827 + } 1.828 + if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && 1.829 + !isLabelOkContextJ(label, labelLength) 1.830 + ) { 1.831 + info.labelErrors|=UIDNA_ERROR_CONTEXTJ; 1.832 + } 1.833 + if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { 1.834 + checkLabelContextO(label, labelLength, info); 1.835 + } 1.836 + if(toASCII) { 1.837 + if(wasPunycode) { 1.838 + // Leave a Punycode label unchanged if it has no severe errors. 1.839 + if(destLabelLength>63) { 1.840 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.841 + } 1.842 + return destLabelLength; 1.843 + } else if(oredChars>=0x80) { 1.844 + // Contains non-ASCII characters. 1.845 + UnicodeString punycode; 1.846 + UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length 1.847 + if(buffer==NULL) { 1.848 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.849 + return destLabelLength; 1.850 + } 1.851 + buffer[0]=0x78; // Write "xn--". 1.852 + buffer[1]=0x6e; 1.853 + buffer[2]=0x2d; 1.854 + buffer[3]=0x2d; 1.855 + int32_t punycodeLength=u_strToPunycode(label, labelLength, 1.856 + buffer+4, punycode.getCapacity()-4, 1.857 + NULL, &errorCode); 1.858 + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 1.859 + errorCode=U_ZERO_ERROR; 1.860 + punycode.releaseBuffer(4); 1.861 + buffer=punycode.getBuffer(4+punycodeLength); 1.862 + if(buffer==NULL) { 1.863 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.864 + return destLabelLength; 1.865 + } 1.866 + punycodeLength=u_strToPunycode(label, labelLength, 1.867 + buffer+4, punycode.getCapacity()-4, 1.868 + NULL, &errorCode); 1.869 + } 1.870 + punycodeLength+=4; 1.871 + punycode.releaseBuffer(punycodeLength); 1.872 + if(U_FAILURE(errorCode)) { 1.873 + return destLabelLength; 1.874 + } 1.875 + if(punycodeLength>63) { 1.876 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.877 + } 1.878 + return replaceLabel(dest, destLabelStart, destLabelLength, 1.879 + punycode, punycodeLength); 1.880 + } else { 1.881 + // all-ASCII label 1.882 + if(labelLength>63) { 1.883 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.884 + } 1.885 + } 1.886 + } 1.887 + } else { 1.888 + // If a Punycode label has severe errors, 1.889 + // then leave it but make sure it does not look valid. 1.890 + if(wasPunycode) { 1.891 + info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; 1.892 + return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info); 1.893 + } 1.894 + } 1.895 + return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); 1.896 +} 1.897 + 1.898 +// Make sure an ACE label does not look valid. 1.899 +// Append U+FFFD if the label has only LDH characters. 1.900 +// If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. 1.901 +int32_t 1.902 +UTS46::markBadACELabel(UnicodeString &dest, 1.903 + int32_t labelStart, int32_t labelLength, 1.904 + UBool toASCII, IDNAInfo &info) const { 1.905 + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 1.906 + UBool isASCII=TRUE; 1.907 + UBool onlyLDH=TRUE; 1.908 + const UChar *label=dest.getBuffer()+labelStart; 1.909 + // Ok to cast away const because we own the UnicodeString. 1.910 + UChar *s=(UChar *)label+4; // After the initial "xn--". 1.911 + const UChar *limit=label+labelLength; 1.912 + do { 1.913 + UChar c=*s; 1.914 + if(c<=0x7f) { 1.915 + if(c==0x2e) { 1.916 + info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; 1.917 + *s=0xfffd; 1.918 + isASCII=onlyLDH=FALSE; 1.919 + } else if(asciiData[c]<0) { 1.920 + onlyLDH=FALSE; 1.921 + if(disallowNonLDHDot) { 1.922 + *s=0xfffd; 1.923 + isASCII=FALSE; 1.924 + } 1.925 + } 1.926 + } else { 1.927 + isASCII=onlyLDH=FALSE; 1.928 + } 1.929 + } while(++s<limit); 1.930 + if(onlyLDH) { 1.931 + dest.insert(labelStart+labelLength, (UChar)0xfffd); 1.932 + ++labelLength; 1.933 + } else { 1.934 + if(toASCII && isASCII && labelLength>63) { 1.935 + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 1.936 + } 1.937 + } 1.938 + return labelLength; 1.939 +} 1.940 + 1.941 +const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); 1.942 +const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); 1.943 +const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; 1.944 + 1.945 +const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); 1.946 + 1.947 +const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); 1.948 +const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; 1.949 +const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); 1.950 + 1.951 +const uint32_t ES_CS_ET_ON_BN_NSM_MASK= 1.952 + U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| 1.953 + U_MASK(U_COMMON_NUMBER_SEPARATOR)| 1.954 + U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| 1.955 + U_MASK(U_OTHER_NEUTRAL)| 1.956 + U_MASK(U_BOUNDARY_NEUTRAL)| 1.957 + U_MASK(U_DIR_NON_SPACING_MARK); 1.958 +const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; 1.959 +const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; 1.960 + 1.961 +// We scan the whole label and check both for whether it contains RTL characters 1.962 +// and whether it passes the BiDi Rule. 1.963 +// In a BiDi domain name, all labels must pass the BiDi Rule, but we might find 1.964 +// that a domain name is a BiDi domain name (has an RTL label) only after 1.965 +// processing several earlier labels. 1.966 +void 1.967 +UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { 1.968 + // IDNA2008 BiDi rule 1.969 + // Get the directionality of the first character. 1.970 + UChar32 c; 1.971 + int32_t i=0; 1.972 + U16_NEXT_UNSAFE(label, i, c); 1.973 + uint32_t firstMask=U_MASK(u_charDirection(c)); 1.974 + // 1. The first character must be a character with BIDI property L, R 1.975 + // or AL. If it has the R or AL property, it is an RTL label; if it 1.976 + // has the L property, it is an LTR label. 1.977 + if((firstMask&~L_R_AL_MASK)!=0) { 1.978 + info.isOkBiDi=FALSE; 1.979 + } 1.980 + // Get the directionality of the last non-NSM character. 1.981 + uint32_t lastMask; 1.982 + for(;;) { 1.983 + if(i>=labelLength) { 1.984 + lastMask=firstMask; 1.985 + break; 1.986 + } 1.987 + U16_PREV_UNSAFE(label, labelLength, c); 1.988 + UCharDirection dir=u_charDirection(c); 1.989 + if(dir!=U_DIR_NON_SPACING_MARK) { 1.990 + lastMask=U_MASK(dir); 1.991 + break; 1.992 + } 1.993 + } 1.994 + // 3. In an RTL label, the end of the label must be a character with 1.995 + // BIDI property R, AL, EN or AN, followed by zero or more 1.996 + // characters with BIDI property NSM. 1.997 + // 6. In an LTR label, the end of the label must be a character with 1.998 + // BIDI property L or EN, followed by zero or more characters with 1.999 + // BIDI property NSM. 1.1000 + if( (firstMask&L_MASK)!=0 ? 1.1001 + (lastMask&~L_EN_MASK)!=0 : 1.1002 + (lastMask&~R_AL_EN_AN_MASK)!=0 1.1003 + ) { 1.1004 + info.isOkBiDi=FALSE; 1.1005 + } 1.1006 + // Get the directionalities of the intervening characters. 1.1007 + uint32_t mask=0; 1.1008 + while(i<labelLength) { 1.1009 + U16_NEXT_UNSAFE(label, i, c); 1.1010 + mask|=U_MASK(u_charDirection(c)); 1.1011 + } 1.1012 + if(firstMask&L_MASK) { 1.1013 + // 5. In an LTR label, only characters with the BIDI properties L, EN, 1.1014 + // ES, CS, ET, ON, BN and NSM are allowed. 1.1015 + if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { 1.1016 + info.isOkBiDi=FALSE; 1.1017 + } 1.1018 + } else { 1.1019 + // 2. In an RTL label, only characters with the BIDI properties R, AL, 1.1020 + // AN, EN, ES, CS, ET, ON, BN and NSM are allowed. 1.1021 + if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { 1.1022 + info.isOkBiDi=FALSE; 1.1023 + } 1.1024 + // 4. In an RTL label, if an EN is present, no AN may be present, and 1.1025 + // vice versa. 1.1026 + if((mask&EN_AN_MASK)==EN_AN_MASK) { 1.1027 + info.isOkBiDi=FALSE; 1.1028 + } 1.1029 + } 1.1030 + // An RTL label is a label that contains at least one character of type 1.1031 + // R, AL or AN. [...] 1.1032 + // A "BIDI domain name" is a domain name that contains at least one RTL 1.1033 + // label. [...] 1.1034 + // The following rule, consisting of six conditions, applies to labels 1.1035 + // in BIDI domain names. 1.1036 + if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) { 1.1037 + info.isBiDi=TRUE; 1.1038 + } 1.1039 +} 1.1040 + 1.1041 +// Special code for the ASCII prefix of a BiDi domain name. 1.1042 +// The ASCII prefix is all-LTR. 1.1043 + 1.1044 +// IDNA2008 BiDi rule, parts relevant to ASCII labels: 1.1045 +// 1. The first character must be a character with BIDI property L [...] 1.1046 +// 5. In an LTR label, only characters with the BIDI properties L, EN, 1.1047 +// ES, CS, ET, ON, BN and NSM are allowed. 1.1048 +// 6. In an LTR label, the end of the label must be a character with 1.1049 +// BIDI property L or EN [...] 1.1050 + 1.1051 +// UTF-16 version, called for mapped ASCII prefix. 1.1052 +// Cannot contain uppercase A-Z. 1.1053 +// s[length-1] must be the trailing dot. 1.1054 +static UBool 1.1055 +isASCIIOkBiDi(const UChar *s, int32_t length) { 1.1056 + int32_t labelStart=0; 1.1057 + for(int32_t i=0; i<length; ++i) { 1.1058 + UChar c=s[i]; 1.1059 + if(c==0x2e) { // dot 1.1060 + if(i>labelStart) { 1.1061 + c=s[i-1]; 1.1062 + if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { 1.1063 + // Last character in the label is not an L or EN. 1.1064 + return FALSE; 1.1065 + } 1.1066 + } 1.1067 + labelStart=i+1; 1.1068 + } else if(i==labelStart) { 1.1069 + if(!(0x61<=c && c<=0x7a)) { 1.1070 + // First character in the label is not an L. 1.1071 + return FALSE; 1.1072 + } 1.1073 + } else { 1.1074 + if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { 1.1075 + // Intermediate character in the label is a B, S or WS. 1.1076 + return FALSE; 1.1077 + } 1.1078 + } 1.1079 + } 1.1080 + return TRUE; 1.1081 +} 1.1082 + 1.1083 +// UTF-8 version, called for source ASCII prefix. 1.1084 +// Can contain uppercase A-Z. 1.1085 +// s[length-1] must be the trailing dot. 1.1086 +static UBool 1.1087 +isASCIIOkBiDi(const char *s, int32_t length) { 1.1088 + int32_t labelStart=0; 1.1089 + for(int32_t i=0; i<length; ++i) { 1.1090 + char c=s[i]; 1.1091 + if(c==0x2e) { // dot 1.1092 + if(i>labelStart) { 1.1093 + c=s[i-1]; 1.1094 + if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { 1.1095 + // Last character in the label is not an L or EN. 1.1096 + return FALSE; 1.1097 + } 1.1098 + } 1.1099 + labelStart=i+1; 1.1100 + } else if(i==labelStart) { 1.1101 + if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { 1.1102 + // First character in the label is not an L. 1.1103 + return FALSE; 1.1104 + } 1.1105 + } else { 1.1106 + if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { 1.1107 + // Intermediate character in the label is a B, S or WS. 1.1108 + return FALSE; 1.1109 + } 1.1110 + } 1.1111 + } 1.1112 + return TRUE; 1.1113 +} 1.1114 + 1.1115 +UBool 1.1116 +UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { 1.1117 + const UBiDiProps *bdp=ubidi_getSingleton(); 1.1118 + // [IDNA2008-Tables] 1.1119 + // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 1.1120 + for(int32_t i=0; i<labelLength; ++i) { 1.1121 + if(label[i]==0x200c) { 1.1122 + // Appendix A.1. ZERO WIDTH NON-JOINER 1.1123 + // Rule Set: 1.1124 + // False; 1.1125 + // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; 1.1126 + // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C 1.1127 + // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; 1.1128 + if(i==0) { 1.1129 + return FALSE; 1.1130 + } 1.1131 + UChar32 c; 1.1132 + int32_t j=i; 1.1133 + U16_PREV_UNSAFE(label, j, c); 1.1134 + if(uts46Norm2.getCombiningClass(c)==9) { 1.1135 + continue; 1.1136 + } 1.1137 + // check precontext (Joining_Type:{L,D})(Joining_Type:T)* 1.1138 + for(;;) { 1.1139 + UJoiningType type=ubidi_getJoiningType(bdp, c); 1.1140 + if(type==U_JT_TRANSPARENT) { 1.1141 + if(j==0) { 1.1142 + return FALSE; 1.1143 + } 1.1144 + U16_PREV_UNSAFE(label, j, c); 1.1145 + } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) { 1.1146 + break; // precontext fulfilled 1.1147 + } else { 1.1148 + return FALSE; 1.1149 + } 1.1150 + } 1.1151 + // check postcontext (Joining_Type:T)*(Joining_Type:{R,D}) 1.1152 + for(j=i+1;;) { 1.1153 + if(j==labelLength) { 1.1154 + return FALSE; 1.1155 + } 1.1156 + U16_NEXT_UNSAFE(label, j, c); 1.1157 + UJoiningType type=ubidi_getJoiningType(bdp, c); 1.1158 + if(type==U_JT_TRANSPARENT) { 1.1159 + // just skip this character 1.1160 + } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { 1.1161 + break; // postcontext fulfilled 1.1162 + } else { 1.1163 + return FALSE; 1.1164 + } 1.1165 + } 1.1166 + } else if(label[i]==0x200d) { 1.1167 + // Appendix A.2. ZERO WIDTH JOINER (U+200D) 1.1168 + // Rule Set: 1.1169 + // False; 1.1170 + // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; 1.1171 + if(i==0) { 1.1172 + return FALSE; 1.1173 + } 1.1174 + UChar32 c; 1.1175 + int32_t j=i; 1.1176 + U16_PREV_UNSAFE(label, j, c); 1.1177 + if(uts46Norm2.getCombiningClass(c)!=9) { 1.1178 + return FALSE; 1.1179 + } 1.1180 + } 1.1181 + } 1.1182 + return TRUE; 1.1183 +} 1.1184 + 1.1185 +void 1.1186 +UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const { 1.1187 + int32_t labelEnd=labelLength-1; // inclusive 1.1188 + int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx 1.1189 + for(int32_t i=0; i<=labelEnd; ++i) { 1.1190 + UChar32 c=label[i]; 1.1191 + if(c<0xb7) { 1.1192 + // ASCII fastpath 1.1193 + } else if(c<=0x6f9) { 1.1194 + if(c==0xb7) { 1.1195 + // Appendix A.3. MIDDLE DOT (U+00B7) 1.1196 + // Rule Set: 1.1197 + // False; 1.1198 + // If Before(cp) .eq. U+006C And 1.1199 + // After(cp) .eq. U+006C Then True; 1.1200 + if(!(0<i && label[i-1]==0x6c && 1.1201 + i<labelEnd && label[i+1]==0x6c)) { 1.1202 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; 1.1203 + } 1.1204 + } else if(c==0x375) { 1.1205 + // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375) 1.1206 + // Rule Set: 1.1207 + // False; 1.1208 + // If Script(After(cp)) .eq. Greek Then True; 1.1209 + UScriptCode script=USCRIPT_INVALID_CODE; 1.1210 + if(i<labelEnd) { 1.1211 + UErrorCode errorCode=U_ZERO_ERROR; 1.1212 + int32_t j=i+1; 1.1213 + U16_NEXT(label, j, labelLength, c); 1.1214 + script=uscript_getScript(c, &errorCode); 1.1215 + } 1.1216 + if(script!=USCRIPT_GREEK) { 1.1217 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; 1.1218 + } 1.1219 + } else if(c==0x5f3 || c==0x5f4) { 1.1220 + // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3) 1.1221 + // Rule Set: 1.1222 + // False; 1.1223 + // If Script(Before(cp)) .eq. Hebrew Then True; 1.1224 + // 1.1225 + // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4) 1.1226 + // Rule Set: 1.1227 + // False; 1.1228 + // If Script(Before(cp)) .eq. Hebrew Then True; 1.1229 + UScriptCode script=USCRIPT_INVALID_CODE; 1.1230 + if(0<i) { 1.1231 + UErrorCode errorCode=U_ZERO_ERROR; 1.1232 + int32_t j=i; 1.1233 + U16_PREV(label, 0, j, c); 1.1234 + script=uscript_getScript(c, &errorCode); 1.1235 + } 1.1236 + if(script!=USCRIPT_HEBREW) { 1.1237 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; 1.1238 + } 1.1239 + } else if(0x660<=c /* && c<=0x6f9 */) { 1.1240 + // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669) 1.1241 + // Rule Set: 1.1242 + // True; 1.1243 + // For All Characters: 1.1244 + // If cp .in. 06F0..06F9 Then False; 1.1245 + // End For; 1.1246 + // 1.1247 + // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9) 1.1248 + // Rule Set: 1.1249 + // True; 1.1250 + // For All Characters: 1.1251 + // If cp .in. 0660..0669 Then False; 1.1252 + // End For; 1.1253 + if(c<=0x669) { 1.1254 + if(arabicDigits>0) { 1.1255 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; 1.1256 + } 1.1257 + arabicDigits=-1; 1.1258 + } else if(0x6f0<=c) { 1.1259 + if(arabicDigits<0) { 1.1260 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; 1.1261 + } 1.1262 + arabicDigits=1; 1.1263 + } 1.1264 + } 1.1265 + } else if(c==0x30fb) { 1.1266 + // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) 1.1267 + // Rule Set: 1.1268 + // False; 1.1269 + // For All Characters: 1.1270 + // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; 1.1271 + // End For; 1.1272 + UErrorCode errorCode=U_ZERO_ERROR; 1.1273 + for(int j=0;;) { 1.1274 + if(j>labelEnd) { 1.1275 + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; 1.1276 + break; 1.1277 + } 1.1278 + U16_NEXT(label, j, labelLength, c); 1.1279 + UScriptCode script=uscript_getScript(c, &errorCode); 1.1280 + if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { 1.1281 + break; 1.1282 + } 1.1283 + } 1.1284 + } 1.1285 + } 1.1286 +} 1.1287 + 1.1288 +U_NAMESPACE_END 1.1289 + 1.1290 +// C API ------------------------------------------------------------------- *** 1.1291 + 1.1292 +U_NAMESPACE_USE 1.1293 + 1.1294 +U_CAPI UIDNA * U_EXPORT2 1.1295 +uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { 1.1296 + return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode)); 1.1297 +} 1.1298 + 1.1299 +U_CAPI void U_EXPORT2 1.1300 +uidna_close(UIDNA *idna) { 1.1301 + delete reinterpret_cast<IDNA *>(idna); 1.1302 +} 1.1303 + 1.1304 +static UBool 1.1305 +checkArgs(const void *label, int32_t length, 1.1306 + void *dest, int32_t capacity, 1.1307 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1308 + if(U_FAILURE(*pErrorCode)) { 1.1309 + return FALSE; 1.1310 + } 1.1311 + // sizeof(UIDNAInfo)=16 in the first API version. 1.1312 + if(pInfo==NULL || pInfo->size<16) { 1.1313 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1314 + return FALSE; 1.1315 + } 1.1316 + if( (label==NULL ? length!=0 : length<-1) || 1.1317 + (dest==NULL ? capacity!=0 : capacity<0) || 1.1318 + (dest==label && label!=NULL) 1.1319 + ) { 1.1320 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1321 + return FALSE; 1.1322 + } 1.1323 + // Set all *pInfo bytes to 0 except for the size field itself. 1.1324 + uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); 1.1325 + return TRUE; 1.1326 +} 1.1327 + 1.1328 +static void 1.1329 +idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { 1.1330 + pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); 1.1331 + pInfo->errors=info.getErrors(); 1.1332 +} 1.1333 + 1.1334 +U_CAPI int32_t U_EXPORT2 1.1335 +uidna_labelToASCII(const UIDNA *idna, 1.1336 + const UChar *label, int32_t length, 1.1337 + UChar *dest, int32_t capacity, 1.1338 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1339 + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { 1.1340 + return 0; 1.1341 + } 1.1342 + UnicodeString src((UBool)(length<0), label, length); 1.1343 + UnicodeString destString(dest, 0, capacity); 1.1344 + IDNAInfo info; 1.1345 + reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode); 1.1346 + idnaInfoToStruct(info, pInfo); 1.1347 + return destString.extract(dest, capacity, *pErrorCode); 1.1348 +} 1.1349 + 1.1350 +U_CAPI int32_t U_EXPORT2 1.1351 +uidna_labelToUnicode(const UIDNA *idna, 1.1352 + const UChar *label, int32_t length, 1.1353 + UChar *dest, int32_t capacity, 1.1354 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1355 + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { 1.1356 + return 0; 1.1357 + } 1.1358 + UnicodeString src((UBool)(length<0), label, length); 1.1359 + UnicodeString destString(dest, 0, capacity); 1.1360 + IDNAInfo info; 1.1361 + reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode); 1.1362 + idnaInfoToStruct(info, pInfo); 1.1363 + return destString.extract(dest, capacity, *pErrorCode); 1.1364 +} 1.1365 + 1.1366 +U_CAPI int32_t U_EXPORT2 1.1367 +uidna_nameToASCII(const UIDNA *idna, 1.1368 + const UChar *name, int32_t length, 1.1369 + UChar *dest, int32_t capacity, 1.1370 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1371 + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { 1.1372 + return 0; 1.1373 + } 1.1374 + UnicodeString src((UBool)(length<0), name, length); 1.1375 + UnicodeString destString(dest, 0, capacity); 1.1376 + IDNAInfo info; 1.1377 + reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode); 1.1378 + idnaInfoToStruct(info, pInfo); 1.1379 + return destString.extract(dest, capacity, *pErrorCode); 1.1380 +} 1.1381 + 1.1382 +U_CAPI int32_t U_EXPORT2 1.1383 +uidna_nameToUnicode(const UIDNA *idna, 1.1384 + const UChar *name, int32_t length, 1.1385 + UChar *dest, int32_t capacity, 1.1386 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1387 + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { 1.1388 + return 0; 1.1389 + } 1.1390 + UnicodeString src((UBool)(length<0), name, length); 1.1391 + UnicodeString destString(dest, 0, capacity); 1.1392 + IDNAInfo info; 1.1393 + reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode); 1.1394 + idnaInfoToStruct(info, pInfo); 1.1395 + return destString.extract(dest, capacity, *pErrorCode); 1.1396 +} 1.1397 + 1.1398 +U_CAPI int32_t U_EXPORT2 1.1399 +uidna_labelToASCII_UTF8(const UIDNA *idna, 1.1400 + const char *label, int32_t length, 1.1401 + char *dest, int32_t capacity, 1.1402 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1403 + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { 1.1404 + return 0; 1.1405 + } 1.1406 + StringPiece src(label, length<0 ? uprv_strlen(label) : length); 1.1407 + CheckedArrayByteSink sink(dest, capacity); 1.1408 + IDNAInfo info; 1.1409 + reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); 1.1410 + idnaInfoToStruct(info, pInfo); 1.1411 + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); 1.1412 +} 1.1413 + 1.1414 +U_CAPI int32_t U_EXPORT2 1.1415 +uidna_labelToUnicodeUTF8(const UIDNA *idna, 1.1416 + const char *label, int32_t length, 1.1417 + char *dest, int32_t capacity, 1.1418 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1419 + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { 1.1420 + return 0; 1.1421 + } 1.1422 + StringPiece src(label, length<0 ? uprv_strlen(label) : length); 1.1423 + CheckedArrayByteSink sink(dest, capacity); 1.1424 + IDNAInfo info; 1.1425 + reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); 1.1426 + idnaInfoToStruct(info, pInfo); 1.1427 + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); 1.1428 +} 1.1429 + 1.1430 +U_CAPI int32_t U_EXPORT2 1.1431 +uidna_nameToASCII_UTF8(const UIDNA *idna, 1.1432 + const char *name, int32_t length, 1.1433 + char *dest, int32_t capacity, 1.1434 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1435 + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { 1.1436 + return 0; 1.1437 + } 1.1438 + StringPiece src(name, length<0 ? uprv_strlen(name) : length); 1.1439 + CheckedArrayByteSink sink(dest, capacity); 1.1440 + IDNAInfo info; 1.1441 + reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); 1.1442 + idnaInfoToStruct(info, pInfo); 1.1443 + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); 1.1444 +} 1.1445 + 1.1446 +U_CAPI int32_t U_EXPORT2 1.1447 +uidna_nameToUnicodeUTF8(const UIDNA *idna, 1.1448 + const char *name, int32_t length, 1.1449 + char *dest, int32_t capacity, 1.1450 + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { 1.1451 + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { 1.1452 + return 0; 1.1453 + } 1.1454 + StringPiece src(name, length<0 ? uprv_strlen(name) : length); 1.1455 + CheckedArrayByteSink sink(dest, capacity); 1.1456 + IDNAInfo info; 1.1457 + reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); 1.1458 + idnaInfoToStruct(info, pInfo); 1.1459 + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); 1.1460 +} 1.1461 + 1.1462 +#endif // UCONFIG_NO_IDNA