michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: uts46.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010mar09 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_IDNA michael@0: michael@0: #include "unicode/idna.h" michael@0: #include "unicode/normalizer2.h" michael@0: #include "unicode/uscript.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/utf16.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "punycode.h" michael@0: #include "ubidi_props.h" michael@0: #include "ustr_imp.h" michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: michael@0: // michael@0: // The domain name length limit is 255 octets in an internal DNS representation michael@0: // where the last ("root") label is the empty label michael@0: // represented by length byte 0 alone. michael@0: // In a conventional string, this translates to 253 characters, or 254 michael@0: // if there is a trailing dot for the root label. michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // Severe errors which usually result in a U+FFFD replacement character in the result string. michael@0: const uint32_t severeErrors= michael@0: UIDNA_ERROR_LEADING_COMBINING_MARK| michael@0: UIDNA_ERROR_DISALLOWED| michael@0: UIDNA_ERROR_PUNYCODE| michael@0: UIDNA_ERROR_LABEL_HAS_DOT| michael@0: UIDNA_ERROR_INVALID_ACE_LABEL; michael@0: michael@0: static inline UBool michael@0: isASCIIString(const UnicodeString &dest) { michael@0: const UChar *s=dest.getBuffer(); michael@0: const UChar *limit=s+dest.length(); michael@0: while(s0x7f) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: static UBool michael@0: isASCIIOkBiDi(const UChar *s, int32_t length); michael@0: michael@0: static UBool michael@0: isASCIIOkBiDi(const char *s, int32_t length); michael@0: michael@0: // IDNA class default implementations -------------------------------------- *** michael@0: michael@0: IDNA::~IDNA() {} michael@0: michael@0: void michael@0: IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: UnicodeString destString; michael@0: labelToASCII(UnicodeString::fromUTF8(label), destString, michael@0: info, errorCode).toUTF8(dest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: IDNA::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: UnicodeString destString; michael@0: labelToUnicode(UnicodeString::fromUTF8(label), destString, michael@0: info, errorCode).toUTF8(dest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: IDNA::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: UnicodeString destString; michael@0: nameToASCII(UnicodeString::fromUTF8(name), destString, michael@0: info, errorCode).toUTF8(dest); michael@0: } michael@0: } michael@0: michael@0: void michael@0: IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: UnicodeString destString; michael@0: nameToUnicode(UnicodeString::fromUTF8(name), destString, michael@0: info, errorCode).toUTF8(dest); michael@0: } michael@0: } michael@0: michael@0: // UTS46 class declaration ------------------------------------------------- *** michael@0: michael@0: class UTS46 : public IDNA { michael@0: public: michael@0: UTS46(uint32_t options, UErrorCode &errorCode); michael@0: virtual ~UTS46(); michael@0: michael@0: virtual UnicodeString & michael@0: labelToASCII(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual UnicodeString & michael@0: labelToUnicode(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual UnicodeString & michael@0: nameToASCII(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual UnicodeString & michael@0: nameToUnicode(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual void michael@0: labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual void michael@0: labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual void michael@0: nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: virtual void michael@0: nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: private: michael@0: UnicodeString & michael@0: process(const UnicodeString &src, michael@0: UBool isLabel, UBool toASCII, michael@0: UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: void michael@0: processUTF8(const StringPiece &src, michael@0: UBool isLabel, UBool toASCII, michael@0: ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: UnicodeString & michael@0: processUnicode(const UnicodeString &src, michael@0: int32_t labelStart, int32_t mappingStart, michael@0: UBool isLabel, UBool toASCII, michael@0: UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: // returns the new dest.length() michael@0: int32_t michael@0: mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, michael@0: UErrorCode &errorCode) const; michael@0: michael@0: // returns the new label length michael@0: int32_t michael@0: processLabel(UnicodeString &dest, michael@0: int32_t labelStart, int32_t labelLength, michael@0: UBool toASCII, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: int32_t michael@0: markBadACELabel(UnicodeString &dest, michael@0: int32_t labelStart, int32_t labelLength, michael@0: UBool toASCII, IDNAInfo &info) const; michael@0: michael@0: void michael@0: checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; michael@0: michael@0: UBool michael@0: isLabelOkContextJ(const UChar *label, int32_t labelLength) const; michael@0: michael@0: void michael@0: checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; michael@0: michael@0: const Normalizer2 &uts46Norm2; // uts46.nrm michael@0: uint32_t options; michael@0: }; michael@0: michael@0: IDNA * michael@0: IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { michael@0: if(U_SUCCESS(errorCode)) { michael@0: IDNA *idna=new UTS46(options, errorCode); michael@0: if(idna==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } else if(U_FAILURE(errorCode)) { michael@0: delete idna; michael@0: idna=NULL; michael@0: } michael@0: return idna; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: // UTS46 implementation ---------------------------------------------------- *** michael@0: michael@0: UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) michael@0: : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), michael@0: options(opt) {} michael@0: michael@0: UTS46::~UTS46() {} michael@0: michael@0: UnicodeString & michael@0: UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: return process(label, TRUE, TRUE, dest, info, errorCode); michael@0: } michael@0: michael@0: UnicodeString & michael@0: UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: return process(label, TRUE, FALSE, dest, info, errorCode); michael@0: } michael@0: michael@0: UnicodeString & michael@0: UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: process(name, FALSE, TRUE, dest, info, errorCode); michael@0: if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && michael@0: isASCIIString(dest) && michael@0: (dest.length()>254 || dest[253]!=0x2e) michael@0: ) { michael@0: info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; michael@0: } michael@0: return dest; michael@0: } michael@0: michael@0: UnicodeString & michael@0: UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: return process(name, FALSE, FALSE, dest, info, errorCode); michael@0: } michael@0: michael@0: void michael@0: UTS46::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: processUTF8(label, TRUE, TRUE, dest, info, errorCode); michael@0: } michael@0: michael@0: void michael@0: UTS46::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: processUTF8(label, TRUE, FALSE, dest, info, errorCode); michael@0: } michael@0: michael@0: void michael@0: UTS46::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: processUTF8(name, FALSE, TRUE, dest, info, errorCode); michael@0: } michael@0: michael@0: void michael@0: UTS46::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: processUTF8(name, FALSE, FALSE, dest, info, errorCode); michael@0: } michael@0: michael@0: // UTS #46 data for ASCII characters. michael@0: // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase michael@0: // and passes through all other ASCII characters. michael@0: // If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed michael@0: // using this data. michael@0: // The ASCII fastpath also uses this data. michael@0: // Values: -1=disallowed 0==valid 1==mapped (lowercase) michael@0: static const int8_t asciiData[128]={ michael@0: -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, michael@0: -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, michael@0: // 002D..002E; valid # HYPHEN-MINUS..FULL STOP michael@0: -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, michael@0: // 0030..0039; valid # DIGIT ZERO..DIGIT NINE michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, michael@0: // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z michael@0: -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, michael@0: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, michael@0: // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z michael@0: -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 michael@0: }; michael@0: michael@0: UnicodeString & michael@0: UTS46::process(const UnicodeString &src, michael@0: UBool isLabel, UBool toASCII, michael@0: UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: // uts46Norm2.normalize() would do all of this error checking and setup, michael@0: // but with the ASCII fastpath we do not always call it, and do not michael@0: // call it first. michael@0: if(U_FAILURE(errorCode)) { michael@0: dest.setToBogus(); michael@0: return dest; michael@0: } michael@0: const UChar *srcArray=src.getBuffer(); michael@0: if(&dest==&src || srcArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: dest.setToBogus(); michael@0: return dest; michael@0: } michael@0: // Arguments are fine, reset output values. michael@0: dest.remove(); michael@0: info.reset(); michael@0: int32_t srcLength=src.length(); michael@0: if(srcLength==0) { michael@0: if(toASCII) { michael@0: info.errors|=UIDNA_ERROR_EMPTY_LABEL; michael@0: } michael@0: return dest; michael@0: } michael@0: UChar *destArray=dest.getBuffer(srcLength); michael@0: if(destArray==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return dest; michael@0: } michael@0: // ASCII fastpath michael@0: UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; michael@0: int32_t labelStart=0; michael@0: int32_t i; michael@0: for(i=0;; ++i) { michael@0: if(i==srcLength) { michael@0: if(toASCII) { michael@0: if((i-labelStart)>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: // There is a trailing dot if labelStart==i. michael@0: if(!isLabel && i>=254 && (i>254 || labelStart0x7f) { michael@0: break; michael@0: } michael@0: int cData=asciiData[c]; michael@0: if(cData>0) { michael@0: destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. michael@0: } else if(cData<0 && disallowNonLDHDot) { michael@0: break; // Replacing with U+FFFD can be complicated for toASCII. michael@0: } else { michael@0: destArray[i]=c; michael@0: if(c==0x2d) { // hyphen michael@0: if(i==(labelStart+3) && srcArray[i-1]==0x2d) { michael@0: // "??--..." is Punycode or forbidden. michael@0: ++i; // '-' was copied to dest already michael@0: break; michael@0: } michael@0: if(i==labelStart) { michael@0: // label starts with "-" michael@0: info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; michael@0: } michael@0: if((i+1)==srcLength || srcArray[i+1]==0x2e) { michael@0: // label ends with "-" michael@0: info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; michael@0: } michael@0: } else if(c==0x2e) { // dot michael@0: if(isLabel) { michael@0: // Replacing with U+FFFD can be complicated for toASCII. michael@0: ++i; // '.' was copied to dest already michael@0: break; michael@0: } michael@0: if(toASCII) { michael@0: // Permit an empty label at the end but not elsewhere. michael@0: if(i==labelStart && i<(srcLength-1)) { michael@0: info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; michael@0: } else if((i-labelStart)>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: } michael@0: info.errors|=info.labelErrors; michael@0: info.labelErrors=0; michael@0: labelStart=i+1; michael@0: } michael@0: } michael@0: } michael@0: info.errors|=info.labelErrors; michael@0: dest.releaseBuffer(i); michael@0: processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); michael@0: if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && michael@0: (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) michael@0: ) { michael@0: info.errors|=UIDNA_ERROR_BIDI; michael@0: } michael@0: return dest; michael@0: } michael@0: michael@0: void michael@0: UTS46::processUTF8(const StringPiece &src, michael@0: UBool isLabel, UBool toASCII, michael@0: ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: const char *srcArray=src.data(); michael@0: int32_t srcLength=src.length(); michael@0: if(srcArray==NULL && srcLength!=0) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: // Arguments are fine, reset output values. michael@0: info.reset(); michael@0: if(srcLength==0) { michael@0: if(toASCII) { michael@0: info.errors|=UIDNA_ERROR_EMPTY_LABEL; michael@0: } michael@0: dest.Flush(); michael@0: return; michael@0: } michael@0: UnicodeString destString; michael@0: int32_t labelStart=0; michael@0: if(srcLength<=256) { // length of stackArray[] michael@0: // ASCII fastpath michael@0: char stackArray[256]; michael@0: int32_t destCapacity; michael@0: char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, michael@0: stackArray, LENGTHOF(stackArray), &destCapacity); michael@0: UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; michael@0: int32_t i; michael@0: for(i=0;; ++i) { michael@0: if(i==srcLength) { michael@0: if(toASCII) { michael@0: if((i-labelStart)>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: // There is a trailing dot if labelStart==i. michael@0: if(!isLabel && i>=254 && (i>254 || labelStart0x7f michael@0: break; michael@0: } michael@0: int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. michael@0: if(cData>0) { michael@0: destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. michael@0: } else if(cData<0 && disallowNonLDHDot) { michael@0: break; // Replacing with U+FFFD can be complicated for toASCII. michael@0: } else { michael@0: destArray[i]=c; michael@0: if(c==0x2d) { // hyphen michael@0: if(i==(labelStart+3) && srcArray[i-1]==0x2d) { michael@0: // "??--..." is Punycode or forbidden. michael@0: break; michael@0: } michael@0: if(i==labelStart) { michael@0: // label starts with "-" michael@0: info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; michael@0: } michael@0: if((i+1)==srcLength || srcArray[i+1]==0x2e) { michael@0: // label ends with "-" michael@0: info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; michael@0: } michael@0: } else if(c==0x2e) { // dot michael@0: if(isLabel) { michael@0: break; // Replacing with U+FFFD can be complicated for toASCII. michael@0: } michael@0: if(toASCII) { michael@0: // Permit an empty label at the end but not elsewhere. michael@0: if(i==labelStart && i<(srcLength-1)) { michael@0: info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; michael@0: } else if((i-labelStart)>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: } michael@0: info.errors|=info.labelErrors; michael@0: info.labelErrors=0; michael@0: labelStart=i+1; michael@0: } michael@0: } michael@0: } michael@0: info.errors|=info.labelErrors; michael@0: // Convert the processed ASCII prefix of the current label to UTF-16. michael@0: int32_t mappingStart=i-labelStart; michael@0: destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); michael@0: // Output the previous ASCII labels and process the rest of src in UTF-16. michael@0: dest.Append(destArray, labelStart); michael@0: processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, michael@0: isLabel, toASCII, michael@0: destString, info, errorCode); michael@0: } else { michael@0: // src is too long for the ASCII fastpath implementation. michael@0: processUnicode(UnicodeString::fromUTF8(src), 0, 0, michael@0: isLabel, toASCII, michael@0: destString, info, errorCode); michael@0: } michael@0: destString.toUTF8(dest); // calls dest.Flush() michael@0: if(toASCII && !isLabel) { michael@0: // length==labelStart==254 means that there is a trailing dot (ok) and michael@0: // destString is empty (do not index at 253-labelStart). michael@0: int32_t length=labelStart+destString.length(); michael@0: if( length>=254 && isASCIIString(destString) && michael@0: (length>254 || michael@0: (labelStart<254 && destString[253-labelStart]!=0x2e)) michael@0: ) { michael@0: info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; michael@0: } michael@0: } michael@0: if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && michael@0: (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) michael@0: ) { michael@0: info.errors|=UIDNA_ERROR_BIDI; michael@0: } michael@0: } michael@0: michael@0: UnicodeString & michael@0: UTS46::processUnicode(const UnicodeString &src, michael@0: int32_t labelStart, int32_t mappingStart, michael@0: UBool isLabel, UBool toASCII, michael@0: UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const { michael@0: if(mappingStart==0) { michael@0: uts46Norm2.normalize(src, dest, errorCode); michael@0: } else { michael@0: uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); michael@0: } michael@0: if(U_FAILURE(errorCode)) { michael@0: return dest; michael@0: } michael@0: UBool doMapDevChars= michael@0: toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : michael@0: (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; michael@0: const UChar *destArray=dest.getBuffer(); michael@0: int32_t destLength=dest.length(); michael@0: int32_t labelLimit=labelStart; michael@0: while(labelLimit=0x200c)) { michael@0: info.isTransDiff=TRUE; michael@0: if(doMapDevChars) { michael@0: destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return dest; michael@0: } michael@0: destArray=dest.getBuffer(); michael@0: // Do not increment labelLimit in case c was removed. michael@0: // All deviation characters have been mapped, no need to check for them again. michael@0: doMapDevChars=FALSE; michael@0: } else { michael@0: ++labelLimit; michael@0: } michael@0: } else { michael@0: ++labelLimit; michael@0: } michael@0: } michael@0: // Permit an empty label at the end (0=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { michael@0: // Label starts with "xn--", try to un-Punycode it. michael@0: wasPunycode=TRUE; michael@0: UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit michael@0: if(unicodeBuffer==NULL) { michael@0: // Should never occur if we used capacity==-1 which uses the internal buffer. michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return labelLength; michael@0: } michael@0: UErrorCode punycodeErrorCode=U_ZERO_ERROR; michael@0: int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, michael@0: unicodeBuffer, fromPunycode.getCapacity(), michael@0: NULL, &punycodeErrorCode); michael@0: if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { michael@0: fromPunycode.releaseBuffer(0); michael@0: unicodeBuffer=fromPunycode.getBuffer(unicodeLength); michael@0: if(unicodeBuffer==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return labelLength; michael@0: } michael@0: punycodeErrorCode=U_ZERO_ERROR; michael@0: unicodeLength=u_strFromPunycode(label+4, labelLength-4, michael@0: unicodeBuffer, fromPunycode.getCapacity(), michael@0: NULL, &punycodeErrorCode); michael@0: } michael@0: fromPunycode.releaseBuffer(unicodeLength); michael@0: if(U_FAILURE(punycodeErrorCode)) { michael@0: info.labelErrors|=UIDNA_ERROR_PUNYCODE; michael@0: return markBadACELabel(dest, labelStart, labelLength, toASCII, info); michael@0: } michael@0: // Check for NFC, and for characters that are not michael@0: // valid or deviation characters according to the normalizer. michael@0: // If there is something wrong, then the string will change. michael@0: // Note that the normalizer passes through non-LDH ASCII and deviation characters. michael@0: // Deviation characters are ok in Punycode even in transitional processing. michael@0: // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES michael@0: // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. michael@0: UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return labelLength; michael@0: } michael@0: if(!isValid) { michael@0: info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; michael@0: return markBadACELabel(dest, labelStart, labelLength, toASCII, info); michael@0: } michael@0: labelString=&fromPunycode; michael@0: label=fromPunycode.getBuffer(); michael@0: labelStart=0; michael@0: labelLength=fromPunycode.length(); michael@0: } else { michael@0: wasPunycode=FALSE; michael@0: labelString=&dest; michael@0: } michael@0: // Validity check michael@0: if(labelLength==0) { michael@0: if(toASCII) { michael@0: info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; michael@0: } michael@0: return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); michael@0: } michael@0: // labelLength>0 michael@0: if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { michael@0: // label starts with "??--" michael@0: info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; michael@0: } michael@0: if(label[0]==0x2d) { michael@0: // label starts with "-" michael@0: info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; michael@0: } michael@0: if(label[labelLength-1]==0x2d) { michael@0: // label ends with "-" michael@0: info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; michael@0: } michael@0: // If the label was not a Punycode label, then it was the result of michael@0: // mapping, normalization and label segmentation. michael@0: // If the label was in Punycode, then we mapped it again above michael@0: // and checked its validity. michael@0: // Now we handle the STD3 restriction to LDH characters (if set) michael@0: // and we look for U+FFFD which indicates disallowed characters michael@0: // in a non-Punycode label or U+FFFD itself in a Punycode label. michael@0: // We also check for dots which can come from the input to a single-label function. michael@0: // Ok to cast away const because we own the UnicodeString. michael@0: UChar *s=(UChar *)label; michael@0: const UChar *limit=label+labelLength; michael@0: UChar oredChars=0; michael@0: // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. michael@0: UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; michael@0: do { michael@0: UChar c=*s; michael@0: if(c<=0x7f) { michael@0: if(c==0x2e) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; michael@0: *s=0xfffd; michael@0: } else if(disallowNonLDHDot && asciiData[c]<0) { michael@0: info.labelErrors|=UIDNA_ERROR_DISALLOWED; michael@0: *s=0xfffd; michael@0: } michael@0: } else { michael@0: oredChars|=c; michael@0: if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { michael@0: info.labelErrors|=UIDNA_ERROR_DISALLOWED; michael@0: *s=0xfffd; michael@0: } else if(c==0xfffd) { michael@0: info.labelErrors|=UIDNA_ERROR_DISALLOWED; michael@0: } michael@0: } michael@0: ++s; michael@0: } while(sreplace(labelStart, cpLength, (UChar)0xfffd); michael@0: label=labelString->getBuffer()+labelStart; michael@0: labelLength+=1-cpLength; michael@0: if(labelString==&dest) { michael@0: destLabelLength=labelLength; michael@0: } michael@0: } michael@0: if((info.labelErrors&severeErrors)==0) { michael@0: // Do contextual checks only if we do not have U+FFFD from a severe error michael@0: // because U+FFFD can make these checks fail. michael@0: if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { michael@0: checkLabelBiDi(label, labelLength, info); michael@0: } michael@0: if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && michael@0: !isLabelOkContextJ(label, labelLength) michael@0: ) { michael@0: info.labelErrors|=UIDNA_ERROR_CONTEXTJ; michael@0: } michael@0: if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { michael@0: checkLabelContextO(label, labelLength, info); michael@0: } michael@0: if(toASCII) { michael@0: if(wasPunycode) { michael@0: // Leave a Punycode label unchanged if it has no severe errors. michael@0: if(destLabelLength>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: return destLabelLength; michael@0: } else if(oredChars>=0x80) { michael@0: // Contains non-ASCII characters. michael@0: UnicodeString punycode; michael@0: UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length michael@0: if(buffer==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return destLabelLength; michael@0: } michael@0: buffer[0]=0x78; // Write "xn--". michael@0: buffer[1]=0x6e; michael@0: buffer[2]=0x2d; michael@0: buffer[3]=0x2d; michael@0: int32_t punycodeLength=u_strToPunycode(label, labelLength, michael@0: buffer+4, punycode.getCapacity()-4, michael@0: NULL, &errorCode); michael@0: if(errorCode==U_BUFFER_OVERFLOW_ERROR) { michael@0: errorCode=U_ZERO_ERROR; michael@0: punycode.releaseBuffer(4); michael@0: buffer=punycode.getBuffer(4+punycodeLength); michael@0: if(buffer==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return destLabelLength; michael@0: } michael@0: punycodeLength=u_strToPunycode(label, labelLength, michael@0: buffer+4, punycode.getCapacity()-4, michael@0: NULL, &errorCode); michael@0: } michael@0: punycodeLength+=4; michael@0: punycode.releaseBuffer(punycodeLength); michael@0: if(U_FAILURE(errorCode)) { michael@0: return destLabelLength; michael@0: } michael@0: if(punycodeLength>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: return replaceLabel(dest, destLabelStart, destLabelLength, michael@0: punycode, punycodeLength); michael@0: } else { michael@0: // all-ASCII label michael@0: if(labelLength>63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: } michael@0: } michael@0: } else { michael@0: // If a Punycode label has severe errors, michael@0: // then leave it but make sure it does not look valid. michael@0: if(wasPunycode) { michael@0: info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; michael@0: return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info); michael@0: } michael@0: } michael@0: return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); michael@0: } michael@0: michael@0: // Make sure an ACE label does not look valid. michael@0: // Append U+FFFD if the label has only LDH characters. michael@0: // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. michael@0: int32_t michael@0: UTS46::markBadACELabel(UnicodeString &dest, michael@0: int32_t labelStart, int32_t labelLength, michael@0: UBool toASCII, IDNAInfo &info) const { michael@0: UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; michael@0: UBool isASCII=TRUE; michael@0: UBool onlyLDH=TRUE; michael@0: const UChar *label=dest.getBuffer()+labelStart; michael@0: // Ok to cast away const because we own the UnicodeString. michael@0: UChar *s=(UChar *)label+4; // After the initial "xn--". michael@0: const UChar *limit=label+labelLength; michael@0: do { michael@0: UChar c=*s; michael@0: if(c<=0x7f) { michael@0: if(c==0x2e) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; michael@0: *s=0xfffd; michael@0: isASCII=onlyLDH=FALSE; michael@0: } else if(asciiData[c]<0) { michael@0: onlyLDH=FALSE; michael@0: if(disallowNonLDHDot) { michael@0: *s=0xfffd; michael@0: isASCII=FALSE; michael@0: } michael@0: } michael@0: } else { michael@0: isASCII=onlyLDH=FALSE; michael@0: } michael@0: } while(++s63) { michael@0: info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; michael@0: } michael@0: } michael@0: return labelLength; michael@0: } michael@0: michael@0: const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); michael@0: const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); michael@0: const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; michael@0: michael@0: const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); michael@0: michael@0: const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); michael@0: const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; michael@0: const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); michael@0: michael@0: const uint32_t ES_CS_ET_ON_BN_NSM_MASK= michael@0: U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| michael@0: U_MASK(U_COMMON_NUMBER_SEPARATOR)| michael@0: U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| michael@0: U_MASK(U_OTHER_NEUTRAL)| michael@0: U_MASK(U_BOUNDARY_NEUTRAL)| michael@0: U_MASK(U_DIR_NON_SPACING_MARK); michael@0: const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; michael@0: const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; michael@0: michael@0: // We scan the whole label and check both for whether it contains RTL characters michael@0: // and whether it passes the BiDi Rule. michael@0: // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find michael@0: // that a domain name is a BiDi domain name (has an RTL label) only after michael@0: // processing several earlier labels. michael@0: void michael@0: UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { michael@0: // IDNA2008 BiDi rule michael@0: // Get the directionality of the first character. michael@0: UChar32 c; michael@0: int32_t i=0; michael@0: U16_NEXT_UNSAFE(label, i, c); michael@0: uint32_t firstMask=U_MASK(u_charDirection(c)); michael@0: // 1. The first character must be a character with BIDI property L, R michael@0: // or AL. If it has the R or AL property, it is an RTL label; if it michael@0: // has the L property, it is an LTR label. michael@0: if((firstMask&~L_R_AL_MASK)!=0) { michael@0: info.isOkBiDi=FALSE; michael@0: } michael@0: // Get the directionality of the last non-NSM character. michael@0: uint32_t lastMask; michael@0: for(;;) { michael@0: if(i>=labelLength) { michael@0: lastMask=firstMask; michael@0: break; michael@0: } michael@0: U16_PREV_UNSAFE(label, labelLength, c); michael@0: UCharDirection dir=u_charDirection(c); michael@0: if(dir!=U_DIR_NON_SPACING_MARK) { michael@0: lastMask=U_MASK(dir); michael@0: break; michael@0: } michael@0: } michael@0: // 3. In an RTL label, the end of the label must be a character with michael@0: // BIDI property R, AL, EN or AN, followed by zero or more michael@0: // characters with BIDI property NSM. michael@0: // 6. In an LTR label, the end of the label must be a character with michael@0: // BIDI property L or EN, followed by zero or more characters with michael@0: // BIDI property NSM. michael@0: if( (firstMask&L_MASK)!=0 ? michael@0: (lastMask&~L_EN_MASK)!=0 : michael@0: (lastMask&~R_AL_EN_AN_MASK)!=0 michael@0: ) { michael@0: info.isOkBiDi=FALSE; michael@0: } michael@0: // Get the directionalities of the intervening characters. michael@0: uint32_t mask=0; michael@0: while(ilabelStart) { michael@0: c=s[i-1]; michael@0: if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { michael@0: // Last character in the label is not an L or EN. michael@0: return FALSE; michael@0: } michael@0: } michael@0: labelStart=i+1; michael@0: } else if(i==labelStart) { michael@0: if(!(0x61<=c && c<=0x7a)) { michael@0: // First character in the label is not an L. michael@0: return FALSE; michael@0: } michael@0: } else { michael@0: if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { michael@0: // Intermediate character in the label is a B, S or WS. michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: // UTF-8 version, called for source ASCII prefix. michael@0: // Can contain uppercase A-Z. michael@0: // s[length-1] must be the trailing dot. michael@0: static UBool michael@0: isASCIIOkBiDi(const char *s, int32_t length) { michael@0: int32_t labelStart=0; michael@0: for(int32_t i=0; ilabelStart) { michael@0: c=s[i-1]; michael@0: if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { michael@0: // Last character in the label is not an L or EN. michael@0: return FALSE; michael@0: } michael@0: } michael@0: labelStart=i+1; michael@0: } else if(i==labelStart) { michael@0: if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { michael@0: // First character in the label is not an L. michael@0: return FALSE; michael@0: } michael@0: } else { michael@0: if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { michael@0: // Intermediate character in the label is a B, S or WS. michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: UBool michael@0: UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { michael@0: const UBiDiProps *bdp=ubidi_getSingleton(); michael@0: // [IDNA2008-Tables] michael@0: // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER michael@0: for(int32_t i=0; i0) { michael@0: info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; michael@0: } michael@0: arabicDigits=-1; michael@0: } else if(0x6f0<=c) { michael@0: if(arabicDigits<0) { michael@0: info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; michael@0: } michael@0: arabicDigits=1; michael@0: } michael@0: } michael@0: } else if(c==0x30fb) { michael@0: // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) michael@0: // Rule Set: michael@0: // False; michael@0: // For All Characters: michael@0: // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; michael@0: // End For; michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: for(int j=0;;) { michael@0: if(j>labelEnd) { michael@0: info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; michael@0: break; michael@0: } michael@0: U16_NEXT(label, j, labelLength, c); michael@0: UScriptCode script=uscript_getScript(c, &errorCode); michael@0: if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: // C API ------------------------------------------------------------------- *** michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: U_CAPI UIDNA * U_EXPORT2 michael@0: uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { michael@0: return reinterpret_cast(IDNA::createUTS46Instance(options, *pErrorCode)); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: uidna_close(UIDNA *idna) { michael@0: delete reinterpret_cast(idna); michael@0: } michael@0: michael@0: static UBool michael@0: checkArgs(const void *label, int32_t length, michael@0: void *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return FALSE; michael@0: } michael@0: // sizeof(UIDNAInfo)=16 in the first API version. michael@0: if(pInfo==NULL || pInfo->size<16) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: if( (label==NULL ? length!=0 : length<-1) || michael@0: (dest==NULL ? capacity!=0 : capacity<0) || michael@0: (dest==label && label!=NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: // Set all *pInfo bytes to 0 except for the size field itself. michael@0: uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); michael@0: return TRUE; michael@0: } michael@0: michael@0: static void michael@0: idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { michael@0: pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); michael@0: pInfo->errors=info.getErrors(); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_labelToASCII(const UIDNA *idna, michael@0: const UChar *label, int32_t length, michael@0: UChar *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: UnicodeString src((UBool)(length<0), label, length); michael@0: UnicodeString destString(dest, 0, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->labelToASCII(src, destString, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return destString.extract(dest, capacity, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_labelToUnicode(const UIDNA *idna, michael@0: const UChar *label, int32_t length, michael@0: UChar *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: UnicodeString src((UBool)(length<0), label, length); michael@0: UnicodeString destString(dest, 0, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->labelToUnicode(src, destString, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return destString.extract(dest, capacity, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_nameToASCII(const UIDNA *idna, michael@0: const UChar *name, int32_t length, michael@0: UChar *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: UnicodeString src((UBool)(length<0), name, length); michael@0: UnicodeString destString(dest, 0, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->nameToASCII(src, destString, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return destString.extract(dest, capacity, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_nameToUnicode(const UIDNA *idna, michael@0: const UChar *name, int32_t length, michael@0: UChar *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: UnicodeString src((UBool)(length<0), name, length); michael@0: UnicodeString destString(dest, 0, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->nameToUnicode(src, destString, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return destString.extract(dest, capacity, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_labelToASCII_UTF8(const UIDNA *idna, michael@0: const char *label, int32_t length, michael@0: char *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: StringPiece src(label, length<0 ? uprv_strlen(label) : length); michael@0: CheckedArrayByteSink sink(dest, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_labelToUnicodeUTF8(const UIDNA *idna, michael@0: const char *label, int32_t length, michael@0: char *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: StringPiece src(label, length<0 ? uprv_strlen(label) : length); michael@0: CheckedArrayByteSink sink(dest, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_nameToASCII_UTF8(const UIDNA *idna, michael@0: const char *name, int32_t length, michael@0: char *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: StringPiece src(name, length<0 ? uprv_strlen(name) : length); michael@0: CheckedArrayByteSink sink(dest, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uidna_nameToUnicodeUTF8(const UIDNA *idna, michael@0: const char *name, int32_t length, michael@0: char *dest, int32_t capacity, michael@0: UIDNAInfo *pInfo, UErrorCode *pErrorCode) { michael@0: if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { michael@0: return 0; michael@0: } michael@0: StringPiece src(name, length<0 ? uprv_strlen(name) : length); michael@0: CheckedArrayByteSink sink(dest, capacity); michael@0: IDNAInfo info; michael@0: reinterpret_cast(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); michael@0: idnaInfoToStruct(info, pInfo); michael@0: return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); michael@0: } michael@0: michael@0: #endif // UCONFIG_NO_IDNA