The Tor Browser: diff intl/icu/source/common/ucnv

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnv_u7.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1484 @@
     1.4 +/*  
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2002-2011, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   file name:  ucnv_u7.c
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2002jul01
    1.15 +*   created by: Markus W. Scherer
    1.16 +*
    1.17 +*   UTF-7 converter implementation. Used to be in ucnv_utf.c.
    1.18 +*/
    1.19 +
    1.20 +#include "unicode/utypes.h"
    1.21 +
    1.22 +#if !UCONFIG_NO_CONVERSION
    1.23 +
    1.24 +#include "unicode/ucnv.h"
    1.25 +#include "ucnv_bld.h"
    1.26 +#include "ucnv_cnv.h"
    1.27 +#include "uassert.h"
    1.28 +
    1.29 +/* UTF-7 -------------------------------------------------------------------- */
    1.30 +
    1.31 +/*
    1.32 + * UTF-7 is a stateful encoding of Unicode.
    1.33 + * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
    1.34 + * It was intended for use in Internet email systems, using in its bytewise
    1.35 + * encoding only a subset of 7-bit US-ASCII.
    1.36 + * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
    1.37 + * occasionally used.
    1.38 + *
    1.39 + * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
    1.40 + * characters directly or in base64. Especially, the characters in set O
    1.41 + * as defined in the RFC (see below) may be encoded directly but are not
    1.42 + * allowed in, e.g., email headers.
    1.43 + * By default, the ICU UTF-7 converter encodes set O directly.
    1.44 + * By choosing the option "version=1", set O will be escaped instead.
    1.45 + * For example:
    1.46 + *     utf7Converter=ucnv_open("UTF-7,version=1");
    1.47 + *
    1.48 + * For details about email headers see RFC 2047.
    1.49 + */
    1.50 +
    1.51 +/*
    1.52 + * Tests for US-ASCII characters belonging to character classes
    1.53 + * defined in UTF-7.
    1.54 + *
    1.55 + * Set D (directly encoded characters) consists of the following
    1.56 + * characters: the upper and lower case letters A through Z
    1.57 + * and a through z, the 10 digits 0-9, and the following nine special
    1.58 + * characters (note that "+" and "=" are omitted):
    1.59 + *     '(),-./:?
    1.60 + *
    1.61 + * Set O (optional direct characters) consists of the following
    1.62 + * characters (note that "\" and "~" are omitted):
    1.63 + *     !"#$%&*;<=>@[]^_`{|}
    1.64 + *
    1.65 + * According to the rules in RFC 2152, the byte values for the following
    1.66 + * US-ASCII characters are not used in UTF-7 and are therefore illegal:
    1.67 + * - all C0 control codes except for CR LF TAB
    1.68 + * - BACKSLASH
    1.69 + * - TILDE
    1.70 + * - DEL
    1.71 + * - all codes beyond US-ASCII, i.e. all >127
    1.72 + */
    1.73 +#define inSetD(c) \
    1.74 +    ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
    1.75 +     (uint8_t)((c)-48)<10 ||    /* digits */ \
    1.76 +     (uint8_t)((c)-39)<3 ||     /* '() */ \
    1.77 +     (uint8_t)((c)-44)<4 ||     /* ,-./ */ \
    1.78 +     (c)==58 || (c)==63         /* :? */ \
    1.79 +    )
    1.80 +
    1.81 +#define inSetO(c) \
    1.82 +    ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \
    1.83 +     (uint8_t)((c)-59)<4 ||         /* ;<=> */ \
    1.84 +     (uint8_t)((c)-93)<4 ||         /* ]^_` */ \
    1.85 +     (uint8_t)((c)-123)<3 ||        /* {|} */ \
    1.86 +     (c)==42 || (c)==64 || (c)==91  /* *@[ */ \
    1.87 +    )
    1.88 +
    1.89 +#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
    1.90 +#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
    1.91 +
    1.92 +#define PLUS  43
    1.93 +#define MINUS 45
    1.94 +#define BACKSLASH 92
    1.95 +#define TILDE 126
    1.96 +
    1.97 +/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
    1.98 +#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
    1.99 +
   1.100 +/* encode directly sets D and O and CR LF SP TAB */
   1.101 +static const UBool encodeDirectlyMaximum[128]={
   1.102 + /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
   1.103 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
   1.104 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1.105 +
   1.106 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
   1.107 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1.108 +
   1.109 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1.110 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
   1.111 +
   1.112 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1.113 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
   1.114 +};
   1.115 +
   1.116 +/* encode directly set D and CR LF SP TAB but not set O */
   1.117 +static const UBool encodeDirectlyRestricted[128]={
   1.118 + /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
   1.119 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
   1.120 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1.121 +
   1.122 +    1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
   1.123 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
   1.124 +
   1.125 +    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1.126 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
   1.127 +
   1.128 +    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1.129 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
   1.130 +};
   1.131 +
   1.132 +static const uint8_t
   1.133 +toBase64[64]={
   1.134 +    /* A-Z */
   1.135 +    65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
   1.136 +    78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
   1.137 +    /* a-z */
   1.138 +    97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
   1.139 +    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
   1.140 +    /* 0-9 */
   1.141 +    48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
   1.142 +    /* +/ */
   1.143 +    43, 47
   1.144 +};
   1.145 +
   1.146 +static const int8_t
   1.147 +fromBase64[128]={
   1.148 +    /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
   1.149 +    -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
   1.150 +    -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
   1.151 +
   1.152 +    /* general punctuation with + and / and a special value (-2) for - */
   1.153 +    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
   1.154 +    /* digits */
   1.155 +    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
   1.156 +
   1.157 +    /* A-Z */
   1.158 +    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
   1.159 +    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
   1.160 +
   1.161 +    /* a-z */
   1.162 +    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
   1.163 +    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
   1.164 +};
   1.165 +
   1.166 +/*
   1.167 + * converter status values:
   1.168 + *
   1.169 + * toUnicodeStatus:
   1.170 + *     24 inDirectMode (boolean)
   1.171 + * 23..16 base64Counter (-1..7)
   1.172 + * 15..0  bits (up to 14 bits incoming base64)
   1.173 + *
   1.174 + * fromUnicodeStatus:
   1.175 + * 31..28 version (0: set O direct  1: set O escaped)
   1.176 + *     24 inDirectMode (boolean)
   1.177 + * 23..16 base64Counter (0..2)
   1.178 + *  7..0  bits (6 bits outgoing base64)
   1.179 + *
   1.180 + */
   1.181 +
   1.182 +static void
   1.183 +_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
   1.184 +    if(choice<=UCNV_RESET_TO_UNICODE) {
   1.185 +        /* reset toUnicode */
   1.186 +        cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
   1.187 +        cnv->toULength=0;
   1.188 +    }
   1.189 +    if(choice!=UCNV_RESET_TO_UNICODE) {
   1.190 +        /* reset fromUnicode */
   1.191 +        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
   1.192 +    }
   1.193 +}
   1.194 +
   1.195 +static void
   1.196 +_UTF7Open(UConverter *cnv,
   1.197 +          UConverterLoadArgs *pArgs,
   1.198 +          UErrorCode *pErrorCode) {
   1.199 +    if(UCNV_GET_VERSION(cnv)<=1) {
   1.200 +        /* TODO(markus): Should just use cnv->options rather than copying the version number. */
   1.201 +        cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
   1.202 +        _UTF7Reset(cnv, UCNV_RESET_BOTH);
   1.203 +    } else {
   1.204 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.205 +    }
   1.206 +}
   1.207 +
   1.208 +static void
   1.209 +_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
   1.210 +                          UErrorCode *pErrorCode) {
   1.211 +    UConverter *cnv;
   1.212 +    const uint8_t *source, *sourceLimit;
   1.213 +    UChar *target;
   1.214 +    const UChar *targetLimit;
   1.215 +    int32_t *offsets;
   1.216 +
   1.217 +    uint8_t *bytes;
   1.218 +    uint8_t byteIndex;
   1.219 +
   1.220 +    int32_t length, targetCapacity;
   1.221 +
   1.222 +    /* UTF-7 state */
   1.223 +    uint16_t bits;
   1.224 +    int8_t base64Counter;
   1.225 +    UBool inDirectMode;
   1.226 +
   1.227 +    int8_t base64Value;
   1.228 +
   1.229 +    int32_t sourceIndex, nextSourceIndex;
   1.230 +
   1.231 +    uint8_t b;
   1.232 +    /* set up the local pointers */
   1.233 +    cnv=pArgs->converter;
   1.234 +
   1.235 +    source=(const uint8_t *)pArgs->source;
   1.236 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
   1.237 +    target=pArgs->target;
   1.238 +    targetLimit=pArgs->targetLimit;
   1.239 +    offsets=pArgs->offsets;
   1.240 +    /* get the state machine state */
   1.241 +    {
   1.242 +        uint32_t status=cnv->toUnicodeStatus;
   1.243 +        inDirectMode=(UBool)((status>>24)&1);
   1.244 +        base64Counter=(int8_t)(status>>16);
   1.245 +        bits=(uint16_t)status;
   1.246 +    }
   1.247 +    bytes=cnv->toUBytes;
   1.248 +    byteIndex=cnv->toULength;
   1.249 +
   1.250 +    /* sourceIndex=-1 if the current character began in the previous buffer */
   1.251 +    sourceIndex=byteIndex==0 ? 0 : -1;
   1.252 +    nextSourceIndex=0;
   1.253 +
   1.254 +    if(inDirectMode) {
   1.255 +directMode:
   1.256 +        /*
   1.257 +         * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
   1.258 +         * with their US-ASCII byte values.
   1.259 +         * Backslash and Tilde and most control characters are not allowed in UTF-7.
   1.260 +         * A plus sign starts Unicode (or "escape") Mode.
   1.261 +         *
   1.262 +         * In Direct Mode, only the sourceIndex is used.
   1.263 +         */
   1.264 +        byteIndex=0;
   1.265 +        length=(int32_t)(sourceLimit-source);
   1.266 +        targetCapacity=(int32_t)(targetLimit-target);
   1.267 +        if(length>targetCapacity) {
   1.268 +            length=targetCapacity;
   1.269 +        }
   1.270 +        while(length>0) {
   1.271 +            b=*source++;
   1.272 +            if(!isLegalUTF7(b)) {
   1.273 +                /* illegal */
   1.274 +                bytes[0]=b;
   1.275 +                byteIndex=1;
   1.276 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.277 +                break;
   1.278 +            } else if(b!=PLUS) {
   1.279 +                /* write directly encoded character */
   1.280 +                *target++=b;
   1.281 +                if(offsets!=NULL) {
   1.282 +                    *offsets++=sourceIndex++;
   1.283 +                }
   1.284 +            } else /* PLUS */ {
   1.285 +                /* switch to Unicode mode */
   1.286 +                nextSourceIndex=++sourceIndex;
   1.287 +                inDirectMode=FALSE;
   1.288 +                byteIndex=0;
   1.289 +                bits=0;
   1.290 +                base64Counter=-1;
   1.291 +                goto unicodeMode;
   1.292 +            }
   1.293 +            --length;
   1.294 +        }
   1.295 +        if(source<sourceLimit && target>=targetLimit) {
   1.296 +            /* target is full */
   1.297 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.298 +        }
   1.299 +    } else {
   1.300 +unicodeMode:
   1.301 +        /*
   1.302 +         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
   1.303 +         * The base64 sequence ends with any character that is not in the base64 alphabet.
   1.304 +         * A terminating minus sign is consumed.
   1.305 +         *
   1.306 +         * In Unicode Mode, the sourceIndex has the index to the start of the current
   1.307 +         * base64 bytes, while nextSourceIndex is precisely parallel to source,
   1.308 +         * keeping the index to the following byte.
   1.309 +         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
   1.310 +         */
   1.311 +        while(source<sourceLimit) {
   1.312 +            if(target<targetLimit) {
   1.313 +                bytes[byteIndex++]=b=*source++;
   1.314 +                ++nextSourceIndex;
   1.315 +                base64Value = -3; /* initialize as illegal */
   1.316 +                if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
   1.317 +                    /* either
   1.318 +                     * base64Value==-1 for any legal character except base64 and minus sign, or
   1.319 +                     * base64Value==-3 for illegal characters:
   1.320 +                     * 1. In either case, leave Unicode mode.
   1.321 +                     * 2.1. If we ended with an incomplete UChar or none after the +, then
   1.322 +                     *      generate an error for the preceding erroneous sequence and deal with
   1.323 +                     *      the current (possibly illegal) character next time through.
   1.324 +                     * 2.2. Else the current char comes after a complete UChar, which was already
   1.325 +                     *      pushed to the output buf, so:
   1.326 +                     * 2.2.1. If the current char is legal, just save it for processing next time.
   1.327 +                     *        It may be for example, a plus which we need to deal with in direct mode.
   1.328 +                     * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
   1.329 +                     */
   1.330 +                    inDirectMode=TRUE;
   1.331 +                    if(base64Counter==-1) {
   1.332 +                        /* illegal: + immediately followed by something other than base64 or minus sign */
   1.333 +                        /* include the plus sign in the reported sequence, but not the subsequent char */
   1.334 +                        --source;
   1.335 +                        bytes[0]=PLUS;
   1.336 +                        byteIndex=1;
   1.337 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.338 +                        break;
   1.339 +                    } else if(bits!=0) {
   1.340 +                        /* bits are illegally left over, a UChar is incomplete */
   1.341 +                        /* don't include current char (legal or illegal) in error seq */
   1.342 +                        --source;
   1.343 +                        --byteIndex;
   1.344 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.345 +                        break;
   1.346 +                    } else {
   1.347 +                        /* previous UChar was complete */
   1.348 +                        if(base64Value==-3) {
   1.349 +                            /* current character is illegal, deal with it here */
   1.350 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.351 +                            break;
   1.352 +                        } else {
   1.353 +                            /* un-read the current character in case it is a plus sign */
   1.354 +                            --source;
   1.355 +                            sourceIndex=nextSourceIndex-1;
   1.356 +                            goto directMode;
   1.357 +                        }
   1.358 +                    }
   1.359 +                } else if(base64Value>=0) {
   1.360 +                    /* collect base64 bytes into UChars */
   1.361 +                    switch(base64Counter) {
   1.362 +                    case -1: /* -1 is immediately after the + */
   1.363 +                    case 0:
   1.364 +                        bits=base64Value;
   1.365 +                        base64Counter=1;
   1.366 +                        break;
   1.367 +                    case 1:
   1.368 +                    case 3:
   1.369 +                    case 4:
   1.370 +                    case 6:
   1.371 +                        bits=(uint16_t)((bits<<6)|base64Value);
   1.372 +                        ++base64Counter;
   1.373 +                        break;
   1.374 +                    case 2:
   1.375 +                        *target++=(UChar)((bits<<4)|(base64Value>>2));
   1.376 +                        if(offsets!=NULL) {
   1.377 +                            *offsets++=sourceIndex;
   1.378 +                            sourceIndex=nextSourceIndex-1;
   1.379 +                        }
   1.380 +                        bytes[0]=b; /* keep this byte in case an error occurs */
   1.381 +                        byteIndex=1;
   1.382 +                        bits=(uint16_t)(base64Value&3);
   1.383 +                        base64Counter=3;
   1.384 +                        break;
   1.385 +                    case 5:
   1.386 +                        *target++=(UChar)((bits<<2)|(base64Value>>4));
   1.387 +                        if(offsets!=NULL) {
   1.388 +                            *offsets++=sourceIndex;
   1.389 +                            sourceIndex=nextSourceIndex-1;
   1.390 +                        }
   1.391 +                        bytes[0]=b; /* keep this byte in case an error occurs */
   1.392 +                        byteIndex=1;
   1.393 +                        bits=(uint16_t)(base64Value&15);
   1.394 +                        base64Counter=6;
   1.395 +                        break;
   1.396 +                    case 7:
   1.397 +                        *target++=(UChar)((bits<<6)|base64Value);
   1.398 +                        if(offsets!=NULL) {
   1.399 +                            *offsets++=sourceIndex;
   1.400 +                            sourceIndex=nextSourceIndex;
   1.401 +                        }
   1.402 +                        byteIndex=0;
   1.403 +                        bits=0;
   1.404 +                        base64Counter=0;
   1.405 +                        break;
   1.406 +                    default:
   1.407 +                        /* will never occur */
   1.408 +                        break;
   1.409 +                    }
   1.410 +                } else /*base64Value==-2*/ {
   1.411 +                    /* minus sign terminates the base64 sequence */
   1.412 +                    inDirectMode=TRUE;
   1.413 +                    if(base64Counter==-1) {
   1.414 +                        /* +- i.e. a minus immediately following a plus */
   1.415 +                        *target++=PLUS;
   1.416 +                        if(offsets!=NULL) {
   1.417 +                            *offsets++=sourceIndex-1;
   1.418 +                        }
   1.419 +                    } else {
   1.420 +                        /* absorb the minus and leave the Unicode Mode */
   1.421 +                        if(bits!=0) {
   1.422 +                            /* bits are illegally left over, a UChar is incomplete */
   1.423 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.424 +                            break;
   1.425 +                        }
   1.426 +                    }
   1.427 +                    sourceIndex=nextSourceIndex;
   1.428 +                    goto directMode;
   1.429 +                }
   1.430 +            } else {
   1.431 +                /* target is full */
   1.432 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.433 +                break;
   1.434 +            }
   1.435 +        }
   1.436 +    }
   1.437 +
   1.438 +    if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
   1.439 +        /*
   1.440 +         * if we are in Unicode mode, then the byteIndex might not be 0,
   1.441 +         * but that is ok if bits==0
   1.442 +         * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
   1.443 +         * (not true for IMAP-mailbox-name where we must end in direct mode)
   1.444 +         */
   1.445 +        byteIndex=0;
   1.446 +    }
   1.447 +
   1.448 +    /* set the converter state back into UConverter */
   1.449 +    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
   1.450 +    cnv->toULength=byteIndex;
   1.451 +
   1.452 +    /* write back the updated pointers */
   1.453 +    pArgs->source=(const char *)source;
   1.454 +    pArgs->target=target;
   1.455 +    pArgs->offsets=offsets;
   1.456 +    return;
   1.457 +}
   1.458 +
   1.459 +static void
   1.460 +_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
   1.461 +                            UErrorCode *pErrorCode) {
   1.462 +    UConverter *cnv;
   1.463 +    const UChar *source, *sourceLimit;
   1.464 +    uint8_t *target, *targetLimit;
   1.465 +    int32_t *offsets;
   1.466 +
   1.467 +    int32_t length, targetCapacity, sourceIndex;
   1.468 +    UChar c;
   1.469 +
   1.470 +    /* UTF-7 state */
   1.471 +    const UBool *encodeDirectly;
   1.472 +    uint8_t bits;
   1.473 +    int8_t base64Counter;
   1.474 +    UBool inDirectMode;
   1.475 +
   1.476 +    /* set up the local pointers */
   1.477 +    cnv=pArgs->converter;
   1.478 +
   1.479 +    /* set up the local pointers */
   1.480 +    source=pArgs->source;
   1.481 +    sourceLimit=pArgs->sourceLimit;
   1.482 +    target=(uint8_t *)pArgs->target;
   1.483 +    targetLimit=(uint8_t *)pArgs->targetLimit;
   1.484 +    offsets=pArgs->offsets;
   1.485 +
   1.486 +    /* get the state machine state */
   1.487 +    {
   1.488 +        uint32_t status=cnv->fromUnicodeStatus;
   1.489 +        encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
   1.490 +        inDirectMode=(UBool)((status>>24)&1);
   1.491 +        base64Counter=(int8_t)(status>>16);
   1.492 +        bits=(uint8_t)status;
   1.493 +        U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0]));
   1.494 +    }
   1.495 +
   1.496 +    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
   1.497 +    sourceIndex=0;
   1.498 +
   1.499 +    if(inDirectMode) {
   1.500 +directMode:
   1.501 +        length=(int32_t)(sourceLimit-source);
   1.502 +        targetCapacity=(int32_t)(targetLimit-target);
   1.503 +        if(length>targetCapacity) {
   1.504 +            length=targetCapacity;
   1.505 +        }
   1.506 +        while(length>0) {
   1.507 +            c=*source++;
   1.508 +            /* currently always encode CR LF SP TAB directly */
   1.509 +            if(c<=127 && encodeDirectly[c]) {
   1.510 +                /* encode directly */
   1.511 +                *target++=(uint8_t)c;
   1.512 +                if(offsets!=NULL) {
   1.513 +                    *offsets++=sourceIndex++;
   1.514 +                }
   1.515 +            } else if(c==PLUS) {
   1.516 +                /* output +- for + */
   1.517 +                *target++=PLUS;
   1.518 +                if(target<targetLimit) {
   1.519 +                    *target++=MINUS;
   1.520 +                    if(offsets!=NULL) {
   1.521 +                        *offsets++=sourceIndex;
   1.522 +                        *offsets++=sourceIndex++;
   1.523 +                    }
   1.524 +                    /* realign length and targetCapacity */
   1.525 +                    goto directMode;
   1.526 +                } else {
   1.527 +                    if(offsets!=NULL) {
   1.528 +                        *offsets++=sourceIndex++;
   1.529 +                    }
   1.530 +                    cnv->charErrorBuffer[0]=MINUS;
   1.531 +                    cnv->charErrorBufferLength=1;
   1.532 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.533 +                    break;
   1.534 +                }
   1.535 +            } else {
   1.536 +                /* un-read this character and switch to Unicode Mode */
   1.537 +                --source;
   1.538 +                *target++=PLUS;
   1.539 +                if(offsets!=NULL) {
   1.540 +                    *offsets++=sourceIndex;
   1.541 +                }
   1.542 +                inDirectMode=FALSE;
   1.543 +                base64Counter=0;
   1.544 +                goto unicodeMode;
   1.545 +            }
   1.546 +            --length;
   1.547 +        }
   1.548 +        if(source<sourceLimit && target>=targetLimit) {
   1.549 +            /* target is full */
   1.550 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.551 +        }
   1.552 +    } else {
   1.553 +unicodeMode:
   1.554 +        while(source<sourceLimit) {
   1.555 +            if(target<targetLimit) {
   1.556 +                c=*source++;
   1.557 +                if(c<=127 && encodeDirectly[c]) {
   1.558 +                    /* encode directly */
   1.559 +                    inDirectMode=TRUE;
   1.560 +
   1.561 +                    /* trick: back out this character to make this easier */
   1.562 +                    --source;
   1.563 +
   1.564 +                    /* terminate the base64 sequence */
   1.565 +                    if(base64Counter!=0) {
   1.566 +                        /* write remaining bits for the previous character */
   1.567 +                        *target++=toBase64[bits];
   1.568 +                        if(offsets!=NULL) {
   1.569 +                            *offsets++=sourceIndex-1;
   1.570 +                        }
   1.571 +                    }
   1.572 +                    if(fromBase64[c]!=-1) {
   1.573 +                        /* need to terminate with a minus */
   1.574 +                        if(target<targetLimit) {
   1.575 +                            *target++=MINUS;
   1.576 +                            if(offsets!=NULL) {
   1.577 +                                *offsets++=sourceIndex-1;
   1.578 +                            }
   1.579 +                        } else {
   1.580 +                            cnv->charErrorBuffer[0]=MINUS;
   1.581 +                            cnv->charErrorBufferLength=1;
   1.582 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.583 +                            break;
   1.584 +                        }
   1.585 +                    }
   1.586 +                    goto directMode;
   1.587 +                } else {
   1.588 +                    /*
   1.589 +                     * base64 this character:
   1.590 +                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
   1.591 +                     * and the bits of this character, each implicitly in UTF-16BE.
   1.592 +                     *
   1.593 +                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
   1.594 +                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
   1.595 +                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
   1.596 +                     */
   1.597 +                    switch(base64Counter) {
   1.598 +                    case 0:
   1.599 +                        *target++=toBase64[c>>10];
   1.600 +                        if(target<targetLimit) {
   1.601 +                            *target++=toBase64[(c>>4)&0x3f];
   1.602 +                            if(offsets!=NULL) {
   1.603 +                                *offsets++=sourceIndex;
   1.604 +                                *offsets++=sourceIndex++;
   1.605 +                            }
   1.606 +                        } else {
   1.607 +                            if(offsets!=NULL) {
   1.608 +                                *offsets++=sourceIndex++;
   1.609 +                            }
   1.610 +                            cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
   1.611 +                            cnv->charErrorBufferLength=1;
   1.612 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.613 +                        }
   1.614 +                        bits=(uint8_t)((c&15)<<2);
   1.615 +                        base64Counter=1;
   1.616 +                        break;
   1.617 +                    case 1:
   1.618 +                        *target++=toBase64[bits|(c>>14)];
   1.619 +                        if(target<targetLimit) {
   1.620 +                            *target++=toBase64[(c>>8)&0x3f];
   1.621 +                            if(target<targetLimit) {
   1.622 +                                *target++=toBase64[(c>>2)&0x3f];
   1.623 +                                if(offsets!=NULL) {
   1.624 +                                    *offsets++=sourceIndex;
   1.625 +                                    *offsets++=sourceIndex;
   1.626 +                                    *offsets++=sourceIndex++;
   1.627 +                                }
   1.628 +                            } else {
   1.629 +                                if(offsets!=NULL) {
   1.630 +                                    *offsets++=sourceIndex;
   1.631 +                                    *offsets++=sourceIndex++;
   1.632 +                                }
   1.633 +                                cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
   1.634 +                                cnv->charErrorBufferLength=1;
   1.635 +                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.636 +                            }
   1.637 +                        } else {
   1.638 +                            if(offsets!=NULL) {
   1.639 +                                *offsets++=sourceIndex++;
   1.640 +                            }
   1.641 +                            cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
   1.642 +                            cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
   1.643 +                            cnv->charErrorBufferLength=2;
   1.644 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.645 +                        }
   1.646 +                        bits=(uint8_t)((c&3)<<4);
   1.647 +                        base64Counter=2;
   1.648 +                        break;
   1.649 +                    case 2:
   1.650 +                        *target++=toBase64[bits|(c>>12)];
   1.651 +                        if(target<targetLimit) {
   1.652 +                            *target++=toBase64[(c>>6)&0x3f];
   1.653 +                            if(target<targetLimit) {
   1.654 +                                *target++=toBase64[c&0x3f];
   1.655 +                                if(offsets!=NULL) {
   1.656 +                                    *offsets++=sourceIndex;
   1.657 +                                    *offsets++=sourceIndex;
   1.658 +                                    *offsets++=sourceIndex++;
   1.659 +                                }
   1.660 +                            } else {
   1.661 +                                if(offsets!=NULL) {
   1.662 +                                    *offsets++=sourceIndex;
   1.663 +                                    *offsets++=sourceIndex++;
   1.664 +                                }
   1.665 +                                cnv->charErrorBuffer[0]=toBase64[c&0x3f];
   1.666 +                                cnv->charErrorBufferLength=1;
   1.667 +                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.668 +                            }
   1.669 +                        } else {
   1.670 +                            if(offsets!=NULL) {
   1.671 +                                *offsets++=sourceIndex++;
   1.672 +                            }
   1.673 +                            cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
   1.674 +                            cnv->charErrorBuffer[1]=toBase64[c&0x3f];
   1.675 +                            cnv->charErrorBufferLength=2;
   1.676 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.677 +                        }
   1.678 +                        bits=0;
   1.679 +                        base64Counter=0;
   1.680 +                        break;
   1.681 +                    default:
   1.682 +                        /* will never occur */
   1.683 +                        break;
   1.684 +                    }
   1.685 +                }
   1.686 +            } else {
   1.687 +                /* target is full */
   1.688 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.689 +                break;
   1.690 +            }
   1.691 +        }
   1.692 +    }
   1.693 +
   1.694 +    if(pArgs->flush && source>=sourceLimit) {
   1.695 +        /* flush remaining bits to the target */
   1.696 +        if(!inDirectMode) {
   1.697 +            if (base64Counter!=0) {
   1.698 +                if(target<targetLimit) {
   1.699 +                    *target++=toBase64[bits];
   1.700 +                    if(offsets!=NULL) {
   1.701 +                        *offsets++=sourceIndex-1;
   1.702 +                    }
   1.703 +                } else {
   1.704 +                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
   1.705 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.706 +                }
   1.707 +            }
   1.708 +            /* Add final MINUS to terminate unicodeMode */
   1.709 +            if(target<targetLimit) {
   1.710 +                *target++=MINUS;
   1.711 +                if(offsets!=NULL) {
   1.712 +                    *offsets++=sourceIndex-1;
   1.713 +                }
   1.714 +            } else {
   1.715 +                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
   1.716 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.717 +            }
   1.718 +        }
   1.719 +        /* reset the state for the next conversion */
   1.720 +        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
   1.721 +    } else {
   1.722 +        /* set the converter state back into UConverter */
   1.723 +        cnv->fromUnicodeStatus=
   1.724 +            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
   1.725 +            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
   1.726 +    }
   1.727 +
   1.728 +    /* write back the updated pointers */
   1.729 +    pArgs->source=source;
   1.730 +    pArgs->target=(char *)target;
   1.731 +    pArgs->offsets=offsets;
   1.732 +    return;
   1.733 +}
   1.734 +
   1.735 +static const char *
   1.736 +_UTF7GetName(const UConverter *cnv) {
   1.737 +    switch(cnv->fromUnicodeStatus>>28) {
   1.738 +    case 1:
   1.739 +        return "UTF-7,version=1";
   1.740 +    default:
   1.741 +        return "UTF-7";
   1.742 +    }
   1.743 +}
   1.744 +
   1.745 +static const UConverterImpl _UTF7Impl={
   1.746 +    UCNV_UTF7,
   1.747 +
   1.748 +    NULL,
   1.749 +    NULL,
   1.750 +
   1.751 +    _UTF7Open,
   1.752 +    NULL,
   1.753 +    _UTF7Reset,
   1.754 +
   1.755 +    _UTF7ToUnicodeWithOffsets,
   1.756 +    _UTF7ToUnicodeWithOffsets,
   1.757 +    _UTF7FromUnicodeWithOffsets,
   1.758 +    _UTF7FromUnicodeWithOffsets,
   1.759 +    NULL,
   1.760 +
   1.761 +    NULL,
   1.762 +    _UTF7GetName,
   1.763 +    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
   1.764 +    NULL,
   1.765 +    ucnv_getCompleteUnicodeSet
   1.766 +};
   1.767 +
   1.768 +static const UConverterStaticData _UTF7StaticData={
   1.769 +    sizeof(UConverterStaticData),
   1.770 +    "UTF-7",
   1.771 +    0, /* TODO CCSID for UTF-7 */
   1.772 +    UCNV_IBM, UCNV_UTF7,
   1.773 +    1, 4,
   1.774 +    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
   1.775 +    FALSE, FALSE,
   1.776 +    0,
   1.777 +    0,
   1.778 +    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   1.779 +};
   1.780 +
   1.781 +const UConverterSharedData _UTF7Data={
   1.782 +    sizeof(UConverterSharedData), ~((uint32_t)0),
   1.783 +    NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
   1.784 +    0
   1.785 +};
   1.786 +
   1.787 +/* IMAP mailbox name encoding ----------------------------------------------- */
   1.788 +
   1.789 +/*
   1.790 + * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
   1.791 + * http://www.ietf.org/rfc/rfc2060.txt
   1.792 + *
   1.793 + * 5.1.3.  Mailbox International Naming Convention
   1.794 + *
   1.795 + * By convention, international mailbox names are specified using a
   1.796 + * modified version of the UTF-7 encoding described in [UTF-7].  The
   1.797 + * purpose of these modifications is to correct the following problems
   1.798 + * with UTF-7:
   1.799 + *
   1.800 + *    1) UTF-7 uses the "+" character for shifting; this conflicts with
   1.801 + *       the common use of "+" in mailbox names, in particular USENET
   1.802 + *       newsgroup names.
   1.803 + *
   1.804 + *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
   1.805 + *       conflicts with the use of "/" as a popular hierarchy delimiter.
   1.806 + *
   1.807 + *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
   1.808 + *       the use of "\" as a popular hierarchy delimiter.
   1.809 + *
   1.810 + *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
   1.811 + *       the use of "~" in some servers as a home directory indicator.
   1.812 + *
   1.813 + *    5) UTF-7 permits multiple alternate forms to represent the same
   1.814 + *       string; in particular, printable US-ASCII chararacters can be
   1.815 + *       represented in encoded form.
   1.816 + *
   1.817 + * In modified UTF-7, printable US-ASCII characters except for "&"
   1.818 + * represent themselves; that is, characters with octet values 0x20-0x25
   1.819 + * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
   1.820 + * octet sequence "&-".
   1.821 + *
   1.822 + * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
   1.823 + * Unicode 16-bit octets) are represented in modified BASE64, with a
   1.824 + * further modification from [UTF-7] that "," is used instead of "/".
   1.825 + * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
   1.826 + * character which can represent itself.
   1.827 + *
   1.828 + * "&" is used to shift to modified BASE64 and "-" to shift back to US-
   1.829 + * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
   1.830 + * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
   1.831 + * ").
   1.832 + *
   1.833 + * For example, here is a mailbox name which mixes English, Japanese,
   1.834 + * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
   1.835 + */
   1.836 +
   1.837 +/*
   1.838 + * Tests for US-ASCII characters belonging to character classes
   1.839 + * defined in UTF-7.
   1.840 + *
   1.841 + * Set D (directly encoded characters) consists of the following
   1.842 + * characters: the upper and lower case letters A through Z
   1.843 + * and a through z, the 10 digits 0-9, and the following nine special
   1.844 + * characters (note that "+" and "=" are omitted):
   1.845 + *     '(),-./:?
   1.846 + *
   1.847 + * Set O (optional direct characters) consists of the following
   1.848 + * characters (note that "\" and "~" are omitted):
   1.849 + *     !"#$%&*;<=>@[]^_`{|}
   1.850 + *
   1.851 + * According to the rules in RFC 2152, the byte values for the following
   1.852 + * US-ASCII characters are not used in UTF-7 and are therefore illegal:
   1.853 + * - all C0 control codes except for CR LF TAB
   1.854 + * - BACKSLASH
   1.855 + * - TILDE
   1.856 + * - DEL
   1.857 + * - all codes beyond US-ASCII, i.e. all >127
   1.858 + */
   1.859 +
   1.860 +/* uses '&' not '+' to start a base64 sequence */
   1.861 +#define AMPERSAND 0x26
   1.862 +#define COMMA 0x2c
   1.863 +#define SLASH 0x2f
   1.864 +
   1.865 +/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
   1.866 +#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
   1.867 +
   1.868 +/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
   1.869 +#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
   1.870 +
   1.871 +#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
   1.872 +#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
   1.873 +
   1.874 +/*
   1.875 + * converter status values:
   1.876 + *
   1.877 + * toUnicodeStatus:
   1.878 + *     24 inDirectMode (boolean)
   1.879 + * 23..16 base64Counter (-1..7)
   1.880 + * 15..0  bits (up to 14 bits incoming base64)
   1.881 + *
   1.882 + * fromUnicodeStatus:
   1.883 + *     24 inDirectMode (boolean)
   1.884 + * 23..16 base64Counter (0..2)
   1.885 + *  7..0  bits (6 bits outgoing base64)
   1.886 + *
   1.887 + * ignore bits 31..25
   1.888 + */
   1.889 +
   1.890 +static void
   1.891 +_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
   1.892 +                          UErrorCode *pErrorCode) {
   1.893 +    UConverter *cnv;
   1.894 +    const uint8_t *source, *sourceLimit;
   1.895 +    UChar *target;
   1.896 +    const UChar *targetLimit;
   1.897 +    int32_t *offsets;
   1.898 +
   1.899 +    uint8_t *bytes;
   1.900 +    uint8_t byteIndex;
   1.901 +
   1.902 +    int32_t length, targetCapacity;
   1.903 +
   1.904 +    /* UTF-7 state */
   1.905 +    uint16_t bits;
   1.906 +    int8_t base64Counter;
   1.907 +    UBool inDirectMode;
   1.908 +
   1.909 +    int8_t base64Value;
   1.910 +
   1.911 +    int32_t sourceIndex, nextSourceIndex;
   1.912 +
   1.913 +    UChar c;
   1.914 +    uint8_t b;
   1.915 +
   1.916 +    /* set up the local pointers */
   1.917 +    cnv=pArgs->converter;
   1.918 +
   1.919 +    source=(const uint8_t *)pArgs->source;
   1.920 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
   1.921 +    target=pArgs->target;
   1.922 +    targetLimit=pArgs->targetLimit;
   1.923 +    offsets=pArgs->offsets;
   1.924 +    /* get the state machine state */
   1.925 +    {
   1.926 +        uint32_t status=cnv->toUnicodeStatus;
   1.927 +        inDirectMode=(UBool)((status>>24)&1);
   1.928 +        base64Counter=(int8_t)(status>>16);
   1.929 +        bits=(uint16_t)status;
   1.930 +    }
   1.931 +    bytes=cnv->toUBytes;
   1.932 +    byteIndex=cnv->toULength;
   1.933 +
   1.934 +    /* sourceIndex=-1 if the current character began in the previous buffer */
   1.935 +    sourceIndex=byteIndex==0 ? 0 : -1;
   1.936 +    nextSourceIndex=0;
   1.937 +
   1.938 +    if(inDirectMode) {
   1.939 +directMode:
   1.940 +        /*
   1.941 +         * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
   1.942 +         * with their US-ASCII byte values.
   1.943 +         * An ampersand starts Unicode (or "escape") Mode.
   1.944 +         *
   1.945 +         * In Direct Mode, only the sourceIndex is used.
   1.946 +         */
   1.947 +        byteIndex=0;
   1.948 +        length=(int32_t)(sourceLimit-source);
   1.949 +        targetCapacity=(int32_t)(targetLimit-target);
   1.950 +        if(length>targetCapacity) {
   1.951 +            length=targetCapacity;
   1.952 +        }
   1.953 +        while(length>0) {
   1.954 +            b=*source++;
   1.955 +            if(!isLegalIMAP(b)) {
   1.956 +                /* illegal */
   1.957 +                bytes[0]=b;
   1.958 +                byteIndex=1;
   1.959 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.960 +                break;
   1.961 +            } else if(b!=AMPERSAND) {
   1.962 +                /* write directly encoded character */
   1.963 +                *target++=b;
   1.964 +                if(offsets!=NULL) {
   1.965 +                    *offsets++=sourceIndex++;
   1.966 +                }
   1.967 +            } else /* AMPERSAND */ {
   1.968 +                /* switch to Unicode mode */
   1.969 +                nextSourceIndex=++sourceIndex;
   1.970 +                inDirectMode=FALSE;
   1.971 +                byteIndex=0;
   1.972 +                bits=0;
   1.973 +                base64Counter=-1;
   1.974 +                goto unicodeMode;
   1.975 +            }
   1.976 +            --length;
   1.977 +        }
   1.978 +        if(source<sourceLimit && target>=targetLimit) {
   1.979 +            /* target is full */
   1.980 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.981 +        }
   1.982 +    } else {
   1.983 +unicodeMode:
   1.984 +        /*
   1.985 +         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
   1.986 +         * The base64 sequence ends with any character that is not in the base64 alphabet.
   1.987 +         * A terminating minus sign is consumed.
   1.988 +         * US-ASCII must not be base64-ed.
   1.989 +         *
   1.990 +         * In Unicode Mode, the sourceIndex has the index to the start of the current
   1.991 +         * base64 bytes, while nextSourceIndex is precisely parallel to source,
   1.992 +         * keeping the index to the following byte.
   1.993 +         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
   1.994 +         */
   1.995 +        while(source<sourceLimit) {
   1.996 +            if(target<targetLimit) {
   1.997 +                bytes[byteIndex++]=b=*source++;
   1.998 +                ++nextSourceIndex;
   1.999 +                if(b>0x7e) {
  1.1000 +                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
  1.1001 +                    inDirectMode=TRUE;
  1.1002 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1003 +                    break;
  1.1004 +                } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
  1.1005 +                    /* collect base64 bytes into UChars */
  1.1006 +                    switch(base64Counter) {
  1.1007 +                    case -1: /* -1 is immediately after the & */
  1.1008 +                    case 0:
  1.1009 +                        bits=base64Value;
  1.1010 +                        base64Counter=1;
  1.1011 +                        break;
  1.1012 +                    case 1:
  1.1013 +                    case 3:
  1.1014 +                    case 4:
  1.1015 +                    case 6:
  1.1016 +                        bits=(uint16_t)((bits<<6)|base64Value);
  1.1017 +                        ++base64Counter;
  1.1018 +                        break;
  1.1019 +                    case 2:
  1.1020 +                        c=(UChar)((bits<<4)|(base64Value>>2));
  1.1021 +                        if(isLegalIMAP(c)) {
  1.1022 +                            /* illegal */
  1.1023 +                            inDirectMode=TRUE;
  1.1024 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1025 +                            goto endloop;
  1.1026 +                        }
  1.1027 +                        *target++=c;
  1.1028 +                        if(offsets!=NULL) {
  1.1029 +                            *offsets++=sourceIndex;
  1.1030 +                            sourceIndex=nextSourceIndex-1;
  1.1031 +                        }
  1.1032 +                        bytes[0]=b; /* keep this byte in case an error occurs */
  1.1033 +                        byteIndex=1;
  1.1034 +                        bits=(uint16_t)(base64Value&3);
  1.1035 +                        base64Counter=3;
  1.1036 +                        break;
  1.1037 +                    case 5:
  1.1038 +                        c=(UChar)((bits<<2)|(base64Value>>4));
  1.1039 +                        if(isLegalIMAP(c)) {
  1.1040 +                            /* illegal */
  1.1041 +                            inDirectMode=TRUE;
  1.1042 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1043 +                            goto endloop;
  1.1044 +                        }
  1.1045 +                        *target++=c;
  1.1046 +                        if(offsets!=NULL) {
  1.1047 +                            *offsets++=sourceIndex;
  1.1048 +                            sourceIndex=nextSourceIndex-1;
  1.1049 +                        }
  1.1050 +                        bytes[0]=b; /* keep this byte in case an error occurs */
  1.1051 +                        byteIndex=1;
  1.1052 +                        bits=(uint16_t)(base64Value&15);
  1.1053 +                        base64Counter=6;
  1.1054 +                        break;
  1.1055 +                    case 7:
  1.1056 +                        c=(UChar)((bits<<6)|base64Value);
  1.1057 +                        if(isLegalIMAP(c)) {
  1.1058 +                            /* illegal */
  1.1059 +                            inDirectMode=TRUE;
  1.1060 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1061 +                            goto endloop;
  1.1062 +                        }
  1.1063 +                        *target++=c;
  1.1064 +                        if(offsets!=NULL) {
  1.1065 +                            *offsets++=sourceIndex;
  1.1066 +                            sourceIndex=nextSourceIndex;
  1.1067 +                        }
  1.1068 +                        byteIndex=0;
  1.1069 +                        bits=0;
  1.1070 +                        base64Counter=0;
  1.1071 +                        break;
  1.1072 +                    default:
  1.1073 +                        /* will never occur */
  1.1074 +                        break;
  1.1075 +                    }
  1.1076 +                } else if(base64Value==-2) {
  1.1077 +                    /* minus sign terminates the base64 sequence */
  1.1078 +                    inDirectMode=TRUE;
  1.1079 +                    if(base64Counter==-1) {
  1.1080 +                        /* &- i.e. a minus immediately following an ampersand */
  1.1081 +                        *target++=AMPERSAND;
  1.1082 +                        if(offsets!=NULL) {
  1.1083 +                            *offsets++=sourceIndex-1;
  1.1084 +                        }
  1.1085 +                    } else {
  1.1086 +                        /* absorb the minus and leave the Unicode Mode */
  1.1087 +                        if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
  1.1088 +                            /* bits are illegally left over, a UChar is incomplete */
  1.1089 +                            /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
  1.1090 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1091 +                            break;
  1.1092 +                        }
  1.1093 +                    }
  1.1094 +                    sourceIndex=nextSourceIndex;
  1.1095 +                    goto directMode;
  1.1096 +                } else {
  1.1097 +                    if(base64Counter==-1) {
  1.1098 +                        /* illegal: & immediately followed by something other than base64 or minus sign */
  1.1099 +                        /* include the ampersand in the reported sequence */
  1.1100 +                        --sourceIndex;
  1.1101 +                        bytes[0]=AMPERSAND;
  1.1102 +                        bytes[1]=b;
  1.1103 +                        byteIndex=2;
  1.1104 +                    }
  1.1105 +                    /* base64Value==-1 for characters that are illegal only in Unicode mode */
  1.1106 +                    /* base64Value==-3 for illegal characters */
  1.1107 +                    /* illegal */
  1.1108 +                    inDirectMode=TRUE;
  1.1109 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1110 +                    break;
  1.1111 +                }
  1.1112 +            } else {
  1.1113 +                /* target is full */
  1.1114 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1115 +                break;
  1.1116 +            }
  1.1117 +        }
  1.1118 +    }
  1.1119 +endloop:
  1.1120 +
  1.1121 +    /*
  1.1122 +     * the end of the input stream and detection of truncated input
  1.1123 +     * are handled by the framework, but here we must check if we are in Unicode
  1.1124 +     * mode and byteIndex==0 because we must end in direct mode
  1.1125 +     *
  1.1126 +     * conditions:
  1.1127 +     *   successful
  1.1128 +     *   in Unicode mode and byteIndex==0
  1.1129 +     *   end of input and no truncated input
  1.1130 +     */
  1.1131 +    if( U_SUCCESS(*pErrorCode) &&
  1.1132 +        !inDirectMode && byteIndex==0 &&
  1.1133 +        pArgs->flush && source>=sourceLimit
  1.1134 +    ) {
  1.1135 +        if(base64Counter==-1) {
  1.1136 +            /* & at the very end of the input */
  1.1137 +            /* make the ampersand the reported sequence */
  1.1138 +            bytes[0]=AMPERSAND;
  1.1139 +            byteIndex=1;
  1.1140 +        }
  1.1141 +        /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
  1.1142 +
  1.1143 +        inDirectMode=TRUE; /* avoid looping */
  1.1144 +        *pErrorCode=U_TRUNCATED_CHAR_FOUND;
  1.1145 +    }
  1.1146 +
  1.1147 +    /* set the converter state back into UConverter */
  1.1148 +    cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
  1.1149 +    cnv->toULength=byteIndex;
  1.1150 +
  1.1151 +    /* write back the updated pointers */
  1.1152 +    pArgs->source=(const char *)source;
  1.1153 +    pArgs->target=target;
  1.1154 +    pArgs->offsets=offsets;
  1.1155 +    return;
  1.1156 +}
  1.1157 +
  1.1158 +static void
  1.1159 +_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.1160 +                            UErrorCode *pErrorCode) {
  1.1161 +    UConverter *cnv;
  1.1162 +    const UChar *source, *sourceLimit;
  1.1163 +    uint8_t *target, *targetLimit;
  1.1164 +    int32_t *offsets;
  1.1165 +
  1.1166 +    int32_t length, targetCapacity, sourceIndex;
  1.1167 +    UChar c;
  1.1168 +    uint8_t b;
  1.1169 +
  1.1170 +    /* UTF-7 state */
  1.1171 +    uint8_t bits;
  1.1172 +    int8_t base64Counter;
  1.1173 +    UBool inDirectMode;
  1.1174 +
  1.1175 +    /* set up the local pointers */
  1.1176 +    cnv=pArgs->converter;
  1.1177 +
  1.1178 +    /* set up the local pointers */
  1.1179 +    source=pArgs->source;
  1.1180 +    sourceLimit=pArgs->sourceLimit;
  1.1181 +    target=(uint8_t *)pArgs->target;
  1.1182 +    targetLimit=(uint8_t *)pArgs->targetLimit;
  1.1183 +    offsets=pArgs->offsets;
  1.1184 +
  1.1185 +    /* get the state machine state */
  1.1186 +    {
  1.1187 +        uint32_t status=cnv->fromUnicodeStatus;
  1.1188 +        inDirectMode=(UBool)((status>>24)&1);
  1.1189 +        base64Counter=(int8_t)(status>>16);
  1.1190 +        bits=(uint8_t)status;
  1.1191 +    }
  1.1192 +
  1.1193 +    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
  1.1194 +    sourceIndex=0;
  1.1195 +
  1.1196 +    if(inDirectMode) {
  1.1197 +directMode:
  1.1198 +        length=(int32_t)(sourceLimit-source);
  1.1199 +        targetCapacity=(int32_t)(targetLimit-target);
  1.1200 +        if(length>targetCapacity) {
  1.1201 +            length=targetCapacity;
  1.1202 +        }
  1.1203 +        while(length>0) {
  1.1204 +            c=*source++;
  1.1205 +            /* encode 0x20..0x7e except '&' directly */
  1.1206 +            if(inSetDIMAP(c)) {
  1.1207 +                /* encode directly */
  1.1208 +                *target++=(uint8_t)c;
  1.1209 +                if(offsets!=NULL) {
  1.1210 +                    *offsets++=sourceIndex++;
  1.1211 +                }
  1.1212 +            } else if(c==AMPERSAND) {
  1.1213 +                /* output &- for & */
  1.1214 +                *target++=AMPERSAND;
  1.1215 +                if(target<targetLimit) {
  1.1216 +                    *target++=MINUS;
  1.1217 +                    if(offsets!=NULL) {
  1.1218 +                        *offsets++=sourceIndex;
  1.1219 +                        *offsets++=sourceIndex++;
  1.1220 +                    }
  1.1221 +                    /* realign length and targetCapacity */
  1.1222 +                    goto directMode;
  1.1223 +                } else {
  1.1224 +                    if(offsets!=NULL) {
  1.1225 +                        *offsets++=sourceIndex++;
  1.1226 +                    }
  1.1227 +                    cnv->charErrorBuffer[0]=MINUS;
  1.1228 +                    cnv->charErrorBufferLength=1;
  1.1229 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1230 +                    break;
  1.1231 +                }
  1.1232 +            } else {
  1.1233 +                /* un-read this character and switch to Unicode Mode */
  1.1234 +                --source;
  1.1235 +                *target++=AMPERSAND;
  1.1236 +                if(offsets!=NULL) {
  1.1237 +                    *offsets++=sourceIndex;
  1.1238 +                }
  1.1239 +                inDirectMode=FALSE;
  1.1240 +                base64Counter=0;
  1.1241 +                goto unicodeMode;
  1.1242 +            }
  1.1243 +            --length;
  1.1244 +        }
  1.1245 +        if(source<sourceLimit && target>=targetLimit) {
  1.1246 +            /* target is full */
  1.1247 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1248 +        }
  1.1249 +    } else {
  1.1250 +unicodeMode:
  1.1251 +        while(source<sourceLimit) {
  1.1252 +            if(target<targetLimit) {
  1.1253 +                c=*source++;
  1.1254 +                if(isLegalIMAP(c)) {
  1.1255 +                    /* encode directly */
  1.1256 +                    inDirectMode=TRUE;
  1.1257 +
  1.1258 +                    /* trick: back out this character to make this easier */
  1.1259 +                    --source;
  1.1260 +
  1.1261 +                    /* terminate the base64 sequence */
  1.1262 +                    if(base64Counter!=0) {
  1.1263 +                        /* write remaining bits for the previous character */
  1.1264 +                        *target++=TO_BASE64_IMAP(bits);
  1.1265 +                        if(offsets!=NULL) {
  1.1266 +                            *offsets++=sourceIndex-1;
  1.1267 +                        }
  1.1268 +                    }
  1.1269 +                    /* need to terminate with a minus */
  1.1270 +                    if(target<targetLimit) {
  1.1271 +                        *target++=MINUS;
  1.1272 +                        if(offsets!=NULL) {
  1.1273 +                            *offsets++=sourceIndex-1;
  1.1274 +                        }
  1.1275 +                    } else {
  1.1276 +                        cnv->charErrorBuffer[0]=MINUS;
  1.1277 +                        cnv->charErrorBufferLength=1;
  1.1278 +                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1279 +                        break;
  1.1280 +                    }
  1.1281 +                    goto directMode;
  1.1282 +                } else {
  1.1283 +                    /*
  1.1284 +                     * base64 this character:
  1.1285 +                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
  1.1286 +                     * and the bits of this character, each implicitly in UTF-16BE.
  1.1287 +                     *
  1.1288 +                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
  1.1289 +                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
  1.1290 +                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
  1.1291 +                     */
  1.1292 +                    switch(base64Counter) {
  1.1293 +                    case 0:
  1.1294 +                        b=(uint8_t)(c>>10);
  1.1295 +                        *target++=TO_BASE64_IMAP(b);
  1.1296 +                        if(target<targetLimit) {
  1.1297 +                            b=(uint8_t)((c>>4)&0x3f);
  1.1298 +                            *target++=TO_BASE64_IMAP(b);
  1.1299 +                            if(offsets!=NULL) {
  1.1300 +                                *offsets++=sourceIndex;
  1.1301 +                                *offsets++=sourceIndex++;
  1.1302 +                            }
  1.1303 +                        } else {
  1.1304 +                            if(offsets!=NULL) {
  1.1305 +                                *offsets++=sourceIndex++;
  1.1306 +                            }
  1.1307 +                            b=(uint8_t)((c>>4)&0x3f);
  1.1308 +                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
  1.1309 +                            cnv->charErrorBufferLength=1;
  1.1310 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1311 +                        }
  1.1312 +                        bits=(uint8_t)((c&15)<<2);
  1.1313 +                        base64Counter=1;
  1.1314 +                        break;
  1.1315 +                    case 1:
  1.1316 +                        b=(uint8_t)(bits|(c>>14));
  1.1317 +                        *target++=TO_BASE64_IMAP(b);
  1.1318 +                        if(target<targetLimit) {
  1.1319 +                            b=(uint8_t)((c>>8)&0x3f);
  1.1320 +                            *target++=TO_BASE64_IMAP(b);
  1.1321 +                            if(target<targetLimit) {
  1.1322 +                                b=(uint8_t)((c>>2)&0x3f);
  1.1323 +                                *target++=TO_BASE64_IMAP(b);
  1.1324 +                                if(offsets!=NULL) {
  1.1325 +                                    *offsets++=sourceIndex;
  1.1326 +                                    *offsets++=sourceIndex;
  1.1327 +                                    *offsets++=sourceIndex++;
  1.1328 +                                }
  1.1329 +                            } else {
  1.1330 +                                if(offsets!=NULL) {
  1.1331 +                                    *offsets++=sourceIndex;
  1.1332 +                                    *offsets++=sourceIndex++;
  1.1333 +                                }
  1.1334 +                                b=(uint8_t)((c>>2)&0x3f);
  1.1335 +                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
  1.1336 +                                cnv->charErrorBufferLength=1;
  1.1337 +                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1338 +                            }
  1.1339 +                        } else {
  1.1340 +                            if(offsets!=NULL) {
  1.1341 +                                *offsets++=sourceIndex++;
  1.1342 +                            }
  1.1343 +                            b=(uint8_t)((c>>8)&0x3f);
  1.1344 +                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
  1.1345 +                            b=(uint8_t)((c>>2)&0x3f);
  1.1346 +                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
  1.1347 +                            cnv->charErrorBufferLength=2;
  1.1348 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1349 +                        }
  1.1350 +                        bits=(uint8_t)((c&3)<<4);
  1.1351 +                        base64Counter=2;
  1.1352 +                        break;
  1.1353 +                    case 2:
  1.1354 +                        b=(uint8_t)(bits|(c>>12));
  1.1355 +                        *target++=TO_BASE64_IMAP(b);
  1.1356 +                        if(target<targetLimit) {
  1.1357 +                            b=(uint8_t)((c>>6)&0x3f);
  1.1358 +                            *target++=TO_BASE64_IMAP(b);
  1.1359 +                            if(target<targetLimit) {
  1.1360 +                                b=(uint8_t)(c&0x3f);
  1.1361 +                                *target++=TO_BASE64_IMAP(b);
  1.1362 +                                if(offsets!=NULL) {
  1.1363 +                                    *offsets++=sourceIndex;
  1.1364 +                                    *offsets++=sourceIndex;
  1.1365 +                                    *offsets++=sourceIndex++;
  1.1366 +                                }
  1.1367 +                            } else {
  1.1368 +                                if(offsets!=NULL) {
  1.1369 +                                    *offsets++=sourceIndex;
  1.1370 +                                    *offsets++=sourceIndex++;
  1.1371 +                                }
  1.1372 +                                b=(uint8_t)(c&0x3f);
  1.1373 +                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
  1.1374 +                                cnv->charErrorBufferLength=1;
  1.1375 +                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1376 +                            }
  1.1377 +                        } else {
  1.1378 +                            if(offsets!=NULL) {
  1.1379 +                                *offsets++=sourceIndex++;
  1.1380 +                            }
  1.1381 +                            b=(uint8_t)((c>>6)&0x3f);
  1.1382 +                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
  1.1383 +                            b=(uint8_t)(c&0x3f);
  1.1384 +                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
  1.1385 +                            cnv->charErrorBufferLength=2;
  1.1386 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1387 +                        }
  1.1388 +                        bits=0;
  1.1389 +                        base64Counter=0;
  1.1390 +                        break;
  1.1391 +                    default:
  1.1392 +                        /* will never occur */
  1.1393 +                        break;
  1.1394 +                    }
  1.1395 +                }
  1.1396 +            } else {
  1.1397 +                /* target is full */
  1.1398 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1399 +                break;
  1.1400 +            }
  1.1401 +        }
  1.1402 +    }
  1.1403 +
  1.1404 +    if(pArgs->flush && source>=sourceLimit) {
  1.1405 +        /* flush remaining bits to the target */
  1.1406 +        if(!inDirectMode) {
  1.1407 +            if(base64Counter!=0) {
  1.1408 +                if(target<targetLimit) {
  1.1409 +                    *target++=TO_BASE64_IMAP(bits);
  1.1410 +                    if(offsets!=NULL) {
  1.1411 +                        *offsets++=sourceIndex-1;
  1.1412 +                    }
  1.1413 +                } else {
  1.1414 +                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
  1.1415 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1416 +                }
  1.1417 +            }
  1.1418 +            /* need to terminate with a minus */
  1.1419 +            if(target<targetLimit) {
  1.1420 +                *target++=MINUS;
  1.1421 +                if(offsets!=NULL) {
  1.1422 +                    *offsets++=sourceIndex-1;
  1.1423 +                }
  1.1424 +            } else {
  1.1425 +                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
  1.1426 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1427 +            }
  1.1428 +        }
  1.1429 +        /* reset the state for the next conversion */
  1.1430 +        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
  1.1431 +    } else {
  1.1432 +        /* set the converter state back into UConverter */
  1.1433 +        cnv->fromUnicodeStatus=
  1.1434 +            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
  1.1435 +            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
  1.1436 +    }
  1.1437 +
  1.1438 +    /* write back the updated pointers */
  1.1439 +    pArgs->source=source;
  1.1440 +    pArgs->target=(char *)target;
  1.1441 +    pArgs->offsets=offsets;
  1.1442 +    return;
  1.1443 +}
  1.1444 +
  1.1445 +static const UConverterImpl _IMAPImpl={
  1.1446 +    UCNV_IMAP_MAILBOX,
  1.1447 +
  1.1448 +    NULL,
  1.1449 +    NULL,
  1.1450 +
  1.1451 +    _UTF7Open,
  1.1452 +    NULL,
  1.1453 +    _UTF7Reset,
  1.1454 +
  1.1455 +    _IMAPToUnicodeWithOffsets,
  1.1456 +    _IMAPToUnicodeWithOffsets,
  1.1457 +    _IMAPFromUnicodeWithOffsets,
  1.1458 +    _IMAPFromUnicodeWithOffsets,
  1.1459 +    NULL,
  1.1460 +
  1.1461 +    NULL,
  1.1462 +    NULL,
  1.1463 +    NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
  1.1464 +    NULL,
  1.1465 +    ucnv_getCompleteUnicodeSet
  1.1466 +};
  1.1467 +
  1.1468 +static const UConverterStaticData _IMAPStaticData={
  1.1469 +    sizeof(UConverterStaticData),
  1.1470 +    "IMAP-mailbox-name",
  1.1471 +    0, /* TODO CCSID for IMAP-mailbox-name */
  1.1472 +    UCNV_IBM, UCNV_IMAP_MAILBOX,
  1.1473 +    1, 4,
  1.1474 +    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
  1.1475 +    FALSE, FALSE,
  1.1476 +    0,
  1.1477 +    0,
  1.1478 +    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
  1.1479 +};
  1.1480 +
  1.1481 +const UConverterSharedData _IMAPData={
  1.1482 +    sizeof(UConverterSharedData), ~((uint32_t)0),
  1.1483 +    NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
  1.1484 +    0
  1.1485 +};
  1.1486 +
  1.1487 +#endif
The Tor Browser / file diff

diff: intl/icu/source/common/ucnv_u7.c

intl/icu/source/common/ucnv_u7.c