intl/icu/source/common/ucnv_u7.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2002-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * file name: ucnv_u7.c
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2002jul01
michael@0 12 * created by: Markus W. Scherer
michael@0 13 *
michael@0 14 * UTF-7 converter implementation. Used to be in ucnv_utf.c.
michael@0 15 */
michael@0 16
michael@0 17 #include "unicode/utypes.h"
michael@0 18
michael@0 19 #if !UCONFIG_NO_CONVERSION
michael@0 20
michael@0 21 #include "unicode/ucnv.h"
michael@0 22 #include "ucnv_bld.h"
michael@0 23 #include "ucnv_cnv.h"
michael@0 24 #include "uassert.h"
michael@0 25
michael@0 26 /* UTF-7 -------------------------------------------------------------------- */
michael@0 27
michael@0 28 /*
michael@0 29 * UTF-7 is a stateful encoding of Unicode.
michael@0 30 * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
michael@0 31 * It was intended for use in Internet email systems, using in its bytewise
michael@0 32 * encoding only a subset of 7-bit US-ASCII.
michael@0 33 * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
michael@0 34 * occasionally used.
michael@0 35 *
michael@0 36 * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
michael@0 37 * characters directly or in base64. Especially, the characters in set O
michael@0 38 * as defined in the RFC (see below) may be encoded directly but are not
michael@0 39 * allowed in, e.g., email headers.
michael@0 40 * By default, the ICU UTF-7 converter encodes set O directly.
michael@0 41 * By choosing the option "version=1", set O will be escaped instead.
michael@0 42 * For example:
michael@0 43 * utf7Converter=ucnv_open("UTF-7,version=1");
michael@0 44 *
michael@0 45 * For details about email headers see RFC 2047.
michael@0 46 */
michael@0 47
michael@0 48 /*
michael@0 49 * Tests for US-ASCII characters belonging to character classes
michael@0 50 * defined in UTF-7.
michael@0 51 *
michael@0 52 * Set D (directly encoded characters) consists of the following
michael@0 53 * characters: the upper and lower case letters A through Z
michael@0 54 * and a through z, the 10 digits 0-9, and the following nine special
michael@0 55 * characters (note that "+" and "=" are omitted):
michael@0 56 * '(),-./:?
michael@0 57 *
michael@0 58 * Set O (optional direct characters) consists of the following
michael@0 59 * characters (note that "\" and "~" are omitted):
michael@0 60 * !"#$%&*;<=>@[]^_`{|}
michael@0 61 *
michael@0 62 * According to the rules in RFC 2152, the byte values for the following
michael@0 63 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
michael@0 64 * - all C0 control codes except for CR LF TAB
michael@0 65 * - BACKSLASH
michael@0 66 * - TILDE
michael@0 67 * - DEL
michael@0 68 * - all codes beyond US-ASCII, i.e. all >127
michael@0 69 */
michael@0 70 #define inSetD(c) \
michael@0 71 ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
michael@0 72 (uint8_t)((c)-48)<10 || /* digits */ \
michael@0 73 (uint8_t)((c)-39)<3 || /* '() */ \
michael@0 74 (uint8_t)((c)-44)<4 || /* ,-./ */ \
michael@0 75 (c)==58 || (c)==63 /* :? */ \
michael@0 76 )
michael@0 77
michael@0 78 #define inSetO(c) \
michael@0 79 ((uint8_t)((c)-33)<6 || /* !"#$%& */ \
michael@0 80 (uint8_t)((c)-59)<4 || /* ;<=> */ \
michael@0 81 (uint8_t)((c)-93)<4 || /* ]^_` */ \
michael@0 82 (uint8_t)((c)-123)<3 || /* {|} */ \
michael@0 83 (c)==42 || (c)==64 || (c)==91 /* *@[ */ \
michael@0 84 )
michael@0 85
michael@0 86 #define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
michael@0 87 #define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
michael@0 88
michael@0 89 #define PLUS 43
michael@0 90 #define MINUS 45
michael@0 91 #define BACKSLASH 92
michael@0 92 #define TILDE 126
michael@0 93
michael@0 94 /* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
michael@0 95 #define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
michael@0 96
michael@0 97 /* encode directly sets D and O and CR LF SP TAB */
michael@0 98 static const UBool encodeDirectlyMaximum[128]={
michael@0 99 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
michael@0 100 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
michael@0 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 102
michael@0 103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
michael@0 104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
michael@0 105
michael@0 106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
michael@0 107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
michael@0 108
michael@0 109 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
michael@0 110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
michael@0 111 };
michael@0 112
michael@0 113 /* encode directly set D and CR LF SP TAB but not set O */
michael@0 114 static const UBool encodeDirectlyRestricted[128]={
michael@0 115 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
michael@0 116 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
michael@0 117 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 118
michael@0 119 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
michael@0 120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
michael@0 121
michael@0 122 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
michael@0 123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
michael@0 124
michael@0 125 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
michael@0 126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
michael@0 127 };
michael@0 128
michael@0 129 static const uint8_t
michael@0 130 toBase64[64]={
michael@0 131 /* A-Z */
michael@0 132 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
michael@0 133 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
michael@0 134 /* a-z */
michael@0 135 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
michael@0 136 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
michael@0 137 /* 0-9 */
michael@0 138 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
michael@0 139 /* +/ */
michael@0 140 43, 47
michael@0 141 };
michael@0 142
michael@0 143 static const int8_t
michael@0 144 fromBase64[128]={
michael@0 145 /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
michael@0 146 -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
michael@0 147 -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
michael@0 148
michael@0 149 /* general punctuation with + and / and a special value (-2) for - */
michael@0 150 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
michael@0 151 /* digits */
michael@0 152 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
michael@0 153
michael@0 154 /* A-Z */
michael@0 155 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
michael@0 156 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
michael@0 157
michael@0 158 /* a-z */
michael@0 159 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
michael@0 160 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
michael@0 161 };
michael@0 162
michael@0 163 /*
michael@0 164 * converter status values:
michael@0 165 *
michael@0 166 * toUnicodeStatus:
michael@0 167 * 24 inDirectMode (boolean)
michael@0 168 * 23..16 base64Counter (-1..7)
michael@0 169 * 15..0 bits (up to 14 bits incoming base64)
michael@0 170 *
michael@0 171 * fromUnicodeStatus:
michael@0 172 * 31..28 version (0: set O direct 1: set O escaped)
michael@0 173 * 24 inDirectMode (boolean)
michael@0 174 * 23..16 base64Counter (0..2)
michael@0 175 * 7..0 bits (6 bits outgoing base64)
michael@0 176 *
michael@0 177 */
michael@0 178
michael@0 179 static void
michael@0 180 _UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
michael@0 181 if(choice<=UCNV_RESET_TO_UNICODE) {
michael@0 182 /* reset toUnicode */
michael@0 183 cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
michael@0 184 cnv->toULength=0;
michael@0 185 }
michael@0 186 if(choice!=UCNV_RESET_TO_UNICODE) {
michael@0 187 /* reset fromUnicode */
michael@0 188 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
michael@0 189 }
michael@0 190 }
michael@0 191
michael@0 192 static void
michael@0 193 _UTF7Open(UConverter *cnv,
michael@0 194 UConverterLoadArgs *pArgs,
michael@0 195 UErrorCode *pErrorCode) {
michael@0 196 if(UCNV_GET_VERSION(cnv)<=1) {
michael@0 197 /* TODO(markus): Should just use cnv->options rather than copying the version number. */
michael@0 198 cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
michael@0 199 _UTF7Reset(cnv, UCNV_RESET_BOTH);
michael@0 200 } else {
michael@0 201 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 202 }
michael@0 203 }
michael@0 204
michael@0 205 static void
michael@0 206 _UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
michael@0 207 UErrorCode *pErrorCode) {
michael@0 208 UConverter *cnv;
michael@0 209 const uint8_t *source, *sourceLimit;
michael@0 210 UChar *target;
michael@0 211 const UChar *targetLimit;
michael@0 212 int32_t *offsets;
michael@0 213
michael@0 214 uint8_t *bytes;
michael@0 215 uint8_t byteIndex;
michael@0 216
michael@0 217 int32_t length, targetCapacity;
michael@0 218
michael@0 219 /* UTF-7 state */
michael@0 220 uint16_t bits;
michael@0 221 int8_t base64Counter;
michael@0 222 UBool inDirectMode;
michael@0 223
michael@0 224 int8_t base64Value;
michael@0 225
michael@0 226 int32_t sourceIndex, nextSourceIndex;
michael@0 227
michael@0 228 uint8_t b;
michael@0 229 /* set up the local pointers */
michael@0 230 cnv=pArgs->converter;
michael@0 231
michael@0 232 source=(const uint8_t *)pArgs->source;
michael@0 233 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
michael@0 234 target=pArgs->target;
michael@0 235 targetLimit=pArgs->targetLimit;
michael@0 236 offsets=pArgs->offsets;
michael@0 237 /* get the state machine state */
michael@0 238 {
michael@0 239 uint32_t status=cnv->toUnicodeStatus;
michael@0 240 inDirectMode=(UBool)((status>>24)&1);
michael@0 241 base64Counter=(int8_t)(status>>16);
michael@0 242 bits=(uint16_t)status;
michael@0 243 }
michael@0 244 bytes=cnv->toUBytes;
michael@0 245 byteIndex=cnv->toULength;
michael@0 246
michael@0 247 /* sourceIndex=-1 if the current character began in the previous buffer */
michael@0 248 sourceIndex=byteIndex==0 ? 0 : -1;
michael@0 249 nextSourceIndex=0;
michael@0 250
michael@0 251 if(inDirectMode) {
michael@0 252 directMode:
michael@0 253 /*
michael@0 254 * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
michael@0 255 * with their US-ASCII byte values.
michael@0 256 * Backslash and Tilde and most control characters are not allowed in UTF-7.
michael@0 257 * A plus sign starts Unicode (or "escape") Mode.
michael@0 258 *
michael@0 259 * In Direct Mode, only the sourceIndex is used.
michael@0 260 */
michael@0 261 byteIndex=0;
michael@0 262 length=(int32_t)(sourceLimit-source);
michael@0 263 targetCapacity=(int32_t)(targetLimit-target);
michael@0 264 if(length>targetCapacity) {
michael@0 265 length=targetCapacity;
michael@0 266 }
michael@0 267 while(length>0) {
michael@0 268 b=*source++;
michael@0 269 if(!isLegalUTF7(b)) {
michael@0 270 /* illegal */
michael@0 271 bytes[0]=b;
michael@0 272 byteIndex=1;
michael@0 273 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 274 break;
michael@0 275 } else if(b!=PLUS) {
michael@0 276 /* write directly encoded character */
michael@0 277 *target++=b;
michael@0 278 if(offsets!=NULL) {
michael@0 279 *offsets++=sourceIndex++;
michael@0 280 }
michael@0 281 } else /* PLUS */ {
michael@0 282 /* switch to Unicode mode */
michael@0 283 nextSourceIndex=++sourceIndex;
michael@0 284 inDirectMode=FALSE;
michael@0 285 byteIndex=0;
michael@0 286 bits=0;
michael@0 287 base64Counter=-1;
michael@0 288 goto unicodeMode;
michael@0 289 }
michael@0 290 --length;
michael@0 291 }
michael@0 292 if(source<sourceLimit && target>=targetLimit) {
michael@0 293 /* target is full */
michael@0 294 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 295 }
michael@0 296 } else {
michael@0 297 unicodeMode:
michael@0 298 /*
michael@0 299 * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
michael@0 300 * The base64 sequence ends with any character that is not in the base64 alphabet.
michael@0 301 * A terminating minus sign is consumed.
michael@0 302 *
michael@0 303 * In Unicode Mode, the sourceIndex has the index to the start of the current
michael@0 304 * base64 bytes, while nextSourceIndex is precisely parallel to source,
michael@0 305 * keeping the index to the following byte.
michael@0 306 * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
michael@0 307 */
michael@0 308 while(source<sourceLimit) {
michael@0 309 if(target<targetLimit) {
michael@0 310 bytes[byteIndex++]=b=*source++;
michael@0 311 ++nextSourceIndex;
michael@0 312 base64Value = -3; /* initialize as illegal */
michael@0 313 if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
michael@0 314 /* either
michael@0 315 * base64Value==-1 for any legal character except base64 and minus sign, or
michael@0 316 * base64Value==-3 for illegal characters:
michael@0 317 * 1. In either case, leave Unicode mode.
michael@0 318 * 2.1. If we ended with an incomplete UChar or none after the +, then
michael@0 319 * generate an error for the preceding erroneous sequence and deal with
michael@0 320 * the current (possibly illegal) character next time through.
michael@0 321 * 2.2. Else the current char comes after a complete UChar, which was already
michael@0 322 * pushed to the output buf, so:
michael@0 323 * 2.2.1. If the current char is legal, just save it for processing next time.
michael@0 324 * It may be for example, a plus which we need to deal with in direct mode.
michael@0 325 * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
michael@0 326 */
michael@0 327 inDirectMode=TRUE;
michael@0 328 if(base64Counter==-1) {
michael@0 329 /* illegal: + immediately followed by something other than base64 or minus sign */
michael@0 330 /* include the plus sign in the reported sequence, but not the subsequent char */
michael@0 331 --source;
michael@0 332 bytes[0]=PLUS;
michael@0 333 byteIndex=1;
michael@0 334 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 335 break;
michael@0 336 } else if(bits!=0) {
michael@0 337 /* bits are illegally left over, a UChar is incomplete */
michael@0 338 /* don't include current char (legal or illegal) in error seq */
michael@0 339 --source;
michael@0 340 --byteIndex;
michael@0 341 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 342 break;
michael@0 343 } else {
michael@0 344 /* previous UChar was complete */
michael@0 345 if(base64Value==-3) {
michael@0 346 /* current character is illegal, deal with it here */
michael@0 347 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 348 break;
michael@0 349 } else {
michael@0 350 /* un-read the current character in case it is a plus sign */
michael@0 351 --source;
michael@0 352 sourceIndex=nextSourceIndex-1;
michael@0 353 goto directMode;
michael@0 354 }
michael@0 355 }
michael@0 356 } else if(base64Value>=0) {
michael@0 357 /* collect base64 bytes into UChars */
michael@0 358 switch(base64Counter) {
michael@0 359 case -1: /* -1 is immediately after the + */
michael@0 360 case 0:
michael@0 361 bits=base64Value;
michael@0 362 base64Counter=1;
michael@0 363 break;
michael@0 364 case 1:
michael@0 365 case 3:
michael@0 366 case 4:
michael@0 367 case 6:
michael@0 368 bits=(uint16_t)((bits<<6)|base64Value);
michael@0 369 ++base64Counter;
michael@0 370 break;
michael@0 371 case 2:
michael@0 372 *target++=(UChar)((bits<<4)|(base64Value>>2));
michael@0 373 if(offsets!=NULL) {
michael@0 374 *offsets++=sourceIndex;
michael@0 375 sourceIndex=nextSourceIndex-1;
michael@0 376 }
michael@0 377 bytes[0]=b; /* keep this byte in case an error occurs */
michael@0 378 byteIndex=1;
michael@0 379 bits=(uint16_t)(base64Value&3);
michael@0 380 base64Counter=3;
michael@0 381 break;
michael@0 382 case 5:
michael@0 383 *target++=(UChar)((bits<<2)|(base64Value>>4));
michael@0 384 if(offsets!=NULL) {
michael@0 385 *offsets++=sourceIndex;
michael@0 386 sourceIndex=nextSourceIndex-1;
michael@0 387 }
michael@0 388 bytes[0]=b; /* keep this byte in case an error occurs */
michael@0 389 byteIndex=1;
michael@0 390 bits=(uint16_t)(base64Value&15);
michael@0 391 base64Counter=6;
michael@0 392 break;
michael@0 393 case 7:
michael@0 394 *target++=(UChar)((bits<<6)|base64Value);
michael@0 395 if(offsets!=NULL) {
michael@0 396 *offsets++=sourceIndex;
michael@0 397 sourceIndex=nextSourceIndex;
michael@0 398 }
michael@0 399 byteIndex=0;
michael@0 400 bits=0;
michael@0 401 base64Counter=0;
michael@0 402 break;
michael@0 403 default:
michael@0 404 /* will never occur */
michael@0 405 break;
michael@0 406 }
michael@0 407 } else /*base64Value==-2*/ {
michael@0 408 /* minus sign terminates the base64 sequence */
michael@0 409 inDirectMode=TRUE;
michael@0 410 if(base64Counter==-1) {
michael@0 411 /* +- i.e. a minus immediately following a plus */
michael@0 412 *target++=PLUS;
michael@0 413 if(offsets!=NULL) {
michael@0 414 *offsets++=sourceIndex-1;
michael@0 415 }
michael@0 416 } else {
michael@0 417 /* absorb the minus and leave the Unicode Mode */
michael@0 418 if(bits!=0) {
michael@0 419 /* bits are illegally left over, a UChar is incomplete */
michael@0 420 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 421 break;
michael@0 422 }
michael@0 423 }
michael@0 424 sourceIndex=nextSourceIndex;
michael@0 425 goto directMode;
michael@0 426 }
michael@0 427 } else {
michael@0 428 /* target is full */
michael@0 429 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 430 break;
michael@0 431 }
michael@0 432 }
michael@0 433 }
michael@0 434
michael@0 435 if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
michael@0 436 /*
michael@0 437 * if we are in Unicode mode, then the byteIndex might not be 0,
michael@0 438 * but that is ok if bits==0
michael@0 439 * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
michael@0 440 * (not true for IMAP-mailbox-name where we must end in direct mode)
michael@0 441 */
michael@0 442 byteIndex=0;
michael@0 443 }
michael@0 444
michael@0 445 /* set the converter state back into UConverter */
michael@0 446 cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
michael@0 447 cnv->toULength=byteIndex;
michael@0 448
michael@0 449 /* write back the updated pointers */
michael@0 450 pArgs->source=(const char *)source;
michael@0 451 pArgs->target=target;
michael@0 452 pArgs->offsets=offsets;
michael@0 453 return;
michael@0 454 }
michael@0 455
michael@0 456 static void
michael@0 457 _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
michael@0 458 UErrorCode *pErrorCode) {
michael@0 459 UConverter *cnv;
michael@0 460 const UChar *source, *sourceLimit;
michael@0 461 uint8_t *target, *targetLimit;
michael@0 462 int32_t *offsets;
michael@0 463
michael@0 464 int32_t length, targetCapacity, sourceIndex;
michael@0 465 UChar c;
michael@0 466
michael@0 467 /* UTF-7 state */
michael@0 468 const UBool *encodeDirectly;
michael@0 469 uint8_t bits;
michael@0 470 int8_t base64Counter;
michael@0 471 UBool inDirectMode;
michael@0 472
michael@0 473 /* set up the local pointers */
michael@0 474 cnv=pArgs->converter;
michael@0 475
michael@0 476 /* set up the local pointers */
michael@0 477 source=pArgs->source;
michael@0 478 sourceLimit=pArgs->sourceLimit;
michael@0 479 target=(uint8_t *)pArgs->target;
michael@0 480 targetLimit=(uint8_t *)pArgs->targetLimit;
michael@0 481 offsets=pArgs->offsets;
michael@0 482
michael@0 483 /* get the state machine state */
michael@0 484 {
michael@0 485 uint32_t status=cnv->fromUnicodeStatus;
michael@0 486 encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
michael@0 487 inDirectMode=(UBool)((status>>24)&1);
michael@0 488 base64Counter=(int8_t)(status>>16);
michael@0 489 bits=(uint8_t)status;
michael@0 490 U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0]));
michael@0 491 }
michael@0 492
michael@0 493 /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
michael@0 494 sourceIndex=0;
michael@0 495
michael@0 496 if(inDirectMode) {
michael@0 497 directMode:
michael@0 498 length=(int32_t)(sourceLimit-source);
michael@0 499 targetCapacity=(int32_t)(targetLimit-target);
michael@0 500 if(length>targetCapacity) {
michael@0 501 length=targetCapacity;
michael@0 502 }
michael@0 503 while(length>0) {
michael@0 504 c=*source++;
michael@0 505 /* currently always encode CR LF SP TAB directly */
michael@0 506 if(c<=127 && encodeDirectly[c]) {
michael@0 507 /* encode directly */
michael@0 508 *target++=(uint8_t)c;
michael@0 509 if(offsets!=NULL) {
michael@0 510 *offsets++=sourceIndex++;
michael@0 511 }
michael@0 512 } else if(c==PLUS) {
michael@0 513 /* output +- for + */
michael@0 514 *target++=PLUS;
michael@0 515 if(target<targetLimit) {
michael@0 516 *target++=MINUS;
michael@0 517 if(offsets!=NULL) {
michael@0 518 *offsets++=sourceIndex;
michael@0 519 *offsets++=sourceIndex++;
michael@0 520 }
michael@0 521 /* realign length and targetCapacity */
michael@0 522 goto directMode;
michael@0 523 } else {
michael@0 524 if(offsets!=NULL) {
michael@0 525 *offsets++=sourceIndex++;
michael@0 526 }
michael@0 527 cnv->charErrorBuffer[0]=MINUS;
michael@0 528 cnv->charErrorBufferLength=1;
michael@0 529 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 530 break;
michael@0 531 }
michael@0 532 } else {
michael@0 533 /* un-read this character and switch to Unicode Mode */
michael@0 534 --source;
michael@0 535 *target++=PLUS;
michael@0 536 if(offsets!=NULL) {
michael@0 537 *offsets++=sourceIndex;
michael@0 538 }
michael@0 539 inDirectMode=FALSE;
michael@0 540 base64Counter=0;
michael@0 541 goto unicodeMode;
michael@0 542 }
michael@0 543 --length;
michael@0 544 }
michael@0 545 if(source<sourceLimit && target>=targetLimit) {
michael@0 546 /* target is full */
michael@0 547 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 548 }
michael@0 549 } else {
michael@0 550 unicodeMode:
michael@0 551 while(source<sourceLimit) {
michael@0 552 if(target<targetLimit) {
michael@0 553 c=*source++;
michael@0 554 if(c<=127 && encodeDirectly[c]) {
michael@0 555 /* encode directly */
michael@0 556 inDirectMode=TRUE;
michael@0 557
michael@0 558 /* trick: back out this character to make this easier */
michael@0 559 --source;
michael@0 560
michael@0 561 /* terminate the base64 sequence */
michael@0 562 if(base64Counter!=0) {
michael@0 563 /* write remaining bits for the previous character */
michael@0 564 *target++=toBase64[bits];
michael@0 565 if(offsets!=NULL) {
michael@0 566 *offsets++=sourceIndex-1;
michael@0 567 }
michael@0 568 }
michael@0 569 if(fromBase64[c]!=-1) {
michael@0 570 /* need to terminate with a minus */
michael@0 571 if(target<targetLimit) {
michael@0 572 *target++=MINUS;
michael@0 573 if(offsets!=NULL) {
michael@0 574 *offsets++=sourceIndex-1;
michael@0 575 }
michael@0 576 } else {
michael@0 577 cnv->charErrorBuffer[0]=MINUS;
michael@0 578 cnv->charErrorBufferLength=1;
michael@0 579 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 580 break;
michael@0 581 }
michael@0 582 }
michael@0 583 goto directMode;
michael@0 584 } else {
michael@0 585 /*
michael@0 586 * base64 this character:
michael@0 587 * Output 2 or 3 base64 bytes for the remaining bits of the previous character
michael@0 588 * and the bits of this character, each implicitly in UTF-16BE.
michael@0 589 *
michael@0 590 * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
michael@0 591 * character to the next. The actual 2 or 4 bits are shifted to the left edge
michael@0 592 * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
michael@0 593 */
michael@0 594 switch(base64Counter) {
michael@0 595 case 0:
michael@0 596 *target++=toBase64[c>>10];
michael@0 597 if(target<targetLimit) {
michael@0 598 *target++=toBase64[(c>>4)&0x3f];
michael@0 599 if(offsets!=NULL) {
michael@0 600 *offsets++=sourceIndex;
michael@0 601 *offsets++=sourceIndex++;
michael@0 602 }
michael@0 603 } else {
michael@0 604 if(offsets!=NULL) {
michael@0 605 *offsets++=sourceIndex++;
michael@0 606 }
michael@0 607 cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
michael@0 608 cnv->charErrorBufferLength=1;
michael@0 609 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 610 }
michael@0 611 bits=(uint8_t)((c&15)<<2);
michael@0 612 base64Counter=1;
michael@0 613 break;
michael@0 614 case 1:
michael@0 615 *target++=toBase64[bits|(c>>14)];
michael@0 616 if(target<targetLimit) {
michael@0 617 *target++=toBase64[(c>>8)&0x3f];
michael@0 618 if(target<targetLimit) {
michael@0 619 *target++=toBase64[(c>>2)&0x3f];
michael@0 620 if(offsets!=NULL) {
michael@0 621 *offsets++=sourceIndex;
michael@0 622 *offsets++=sourceIndex;
michael@0 623 *offsets++=sourceIndex++;
michael@0 624 }
michael@0 625 } else {
michael@0 626 if(offsets!=NULL) {
michael@0 627 *offsets++=sourceIndex;
michael@0 628 *offsets++=sourceIndex++;
michael@0 629 }
michael@0 630 cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
michael@0 631 cnv->charErrorBufferLength=1;
michael@0 632 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 633 }
michael@0 634 } else {
michael@0 635 if(offsets!=NULL) {
michael@0 636 *offsets++=sourceIndex++;
michael@0 637 }
michael@0 638 cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
michael@0 639 cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
michael@0 640 cnv->charErrorBufferLength=2;
michael@0 641 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 642 }
michael@0 643 bits=(uint8_t)((c&3)<<4);
michael@0 644 base64Counter=2;
michael@0 645 break;
michael@0 646 case 2:
michael@0 647 *target++=toBase64[bits|(c>>12)];
michael@0 648 if(target<targetLimit) {
michael@0 649 *target++=toBase64[(c>>6)&0x3f];
michael@0 650 if(target<targetLimit) {
michael@0 651 *target++=toBase64[c&0x3f];
michael@0 652 if(offsets!=NULL) {
michael@0 653 *offsets++=sourceIndex;
michael@0 654 *offsets++=sourceIndex;
michael@0 655 *offsets++=sourceIndex++;
michael@0 656 }
michael@0 657 } else {
michael@0 658 if(offsets!=NULL) {
michael@0 659 *offsets++=sourceIndex;
michael@0 660 *offsets++=sourceIndex++;
michael@0 661 }
michael@0 662 cnv->charErrorBuffer[0]=toBase64[c&0x3f];
michael@0 663 cnv->charErrorBufferLength=1;
michael@0 664 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 665 }
michael@0 666 } else {
michael@0 667 if(offsets!=NULL) {
michael@0 668 *offsets++=sourceIndex++;
michael@0 669 }
michael@0 670 cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
michael@0 671 cnv->charErrorBuffer[1]=toBase64[c&0x3f];
michael@0 672 cnv->charErrorBufferLength=2;
michael@0 673 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 674 }
michael@0 675 bits=0;
michael@0 676 base64Counter=0;
michael@0 677 break;
michael@0 678 default:
michael@0 679 /* will never occur */
michael@0 680 break;
michael@0 681 }
michael@0 682 }
michael@0 683 } else {
michael@0 684 /* target is full */
michael@0 685 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 686 break;
michael@0 687 }
michael@0 688 }
michael@0 689 }
michael@0 690
michael@0 691 if(pArgs->flush && source>=sourceLimit) {
michael@0 692 /* flush remaining bits to the target */
michael@0 693 if(!inDirectMode) {
michael@0 694 if (base64Counter!=0) {
michael@0 695 if(target<targetLimit) {
michael@0 696 *target++=toBase64[bits];
michael@0 697 if(offsets!=NULL) {
michael@0 698 *offsets++=sourceIndex-1;
michael@0 699 }
michael@0 700 } else {
michael@0 701 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
michael@0 702 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 703 }
michael@0 704 }
michael@0 705 /* Add final MINUS to terminate unicodeMode */
michael@0 706 if(target<targetLimit) {
michael@0 707 *target++=MINUS;
michael@0 708 if(offsets!=NULL) {
michael@0 709 *offsets++=sourceIndex-1;
michael@0 710 }
michael@0 711 } else {
michael@0 712 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
michael@0 713 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 714 }
michael@0 715 }
michael@0 716 /* reset the state for the next conversion */
michael@0 717 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
michael@0 718 } else {
michael@0 719 /* set the converter state back into UConverter */
michael@0 720 cnv->fromUnicodeStatus=
michael@0 721 (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
michael@0 722 ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
michael@0 723 }
michael@0 724
michael@0 725 /* write back the updated pointers */
michael@0 726 pArgs->source=source;
michael@0 727 pArgs->target=(char *)target;
michael@0 728 pArgs->offsets=offsets;
michael@0 729 return;
michael@0 730 }
michael@0 731
michael@0 732 static const char *
michael@0 733 _UTF7GetName(const UConverter *cnv) {
michael@0 734 switch(cnv->fromUnicodeStatus>>28) {
michael@0 735 case 1:
michael@0 736 return "UTF-7,version=1";
michael@0 737 default:
michael@0 738 return "UTF-7";
michael@0 739 }
michael@0 740 }
michael@0 741
michael@0 742 static const UConverterImpl _UTF7Impl={
michael@0 743 UCNV_UTF7,
michael@0 744
michael@0 745 NULL,
michael@0 746 NULL,
michael@0 747
michael@0 748 _UTF7Open,
michael@0 749 NULL,
michael@0 750 _UTF7Reset,
michael@0 751
michael@0 752 _UTF7ToUnicodeWithOffsets,
michael@0 753 _UTF7ToUnicodeWithOffsets,
michael@0 754 _UTF7FromUnicodeWithOffsets,
michael@0 755 _UTF7FromUnicodeWithOffsets,
michael@0 756 NULL,
michael@0 757
michael@0 758 NULL,
michael@0 759 _UTF7GetName,
michael@0 760 NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
michael@0 761 NULL,
michael@0 762 ucnv_getCompleteUnicodeSet
michael@0 763 };
michael@0 764
michael@0 765 static const UConverterStaticData _UTF7StaticData={
michael@0 766 sizeof(UConverterStaticData),
michael@0 767 "UTF-7",
michael@0 768 0, /* TODO CCSID for UTF-7 */
michael@0 769 UCNV_IBM, UCNV_UTF7,
michael@0 770 1, 4,
michael@0 771 { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
michael@0 772 FALSE, FALSE,
michael@0 773 0,
michael@0 774 0,
michael@0 775 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
michael@0 776 };
michael@0 777
michael@0 778 const UConverterSharedData _UTF7Data={
michael@0 779 sizeof(UConverterSharedData), ~((uint32_t)0),
michael@0 780 NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
michael@0 781 0
michael@0 782 };
michael@0 783
michael@0 784 /* IMAP mailbox name encoding ----------------------------------------------- */
michael@0 785
michael@0 786 /*
michael@0 787 * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
michael@0 788 * http://www.ietf.org/rfc/rfc2060.txt
michael@0 789 *
michael@0 790 * 5.1.3. Mailbox International Naming Convention
michael@0 791 *
michael@0 792 * By convention, international mailbox names are specified using a
michael@0 793 * modified version of the UTF-7 encoding described in [UTF-7]. The
michael@0 794 * purpose of these modifications is to correct the following problems
michael@0 795 * with UTF-7:
michael@0 796 *
michael@0 797 * 1) UTF-7 uses the "+" character for shifting; this conflicts with
michael@0 798 * the common use of "+" in mailbox names, in particular USENET
michael@0 799 * newsgroup names.
michael@0 800 *
michael@0 801 * 2) UTF-7's encoding is BASE64 which uses the "/" character; this
michael@0 802 * conflicts with the use of "/" as a popular hierarchy delimiter.
michael@0 803 *
michael@0 804 * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
michael@0 805 * the use of "\" as a popular hierarchy delimiter.
michael@0 806 *
michael@0 807 * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
michael@0 808 * the use of "~" in some servers as a home directory indicator.
michael@0 809 *
michael@0 810 * 5) UTF-7 permits multiple alternate forms to represent the same
michael@0 811 * string; in particular, printable US-ASCII chararacters can be
michael@0 812 * represented in encoded form.
michael@0 813 *
michael@0 814 * In modified UTF-7, printable US-ASCII characters except for "&"
michael@0 815 * represent themselves; that is, characters with octet values 0x20-0x25
michael@0 816 * and 0x27-0x7e. The character "&" (0x26) is represented by the two-
michael@0 817 * octet sequence "&-".
michael@0 818 *
michael@0 819 * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
michael@0 820 * Unicode 16-bit octets) are represented in modified BASE64, with a
michael@0 821 * further modification from [UTF-7] that "," is used instead of "/".
michael@0 822 * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
michael@0 823 * character which can represent itself.
michael@0 824 *
michael@0 825 * "&" is used to shift to modified BASE64 and "-" to shift back to US-
michael@0 826 * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that
michael@0 827 * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
michael@0 828 * ").
michael@0 829 *
michael@0 830 * For example, here is a mailbox name which mixes English, Japanese,
michael@0 831 * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
michael@0 832 */
michael@0 833
michael@0 834 /*
michael@0 835 * Tests for US-ASCII characters belonging to character classes
michael@0 836 * defined in UTF-7.
michael@0 837 *
michael@0 838 * Set D (directly encoded characters) consists of the following
michael@0 839 * characters: the upper and lower case letters A through Z
michael@0 840 * and a through z, the 10 digits 0-9, and the following nine special
michael@0 841 * characters (note that "+" and "=" are omitted):
michael@0 842 * '(),-./:?
michael@0 843 *
michael@0 844 * Set O (optional direct characters) consists of the following
michael@0 845 * characters (note that "\" and "~" are omitted):
michael@0 846 * !"#$%&*;<=>@[]^_`{|}
michael@0 847 *
michael@0 848 * According to the rules in RFC 2152, the byte values for the following
michael@0 849 * US-ASCII characters are not used in UTF-7 and are therefore illegal:
michael@0 850 * - all C0 control codes except for CR LF TAB
michael@0 851 * - BACKSLASH
michael@0 852 * - TILDE
michael@0 853 * - DEL
michael@0 854 * - all codes beyond US-ASCII, i.e. all >127
michael@0 855 */
michael@0 856
michael@0 857 /* uses '&' not '+' to start a base64 sequence */
michael@0 858 #define AMPERSAND 0x26
michael@0 859 #define COMMA 0x2c
michael@0 860 #define SLASH 0x2f
michael@0 861
michael@0 862 /* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
michael@0 863 #define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
michael@0 864
michael@0 865 /* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
michael@0 866 #define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
michael@0 867
michael@0 868 #define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
michael@0 869 #define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
michael@0 870
michael@0 871 /*
michael@0 872 * converter status values:
michael@0 873 *
michael@0 874 * toUnicodeStatus:
michael@0 875 * 24 inDirectMode (boolean)
michael@0 876 * 23..16 base64Counter (-1..7)
michael@0 877 * 15..0 bits (up to 14 bits incoming base64)
michael@0 878 *
michael@0 879 * fromUnicodeStatus:
michael@0 880 * 24 inDirectMode (boolean)
michael@0 881 * 23..16 base64Counter (0..2)
michael@0 882 * 7..0 bits (6 bits outgoing base64)
michael@0 883 *
michael@0 884 * ignore bits 31..25
michael@0 885 */
michael@0 886
michael@0 887 static void
michael@0 888 _IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
michael@0 889 UErrorCode *pErrorCode) {
michael@0 890 UConverter *cnv;
michael@0 891 const uint8_t *source, *sourceLimit;
michael@0 892 UChar *target;
michael@0 893 const UChar *targetLimit;
michael@0 894 int32_t *offsets;
michael@0 895
michael@0 896 uint8_t *bytes;
michael@0 897 uint8_t byteIndex;
michael@0 898
michael@0 899 int32_t length, targetCapacity;
michael@0 900
michael@0 901 /* UTF-7 state */
michael@0 902 uint16_t bits;
michael@0 903 int8_t base64Counter;
michael@0 904 UBool inDirectMode;
michael@0 905
michael@0 906 int8_t base64Value;
michael@0 907
michael@0 908 int32_t sourceIndex, nextSourceIndex;
michael@0 909
michael@0 910 UChar c;
michael@0 911 uint8_t b;
michael@0 912
michael@0 913 /* set up the local pointers */
michael@0 914 cnv=pArgs->converter;
michael@0 915
michael@0 916 source=(const uint8_t *)pArgs->source;
michael@0 917 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
michael@0 918 target=pArgs->target;
michael@0 919 targetLimit=pArgs->targetLimit;
michael@0 920 offsets=pArgs->offsets;
michael@0 921 /* get the state machine state */
michael@0 922 {
michael@0 923 uint32_t status=cnv->toUnicodeStatus;
michael@0 924 inDirectMode=(UBool)((status>>24)&1);
michael@0 925 base64Counter=(int8_t)(status>>16);
michael@0 926 bits=(uint16_t)status;
michael@0 927 }
michael@0 928 bytes=cnv->toUBytes;
michael@0 929 byteIndex=cnv->toULength;
michael@0 930
michael@0 931 /* sourceIndex=-1 if the current character began in the previous buffer */
michael@0 932 sourceIndex=byteIndex==0 ? 0 : -1;
michael@0 933 nextSourceIndex=0;
michael@0 934
michael@0 935 if(inDirectMode) {
michael@0 936 directMode:
michael@0 937 /*
michael@0 938 * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
michael@0 939 * with their US-ASCII byte values.
michael@0 940 * An ampersand starts Unicode (or "escape") Mode.
michael@0 941 *
michael@0 942 * In Direct Mode, only the sourceIndex is used.
michael@0 943 */
michael@0 944 byteIndex=0;
michael@0 945 length=(int32_t)(sourceLimit-source);
michael@0 946 targetCapacity=(int32_t)(targetLimit-target);
michael@0 947 if(length>targetCapacity) {
michael@0 948 length=targetCapacity;
michael@0 949 }
michael@0 950 while(length>0) {
michael@0 951 b=*source++;
michael@0 952 if(!isLegalIMAP(b)) {
michael@0 953 /* illegal */
michael@0 954 bytes[0]=b;
michael@0 955 byteIndex=1;
michael@0 956 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 957 break;
michael@0 958 } else if(b!=AMPERSAND) {
michael@0 959 /* write directly encoded character */
michael@0 960 *target++=b;
michael@0 961 if(offsets!=NULL) {
michael@0 962 *offsets++=sourceIndex++;
michael@0 963 }
michael@0 964 } else /* AMPERSAND */ {
michael@0 965 /* switch to Unicode mode */
michael@0 966 nextSourceIndex=++sourceIndex;
michael@0 967 inDirectMode=FALSE;
michael@0 968 byteIndex=0;
michael@0 969 bits=0;
michael@0 970 base64Counter=-1;
michael@0 971 goto unicodeMode;
michael@0 972 }
michael@0 973 --length;
michael@0 974 }
michael@0 975 if(source<sourceLimit && target>=targetLimit) {
michael@0 976 /* target is full */
michael@0 977 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 978 }
michael@0 979 } else {
michael@0 980 unicodeMode:
michael@0 981 /*
michael@0 982 * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
michael@0 983 * The base64 sequence ends with any character that is not in the base64 alphabet.
michael@0 984 * A terminating minus sign is consumed.
michael@0 985 * US-ASCII must not be base64-ed.
michael@0 986 *
michael@0 987 * In Unicode Mode, the sourceIndex has the index to the start of the current
michael@0 988 * base64 bytes, while nextSourceIndex is precisely parallel to source,
michael@0 989 * keeping the index to the following byte.
michael@0 990 * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
michael@0 991 */
michael@0 992 while(source<sourceLimit) {
michael@0 993 if(target<targetLimit) {
michael@0 994 bytes[byteIndex++]=b=*source++;
michael@0 995 ++nextSourceIndex;
michael@0 996 if(b>0x7e) {
michael@0 997 /* illegal - test other illegal US-ASCII values by base64Value==-3 */
michael@0 998 inDirectMode=TRUE;
michael@0 999 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1000 break;
michael@0 1001 } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
michael@0 1002 /* collect base64 bytes into UChars */
michael@0 1003 switch(base64Counter) {
michael@0 1004 case -1: /* -1 is immediately after the & */
michael@0 1005 case 0:
michael@0 1006 bits=base64Value;
michael@0 1007 base64Counter=1;
michael@0 1008 break;
michael@0 1009 case 1:
michael@0 1010 case 3:
michael@0 1011 case 4:
michael@0 1012 case 6:
michael@0 1013 bits=(uint16_t)((bits<<6)|base64Value);
michael@0 1014 ++base64Counter;
michael@0 1015 break;
michael@0 1016 case 2:
michael@0 1017 c=(UChar)((bits<<4)|(base64Value>>2));
michael@0 1018 if(isLegalIMAP(c)) {
michael@0 1019 /* illegal */
michael@0 1020 inDirectMode=TRUE;
michael@0 1021 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1022 goto endloop;
michael@0 1023 }
michael@0 1024 *target++=c;
michael@0 1025 if(offsets!=NULL) {
michael@0 1026 *offsets++=sourceIndex;
michael@0 1027 sourceIndex=nextSourceIndex-1;
michael@0 1028 }
michael@0 1029 bytes[0]=b; /* keep this byte in case an error occurs */
michael@0 1030 byteIndex=1;
michael@0 1031 bits=(uint16_t)(base64Value&3);
michael@0 1032 base64Counter=3;
michael@0 1033 break;
michael@0 1034 case 5:
michael@0 1035 c=(UChar)((bits<<2)|(base64Value>>4));
michael@0 1036 if(isLegalIMAP(c)) {
michael@0 1037 /* illegal */
michael@0 1038 inDirectMode=TRUE;
michael@0 1039 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1040 goto endloop;
michael@0 1041 }
michael@0 1042 *target++=c;
michael@0 1043 if(offsets!=NULL) {
michael@0 1044 *offsets++=sourceIndex;
michael@0 1045 sourceIndex=nextSourceIndex-1;
michael@0 1046 }
michael@0 1047 bytes[0]=b; /* keep this byte in case an error occurs */
michael@0 1048 byteIndex=1;
michael@0 1049 bits=(uint16_t)(base64Value&15);
michael@0 1050 base64Counter=6;
michael@0 1051 break;
michael@0 1052 case 7:
michael@0 1053 c=(UChar)((bits<<6)|base64Value);
michael@0 1054 if(isLegalIMAP(c)) {
michael@0 1055 /* illegal */
michael@0 1056 inDirectMode=TRUE;
michael@0 1057 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1058 goto endloop;
michael@0 1059 }
michael@0 1060 *target++=c;
michael@0 1061 if(offsets!=NULL) {
michael@0 1062 *offsets++=sourceIndex;
michael@0 1063 sourceIndex=nextSourceIndex;
michael@0 1064 }
michael@0 1065 byteIndex=0;
michael@0 1066 bits=0;
michael@0 1067 base64Counter=0;
michael@0 1068 break;
michael@0 1069 default:
michael@0 1070 /* will never occur */
michael@0 1071 break;
michael@0 1072 }
michael@0 1073 } else if(base64Value==-2) {
michael@0 1074 /* minus sign terminates the base64 sequence */
michael@0 1075 inDirectMode=TRUE;
michael@0 1076 if(base64Counter==-1) {
michael@0 1077 /* &- i.e. a minus immediately following an ampersand */
michael@0 1078 *target++=AMPERSAND;
michael@0 1079 if(offsets!=NULL) {
michael@0 1080 *offsets++=sourceIndex-1;
michael@0 1081 }
michael@0 1082 } else {
michael@0 1083 /* absorb the minus and leave the Unicode Mode */
michael@0 1084 if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
michael@0 1085 /* bits are illegally left over, a UChar is incomplete */
michael@0 1086 /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
michael@0 1087 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1088 break;
michael@0 1089 }
michael@0 1090 }
michael@0 1091 sourceIndex=nextSourceIndex;
michael@0 1092 goto directMode;
michael@0 1093 } else {
michael@0 1094 if(base64Counter==-1) {
michael@0 1095 /* illegal: & immediately followed by something other than base64 or minus sign */
michael@0 1096 /* include the ampersand in the reported sequence */
michael@0 1097 --sourceIndex;
michael@0 1098 bytes[0]=AMPERSAND;
michael@0 1099 bytes[1]=b;
michael@0 1100 byteIndex=2;
michael@0 1101 }
michael@0 1102 /* base64Value==-1 for characters that are illegal only in Unicode mode */
michael@0 1103 /* base64Value==-3 for illegal characters */
michael@0 1104 /* illegal */
michael@0 1105 inDirectMode=TRUE;
michael@0 1106 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0 1107 break;
michael@0 1108 }
michael@0 1109 } else {
michael@0 1110 /* target is full */
michael@0 1111 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1112 break;
michael@0 1113 }
michael@0 1114 }
michael@0 1115 }
michael@0 1116 endloop:
michael@0 1117
michael@0 1118 /*
michael@0 1119 * the end of the input stream and detection of truncated input
michael@0 1120 * are handled by the framework, but here we must check if we are in Unicode
michael@0 1121 * mode and byteIndex==0 because we must end in direct mode
michael@0 1122 *
michael@0 1123 * conditions:
michael@0 1124 * successful
michael@0 1125 * in Unicode mode and byteIndex==0
michael@0 1126 * end of input and no truncated input
michael@0 1127 */
michael@0 1128 if( U_SUCCESS(*pErrorCode) &&
michael@0 1129 !inDirectMode && byteIndex==0 &&
michael@0 1130 pArgs->flush && source>=sourceLimit
michael@0 1131 ) {
michael@0 1132 if(base64Counter==-1) {
michael@0 1133 /* & at the very end of the input */
michael@0 1134 /* make the ampersand the reported sequence */
michael@0 1135 bytes[0]=AMPERSAND;
michael@0 1136 byteIndex=1;
michael@0 1137 }
michael@0 1138 /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
michael@0 1139
michael@0 1140 inDirectMode=TRUE; /* avoid looping */
michael@0 1141 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
michael@0 1142 }
michael@0 1143
michael@0 1144 /* set the converter state back into UConverter */
michael@0 1145 cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
michael@0 1146 cnv->toULength=byteIndex;
michael@0 1147
michael@0 1148 /* write back the updated pointers */
michael@0 1149 pArgs->source=(const char *)source;
michael@0 1150 pArgs->target=target;
michael@0 1151 pArgs->offsets=offsets;
michael@0 1152 return;
michael@0 1153 }
michael@0 1154
michael@0 1155 static void
michael@0 1156 _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
michael@0 1157 UErrorCode *pErrorCode) {
michael@0 1158 UConverter *cnv;
michael@0 1159 const UChar *source, *sourceLimit;
michael@0 1160 uint8_t *target, *targetLimit;
michael@0 1161 int32_t *offsets;
michael@0 1162
michael@0 1163 int32_t length, targetCapacity, sourceIndex;
michael@0 1164 UChar c;
michael@0 1165 uint8_t b;
michael@0 1166
michael@0 1167 /* UTF-7 state */
michael@0 1168 uint8_t bits;
michael@0 1169 int8_t base64Counter;
michael@0 1170 UBool inDirectMode;
michael@0 1171
michael@0 1172 /* set up the local pointers */
michael@0 1173 cnv=pArgs->converter;
michael@0 1174
michael@0 1175 /* set up the local pointers */
michael@0 1176 source=pArgs->source;
michael@0 1177 sourceLimit=pArgs->sourceLimit;
michael@0 1178 target=(uint8_t *)pArgs->target;
michael@0 1179 targetLimit=(uint8_t *)pArgs->targetLimit;
michael@0 1180 offsets=pArgs->offsets;
michael@0 1181
michael@0 1182 /* get the state machine state */
michael@0 1183 {
michael@0 1184 uint32_t status=cnv->fromUnicodeStatus;
michael@0 1185 inDirectMode=(UBool)((status>>24)&1);
michael@0 1186 base64Counter=(int8_t)(status>>16);
michael@0 1187 bits=(uint8_t)status;
michael@0 1188 }
michael@0 1189
michael@0 1190 /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
michael@0 1191 sourceIndex=0;
michael@0 1192
michael@0 1193 if(inDirectMode) {
michael@0 1194 directMode:
michael@0 1195 length=(int32_t)(sourceLimit-source);
michael@0 1196 targetCapacity=(int32_t)(targetLimit-target);
michael@0 1197 if(length>targetCapacity) {
michael@0 1198 length=targetCapacity;
michael@0 1199 }
michael@0 1200 while(length>0) {
michael@0 1201 c=*source++;
michael@0 1202 /* encode 0x20..0x7e except '&' directly */
michael@0 1203 if(inSetDIMAP(c)) {
michael@0 1204 /* encode directly */
michael@0 1205 *target++=(uint8_t)c;
michael@0 1206 if(offsets!=NULL) {
michael@0 1207 *offsets++=sourceIndex++;
michael@0 1208 }
michael@0 1209 } else if(c==AMPERSAND) {
michael@0 1210 /* output &- for & */
michael@0 1211 *target++=AMPERSAND;
michael@0 1212 if(target<targetLimit) {
michael@0 1213 *target++=MINUS;
michael@0 1214 if(offsets!=NULL) {
michael@0 1215 *offsets++=sourceIndex;
michael@0 1216 *offsets++=sourceIndex++;
michael@0 1217 }
michael@0 1218 /* realign length and targetCapacity */
michael@0 1219 goto directMode;
michael@0 1220 } else {
michael@0 1221 if(offsets!=NULL) {
michael@0 1222 *offsets++=sourceIndex++;
michael@0 1223 }
michael@0 1224 cnv->charErrorBuffer[0]=MINUS;
michael@0 1225 cnv->charErrorBufferLength=1;
michael@0 1226 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1227 break;
michael@0 1228 }
michael@0 1229 } else {
michael@0 1230 /* un-read this character and switch to Unicode Mode */
michael@0 1231 --source;
michael@0 1232 *target++=AMPERSAND;
michael@0 1233 if(offsets!=NULL) {
michael@0 1234 *offsets++=sourceIndex;
michael@0 1235 }
michael@0 1236 inDirectMode=FALSE;
michael@0 1237 base64Counter=0;
michael@0 1238 goto unicodeMode;
michael@0 1239 }
michael@0 1240 --length;
michael@0 1241 }
michael@0 1242 if(source<sourceLimit && target>=targetLimit) {
michael@0 1243 /* target is full */
michael@0 1244 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1245 }
michael@0 1246 } else {
michael@0 1247 unicodeMode:
michael@0 1248 while(source<sourceLimit) {
michael@0 1249 if(target<targetLimit) {
michael@0 1250 c=*source++;
michael@0 1251 if(isLegalIMAP(c)) {
michael@0 1252 /* encode directly */
michael@0 1253 inDirectMode=TRUE;
michael@0 1254
michael@0 1255 /* trick: back out this character to make this easier */
michael@0 1256 --source;
michael@0 1257
michael@0 1258 /* terminate the base64 sequence */
michael@0 1259 if(base64Counter!=0) {
michael@0 1260 /* write remaining bits for the previous character */
michael@0 1261 *target++=TO_BASE64_IMAP(bits);
michael@0 1262 if(offsets!=NULL) {
michael@0 1263 *offsets++=sourceIndex-1;
michael@0 1264 }
michael@0 1265 }
michael@0 1266 /* need to terminate with a minus */
michael@0 1267 if(target<targetLimit) {
michael@0 1268 *target++=MINUS;
michael@0 1269 if(offsets!=NULL) {
michael@0 1270 *offsets++=sourceIndex-1;
michael@0 1271 }
michael@0 1272 } else {
michael@0 1273 cnv->charErrorBuffer[0]=MINUS;
michael@0 1274 cnv->charErrorBufferLength=1;
michael@0 1275 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1276 break;
michael@0 1277 }
michael@0 1278 goto directMode;
michael@0 1279 } else {
michael@0 1280 /*
michael@0 1281 * base64 this character:
michael@0 1282 * Output 2 or 3 base64 bytes for the remaining bits of the previous character
michael@0 1283 * and the bits of this character, each implicitly in UTF-16BE.
michael@0 1284 *
michael@0 1285 * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
michael@0 1286 * character to the next. The actual 2 or 4 bits are shifted to the left edge
michael@0 1287 * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
michael@0 1288 */
michael@0 1289 switch(base64Counter) {
michael@0 1290 case 0:
michael@0 1291 b=(uint8_t)(c>>10);
michael@0 1292 *target++=TO_BASE64_IMAP(b);
michael@0 1293 if(target<targetLimit) {
michael@0 1294 b=(uint8_t)((c>>4)&0x3f);
michael@0 1295 *target++=TO_BASE64_IMAP(b);
michael@0 1296 if(offsets!=NULL) {
michael@0 1297 *offsets++=sourceIndex;
michael@0 1298 *offsets++=sourceIndex++;
michael@0 1299 }
michael@0 1300 } else {
michael@0 1301 if(offsets!=NULL) {
michael@0 1302 *offsets++=sourceIndex++;
michael@0 1303 }
michael@0 1304 b=(uint8_t)((c>>4)&0x3f);
michael@0 1305 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
michael@0 1306 cnv->charErrorBufferLength=1;
michael@0 1307 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1308 }
michael@0 1309 bits=(uint8_t)((c&15)<<2);
michael@0 1310 base64Counter=1;
michael@0 1311 break;
michael@0 1312 case 1:
michael@0 1313 b=(uint8_t)(bits|(c>>14));
michael@0 1314 *target++=TO_BASE64_IMAP(b);
michael@0 1315 if(target<targetLimit) {
michael@0 1316 b=(uint8_t)((c>>8)&0x3f);
michael@0 1317 *target++=TO_BASE64_IMAP(b);
michael@0 1318 if(target<targetLimit) {
michael@0 1319 b=(uint8_t)((c>>2)&0x3f);
michael@0 1320 *target++=TO_BASE64_IMAP(b);
michael@0 1321 if(offsets!=NULL) {
michael@0 1322 *offsets++=sourceIndex;
michael@0 1323 *offsets++=sourceIndex;
michael@0 1324 *offsets++=sourceIndex++;
michael@0 1325 }
michael@0 1326 } else {
michael@0 1327 if(offsets!=NULL) {
michael@0 1328 *offsets++=sourceIndex;
michael@0 1329 *offsets++=sourceIndex++;
michael@0 1330 }
michael@0 1331 b=(uint8_t)((c>>2)&0x3f);
michael@0 1332 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
michael@0 1333 cnv->charErrorBufferLength=1;
michael@0 1334 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1335 }
michael@0 1336 } else {
michael@0 1337 if(offsets!=NULL) {
michael@0 1338 *offsets++=sourceIndex++;
michael@0 1339 }
michael@0 1340 b=(uint8_t)((c>>8)&0x3f);
michael@0 1341 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
michael@0 1342 b=(uint8_t)((c>>2)&0x3f);
michael@0 1343 cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
michael@0 1344 cnv->charErrorBufferLength=2;
michael@0 1345 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1346 }
michael@0 1347 bits=(uint8_t)((c&3)<<4);
michael@0 1348 base64Counter=2;
michael@0 1349 break;
michael@0 1350 case 2:
michael@0 1351 b=(uint8_t)(bits|(c>>12));
michael@0 1352 *target++=TO_BASE64_IMAP(b);
michael@0 1353 if(target<targetLimit) {
michael@0 1354 b=(uint8_t)((c>>6)&0x3f);
michael@0 1355 *target++=TO_BASE64_IMAP(b);
michael@0 1356 if(target<targetLimit) {
michael@0 1357 b=(uint8_t)(c&0x3f);
michael@0 1358 *target++=TO_BASE64_IMAP(b);
michael@0 1359 if(offsets!=NULL) {
michael@0 1360 *offsets++=sourceIndex;
michael@0 1361 *offsets++=sourceIndex;
michael@0 1362 *offsets++=sourceIndex++;
michael@0 1363 }
michael@0 1364 } else {
michael@0 1365 if(offsets!=NULL) {
michael@0 1366 *offsets++=sourceIndex;
michael@0 1367 *offsets++=sourceIndex++;
michael@0 1368 }
michael@0 1369 b=(uint8_t)(c&0x3f);
michael@0 1370 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
michael@0 1371 cnv->charErrorBufferLength=1;
michael@0 1372 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1373 }
michael@0 1374 } else {
michael@0 1375 if(offsets!=NULL) {
michael@0 1376 *offsets++=sourceIndex++;
michael@0 1377 }
michael@0 1378 b=(uint8_t)((c>>6)&0x3f);
michael@0 1379 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
michael@0 1380 b=(uint8_t)(c&0x3f);
michael@0 1381 cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
michael@0 1382 cnv->charErrorBufferLength=2;
michael@0 1383 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1384 }
michael@0 1385 bits=0;
michael@0 1386 base64Counter=0;
michael@0 1387 break;
michael@0 1388 default:
michael@0 1389 /* will never occur */
michael@0 1390 break;
michael@0 1391 }
michael@0 1392 }
michael@0 1393 } else {
michael@0 1394 /* target is full */
michael@0 1395 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1396 break;
michael@0 1397 }
michael@0 1398 }
michael@0 1399 }
michael@0 1400
michael@0 1401 if(pArgs->flush && source>=sourceLimit) {
michael@0 1402 /* flush remaining bits to the target */
michael@0 1403 if(!inDirectMode) {
michael@0 1404 if(base64Counter!=0) {
michael@0 1405 if(target<targetLimit) {
michael@0 1406 *target++=TO_BASE64_IMAP(bits);
michael@0 1407 if(offsets!=NULL) {
michael@0 1408 *offsets++=sourceIndex-1;
michael@0 1409 }
michael@0 1410 } else {
michael@0 1411 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
michael@0 1412 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1413 }
michael@0 1414 }
michael@0 1415 /* need to terminate with a minus */
michael@0 1416 if(target<targetLimit) {
michael@0 1417 *target++=MINUS;
michael@0 1418 if(offsets!=NULL) {
michael@0 1419 *offsets++=sourceIndex-1;
michael@0 1420 }
michael@0 1421 } else {
michael@0 1422 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
michael@0 1423 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0 1424 }
michael@0 1425 }
michael@0 1426 /* reset the state for the next conversion */
michael@0 1427 cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
michael@0 1428 } else {
michael@0 1429 /* set the converter state back into UConverter */
michael@0 1430 cnv->fromUnicodeStatus=
michael@0 1431 (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
michael@0 1432 ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
michael@0 1433 }
michael@0 1434
michael@0 1435 /* write back the updated pointers */
michael@0 1436 pArgs->source=source;
michael@0 1437 pArgs->target=(char *)target;
michael@0 1438 pArgs->offsets=offsets;
michael@0 1439 return;
michael@0 1440 }
michael@0 1441
michael@0 1442 static const UConverterImpl _IMAPImpl={
michael@0 1443 UCNV_IMAP_MAILBOX,
michael@0 1444
michael@0 1445 NULL,
michael@0 1446 NULL,
michael@0 1447
michael@0 1448 _UTF7Open,
michael@0 1449 NULL,
michael@0 1450 _UTF7Reset,
michael@0 1451
michael@0 1452 _IMAPToUnicodeWithOffsets,
michael@0 1453 _IMAPToUnicodeWithOffsets,
michael@0 1454 _IMAPFromUnicodeWithOffsets,
michael@0 1455 _IMAPFromUnicodeWithOffsets,
michael@0 1456 NULL,
michael@0 1457
michael@0 1458 NULL,
michael@0 1459 NULL,
michael@0 1460 NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
michael@0 1461 NULL,
michael@0 1462 ucnv_getCompleteUnicodeSet
michael@0 1463 };
michael@0 1464
michael@0 1465 static const UConverterStaticData _IMAPStaticData={
michael@0 1466 sizeof(UConverterStaticData),
michael@0 1467 "IMAP-mailbox-name",
michael@0 1468 0, /* TODO CCSID for IMAP-mailbox-name */
michael@0 1469 UCNV_IBM, UCNV_IMAP_MAILBOX,
michael@0 1470 1, 4,
michael@0 1471 { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
michael@0 1472 FALSE, FALSE,
michael@0 1473 0,
michael@0 1474 0,
michael@0 1475 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
michael@0 1476 };
michael@0 1477
michael@0 1478 const UConverterSharedData _IMAPData={
michael@0 1479 sizeof(UConverterSharedData), ~((uint32_t)0),
michael@0 1480 NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
michael@0 1481 0
michael@0 1482 };
michael@0 1483
michael@0 1484 #endif

mercurial