intl/icu/source/common/ucnvscsu.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnvscsu.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2018 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  ucnvscsu.c
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2000nov18
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   This is an implementation of the Standard Compression Scheme for Unicode
    1.20 +*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
    1.21 +*   Reserved commands and window settings are treated as illegal sequences and
    1.22 +*   will result in callback calls.
    1.23 +*/
    1.24 +
    1.25 +#include "unicode/utypes.h"
    1.26 +
    1.27 +#if !UCONFIG_NO_CONVERSION
    1.28 +
    1.29 +#include "unicode/ucnv.h"
    1.30 +#include "unicode/ucnv_cb.h"
    1.31 +#include "unicode/utf16.h"
    1.32 +#include "ucnv_bld.h"
    1.33 +#include "ucnv_cnv.h"
    1.34 +#include "cmemory.h"
    1.35 +
    1.36 +/* SCSU definitions --------------------------------------------------------- */
    1.37 +
    1.38 +/* SCSU command byte values */
    1.39 +enum {
    1.40 +    SQ0=0x01, /* Quote from window pair 0 */
    1.41 +    SQ7=0x08, /* Quote from window pair 7 */
    1.42 +    SDX=0x0B, /* Define a window as extended */
    1.43 +    Srs=0x0C, /* reserved */
    1.44 +    SQU=0x0E, /* Quote a single Unicode character */
    1.45 +    SCU=0x0F, /* Change to Unicode mode */
    1.46 +    SC0=0x10, /* Select window 0 */
    1.47 +    SC7=0x17, /* Select window 7 */
    1.48 +    SD0=0x18, /* Define and select window 0 */
    1.49 +    SD7=0x1F, /* Define and select window 7 */
    1.50 +
    1.51 +    UC0=0xE0, /* Select window 0 */
    1.52 +    UC7=0xE7, /* Select window 7 */
    1.53 +    UD0=0xE8, /* Define and select window 0 */
    1.54 +    UD7=0xEF, /* Define and select window 7 */
    1.55 +    UQU=0xF0, /* Quote a single Unicode character */
    1.56 +    UDX=0xF1, /* Define a Window as extended */
    1.57 +    Urs=0xF2  /* reserved */
    1.58 +};
    1.59 +
    1.60 +enum {
    1.61 +    /*
    1.62 +     * Unicode code points from 3400 to E000 are not adressible by
    1.63 +     * dynamic window, since in these areas no short run alphabets are
    1.64 +     * found. Therefore add gapOffset to all values from gapThreshold.
    1.65 +     */
    1.66 +    gapThreshold=0x68,
    1.67 +    gapOffset=0xAC00,
    1.68 +
    1.69 +    /* values between reservedStart and fixedThreshold are reserved */
    1.70 +    reservedStart=0xA8,
    1.71 +
    1.72 +    /* use table of predefined fixed offsets for values from fixedThreshold */
    1.73 +    fixedThreshold=0xF9
    1.74 +};
    1.75 +
    1.76 +/* constant offsets for the 8 static windows */
    1.77 +static const uint32_t staticOffsets[8]={
    1.78 +    0x0000, /* ASCII for quoted tags */
    1.79 +    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
    1.80 +    0x0100, /* Latin Extended-A */
    1.81 +    0x0300, /* Combining Diacritical Marks */
    1.82 +    0x2000, /* General Punctuation */
    1.83 +    0x2080, /* Currency Symbols */
    1.84 +    0x2100, /* Letterlike Symbols and Number Forms */
    1.85 +    0x3000  /* CJK Symbols and punctuation */
    1.86 +};
    1.87 +
    1.88 +/* initial offsets for the 8 dynamic (sliding) windows */
    1.89 +static const uint32_t initialDynamicOffsets[8]={
    1.90 +    0x0080, /* Latin-1 */
    1.91 +    0x00C0, /* Latin Extended A */
    1.92 +    0x0400, /* Cyrillic */
    1.93 +    0x0600, /* Arabic */
    1.94 +    0x0900, /* Devanagari */
    1.95 +    0x3040, /* Hiragana */
    1.96 +    0x30A0, /* Katakana */
    1.97 +    0xFF00  /* Fullwidth ASCII */
    1.98 +};
    1.99 +
   1.100 +/* Table of fixed predefined Offsets */
   1.101 +static const uint32_t fixedOffsets[]={
   1.102 +    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
   1.103 +    /* 0xFA */ 0x0250, /* IPA extensions */
   1.104 +    /* 0xFB */ 0x0370, /* Greek */
   1.105 +    /* 0xFC */ 0x0530, /* Armenian */
   1.106 +    /* 0xFD */ 0x3040, /* Hiragana */
   1.107 +    /* 0xFE */ 0x30A0, /* Katakana */
   1.108 +    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
   1.109 +};
   1.110 +
   1.111 +/* state values */
   1.112 +enum {
   1.113 +    readCommand,
   1.114 +    quotePairOne,
   1.115 +    quotePairTwo,
   1.116 +    quoteOne,
   1.117 +    definePairOne,
   1.118 +    definePairTwo,
   1.119 +    defineOne
   1.120 +};
   1.121 +
   1.122 +typedef struct SCSUData {
   1.123 +    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
   1.124 +    uint32_t toUDynamicOffsets[8];
   1.125 +    uint32_t fromUDynamicOffsets[8];
   1.126 +
   1.127 +    /* state machine state - toUnicode */
   1.128 +    UBool toUIsSingleByteMode;
   1.129 +    uint8_t toUState;
   1.130 +    int8_t toUQuoteWindow, toUDynamicWindow;
   1.131 +    uint8_t toUByteOne;
   1.132 +    uint8_t toUPadding[3];
   1.133 +
   1.134 +    /* state machine state - fromUnicode */
   1.135 +    UBool fromUIsSingleByteMode;
   1.136 +    int8_t fromUDynamicWindow;
   1.137 +
   1.138 +    /*
   1.139 +     * windowUse[] keeps track of the use of the dynamic windows:
   1.140 +     * At nextWindowUseIndex there is the least recently used window,
   1.141 +     * and the following windows (in a wrapping manner) are more and more
   1.142 +     * recently used.
   1.143 +     * At nextWindowUseIndex-1 there is the most recently used window.
   1.144 +     */
   1.145 +    uint8_t locale;
   1.146 +    int8_t nextWindowUseIndex;
   1.147 +    int8_t windowUse[8];
   1.148 +} SCSUData;
   1.149 +
   1.150 +static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
   1.151 +static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
   1.152 +
   1.153 +enum {
   1.154 +    lGeneric, l_ja
   1.155 +};
   1.156 +
   1.157 +/* SCSU setup functions ----------------------------------------------------- */
   1.158 +
   1.159 +static void
   1.160 +_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
   1.161 +    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
   1.162 +
   1.163 +    if(choice<=UCNV_RESET_TO_UNICODE) {
   1.164 +        /* reset toUnicode */
   1.165 +        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
   1.166 +
   1.167 +        scsu->toUIsSingleByteMode=TRUE;
   1.168 +        scsu->toUState=readCommand;
   1.169 +        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
   1.170 +        scsu->toUByteOne=0;
   1.171 +
   1.172 +        cnv->toULength=0;
   1.173 +    }
   1.174 +    if(choice!=UCNV_RESET_TO_UNICODE) {
   1.175 +        /* reset fromUnicode */
   1.176 +        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
   1.177 +
   1.178 +        scsu->fromUIsSingleByteMode=TRUE;
   1.179 +        scsu->fromUDynamicWindow=0;
   1.180 +
   1.181 +        scsu->nextWindowUseIndex=0;
   1.182 +        switch(scsu->locale) {
   1.183 +        case l_ja:
   1.184 +            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
   1.185 +            break;
   1.186 +        default:
   1.187 +            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
   1.188 +            break;
   1.189 +        }
   1.190 +
   1.191 +        cnv->fromUChar32=0;
   1.192 +    }
   1.193 +}
   1.194 +
   1.195 +static void
   1.196 +_SCSUOpen(UConverter *cnv,
   1.197 +          UConverterLoadArgs *pArgs,
   1.198 +          UErrorCode *pErrorCode) {
   1.199 +    const char *locale=pArgs->locale;
   1.200 +    if(pArgs->onlyTestIsLoadable) {
   1.201 +        return;
   1.202 +    }
   1.203 +    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
   1.204 +    if(cnv->extraInfo!=NULL) {
   1.205 +        if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
   1.206 +            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
   1.207 +        } else {
   1.208 +            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
   1.209 +        }
   1.210 +        _SCSUReset(cnv, UCNV_RESET_BOTH);
   1.211 +    } else {
   1.212 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.213 +    }
   1.214 +
   1.215 +    /* Set the substitution character U+fffd as a Unicode string. */
   1.216 +    cnv->subUChars[0]=0xfffd;
   1.217 +    cnv->subCharLen=-1;
   1.218 +}
   1.219 +
   1.220 +static void
   1.221 +_SCSUClose(UConverter *cnv) {
   1.222 +    if(cnv->extraInfo!=NULL) {
   1.223 +        if(!cnv->isExtraLocal) {
   1.224 +            uprv_free(cnv->extraInfo);
   1.225 +        }
   1.226 +        cnv->extraInfo=NULL;
   1.227 +    }
   1.228 +}
   1.229 +
   1.230 +/* SCSU-to-Unicode conversion functions ------------------------------------- */
   1.231 +
   1.232 +static void
   1.233 +_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
   1.234 +                          UErrorCode *pErrorCode) {
   1.235 +    UConverter *cnv;
   1.236 +    SCSUData *scsu;
   1.237 +    const uint8_t *source, *sourceLimit;
   1.238 +    UChar *target;
   1.239 +    const UChar *targetLimit;
   1.240 +    int32_t *offsets;
   1.241 +    UBool isSingleByteMode;
   1.242 +    uint8_t state, byteOne;
   1.243 +    int8_t quoteWindow, dynamicWindow;
   1.244 +
   1.245 +    int32_t sourceIndex, nextSourceIndex;
   1.246 +
   1.247 +    uint8_t b;
   1.248 +
   1.249 +    /* set up the local pointers */
   1.250 +    cnv=pArgs->converter;
   1.251 +    scsu=(SCSUData *)cnv->extraInfo;
   1.252 +
   1.253 +    source=(const uint8_t *)pArgs->source;
   1.254 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
   1.255 +    target=pArgs->target;
   1.256 +    targetLimit=pArgs->targetLimit;
   1.257 +    offsets=pArgs->offsets;
   1.258 +
   1.259 +    /* get the state machine state */
   1.260 +    isSingleByteMode=scsu->toUIsSingleByteMode;
   1.261 +    state=scsu->toUState;
   1.262 +    quoteWindow=scsu->toUQuoteWindow;
   1.263 +    dynamicWindow=scsu->toUDynamicWindow;
   1.264 +    byteOne=scsu->toUByteOne;
   1.265 +
   1.266 +    /* sourceIndex=-1 if the current character began in the previous buffer */
   1.267 +    sourceIndex=state==readCommand ? 0 : -1;
   1.268 +    nextSourceIndex=0;
   1.269 +
   1.270 +    /*
   1.271 +     * conversion "loop"
   1.272 +     *
   1.273 +     * For performance, this is not a normal C loop.
   1.274 +     * Instead, there are two code blocks for the two SCSU modes.
   1.275 +     * The function branches to either one, and a change of the mode is done with a goto to
   1.276 +     * the other branch.
   1.277 +     *
   1.278 +     * Each branch has two conventional loops:
   1.279 +     * - a fast-path loop for the most common codes in the mode
   1.280 +     * - a loop for all other codes in the mode
   1.281 +     * When the fast-path runs into a code that it cannot handle, its loop ends and it
   1.282 +     * runs into the following loop to handle the other codes.
   1.283 +     * The end of the input or output buffer is also handled by the slower loop.
   1.284 +     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
   1.285 +     *
   1.286 +     * The callback handling is done by returning with an error code.
   1.287 +     * The conversion framework actually calls the callback function.
   1.288 +     */
   1.289 +    if(isSingleByteMode) {
   1.290 +        /* fast path for single-byte mode */
   1.291 +        if(state==readCommand) {
   1.292 +fastSingle:
   1.293 +            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
   1.294 +                ++source;
   1.295 +                ++nextSourceIndex;
   1.296 +                if(b<=0x7f) {
   1.297 +                    /* write US-ASCII graphic character or DEL */
   1.298 +                    *target++=(UChar)b;
   1.299 +                    if(offsets!=NULL) {
   1.300 +                        *offsets++=sourceIndex;
   1.301 +                    }
   1.302 +                } else {
   1.303 +                    /* write from dynamic window */
   1.304 +                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
   1.305 +                    if(c<=0xffff) {
   1.306 +                        *target++=(UChar)c;
   1.307 +                        if(offsets!=NULL) {
   1.308 +                            *offsets++=sourceIndex;
   1.309 +                        }
   1.310 +                    } else {
   1.311 +                        /* output surrogate pair */
   1.312 +                        *target++=(UChar)(0xd7c0+(c>>10));
   1.313 +                        if(target<targetLimit) {
   1.314 +                            *target++=(UChar)(0xdc00|(c&0x3ff));
   1.315 +                            if(offsets!=NULL) {
   1.316 +                                *offsets++=sourceIndex;
   1.317 +                                *offsets++=sourceIndex;
   1.318 +                            }
   1.319 +                        } else {
   1.320 +                            /* target overflow */
   1.321 +                            if(offsets!=NULL) {
   1.322 +                                *offsets++=sourceIndex;
   1.323 +                            }
   1.324 +                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
   1.325 +                            cnv->UCharErrorBufferLength=1;
   1.326 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.327 +                            goto endloop;
   1.328 +                        }
   1.329 +                    }
   1.330 +                }
   1.331 +                sourceIndex=nextSourceIndex;
   1.332 +            }
   1.333 +        }
   1.334 +
   1.335 +        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
   1.336 +singleByteMode:
   1.337 +        while(source<sourceLimit) {
   1.338 +            if(target>=targetLimit) {
   1.339 +                /* target is full */
   1.340 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.341 +                break;
   1.342 +            }
   1.343 +            b=*source++;
   1.344 +            ++nextSourceIndex;
   1.345 +            switch(state) {
   1.346 +            case readCommand:
   1.347 +                /* redundant conditions are commented out */
   1.348 +                /* here: b<0x20 because otherwise we would be in fastSingle */
   1.349 +                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1.350 +                    /* CR/LF/TAB/NUL */
   1.351 +                    *target++=(UChar)b;
   1.352 +                    if(offsets!=NULL) {
   1.353 +                        *offsets++=sourceIndex;
   1.354 +                    }
   1.355 +                    sourceIndex=nextSourceIndex;
   1.356 +                    goto fastSingle;
   1.357 +                } else if(SC0<=b) {
   1.358 +                    if(b<=SC7) {
   1.359 +                        dynamicWindow=(int8_t)(b-SC0);
   1.360 +                        sourceIndex=nextSourceIndex;
   1.361 +                        goto fastSingle;
   1.362 +                    } else /* if(SD0<=b && b<=SD7) */ {
   1.363 +                        dynamicWindow=(int8_t)(b-SD0);
   1.364 +                        state=defineOne;
   1.365 +                    }
   1.366 +                } else if(/* SQ0<=b && */ b<=SQ7) {
   1.367 +                    quoteWindow=(int8_t)(b-SQ0);
   1.368 +                    state=quoteOne;
   1.369 +                } else if(b==SDX) {
   1.370 +                    state=definePairOne;
   1.371 +                } else if(b==SQU) {
   1.372 +                    state=quotePairOne;
   1.373 +                } else if(b==SCU) {
   1.374 +                    sourceIndex=nextSourceIndex;
   1.375 +                    isSingleByteMode=FALSE;
   1.376 +                    goto fastUnicode;
   1.377 +                } else /* Srs */ {
   1.378 +                    /* callback(illegal) */
   1.379 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.380 +                    cnv->toUBytes[0]=b;
   1.381 +                    cnv->toULength=1;
   1.382 +                    goto endloop;
   1.383 +                }
   1.384 +
   1.385 +                /* store the first byte of a multibyte sequence in toUBytes[] */
   1.386 +                cnv->toUBytes[0]=b;
   1.387 +                cnv->toULength=1;
   1.388 +                break;
   1.389 +            case quotePairOne:
   1.390 +                byteOne=b;
   1.391 +                cnv->toUBytes[1]=b;
   1.392 +                cnv->toULength=2;
   1.393 +                state=quotePairTwo;
   1.394 +                break;
   1.395 +            case quotePairTwo:
   1.396 +                *target++=(UChar)((byteOne<<8)|b);
   1.397 +                if(offsets!=NULL) {
   1.398 +                    *offsets++=sourceIndex;
   1.399 +                }
   1.400 +                sourceIndex=nextSourceIndex;
   1.401 +                state=readCommand;
   1.402 +                goto fastSingle;
   1.403 +            case quoteOne:
   1.404 +                if(b<0x80) {
   1.405 +                    /* all static offsets are in the BMP */
   1.406 +                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
   1.407 +                    if(offsets!=NULL) {
   1.408 +                        *offsets++=sourceIndex;
   1.409 +                    }
   1.410 +                } else {
   1.411 +                    /* write from dynamic window */
   1.412 +                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
   1.413 +                    if(c<=0xffff) {
   1.414 +                        *target++=(UChar)c;
   1.415 +                        if(offsets!=NULL) {
   1.416 +                            *offsets++=sourceIndex;
   1.417 +                        }
   1.418 +                    } else {
   1.419 +                        /* output surrogate pair */
   1.420 +                        *target++=(UChar)(0xd7c0+(c>>10));
   1.421 +                        if(target<targetLimit) {
   1.422 +                            *target++=(UChar)(0xdc00|(c&0x3ff));
   1.423 +                            if(offsets!=NULL) {
   1.424 +                                *offsets++=sourceIndex;
   1.425 +                                *offsets++=sourceIndex;
   1.426 +                            }
   1.427 +                        } else {
   1.428 +                            /* target overflow */
   1.429 +                            if(offsets!=NULL) {
   1.430 +                                *offsets++=sourceIndex;
   1.431 +                            }
   1.432 +                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
   1.433 +                            cnv->UCharErrorBufferLength=1;
   1.434 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.435 +                            goto endloop;
   1.436 +                        }
   1.437 +                    }
   1.438 +                }
   1.439 +                sourceIndex=nextSourceIndex;
   1.440 +                state=readCommand;
   1.441 +                goto fastSingle;
   1.442 +            case definePairOne:
   1.443 +                dynamicWindow=(int8_t)((b>>5)&7);
   1.444 +                byteOne=(uint8_t)(b&0x1f);
   1.445 +                cnv->toUBytes[1]=b;
   1.446 +                cnv->toULength=2;
   1.447 +                state=definePairTwo;
   1.448 +                break;
   1.449 +            case definePairTwo:
   1.450 +                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
   1.451 +                sourceIndex=nextSourceIndex;
   1.452 +                state=readCommand;
   1.453 +                goto fastSingle;
   1.454 +            case defineOne:
   1.455 +                if(b==0) {
   1.456 +                    /* callback(illegal): Reserved window offset value 0 */
   1.457 +                    cnv->toUBytes[1]=b;
   1.458 +                    cnv->toULength=2;
   1.459 +                    goto endloop;
   1.460 +                } else if(b<gapThreshold) {
   1.461 +                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
   1.462 +                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
   1.463 +                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
   1.464 +                } else if(b>=fixedThreshold) {
   1.465 +                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
   1.466 +                } else {
   1.467 +                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
   1.468 +                    cnv->toUBytes[1]=b;
   1.469 +                    cnv->toULength=2;
   1.470 +                    goto endloop;
   1.471 +                }
   1.472 +                sourceIndex=nextSourceIndex;
   1.473 +                state=readCommand;
   1.474 +                goto fastSingle;
   1.475 +            }
   1.476 +        }
   1.477 +    } else {
   1.478 +        /* fast path for Unicode mode */
   1.479 +        if(state==readCommand) {
   1.480 +fastUnicode:
   1.481 +            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
   1.482 +                *target++=(UChar)((b<<8)|source[1]);
   1.483 +                if(offsets!=NULL) {
   1.484 +                    *offsets++=sourceIndex;
   1.485 +                }
   1.486 +                sourceIndex=nextSourceIndex;
   1.487 +                nextSourceIndex+=2;
   1.488 +                source+=2;
   1.489 +            }
   1.490 +        }
   1.491 +
   1.492 +        /* normal state machine for Unicode mode */
   1.493 +/* unicodeByteMode: */
   1.494 +        while(source<sourceLimit) {
   1.495 +            if(target>=targetLimit) {
   1.496 +                /* target is full */
   1.497 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.498 +                break;
   1.499 +            }
   1.500 +            b=*source++;
   1.501 +            ++nextSourceIndex;
   1.502 +            switch(state) {
   1.503 +            case readCommand:
   1.504 +                if((uint8_t)(b-UC0)>(Urs-UC0)) {
   1.505 +                    byteOne=b;
   1.506 +                    cnv->toUBytes[0]=b;
   1.507 +                    cnv->toULength=1;
   1.508 +                    state=quotePairTwo;
   1.509 +                } else if(/* UC0<=b && */ b<=UC7) {
   1.510 +                    dynamicWindow=(int8_t)(b-UC0);
   1.511 +                    sourceIndex=nextSourceIndex;
   1.512 +                    isSingleByteMode=TRUE;
   1.513 +                    goto fastSingle;
   1.514 +                } else if(/* UD0<=b && */ b<=UD7) {
   1.515 +                    dynamicWindow=(int8_t)(b-UD0);
   1.516 +                    isSingleByteMode=TRUE;
   1.517 +                    cnv->toUBytes[0]=b;
   1.518 +                    cnv->toULength=1;
   1.519 +                    state=defineOne;
   1.520 +                    goto singleByteMode;
   1.521 +                } else if(b==UDX) {
   1.522 +                    isSingleByteMode=TRUE;
   1.523 +                    cnv->toUBytes[0]=b;
   1.524 +                    cnv->toULength=1;
   1.525 +                    state=definePairOne;
   1.526 +                    goto singleByteMode;
   1.527 +                } else if(b==UQU) {
   1.528 +                    cnv->toUBytes[0]=b;
   1.529 +                    cnv->toULength=1;
   1.530 +                    state=quotePairOne;
   1.531 +                } else /* Urs */ {
   1.532 +                    /* callback(illegal) */
   1.533 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.534 +                    cnv->toUBytes[0]=b;
   1.535 +                    cnv->toULength=1;
   1.536 +                    goto endloop;
   1.537 +                }
   1.538 +                break;
   1.539 +            case quotePairOne:
   1.540 +                byteOne=b;
   1.541 +                cnv->toUBytes[1]=b;
   1.542 +                cnv->toULength=2;
   1.543 +                state=quotePairTwo;
   1.544 +                break;
   1.545 +            case quotePairTwo:
   1.546 +                *target++=(UChar)((byteOne<<8)|b);
   1.547 +                if(offsets!=NULL) {
   1.548 +                    *offsets++=sourceIndex;
   1.549 +                }
   1.550 +                sourceIndex=nextSourceIndex;
   1.551 +                state=readCommand;
   1.552 +                goto fastUnicode;
   1.553 +            }
   1.554 +        }
   1.555 +    }
   1.556 +endloop:
   1.557 +
   1.558 +    /* set the converter state back into UConverter */
   1.559 +    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
   1.560 +        /* reset to deal with the next character */
   1.561 +        state=readCommand;
   1.562 +    } else if(state==readCommand) {
   1.563 +        /* not in a multi-byte sequence, reset toULength */
   1.564 +        cnv->toULength=0;
   1.565 +    }
   1.566 +    scsu->toUIsSingleByteMode=isSingleByteMode;
   1.567 +    scsu->toUState=state;
   1.568 +    scsu->toUQuoteWindow=quoteWindow;
   1.569 +    scsu->toUDynamicWindow=dynamicWindow;
   1.570 +    scsu->toUByteOne=byteOne;
   1.571 +
   1.572 +    /* write back the updated pointers */
   1.573 +    pArgs->source=(const char *)source;
   1.574 +    pArgs->target=target;
   1.575 +    pArgs->offsets=offsets;
   1.576 +    return;
   1.577 +}
   1.578 +
   1.579 +/*
   1.580 + * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
   1.581 + * If a change is made in the original function, then either
   1.582 + * change this function the same way or
   1.583 + * re-copy the original function and remove the variables
   1.584 + * offsets, sourceIndex, and nextSourceIndex.
   1.585 + */
   1.586 +static void
   1.587 +_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
   1.588 +               UErrorCode *pErrorCode) {
   1.589 +    UConverter *cnv;
   1.590 +    SCSUData *scsu;
   1.591 +    const uint8_t *source, *sourceLimit;
   1.592 +    UChar *target;
   1.593 +    const UChar *targetLimit;
   1.594 +    UBool isSingleByteMode;
   1.595 +    uint8_t state, byteOne;
   1.596 +    int8_t quoteWindow, dynamicWindow;
   1.597 +
   1.598 +    uint8_t b;
   1.599 +
   1.600 +    /* set up the local pointers */
   1.601 +    cnv=pArgs->converter;
   1.602 +    scsu=(SCSUData *)cnv->extraInfo;
   1.603 +
   1.604 +    source=(const uint8_t *)pArgs->source;
   1.605 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
   1.606 +    target=pArgs->target;
   1.607 +    targetLimit=pArgs->targetLimit;
   1.608 +
   1.609 +    /* get the state machine state */
   1.610 +    isSingleByteMode=scsu->toUIsSingleByteMode;
   1.611 +    state=scsu->toUState;
   1.612 +    quoteWindow=scsu->toUQuoteWindow;
   1.613 +    dynamicWindow=scsu->toUDynamicWindow;
   1.614 +    byteOne=scsu->toUByteOne;
   1.615 +
   1.616 +    /*
   1.617 +     * conversion "loop"
   1.618 +     *
   1.619 +     * For performance, this is not a normal C loop.
   1.620 +     * Instead, there are two code blocks for the two SCSU modes.
   1.621 +     * The function branches to either one, and a change of the mode is done with a goto to
   1.622 +     * the other branch.
   1.623 +     *
   1.624 +     * Each branch has two conventional loops:
   1.625 +     * - a fast-path loop for the most common codes in the mode
   1.626 +     * - a loop for all other codes in the mode
   1.627 +     * When the fast-path runs into a code that it cannot handle, its loop ends and it
   1.628 +     * runs into the following loop to handle the other codes.
   1.629 +     * The end of the input or output buffer is also handled by the slower loop.
   1.630 +     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
   1.631 +     *
   1.632 +     * The callback handling is done by returning with an error code.
   1.633 +     * The conversion framework actually calls the callback function.
   1.634 +     */
   1.635 +    if(isSingleByteMode) {
   1.636 +        /* fast path for single-byte mode */
   1.637 +        if(state==readCommand) {
   1.638 +fastSingle:
   1.639 +            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
   1.640 +                ++source;
   1.641 +                if(b<=0x7f) {
   1.642 +                    /* write US-ASCII graphic character or DEL */
   1.643 +                    *target++=(UChar)b;
   1.644 +                } else {
   1.645 +                    /* write from dynamic window */
   1.646 +                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
   1.647 +                    if(c<=0xffff) {
   1.648 +                        *target++=(UChar)c;
   1.649 +                    } else {
   1.650 +                        /* output surrogate pair */
   1.651 +                        *target++=(UChar)(0xd7c0+(c>>10));
   1.652 +                        if(target<targetLimit) {
   1.653 +                            *target++=(UChar)(0xdc00|(c&0x3ff));
   1.654 +                        } else {
   1.655 +                            /* target overflow */
   1.656 +                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
   1.657 +                            cnv->UCharErrorBufferLength=1;
   1.658 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.659 +                            goto endloop;
   1.660 +                        }
   1.661 +                    }
   1.662 +                }
   1.663 +            }
   1.664 +        }
   1.665 +
   1.666 +        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
   1.667 +singleByteMode:
   1.668 +        while(source<sourceLimit) {
   1.669 +            if(target>=targetLimit) {
   1.670 +                /* target is full */
   1.671 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.672 +                break;
   1.673 +            }
   1.674 +            b=*source++;
   1.675 +            switch(state) {
   1.676 +            case readCommand:
   1.677 +                /* redundant conditions are commented out */
   1.678 +                /* here: b<0x20 because otherwise we would be in fastSingle */
   1.679 +                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1.680 +                    /* CR/LF/TAB/NUL */
   1.681 +                    *target++=(UChar)b;
   1.682 +                    goto fastSingle;
   1.683 +                } else if(SC0<=b) {
   1.684 +                    if(b<=SC7) {
   1.685 +                        dynamicWindow=(int8_t)(b-SC0);
   1.686 +                        goto fastSingle;
   1.687 +                    } else /* if(SD0<=b && b<=SD7) */ {
   1.688 +                        dynamicWindow=(int8_t)(b-SD0);
   1.689 +                        state=defineOne;
   1.690 +                    }
   1.691 +                } else if(/* SQ0<=b && */ b<=SQ7) {
   1.692 +                    quoteWindow=(int8_t)(b-SQ0);
   1.693 +                    state=quoteOne;
   1.694 +                } else if(b==SDX) {
   1.695 +                    state=definePairOne;
   1.696 +                } else if(b==SQU) {
   1.697 +                    state=quotePairOne;
   1.698 +                } else if(b==SCU) {
   1.699 +                    isSingleByteMode=FALSE;
   1.700 +                    goto fastUnicode;
   1.701 +                } else /* Srs */ {
   1.702 +                    /* callback(illegal) */
   1.703 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.704 +                    cnv->toUBytes[0]=b;
   1.705 +                    cnv->toULength=1;
   1.706 +                    goto endloop;
   1.707 +                }
   1.708 +
   1.709 +                /* store the first byte of a multibyte sequence in toUBytes[] */
   1.710 +                cnv->toUBytes[0]=b;
   1.711 +                cnv->toULength=1;
   1.712 +                break;
   1.713 +            case quotePairOne:
   1.714 +                byteOne=b;
   1.715 +                cnv->toUBytes[1]=b;
   1.716 +                cnv->toULength=2;
   1.717 +                state=quotePairTwo;
   1.718 +                break;
   1.719 +            case quotePairTwo:
   1.720 +                *target++=(UChar)((byteOne<<8)|b);
   1.721 +                state=readCommand;
   1.722 +                goto fastSingle;
   1.723 +            case quoteOne:
   1.724 +                if(b<0x80) {
   1.725 +                    /* all static offsets are in the BMP */
   1.726 +                    *target++=(UChar)(staticOffsets[quoteWindow]+b);
   1.727 +                } else {
   1.728 +                    /* write from dynamic window */
   1.729 +                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
   1.730 +                    if(c<=0xffff) {
   1.731 +                        *target++=(UChar)c;
   1.732 +                    } else {
   1.733 +                        /* output surrogate pair */
   1.734 +                        *target++=(UChar)(0xd7c0+(c>>10));
   1.735 +                        if(target<targetLimit) {
   1.736 +                            *target++=(UChar)(0xdc00|(c&0x3ff));
   1.737 +                        } else {
   1.738 +                            /* target overflow */
   1.739 +                            cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
   1.740 +                            cnv->UCharErrorBufferLength=1;
   1.741 +                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.742 +                            goto endloop;
   1.743 +                        }
   1.744 +                    }
   1.745 +                }
   1.746 +                state=readCommand;
   1.747 +                goto fastSingle;
   1.748 +            case definePairOne:
   1.749 +                dynamicWindow=(int8_t)((b>>5)&7);
   1.750 +                byteOne=(uint8_t)(b&0x1f);
   1.751 +                cnv->toUBytes[1]=b;
   1.752 +                cnv->toULength=2;
   1.753 +                state=definePairTwo;
   1.754 +                break;
   1.755 +            case definePairTwo:
   1.756 +                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
   1.757 +                state=readCommand;
   1.758 +                goto fastSingle;
   1.759 +            case defineOne:
   1.760 +                if(b==0) {
   1.761 +                    /* callback(illegal): Reserved window offset value 0 */
   1.762 +                    cnv->toUBytes[1]=b;
   1.763 +                    cnv->toULength=2;
   1.764 +                    goto endloop;
   1.765 +                } else if(b<gapThreshold) {
   1.766 +                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
   1.767 +                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
   1.768 +                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
   1.769 +                } else if(b>=fixedThreshold) {
   1.770 +                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
   1.771 +                } else {
   1.772 +                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
   1.773 +                    cnv->toUBytes[1]=b;
   1.774 +                    cnv->toULength=2;
   1.775 +                    goto endloop;
   1.776 +                }
   1.777 +                state=readCommand;
   1.778 +                goto fastSingle;
   1.779 +            }
   1.780 +        }
   1.781 +    } else {
   1.782 +        /* fast path for Unicode mode */
   1.783 +        if(state==readCommand) {
   1.784 +fastUnicode:
   1.785 +            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
   1.786 +                *target++=(UChar)((b<<8)|source[1]);
   1.787 +                source+=2;
   1.788 +            }
   1.789 +        }
   1.790 +
   1.791 +        /* normal state machine for Unicode mode */
   1.792 +/* unicodeByteMode: */
   1.793 +        while(source<sourceLimit) {
   1.794 +            if(target>=targetLimit) {
   1.795 +                /* target is full */
   1.796 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1.797 +                break;
   1.798 +            }
   1.799 +            b=*source++;
   1.800 +            switch(state) {
   1.801 +            case readCommand:
   1.802 +                if((uint8_t)(b-UC0)>(Urs-UC0)) {
   1.803 +                    byteOne=b;
   1.804 +                    cnv->toUBytes[0]=b;
   1.805 +                    cnv->toULength=1;
   1.806 +                    state=quotePairTwo;
   1.807 +                } else if(/* UC0<=b && */ b<=UC7) {
   1.808 +                    dynamicWindow=(int8_t)(b-UC0);
   1.809 +                    isSingleByteMode=TRUE;
   1.810 +                    goto fastSingle;
   1.811 +                } else if(/* UD0<=b && */ b<=UD7) {
   1.812 +                    dynamicWindow=(int8_t)(b-UD0);
   1.813 +                    isSingleByteMode=TRUE;
   1.814 +                    cnv->toUBytes[0]=b;
   1.815 +                    cnv->toULength=1;
   1.816 +                    state=defineOne;
   1.817 +                    goto singleByteMode;
   1.818 +                } else if(b==UDX) {
   1.819 +                    isSingleByteMode=TRUE;
   1.820 +                    cnv->toUBytes[0]=b;
   1.821 +                    cnv->toULength=1;
   1.822 +                    state=definePairOne;
   1.823 +                    goto singleByteMode;
   1.824 +                } else if(b==UQU) {
   1.825 +                    cnv->toUBytes[0]=b;
   1.826 +                    cnv->toULength=1;
   1.827 +                    state=quotePairOne;
   1.828 +                } else /* Urs */ {
   1.829 +                    /* callback(illegal) */
   1.830 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1.831 +                    cnv->toUBytes[0]=b;
   1.832 +                    cnv->toULength=1;
   1.833 +                    goto endloop;
   1.834 +                }
   1.835 +                break;
   1.836 +            case quotePairOne:
   1.837 +                byteOne=b;
   1.838 +                cnv->toUBytes[1]=b;
   1.839 +                cnv->toULength=2;
   1.840 +                state=quotePairTwo;
   1.841 +                break;
   1.842 +            case quotePairTwo:
   1.843 +                *target++=(UChar)((byteOne<<8)|b);
   1.844 +                state=readCommand;
   1.845 +                goto fastUnicode;
   1.846 +            }
   1.847 +        }
   1.848 +    }
   1.849 +endloop:
   1.850 +
   1.851 +    /* set the converter state back into UConverter */
   1.852 +    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
   1.853 +        /* reset to deal with the next character */
   1.854 +        state=readCommand;
   1.855 +    } else if(state==readCommand) {
   1.856 +        /* not in a multi-byte sequence, reset toULength */
   1.857 +        cnv->toULength=0;
   1.858 +    }
   1.859 +    scsu->toUIsSingleByteMode=isSingleByteMode;
   1.860 +    scsu->toUState=state;
   1.861 +    scsu->toUQuoteWindow=quoteWindow;
   1.862 +    scsu->toUDynamicWindow=dynamicWindow;
   1.863 +    scsu->toUByteOne=byteOne;
   1.864 +
   1.865 +    /* write back the updated pointers */
   1.866 +    pArgs->source=(const char *)source;
   1.867 +    pArgs->target=target;
   1.868 +    return;
   1.869 +}
   1.870 +
   1.871 +/* SCSU-from-Unicode conversion functions ----------------------------------- */
   1.872 +
   1.873 +/*
   1.874 + * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
   1.875 + * reasonable results. The lookahead is minimal.
   1.876 + * Many cases are simple:
   1.877 + * A character fits directly into the current mode, a dynamic or static window,
   1.878 + * or is not compressible. These cases are tested first.
   1.879 + * Real compression heuristics are applied to the rest, in code branches for
   1.880 + * single/Unicode mode and BMP/supplementary code points.
   1.881 + * The heuristics used here are extremely simple.
   1.882 + */
   1.883 +
   1.884 +/* get the number of the window that this character is in, or -1 */
   1.885 +static int8_t
   1.886 +getWindow(const uint32_t offsets[8], uint32_t c) {
   1.887 +    int i;
   1.888 +    for(i=0; i<8; ++i) {
   1.889 +        if((uint32_t)(c-offsets[i])<=0x7f) {
   1.890 +            return (int8_t)(i);
   1.891 +        }
   1.892 +    }
   1.893 +    return -1;
   1.894 +}
   1.895 +
   1.896 +/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
   1.897 +static UBool
   1.898 +isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
   1.899 +    return (UBool)(c<=offset+0x7f &&
   1.900 +          (c>=offset || (c<=0x7f &&
   1.901 +                        (c>=0x20 || (1UL<<c)&0x2601))));
   1.902 +                                /* binary 0010 0110 0000 0001,
   1.903 +                                   check for b==0xd || b==0xa || b==9 || b==0 */
   1.904 +}
   1.905 +
   1.906 +/*
   1.907 + * getNextDynamicWindow returns the next dynamic window to be redefined
   1.908 + */
   1.909 +static int8_t
   1.910 +getNextDynamicWindow(SCSUData *scsu) {
   1.911 +    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
   1.912 +    if(++scsu->nextWindowUseIndex==8) {
   1.913 +        scsu->nextWindowUseIndex=0;
   1.914 +    }
   1.915 +    return window;
   1.916 +}
   1.917 +
   1.918 +/*
   1.919 + * useDynamicWindow() adjusts
   1.920 + * windowUse[] and nextWindowUseIndex for the algorithm to choose
   1.921 + * the next dynamic window to be defined;
   1.922 + * a subclass may override it and provide its own algorithm.
   1.923 + */
   1.924 +static void
   1.925 +useDynamicWindow(SCSUData *scsu, int8_t window) {
   1.926 +    /*
   1.927 +     * move the existing window, which just became the most recently used one,
   1.928 +     * up in windowUse[] to nextWindowUseIndex-1
   1.929 +     */
   1.930 +
   1.931 +    /* first, find the index of the window - backwards to favor the more recently used windows */
   1.932 +    int i, j;
   1.933 +
   1.934 +    i=scsu->nextWindowUseIndex;
   1.935 +    do {
   1.936 +        if(--i<0) {
   1.937 +            i=7;
   1.938 +        }
   1.939 +    } while(scsu->windowUse[i]!=window);
   1.940 +
   1.941 +    /* now copy each windowUse[i+1] to [i] */
   1.942 +    j=i+1;
   1.943 +    if(j==8) {
   1.944 +        j=0;
   1.945 +    }
   1.946 +    while(j!=scsu->nextWindowUseIndex) {
   1.947 +        scsu->windowUse[i]=scsu->windowUse[j];
   1.948 +        i=j;
   1.949 +        if(++j==8) { j=0; }
   1.950 +    }
   1.951 +
   1.952 +    /* finally, set the window into the most recently used index */
   1.953 +    scsu->windowUse[i]=window;
   1.954 +}
   1.955 +
   1.956 +/*
   1.957 + * calculate the offset and the code for a dynamic window that contains the character
   1.958 + * takes fixed offsets into account
   1.959 + * the offset of the window is stored in the offset variable,
   1.960 + * the code is returned
   1.961 + *
   1.962 + * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
   1.963 + */
   1.964 +static int
   1.965 +getDynamicOffset(uint32_t c, uint32_t *pOffset) {
   1.966 +    int i;
   1.967 +
   1.968 +    for(i=0; i<7; ++i) {
   1.969 +        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
   1.970 +            *pOffset=fixedOffsets[i];
   1.971 +            return 0xf9+i;
   1.972 +        }
   1.973 +    }
   1.974 +
   1.975 +    if(c<0x80) {
   1.976 +        /* No dynamic window for US-ASCII. */
   1.977 +        return -1;
   1.978 +    } else if(c<0x3400 ||
   1.979 +              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
   1.980 +              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
   1.981 +    ) {
   1.982 +        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
   1.983 +        *pOffset=c&0x7fffff80;
   1.984 +        return (int)(c>>7);
   1.985 +    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
   1.986 +        /* For these characters we need to take the gapOffset into account. */
   1.987 +        *pOffset=c&0x7fffff80;
   1.988 +        return (int)((c-gapOffset)>>7);
   1.989 +    } else {
   1.990 +        return -1;
   1.991 +    }
   1.992 +}
   1.993 +
   1.994 +/*
   1.995 + * Idea for compression:
   1.996 + *  - save SCSUData and other state before really starting work
   1.997 + *  - at endloop, see if compression could be better with just unicode mode
   1.998 + *  - don't do this if a callback has been called
   1.999 + *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
  1.1000 + *  - different buffer handling!
  1.1001 + *
  1.1002 + * Drawback or need for corrective handling:
  1.1003 + * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
  1.1004 + * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
  1.1005 + * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
  1.1006 + *
  1.1007 + * How to achieve both?
  1.1008 + *  - Only replace the result after an SDX or SCU?
  1.1009 + */
  1.1010 +
  1.1011 +static void
  1.1012 +_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.1013 +                            UErrorCode *pErrorCode) {
  1.1014 +    UConverter *cnv;
  1.1015 +    SCSUData *scsu;
  1.1016 +    const UChar *source, *sourceLimit;
  1.1017 +    uint8_t *target;
  1.1018 +    int32_t targetCapacity;
  1.1019 +    int32_t *offsets;
  1.1020 +
  1.1021 +    UBool isSingleByteMode;
  1.1022 +    uint8_t dynamicWindow;
  1.1023 +    uint32_t currentOffset;
  1.1024 +
  1.1025 +    uint32_t c, delta;
  1.1026 +
  1.1027 +    int32_t sourceIndex, nextSourceIndex;
  1.1028 +
  1.1029 +    int32_t length;
  1.1030 +
  1.1031 +    /* variables for compression heuristics */
  1.1032 +    uint32_t offset;
  1.1033 +    UChar lead, trail;
  1.1034 +    int code;
  1.1035 +    int8_t window;
  1.1036 +
  1.1037 +    /* set up the local pointers */
  1.1038 +    cnv=pArgs->converter;
  1.1039 +    scsu=(SCSUData *)cnv->extraInfo;
  1.1040 +
  1.1041 +    /* set up the local pointers */
  1.1042 +    source=pArgs->source;
  1.1043 +    sourceLimit=pArgs->sourceLimit;
  1.1044 +    target=(uint8_t *)pArgs->target;
  1.1045 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.1046 +    offsets=pArgs->offsets;
  1.1047 +
  1.1048 +    /* get the state machine state */
  1.1049 +    isSingleByteMode=scsu->fromUIsSingleByteMode;
  1.1050 +    dynamicWindow=scsu->fromUDynamicWindow;
  1.1051 +    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1052 +
  1.1053 +    c=cnv->fromUChar32;
  1.1054 +
  1.1055 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.1056 +    sourceIndex= c==0 ? 0 : -1;
  1.1057 +    nextSourceIndex=0;
  1.1058 +
  1.1059 +    /* similar conversion "loop" as in toUnicode */
  1.1060 +loop:
  1.1061 +    if(isSingleByteMode) {
  1.1062 +        if(c!=0 && targetCapacity>0) {
  1.1063 +            goto getTrailSingle;
  1.1064 +        }
  1.1065 +
  1.1066 +        /* state machine for single-byte mode */
  1.1067 +/* singleByteMode: */
  1.1068 +        while(source<sourceLimit) {
  1.1069 +            if(targetCapacity<=0) {
  1.1070 +                /* target is full */
  1.1071 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1072 +                break;
  1.1073 +            }
  1.1074 +            c=*source++;
  1.1075 +            ++nextSourceIndex;
  1.1076 +
  1.1077 +            if((c-0x20)<=0x5f) {
  1.1078 +                /* pass US-ASCII graphic character through */
  1.1079 +                *target++=(uint8_t)c;
  1.1080 +                if(offsets!=NULL) {
  1.1081 +                    *offsets++=sourceIndex;
  1.1082 +                }
  1.1083 +                --targetCapacity;
  1.1084 +            } else if(c<0x20) {
  1.1085 +                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
  1.1086 +                    /* CR/LF/TAB/NUL */
  1.1087 +                    *target++=(uint8_t)c;
  1.1088 +                    if(offsets!=NULL) {
  1.1089 +                        *offsets++=sourceIndex;
  1.1090 +                    }
  1.1091 +                    --targetCapacity;
  1.1092 +                } else {
  1.1093 +                    /* quote C0 control character */
  1.1094 +                    c|=SQ0<<8;
  1.1095 +                    length=2;
  1.1096 +                    goto outputBytes;
  1.1097 +                }
  1.1098 +            } else if((delta=c-currentOffset)<=0x7f) {
  1.1099 +                /* use the current dynamic window */
  1.1100 +                *target++=(uint8_t)(delta|0x80);
  1.1101 +                if(offsets!=NULL) {
  1.1102 +                    *offsets++=sourceIndex;
  1.1103 +                }
  1.1104 +                --targetCapacity;
  1.1105 +            } else if(U16_IS_SURROGATE(c)) {
  1.1106 +                if(U16_IS_SURROGATE_LEAD(c)) {
  1.1107 +getTrailSingle:
  1.1108 +                    lead=(UChar)c;
  1.1109 +                    if(source<sourceLimit) {
  1.1110 +                        /* test the following code unit */
  1.1111 +                        trail=*source;
  1.1112 +                        if(U16_IS_TRAIL(trail)) {
  1.1113 +                            ++source;
  1.1114 +                            ++nextSourceIndex;
  1.1115 +                            c=U16_GET_SUPPLEMENTARY(c, trail);
  1.1116 +                            /* convert this surrogate code point */
  1.1117 +                            /* exit this condition tree */
  1.1118 +                        } else {
  1.1119 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.1120 +                            /* callback(illegal) */
  1.1121 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1122 +                            goto endloop;
  1.1123 +                        }
  1.1124 +                    } else {
  1.1125 +                        /* no more input */
  1.1126 +                        break;
  1.1127 +                    }
  1.1128 +                } else {
  1.1129 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.1130 +                    /* callback(illegal) */
  1.1131 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1132 +                    goto endloop;
  1.1133 +                }
  1.1134 +
  1.1135 +                /* compress supplementary character U+10000..U+10ffff */
  1.1136 +                if((delta=c-currentOffset)<=0x7f) {
  1.1137 +                    /* use the current dynamic window */
  1.1138 +                    *target++=(uint8_t)(delta|0x80);
  1.1139 +                    if(offsets!=NULL) {
  1.1140 +                        *offsets++=sourceIndex;
  1.1141 +                    }
  1.1142 +                    --targetCapacity;
  1.1143 +                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1144 +                    /* there is a dynamic window that contains this character, change to it */
  1.1145 +                    dynamicWindow=window;
  1.1146 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1147 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1148 +                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1149 +                    length=2;
  1.1150 +                    goto outputBytes;
  1.1151 +                } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1152 +                    /* might check if there are more characters in this window to come */
  1.1153 +                    /* define an extended window with this character */
  1.1154 +                    code-=0x200;
  1.1155 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1156 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1157 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1158 +                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1159 +                    length=4;
  1.1160 +                    goto outputBytes;
  1.1161 +                } else {
  1.1162 +                    /* change to Unicode mode and output this (lead, trail) pair */
  1.1163 +                    isSingleByteMode=FALSE;
  1.1164 +                    *target++=(uint8_t)SCU;
  1.1165 +                    if(offsets!=NULL) {
  1.1166 +                        *offsets++=sourceIndex;
  1.1167 +                    }
  1.1168 +                    --targetCapacity;
  1.1169 +                    c=((uint32_t)lead<<16)|trail;
  1.1170 +                    length=4;
  1.1171 +                    goto outputBytes;
  1.1172 +                }
  1.1173 +            } else if(c<0xa0) {
  1.1174 +                /* quote C1 control character */
  1.1175 +                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
  1.1176 +                length=2;
  1.1177 +                goto outputBytes;
  1.1178 +            } else if(c==0xfeff || c>=0xfff0) {
  1.1179 +                /* quote signature character=byte order mark and specials */
  1.1180 +                c|=SQU<<16;
  1.1181 +                length=3;
  1.1182 +                goto outputBytes;
  1.1183 +            } else {
  1.1184 +                /* compress all other BMP characters */
  1.1185 +                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1186 +                    /* there is a window defined that contains this character - switch to it or quote from it? */
  1.1187 +                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
  1.1188 +                        /* change to dynamic window */
  1.1189 +                        dynamicWindow=window;
  1.1190 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1191 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1192 +                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1193 +                        length=2;
  1.1194 +                        goto outputBytes;
  1.1195 +                    } else {
  1.1196 +                        /* quote from dynamic window */
  1.1197 +                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
  1.1198 +                        length=2;
  1.1199 +                        goto outputBytes;
  1.1200 +                    }
  1.1201 +                } else if((window=getWindow(staticOffsets, c))>=0) {
  1.1202 +                    /* quote from static window */
  1.1203 +                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
  1.1204 +                    length=2;
  1.1205 +                    goto outputBytes;
  1.1206 +                } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1207 +                    /* define a dynamic window with this character */
  1.1208 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1209 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1210 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1211 +                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1212 +                    length=3;
  1.1213 +                    goto outputBytes;
  1.1214 +                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
  1.1215 +                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
  1.1216 +                ) {
  1.1217 +                    /*
  1.1218 +                     * this character is not compressible (a BMP ideograph or similar);
  1.1219 +                     * switch to Unicode mode if this is the last character in the block
  1.1220 +                     * or there is at least one more ideograph following immediately
  1.1221 +                     */
  1.1222 +                    isSingleByteMode=FALSE;
  1.1223 +                    c|=SCU<<16;
  1.1224 +                    length=3;
  1.1225 +                    goto outputBytes;
  1.1226 +                } else {
  1.1227 +                    /* quote Unicode */
  1.1228 +                    c|=SQU<<16;
  1.1229 +                    length=3;
  1.1230 +                    goto outputBytes;
  1.1231 +                }
  1.1232 +            }
  1.1233 +
  1.1234 +            /* normal end of conversion: prepare for a new character */
  1.1235 +            c=0;
  1.1236 +            sourceIndex=nextSourceIndex;
  1.1237 +        }
  1.1238 +    } else {
  1.1239 +        if(c!=0 && targetCapacity>0) {
  1.1240 +            goto getTrailUnicode;
  1.1241 +        }
  1.1242 +
  1.1243 +        /* state machine for Unicode mode */
  1.1244 +/* unicodeByteMode: */
  1.1245 +        while(source<sourceLimit) {
  1.1246 +            if(targetCapacity<=0) {
  1.1247 +                /* target is full */
  1.1248 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1249 +                break;
  1.1250 +            }
  1.1251 +            c=*source++;
  1.1252 +            ++nextSourceIndex;
  1.1253 +
  1.1254 +            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
  1.1255 +                /* not compressible, write character directly */
  1.1256 +                if(targetCapacity>=2) {
  1.1257 +                    *target++=(uint8_t)(c>>8);
  1.1258 +                    *target++=(uint8_t)c;
  1.1259 +                    if(offsets!=NULL) {
  1.1260 +                        *offsets++=sourceIndex;
  1.1261 +                        *offsets++=sourceIndex;
  1.1262 +                    }
  1.1263 +                    targetCapacity-=2;
  1.1264 +                } else {
  1.1265 +                    length=2;
  1.1266 +                    goto outputBytes;
  1.1267 +                }
  1.1268 +            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
  1.1269 +                /* compress BMP character if the following one is not an uncompressible ideograph */
  1.1270 +                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
  1.1271 +                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
  1.1272 +                        /* ASCII digit or letter */
  1.1273 +                        isSingleByteMode=TRUE;
  1.1274 +                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
  1.1275 +                        length=2;
  1.1276 +                        goto outputBytes;
  1.1277 +                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1278 +                        /* there is a dynamic window that contains this character, change to it */
  1.1279 +                        isSingleByteMode=TRUE;
  1.1280 +                        dynamicWindow=window;
  1.1281 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1282 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1283 +                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1284 +                        length=2;
  1.1285 +                        goto outputBytes;
  1.1286 +                    } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1287 +                        /* define a dynamic window with this character */
  1.1288 +                        isSingleByteMode=TRUE;
  1.1289 +                        dynamicWindow=getNextDynamicWindow(scsu);
  1.1290 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1291 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1292 +                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1293 +                        length=3;
  1.1294 +                        goto outputBytes;
  1.1295 +                    }
  1.1296 +                }
  1.1297 +
  1.1298 +                /* don't know how to compress this character, just write it directly */
  1.1299 +                length=2;
  1.1300 +                goto outputBytes;
  1.1301 +            } else if(c<0xe000) {
  1.1302 +                /* c is a surrogate */
  1.1303 +                if(U16_IS_SURROGATE_LEAD(c)) {
  1.1304 +getTrailUnicode:
  1.1305 +                    lead=(UChar)c;
  1.1306 +                    if(source<sourceLimit) {
  1.1307 +                        /* test the following code unit */
  1.1308 +                        trail=*source;
  1.1309 +                        if(U16_IS_TRAIL(trail)) {
  1.1310 +                            ++source;
  1.1311 +                            ++nextSourceIndex;
  1.1312 +                            c=U16_GET_SUPPLEMENTARY(c, trail);
  1.1313 +                            /* convert this surrogate code point */
  1.1314 +                            /* exit this condition tree */
  1.1315 +                        } else {
  1.1316 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.1317 +                            /* callback(illegal) */
  1.1318 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1319 +                            goto endloop;
  1.1320 +                        }
  1.1321 +                    } else {
  1.1322 +                        /* no more input */
  1.1323 +                        break;
  1.1324 +                    }
  1.1325 +                } else {
  1.1326 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.1327 +                    /* callback(illegal) */
  1.1328 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1329 +                    goto endloop;
  1.1330 +                }
  1.1331 +
  1.1332 +                /* compress supplementary character */
  1.1333 +                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
  1.1334 +                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
  1.1335 +                ) {
  1.1336 +                    /*
  1.1337 +                     * there is a dynamic window that contains this character and
  1.1338 +                     * the following character is not uncompressible,
  1.1339 +                     * change to the window
  1.1340 +                     */
  1.1341 +                    isSingleByteMode=TRUE;
  1.1342 +                    dynamicWindow=window;
  1.1343 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1344 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1345 +                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1346 +                    length=2;
  1.1347 +                    goto outputBytes;
  1.1348 +                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
  1.1349 +                          (code=getDynamicOffset(c, &offset))>=0
  1.1350 +                ) {
  1.1351 +                    /* two supplementary characters in (probably) the same window - define an extended one */
  1.1352 +                    isSingleByteMode=TRUE;
  1.1353 +                    code-=0x200;
  1.1354 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1355 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1356 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1357 +                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1358 +                    length=4;
  1.1359 +                    goto outputBytes;
  1.1360 +                } else {
  1.1361 +                    /* don't know how to compress this character, just write it directly */
  1.1362 +                    c=((uint32_t)lead<<16)|trail;
  1.1363 +                    length=4;
  1.1364 +                    goto outputBytes;
  1.1365 +                }
  1.1366 +            } else /* 0xe000<=c<0xf300 */ {
  1.1367 +                /* quote to avoid SCSU tags */
  1.1368 +                c|=UQU<<16;
  1.1369 +                length=3;
  1.1370 +                goto outputBytes;
  1.1371 +            }
  1.1372 +
  1.1373 +            /* normal end of conversion: prepare for a new character */
  1.1374 +            c=0;
  1.1375 +            sourceIndex=nextSourceIndex;
  1.1376 +        }
  1.1377 +    }
  1.1378 +endloop:
  1.1379 +
  1.1380 +    /* set the converter state back into UConverter */
  1.1381 +    scsu->fromUIsSingleByteMode=isSingleByteMode;
  1.1382 +    scsu->fromUDynamicWindow=dynamicWindow;
  1.1383 +
  1.1384 +    cnv->fromUChar32=c;
  1.1385 +
  1.1386 +    /* write back the updated pointers */
  1.1387 +    pArgs->source=source;
  1.1388 +    pArgs->target=(char *)target;
  1.1389 +    pArgs->offsets=offsets;
  1.1390 +    return;
  1.1391 +
  1.1392 +outputBytes:
  1.1393 +    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
  1.1394 +    /* from the first if in the loop we know that targetCapacity>0 */
  1.1395 +    if(length<=targetCapacity) {
  1.1396 +        if(offsets==NULL) {
  1.1397 +            switch(length) {
  1.1398 +                /* each branch falls through to the next one */
  1.1399 +            case 4:
  1.1400 +                *target++=(uint8_t)(c>>24);
  1.1401 +            case 3: /*fall through*/
  1.1402 +                *target++=(uint8_t)(c>>16);
  1.1403 +            case 2: /*fall through*/
  1.1404 +                *target++=(uint8_t)(c>>8);
  1.1405 +            case 1: /*fall through*/
  1.1406 +                *target++=(uint8_t)c;
  1.1407 +            default:
  1.1408 +                /* will never occur */
  1.1409 +                break;
  1.1410 +            }
  1.1411 +        } else {
  1.1412 +            switch(length) {
  1.1413 +                /* each branch falls through to the next one */
  1.1414 +            case 4:
  1.1415 +                *target++=(uint8_t)(c>>24);
  1.1416 +                *offsets++=sourceIndex;
  1.1417 +            case 3: /*fall through*/
  1.1418 +                *target++=(uint8_t)(c>>16);
  1.1419 +                *offsets++=sourceIndex;
  1.1420 +            case 2: /*fall through*/
  1.1421 +                *target++=(uint8_t)(c>>8);
  1.1422 +                *offsets++=sourceIndex;
  1.1423 +            case 1: /*fall through*/
  1.1424 +                *target++=(uint8_t)c;
  1.1425 +                *offsets++=sourceIndex;
  1.1426 +            default:
  1.1427 +                /* will never occur */
  1.1428 +                break;
  1.1429 +            }
  1.1430 +        }
  1.1431 +        targetCapacity-=length;
  1.1432 +
  1.1433 +        /* normal end of conversion: prepare for a new character */
  1.1434 +        c=0;
  1.1435 +        sourceIndex=nextSourceIndex;
  1.1436 +        goto loop;
  1.1437 +    } else {
  1.1438 +        uint8_t *p;
  1.1439 +
  1.1440 +        /*
  1.1441 +         * We actually do this backwards here:
  1.1442 +         * In order to save an intermediate variable, we output
  1.1443 +         * first to the overflow buffer what does not fit into the
  1.1444 +         * regular target.
  1.1445 +         */
  1.1446 +        /* we know that 0<=targetCapacity<length<=4 */
  1.1447 +        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
  1.1448 +        length-=targetCapacity;
  1.1449 +        p=(uint8_t *)cnv->charErrorBuffer;
  1.1450 +        switch(length) {
  1.1451 +            /* each branch falls through to the next one */
  1.1452 +        case 4:
  1.1453 +            *p++=(uint8_t)(c>>24);
  1.1454 +        case 3: /*fall through*/
  1.1455 +            *p++=(uint8_t)(c>>16);
  1.1456 +        case 2: /*fall through*/
  1.1457 +            *p++=(uint8_t)(c>>8);
  1.1458 +        case 1: /*fall through*/
  1.1459 +            *p=(uint8_t)c;
  1.1460 +        default:
  1.1461 +            /* will never occur */
  1.1462 +            break;
  1.1463 +        }
  1.1464 +        cnv->charErrorBufferLength=(int8_t)length;
  1.1465 +
  1.1466 +        /* now output what fits into the regular target */
  1.1467 +        c>>=8*length; /* length was reduced by targetCapacity */
  1.1468 +        switch(targetCapacity) {
  1.1469 +            /* each branch falls through to the next one */
  1.1470 +        case 3:
  1.1471 +            *target++=(uint8_t)(c>>16);
  1.1472 +            if(offsets!=NULL) {
  1.1473 +                *offsets++=sourceIndex;
  1.1474 +            }
  1.1475 +        case 2: /*fall through*/
  1.1476 +            *target++=(uint8_t)(c>>8);
  1.1477 +            if(offsets!=NULL) {
  1.1478 +                *offsets++=sourceIndex;
  1.1479 +            }
  1.1480 +        case 1: /*fall through*/
  1.1481 +            *target++=(uint8_t)c;
  1.1482 +            if(offsets!=NULL) {
  1.1483 +                *offsets++=sourceIndex;
  1.1484 +            }
  1.1485 +        default:
  1.1486 +            break;
  1.1487 +        }
  1.1488 +
  1.1489 +        /* target overflow */
  1.1490 +        targetCapacity=0;
  1.1491 +        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1492 +        c=0;
  1.1493 +        goto endloop;
  1.1494 +    }
  1.1495 +}
  1.1496 +
  1.1497 +/*
  1.1498 + * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
  1.1499 + * If a change is made in the original function, then either
  1.1500 + * change this function the same way or
  1.1501 + * re-copy the original function and remove the variables
  1.1502 + * offsets, sourceIndex, and nextSourceIndex.
  1.1503 + */
  1.1504 +static void
  1.1505 +_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
  1.1506 +                 UErrorCode *pErrorCode) {
  1.1507 +    UConverter *cnv;
  1.1508 +    SCSUData *scsu;
  1.1509 +    const UChar *source, *sourceLimit;
  1.1510 +    uint8_t *target;
  1.1511 +    int32_t targetCapacity;
  1.1512 +
  1.1513 +    UBool isSingleByteMode;
  1.1514 +    uint8_t dynamicWindow;
  1.1515 +    uint32_t currentOffset;
  1.1516 +
  1.1517 +    uint32_t c, delta;
  1.1518 +
  1.1519 +    int32_t length;
  1.1520 +
  1.1521 +    /* variables for compression heuristics */
  1.1522 +    uint32_t offset;
  1.1523 +    UChar lead, trail;
  1.1524 +    int code;
  1.1525 +    int8_t window;
  1.1526 +
  1.1527 +    /* set up the local pointers */
  1.1528 +    cnv=pArgs->converter;
  1.1529 +    scsu=(SCSUData *)cnv->extraInfo;
  1.1530 +
  1.1531 +    /* set up the local pointers */
  1.1532 +    source=pArgs->source;
  1.1533 +    sourceLimit=pArgs->sourceLimit;
  1.1534 +    target=(uint8_t *)pArgs->target;
  1.1535 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.1536 +
  1.1537 +    /* get the state machine state */
  1.1538 +    isSingleByteMode=scsu->fromUIsSingleByteMode;
  1.1539 +    dynamicWindow=scsu->fromUDynamicWindow;
  1.1540 +    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1541 +
  1.1542 +    c=cnv->fromUChar32;
  1.1543 +
  1.1544 +    /* similar conversion "loop" as in toUnicode */
  1.1545 +loop:
  1.1546 +    if(isSingleByteMode) {
  1.1547 +        if(c!=0 && targetCapacity>0) {
  1.1548 +            goto getTrailSingle;
  1.1549 +        }
  1.1550 +
  1.1551 +        /* state machine for single-byte mode */
  1.1552 +/* singleByteMode: */
  1.1553 +        while(source<sourceLimit) {
  1.1554 +            if(targetCapacity<=0) {
  1.1555 +                /* target is full */
  1.1556 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1557 +                break;
  1.1558 +            }
  1.1559 +            c=*source++;
  1.1560 +
  1.1561 +            if((c-0x20)<=0x5f) {
  1.1562 +                /* pass US-ASCII graphic character through */
  1.1563 +                *target++=(uint8_t)c;
  1.1564 +                --targetCapacity;
  1.1565 +            } else if(c<0x20) {
  1.1566 +                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
  1.1567 +                    /* CR/LF/TAB/NUL */
  1.1568 +                    *target++=(uint8_t)c;
  1.1569 +                    --targetCapacity;
  1.1570 +                } else {
  1.1571 +                    /* quote C0 control character */
  1.1572 +                    c|=SQ0<<8;
  1.1573 +                    length=2;
  1.1574 +                    goto outputBytes;
  1.1575 +                }
  1.1576 +            } else if((delta=c-currentOffset)<=0x7f) {
  1.1577 +                /* use the current dynamic window */
  1.1578 +                *target++=(uint8_t)(delta|0x80);
  1.1579 +                --targetCapacity;
  1.1580 +            } else if(U16_IS_SURROGATE(c)) {
  1.1581 +                if(U16_IS_SURROGATE_LEAD(c)) {
  1.1582 +getTrailSingle:
  1.1583 +                    lead=(UChar)c;
  1.1584 +                    if(source<sourceLimit) {
  1.1585 +                        /* test the following code unit */
  1.1586 +                        trail=*source;
  1.1587 +                        if(U16_IS_TRAIL(trail)) {
  1.1588 +                            ++source;
  1.1589 +                            c=U16_GET_SUPPLEMENTARY(c, trail);
  1.1590 +                            /* convert this surrogate code point */
  1.1591 +                            /* exit this condition tree */
  1.1592 +                        } else {
  1.1593 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.1594 +                            /* callback(illegal) */
  1.1595 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1596 +                            goto endloop;
  1.1597 +                        }
  1.1598 +                    } else {
  1.1599 +                        /* no more input */
  1.1600 +                        break;
  1.1601 +                    }
  1.1602 +                } else {
  1.1603 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.1604 +                    /* callback(illegal) */
  1.1605 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1606 +                    goto endloop;
  1.1607 +                }
  1.1608 +
  1.1609 +                /* compress supplementary character U+10000..U+10ffff */
  1.1610 +                if((delta=c-currentOffset)<=0x7f) {
  1.1611 +                    /* use the current dynamic window */
  1.1612 +                    *target++=(uint8_t)(delta|0x80);
  1.1613 +                    --targetCapacity;
  1.1614 +                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1615 +                    /* there is a dynamic window that contains this character, change to it */
  1.1616 +                    dynamicWindow=window;
  1.1617 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1618 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1619 +                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1620 +                    length=2;
  1.1621 +                    goto outputBytes;
  1.1622 +                } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1623 +                    /* might check if there are more characters in this window to come */
  1.1624 +                    /* define an extended window with this character */
  1.1625 +                    code-=0x200;
  1.1626 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1627 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1628 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1629 +                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1630 +                    length=4;
  1.1631 +                    goto outputBytes;
  1.1632 +                } else {
  1.1633 +                    /* change to Unicode mode and output this (lead, trail) pair */
  1.1634 +                    isSingleByteMode=FALSE;
  1.1635 +                    *target++=(uint8_t)SCU;
  1.1636 +                    --targetCapacity;
  1.1637 +                    c=((uint32_t)lead<<16)|trail;
  1.1638 +                    length=4;
  1.1639 +                    goto outputBytes;
  1.1640 +                }
  1.1641 +            } else if(c<0xa0) {
  1.1642 +                /* quote C1 control character */
  1.1643 +                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
  1.1644 +                length=2;
  1.1645 +                goto outputBytes;
  1.1646 +            } else if(c==0xfeff || c>=0xfff0) {
  1.1647 +                /* quote signature character=byte order mark and specials */
  1.1648 +                c|=SQU<<16;
  1.1649 +                length=3;
  1.1650 +                goto outputBytes;
  1.1651 +            } else {
  1.1652 +                /* compress all other BMP characters */
  1.1653 +                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1654 +                    /* there is a window defined that contains this character - switch to it or quote from it? */
  1.1655 +                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
  1.1656 +                        /* change to dynamic window */
  1.1657 +                        dynamicWindow=window;
  1.1658 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1659 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1660 +                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1661 +                        length=2;
  1.1662 +                        goto outputBytes;
  1.1663 +                    } else {
  1.1664 +                        /* quote from dynamic window */
  1.1665 +                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
  1.1666 +                        length=2;
  1.1667 +                        goto outputBytes;
  1.1668 +                    }
  1.1669 +                } else if((window=getWindow(staticOffsets, c))>=0) {
  1.1670 +                    /* quote from static window */
  1.1671 +                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
  1.1672 +                    length=2;
  1.1673 +                    goto outputBytes;
  1.1674 +                } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1675 +                    /* define a dynamic window with this character */
  1.1676 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1677 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1678 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1679 +                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1680 +                    length=3;
  1.1681 +                    goto outputBytes;
  1.1682 +                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
  1.1683 +                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
  1.1684 +                ) {
  1.1685 +                    /*
  1.1686 +                     * this character is not compressible (a BMP ideograph or similar);
  1.1687 +                     * switch to Unicode mode if this is the last character in the block
  1.1688 +                     * or there is at least one more ideograph following immediately
  1.1689 +                     */
  1.1690 +                    isSingleByteMode=FALSE;
  1.1691 +                    c|=SCU<<16;
  1.1692 +                    length=3;
  1.1693 +                    goto outputBytes;
  1.1694 +                } else {
  1.1695 +                    /* quote Unicode */
  1.1696 +                    c|=SQU<<16;
  1.1697 +                    length=3;
  1.1698 +                    goto outputBytes;
  1.1699 +                }
  1.1700 +            }
  1.1701 +
  1.1702 +            /* normal end of conversion: prepare for a new character */
  1.1703 +            c=0;
  1.1704 +        }
  1.1705 +    } else {
  1.1706 +        if(c!=0 && targetCapacity>0) {
  1.1707 +            goto getTrailUnicode;
  1.1708 +        }
  1.1709 +
  1.1710 +        /* state machine for Unicode mode */
  1.1711 +/* unicodeByteMode: */
  1.1712 +        while(source<sourceLimit) {
  1.1713 +            if(targetCapacity<=0) {
  1.1714 +                /* target is full */
  1.1715 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1716 +                break;
  1.1717 +            }
  1.1718 +            c=*source++;
  1.1719 +
  1.1720 +            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
  1.1721 +                /* not compressible, write character directly */
  1.1722 +                if(targetCapacity>=2) {
  1.1723 +                    *target++=(uint8_t)(c>>8);
  1.1724 +                    *target++=(uint8_t)c;
  1.1725 +                    targetCapacity-=2;
  1.1726 +                } else {
  1.1727 +                    length=2;
  1.1728 +                    goto outputBytes;
  1.1729 +                }
  1.1730 +            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
  1.1731 +                /* compress BMP character if the following one is not an uncompressible ideograph */
  1.1732 +                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
  1.1733 +                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
  1.1734 +                        /* ASCII digit or letter */
  1.1735 +                        isSingleByteMode=TRUE;
  1.1736 +                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
  1.1737 +                        length=2;
  1.1738 +                        goto outputBytes;
  1.1739 +                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
  1.1740 +                        /* there is a dynamic window that contains this character, change to it */
  1.1741 +                        isSingleByteMode=TRUE;
  1.1742 +                        dynamicWindow=window;
  1.1743 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1744 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1745 +                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1746 +                        length=2;
  1.1747 +                        goto outputBytes;
  1.1748 +                    } else if((code=getDynamicOffset(c, &offset))>=0) {
  1.1749 +                        /* define a dynamic window with this character */
  1.1750 +                        isSingleByteMode=TRUE;
  1.1751 +                        dynamicWindow=getNextDynamicWindow(scsu);
  1.1752 +                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1753 +                        useDynamicWindow(scsu, dynamicWindow);
  1.1754 +                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1755 +                        length=3;
  1.1756 +                        goto outputBytes;
  1.1757 +                    }
  1.1758 +                }
  1.1759 +
  1.1760 +                /* don't know how to compress this character, just write it directly */
  1.1761 +                length=2;
  1.1762 +                goto outputBytes;
  1.1763 +            } else if(c<0xe000) {
  1.1764 +                /* c is a surrogate */
  1.1765 +                if(U16_IS_SURROGATE_LEAD(c)) {
  1.1766 +getTrailUnicode:
  1.1767 +                    lead=(UChar)c;
  1.1768 +                    if(source<sourceLimit) {
  1.1769 +                        /* test the following code unit */
  1.1770 +                        trail=*source;
  1.1771 +                        if(U16_IS_TRAIL(trail)) {
  1.1772 +                            ++source;
  1.1773 +                            c=U16_GET_SUPPLEMENTARY(c, trail);
  1.1774 +                            /* convert this surrogate code point */
  1.1775 +                            /* exit this condition tree */
  1.1776 +                        } else {
  1.1777 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.1778 +                            /* callback(illegal) */
  1.1779 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1780 +                            goto endloop;
  1.1781 +                        }
  1.1782 +                    } else {
  1.1783 +                        /* no more input */
  1.1784 +                        break;
  1.1785 +                    }
  1.1786 +                } else {
  1.1787 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.1788 +                    /* callback(illegal) */
  1.1789 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1790 +                    goto endloop;
  1.1791 +                }
  1.1792 +
  1.1793 +                /* compress supplementary character */
  1.1794 +                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
  1.1795 +                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
  1.1796 +                ) {
  1.1797 +                    /*
  1.1798 +                     * there is a dynamic window that contains this character and
  1.1799 +                     * the following character is not uncompressible,
  1.1800 +                     * change to the window
  1.1801 +                     */
  1.1802 +                    isSingleByteMode=TRUE;
  1.1803 +                    dynamicWindow=window;
  1.1804 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
  1.1805 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1806 +                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
  1.1807 +                    length=2;
  1.1808 +                    goto outputBytes;
  1.1809 +                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
  1.1810 +                          (code=getDynamicOffset(c, &offset))>=0
  1.1811 +                ) {
  1.1812 +                    /* two supplementary characters in (probably) the same window - define an extended one */
  1.1813 +                    isSingleByteMode=TRUE;
  1.1814 +                    code-=0x200;
  1.1815 +                    dynamicWindow=getNextDynamicWindow(scsu);
  1.1816 +                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
  1.1817 +                    useDynamicWindow(scsu, dynamicWindow);
  1.1818 +                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
  1.1819 +                    length=4;
  1.1820 +                    goto outputBytes;
  1.1821 +                } else {
  1.1822 +                    /* don't know how to compress this character, just write it directly */
  1.1823 +                    c=((uint32_t)lead<<16)|trail;
  1.1824 +                    length=4;
  1.1825 +                    goto outputBytes;
  1.1826 +                }
  1.1827 +            } else /* 0xe000<=c<0xf300 */ {
  1.1828 +                /* quote to avoid SCSU tags */
  1.1829 +                c|=UQU<<16;
  1.1830 +                length=3;
  1.1831 +                goto outputBytes;
  1.1832 +            }
  1.1833 +
  1.1834 +            /* normal end of conversion: prepare for a new character */
  1.1835 +            c=0;
  1.1836 +        }
  1.1837 +    }
  1.1838 +endloop:
  1.1839 +
  1.1840 +    /* set the converter state back into UConverter */
  1.1841 +    scsu->fromUIsSingleByteMode=isSingleByteMode;
  1.1842 +    scsu->fromUDynamicWindow=dynamicWindow;
  1.1843 +
  1.1844 +    cnv->fromUChar32=c;
  1.1845 +
  1.1846 +    /* write back the updated pointers */
  1.1847 +    pArgs->source=source;
  1.1848 +    pArgs->target=(char *)target;
  1.1849 +    return;
  1.1850 +
  1.1851 +outputBytes:
  1.1852 +    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
  1.1853 +    /* from the first if in the loop we know that targetCapacity>0 */
  1.1854 +    if(length<=targetCapacity) {
  1.1855 +        switch(length) {
  1.1856 +            /* each branch falls through to the next one */
  1.1857 +        case 4:
  1.1858 +            *target++=(uint8_t)(c>>24);
  1.1859 +        case 3: /*fall through*/
  1.1860 +            *target++=(uint8_t)(c>>16);
  1.1861 +        case 2: /*fall through*/
  1.1862 +            *target++=(uint8_t)(c>>8);
  1.1863 +        case 1: /*fall through*/
  1.1864 +            *target++=(uint8_t)c;
  1.1865 +        default:
  1.1866 +            /* will never occur */
  1.1867 +            break;
  1.1868 +        }
  1.1869 +        targetCapacity-=length;
  1.1870 +
  1.1871 +        /* normal end of conversion: prepare for a new character */
  1.1872 +        c=0;
  1.1873 +        goto loop;
  1.1874 +    } else {
  1.1875 +        uint8_t *p;
  1.1876 +
  1.1877 +        /*
  1.1878 +         * We actually do this backwards here:
  1.1879 +         * In order to save an intermediate variable, we output
  1.1880 +         * first to the overflow buffer what does not fit into the
  1.1881 +         * regular target.
  1.1882 +         */
  1.1883 +        /* we know that 0<=targetCapacity<length<=4 */
  1.1884 +        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
  1.1885 +        length-=targetCapacity;
  1.1886 +        p=(uint8_t *)cnv->charErrorBuffer;
  1.1887 +        switch(length) {
  1.1888 +            /* each branch falls through to the next one */
  1.1889 +        case 4:
  1.1890 +            *p++=(uint8_t)(c>>24);
  1.1891 +        case 3: /*fall through*/
  1.1892 +            *p++=(uint8_t)(c>>16);
  1.1893 +        case 2: /*fall through*/
  1.1894 +            *p++=(uint8_t)(c>>8);
  1.1895 +        case 1: /*fall through*/
  1.1896 +            *p=(uint8_t)c;
  1.1897 +        default:
  1.1898 +            /* will never occur */
  1.1899 +            break;
  1.1900 +        }
  1.1901 +        cnv->charErrorBufferLength=(int8_t)length;
  1.1902 +
  1.1903 +        /* now output what fits into the regular target */
  1.1904 +        c>>=8*length; /* length was reduced by targetCapacity */
  1.1905 +        switch(targetCapacity) {
  1.1906 +            /* each branch falls through to the next one */
  1.1907 +        case 3:
  1.1908 +            *target++=(uint8_t)(c>>16);
  1.1909 +        case 2: /*fall through*/
  1.1910 +            *target++=(uint8_t)(c>>8);
  1.1911 +        case 1: /*fall through*/
  1.1912 +            *target++=(uint8_t)c;
  1.1913 +        default:
  1.1914 +            break;
  1.1915 +        }
  1.1916 +
  1.1917 +        /* target overflow */
  1.1918 +        targetCapacity=0;
  1.1919 +        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1920 +        c=0;
  1.1921 +        goto endloop;
  1.1922 +    }
  1.1923 +}
  1.1924 +
  1.1925 +/* miscellaneous ------------------------------------------------------------ */
  1.1926 +
  1.1927 +static const char *
  1.1928 +_SCSUGetName(const UConverter *cnv) {
  1.1929 +    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
  1.1930 +
  1.1931 +    switch(scsu->locale) {
  1.1932 +    case l_ja:
  1.1933 +        return "SCSU,locale=ja";
  1.1934 +    default:
  1.1935 +        return "SCSU";
  1.1936 +    }
  1.1937 +}
  1.1938 +
  1.1939 +/* structure for SafeClone calculations */
  1.1940 +struct cloneSCSUStruct
  1.1941 +{
  1.1942 +    UConverter cnv;
  1.1943 +    SCSUData mydata;
  1.1944 +};
  1.1945 +
  1.1946 +static UConverter * 
  1.1947 +_SCSUSafeClone(const UConverter *cnv, 
  1.1948 +               void *stackBuffer, 
  1.1949 +               int32_t *pBufferSize, 
  1.1950 +               UErrorCode *status)
  1.1951 +{
  1.1952 +    struct cloneSCSUStruct * localClone;
  1.1953 +    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
  1.1954 +
  1.1955 +    if (U_FAILURE(*status)){
  1.1956 +        return 0;
  1.1957 +    }
  1.1958 +
  1.1959 +    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
  1.1960 +        *pBufferSize = bufferSizeNeeded;
  1.1961 +        return 0;
  1.1962 +    }
  1.1963 +
  1.1964 +    localClone = (struct cloneSCSUStruct *)stackBuffer;
  1.1965 +    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
  1.1966 +
  1.1967 +    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
  1.1968 +    localClone->cnv.extraInfo = &localClone->mydata;
  1.1969 +    localClone->cnv.isExtraLocal = TRUE;
  1.1970 +
  1.1971 +    return &localClone->cnv;
  1.1972 +}
  1.1973 +
  1.1974 +
  1.1975 +static const UConverterImpl _SCSUImpl={
  1.1976 +    UCNV_SCSU,
  1.1977 +
  1.1978 +    NULL,
  1.1979 +    NULL,
  1.1980 +
  1.1981 +    _SCSUOpen,
  1.1982 +    _SCSUClose,
  1.1983 +    _SCSUReset,
  1.1984 +
  1.1985 +    _SCSUToUnicode,
  1.1986 +    _SCSUToUnicodeWithOffsets,
  1.1987 +    _SCSUFromUnicode,
  1.1988 +    _SCSUFromUnicodeWithOffsets,
  1.1989 +    NULL,
  1.1990 +
  1.1991 +    NULL,
  1.1992 +    _SCSUGetName,
  1.1993 +    NULL,
  1.1994 +    _SCSUSafeClone,
  1.1995 +    ucnv_getCompleteUnicodeSet
  1.1996 +};
  1.1997 +
  1.1998 +static const UConverterStaticData _SCSUStaticData={
  1.1999 +    sizeof(UConverterStaticData),
  1.2000 +    "SCSU",
  1.2001 +    1212, /* CCSID for SCSU */
  1.2002 +    UCNV_IBM, UCNV_SCSU,
  1.2003 +    1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
  1.2004 +    /*
  1.2005 +     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
  1.2006 +     * substitution string.
  1.2007 +     */
  1.2008 +    { 0x0e, 0xff, 0xfd, 0 }, 3,
  1.2009 +    FALSE, FALSE,
  1.2010 +    0,
  1.2011 +    0,
  1.2012 +    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
  1.2013 +};
  1.2014 +
  1.2015 +const UConverterSharedData _SCSUData={
  1.2016 +    sizeof(UConverterSharedData), ~((uint32_t)0),
  1.2017 +    NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
  1.2018 +    0
  1.2019 +};
  1.2020 +
  1.2021 +#endif

mercurial