intl/icu/source/common/ucnv_cb.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnv_cb.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,259 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2000-2006, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 + *  ucnv_cb.c:
    1.10 + *  External APIs for the ICU's codeset conversion library
    1.11 + *  Helena Shih
    1.12 + *
    1.13 + * Modification History:
    1.14 + *
    1.15 + *   Date        Name        Description
    1.16 + *   7/28/2000   srl         Implementation
    1.17 + */
    1.18 +
    1.19 +/**
    1.20 + * @name Character Conversion C API
    1.21 + *
    1.22 + */
    1.23 +
    1.24 +#include "unicode/utypes.h"
    1.25 +
    1.26 +#if !UCONFIG_NO_CONVERSION
    1.27 +
    1.28 +#include "unicode/ucnv_cb.h"
    1.29 +#include "ucnv_bld.h"
    1.30 +#include "ucnv_cnv.h"
    1.31 +#include "cmemory.h"
    1.32 +
    1.33 +/* need to update the offsets when the target moves. */
    1.34 +/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
    1.35 +if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
    1.36 +the same call stack if the complexity arises. */
    1.37 +U_CAPI void  U_EXPORT2
    1.38 +ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
    1.39 +                       const char* source,
    1.40 +                       int32_t length,
    1.41 +                       int32_t offsetIndex,
    1.42 +                       UErrorCode * err)
    1.43 +{
    1.44 +    if(U_FAILURE(*err)) {
    1.45 +        return;
    1.46 +    }
    1.47 +
    1.48 +    ucnv_fromUWriteBytes(
    1.49 +        args->converter,
    1.50 +        source, length,
    1.51 +        &args->target, args->targetLimit,
    1.52 +        &args->offsets, offsetIndex,
    1.53 +        err);
    1.54 +}
    1.55 +
    1.56 +U_CAPI void  U_EXPORT2
    1.57 +ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
    1.58 +                             const UChar** source,
    1.59 +                             const UChar*  sourceLimit,
    1.60 +                             int32_t offsetIndex,
    1.61 +                             UErrorCode * err)
    1.62 +{
    1.63 +    /*
    1.64 +    This is a fun one.  Recursion can occur - we're basically going to
    1.65 +    just retry shoving data through the same converter. Note, if you got
    1.66 +    here through some kind of invalid sequence, you maybe should emit a
    1.67 +    reset sequence of some kind and/or call ucnv_reset().  Since this
    1.68 +    IS an actual conversion, take care that you've changed the callback
    1.69 +    or the data, or you'll get an infinite loop.
    1.70 +
    1.71 +    Please set the err value to something reasonable before calling
    1.72 +    into this.
    1.73 +    */
    1.74 +
    1.75 +    char *oldTarget;
    1.76 +
    1.77 +    if(U_FAILURE(*err))
    1.78 +    {
    1.79 +        return;
    1.80 +    }
    1.81 +
    1.82 +    oldTarget = args->target;
    1.83 +
    1.84 +    ucnv_fromUnicode(args->converter,
    1.85 +        &args->target,
    1.86 +        args->targetLimit,
    1.87 +        source,
    1.88 +        sourceLimit,
    1.89 +        NULL, /* no offsets */
    1.90 +        FALSE, /* no flush */
    1.91 +        err);
    1.92 +
    1.93 +    if(args->offsets)
    1.94 +    {
    1.95 +        while (args->target != oldTarget)  /* if it moved at all.. */
    1.96 +        {
    1.97 +            *(args->offsets)++ = offsetIndex;
    1.98 +            oldTarget++;
    1.99 +        }
   1.100 +    }
   1.101 +
   1.102 +    /*
   1.103 +    Note, if you did something like used a Stop subcallback, things would get interesting.
   1.104 +    In fact, here's where we want to return the partially consumed in-source!
   1.105 +    */
   1.106 +    if(*err == U_BUFFER_OVERFLOW_ERROR)
   1.107 +    /* && (*source < sourceLimit && args->target >= args->targetLimit)
   1.108 +    -- S. Hrcek */
   1.109 +    {
   1.110 +        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
   1.111 +        It's a fixed size. If we overflow it... Hmm */
   1.112 +        char *newTarget;
   1.113 +        const char *newTargetLimit;
   1.114 +        UErrorCode err2 = U_ZERO_ERROR;
   1.115 +
   1.116 +        int8_t errBuffLen;
   1.117 +
   1.118 +        errBuffLen  = args->converter->charErrorBufferLength;
   1.119 +
   1.120 +        /* start the new target at the first free slot in the errbuff.. */
   1.121 +        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
   1.122 +
   1.123 +        newTargetLimit = (char *)(args->converter->charErrorBuffer +
   1.124 +            sizeof(args->converter->charErrorBuffer));
   1.125 +
   1.126 +        if(newTarget >= newTargetLimit)
   1.127 +        {
   1.128 +            *err = U_INTERNAL_PROGRAM_ERROR;
   1.129 +            return;
   1.130 +        }
   1.131 +
   1.132 +        /* We're going to tell the converter that the errbuff len is empty.
   1.133 +        This prevents the existing errbuff from being 'flushed' out onto
   1.134 +        itself.  If the errbuff is needed by the converter this time,
   1.135 +        we're hosed - we're out of space! */
   1.136 +
   1.137 +        args->converter->charErrorBufferLength = 0;
   1.138 +
   1.139 +        ucnv_fromUnicode(args->converter,
   1.140 +                         &newTarget,
   1.141 +                         newTargetLimit,
   1.142 +                         source,
   1.143 +                         sourceLimit,
   1.144 +                         NULL,
   1.145 +                         FALSE,
   1.146 +                         &err2);
   1.147 +
   1.148 +        /* We can go ahead and overwrite the  length here. We know just how
   1.149 +        to recalculate it. */
   1.150 +
   1.151 +        args->converter->charErrorBufferLength = (int8_t)(
   1.152 +            newTarget - (char*)args->converter->charErrorBuffer);
   1.153 +
   1.154 +        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
   1.155 +        {
   1.156 +            /* now we're REALLY in trouble.
   1.157 +            Internal program error - callback shouldn't have written this much
   1.158 +            data!
   1.159 +            */
   1.160 +            *err = U_INTERNAL_PROGRAM_ERROR;
   1.161 +            return;
   1.162 +        }
   1.163 +        /*else {*/
   1.164 +            /* sub errs could be invalid/truncated/illegal chars or w/e.
   1.165 +            These might want to be passed on up.. But the problem is, we already
   1.166 +            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
   1.167 +            other errs.. */
   1.168 +
   1.169 +            /*
   1.170 +            if(U_FAILURE(err2))
   1.171 +            ??
   1.172 +            */
   1.173 +        /*}*/
   1.174 +    }
   1.175 +}
   1.176 +
   1.177 +U_CAPI void  U_EXPORT2
   1.178 +ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
   1.179 +                           int32_t offsetIndex,
   1.180 +                           UErrorCode * err)
   1.181 +{
   1.182 +    UConverter *converter;
   1.183 +    int32_t length;
   1.184 +
   1.185 +    if(U_FAILURE(*err)) {
   1.186 +        return;
   1.187 +    }
   1.188 +    converter = args->converter;
   1.189 +    length = converter->subCharLen;
   1.190 +
   1.191 +    if(length == 0) {
   1.192 +        return;
   1.193 +    }
   1.194 +
   1.195 +    if(length < 0) {
   1.196 +        /*
   1.197 +         * Write/convert the substitution string. Its real length is -length.
   1.198 +         * Unlike the escape callback, we need not change the converter's
   1.199 +         * callback function because ucnv_setSubstString() verified that
   1.200 +         * the string can be converted, so we will not get a conversion error
   1.201 +         * and will not recurse.
   1.202 +         * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
   1.203 +         */
   1.204 +        const UChar *source = (const UChar *)converter->subChars;
   1.205 +        ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
   1.206 +        return;
   1.207 +    }
   1.208 +
   1.209 +    if(converter->sharedData->impl->writeSub!=NULL) {
   1.210 +        converter->sharedData->impl->writeSub(args, offsetIndex, err);
   1.211 +    }
   1.212 +    else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
   1.213 +        /*
   1.214 +        TODO: Is this untestable because the MBCS converter has a writeSub function to call
   1.215 +        and the other converters don't use subChar1?
   1.216 +        */
   1.217 +        ucnv_cbFromUWriteBytes(args,
   1.218 +                               (const char *)&converter->subChar1, 1,
   1.219 +                               offsetIndex, err);
   1.220 +    }
   1.221 +    else {
   1.222 +        ucnv_cbFromUWriteBytes(args,
   1.223 +                               (const char *)converter->subChars, length,
   1.224 +                               offsetIndex, err);
   1.225 +    }
   1.226 +}
   1.227 +
   1.228 +U_CAPI void  U_EXPORT2
   1.229 +ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
   1.230 +                            const UChar* source,
   1.231 +                            int32_t length,
   1.232 +                            int32_t offsetIndex,
   1.233 +                            UErrorCode * err)
   1.234 +{
   1.235 +    if(U_FAILURE(*err)) {
   1.236 +        return;
   1.237 +    }
   1.238 +
   1.239 +    ucnv_toUWriteUChars(
   1.240 +        args->converter,
   1.241 +        source, length,
   1.242 +        &args->target, args->targetLimit,
   1.243 +        &args->offsets, offsetIndex,
   1.244 +        err);
   1.245 +}
   1.246 +
   1.247 +U_CAPI void  U_EXPORT2
   1.248 +ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
   1.249 +                         int32_t offsetIndex,
   1.250 +                       UErrorCode * err)
   1.251 +{
   1.252 +    static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
   1.253 +
   1.254 +    /* could optimize this case, just one uchar */
   1.255 +    if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
   1.256 +        ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
   1.257 +    } else {
   1.258 +        ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
   1.259 +    }
   1.260 +}
   1.261 +
   1.262 +#endif

mercurial