michael@0: /*
michael@0: **********************************************************************
michael@0: *   Copyright (C) 2000-2006, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: **********************************************************************
michael@0:  *  ucnv_cb.c:
michael@0:  *  External APIs for the ICU's codeset conversion library
michael@0:  *  Helena Shih
michael@0:  *
michael@0:  * Modification History:
michael@0:  *
michael@0:  *   Date        Name        Description
michael@0:  *   7/28/2000   srl         Implementation
michael@0:  */
michael@0: 
michael@0: /**
michael@0:  * @name Character Conversion C API
michael@0:  *
michael@0:  */
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: #if !UCONFIG_NO_CONVERSION
michael@0: 
michael@0: #include "unicode/ucnv_cb.h"
michael@0: #include "ucnv_bld.h"
michael@0: #include "ucnv_cnv.h"
michael@0: #include "cmemory.h"
michael@0: 
michael@0: /* need to update the offsets when the target moves. */
michael@0: /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
michael@0: if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
michael@0: the same call stack if the complexity arises. */
michael@0: U_CAPI void  U_EXPORT2
michael@0: ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
michael@0:                        const char* source,
michael@0:                        int32_t length,
michael@0:                        int32_t offsetIndex,
michael@0:                        UErrorCode * err)
michael@0: {
michael@0:     if(U_FAILURE(*err)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     ucnv_fromUWriteBytes(
michael@0:         args->converter,
michael@0:         source, length,
michael@0:         &args->target, args->targetLimit,
michael@0:         &args->offsets, offsetIndex,
michael@0:         err);
michael@0: }
michael@0: 
michael@0: U_CAPI void  U_EXPORT2
michael@0: ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
michael@0:                              const UChar** source,
michael@0:                              const UChar*  sourceLimit,
michael@0:                              int32_t offsetIndex,
michael@0:                              UErrorCode * err)
michael@0: {
michael@0:     /*
michael@0:     This is a fun one.  Recursion can occur - we're basically going to
michael@0:     just retry shoving data through the same converter. Note, if you got
michael@0:     here through some kind of invalid sequence, you maybe should emit a
michael@0:     reset sequence of some kind and/or call ucnv_reset().  Since this
michael@0:     IS an actual conversion, take care that you've changed the callback
michael@0:     or the data, or you'll get an infinite loop.
michael@0: 
michael@0:     Please set the err value to something reasonable before calling
michael@0:     into this.
michael@0:     */
michael@0: 
michael@0:     char *oldTarget;
michael@0: 
michael@0:     if(U_FAILURE(*err))
michael@0:     {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     oldTarget = args->target;
michael@0: 
michael@0:     ucnv_fromUnicode(args->converter,
michael@0:         &args->target,
michael@0:         args->targetLimit,
michael@0:         source,
michael@0:         sourceLimit,
michael@0:         NULL, /* no offsets */
michael@0:         FALSE, /* no flush */
michael@0:         err);
michael@0: 
michael@0:     if(args->offsets)
michael@0:     {
michael@0:         while (args->target != oldTarget)  /* if it moved at all.. */
michael@0:         {
michael@0:             *(args->offsets)++ = offsetIndex;
michael@0:             oldTarget++;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /*
michael@0:     Note, if you did something like used a Stop subcallback, things would get interesting.
michael@0:     In fact, here's where we want to return the partially consumed in-source!
michael@0:     */
michael@0:     if(*err == U_BUFFER_OVERFLOW_ERROR)
michael@0:     /* && (*source < sourceLimit && args->target >= args->targetLimit)
michael@0:     -- S. Hrcek */
michael@0:     {
michael@0:         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
michael@0:         It's a fixed size. If we overflow it... Hmm */
michael@0:         char *newTarget;
michael@0:         const char *newTargetLimit;
michael@0:         UErrorCode err2 = U_ZERO_ERROR;
michael@0: 
michael@0:         int8_t errBuffLen;
michael@0: 
michael@0:         errBuffLen  = args->converter->charErrorBufferLength;
michael@0: 
michael@0:         /* start the new target at the first free slot in the errbuff.. */
michael@0:         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
michael@0: 
michael@0:         newTargetLimit = (char *)(args->converter->charErrorBuffer +
michael@0:             sizeof(args->converter->charErrorBuffer));
michael@0: 
michael@0:         if(newTarget >= newTargetLimit)
michael@0:         {
michael@0:             *err = U_INTERNAL_PROGRAM_ERROR;
michael@0:             return;
michael@0:         }
michael@0: 
michael@0:         /* We're going to tell the converter that the errbuff len is empty.
michael@0:         This prevents the existing errbuff from being 'flushed' out onto
michael@0:         itself.  If the errbuff is needed by the converter this time,
michael@0:         we're hosed - we're out of space! */
michael@0: 
michael@0:         args->converter->charErrorBufferLength = 0;
michael@0: 
michael@0:         ucnv_fromUnicode(args->converter,
michael@0:                          &newTarget,
michael@0:                          newTargetLimit,
michael@0:                          source,
michael@0:                          sourceLimit,
michael@0:                          NULL,
michael@0:                          FALSE,
michael@0:                          &err2);
michael@0: 
michael@0:         /* We can go ahead and overwrite the  length here. We know just how
michael@0:         to recalculate it. */
michael@0: 
michael@0:         args->converter->charErrorBufferLength = (int8_t)(
michael@0:             newTarget - (char*)args->converter->charErrorBuffer);
michael@0: 
michael@0:         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
michael@0:         {
michael@0:             /* now we're REALLY in trouble.
michael@0:             Internal program error - callback shouldn't have written this much
michael@0:             data!
michael@0:             */
michael@0:             *err = U_INTERNAL_PROGRAM_ERROR;
michael@0:             return;
michael@0:         }
michael@0:         /*else {*/
michael@0:             /* sub errs could be invalid/truncated/illegal chars or w/e.
michael@0:             These might want to be passed on up.. But the problem is, we already
michael@0:             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
michael@0:             other errs.. */
michael@0: 
michael@0:             /*
michael@0:             if(U_FAILURE(err2))
michael@0:             ??
michael@0:             */
michael@0:         /*}*/
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI void  U_EXPORT2
michael@0: ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
michael@0:                            int32_t offsetIndex,
michael@0:                            UErrorCode * err)
michael@0: {
michael@0:     UConverter *converter;
michael@0:     int32_t length;
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         return;
michael@0:     }
michael@0:     converter = args->converter;
michael@0:     length = converter->subCharLen;
michael@0: 
michael@0:     if(length == 0) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if(length < 0) {
michael@0:         /*
michael@0:          * Write/convert the substitution string. Its real length is -length.
michael@0:          * Unlike the escape callback, we need not change the converter's
michael@0:          * callback function because ucnv_setSubstString() verified that
michael@0:          * the string can be converted, so we will not get a conversion error
michael@0:          * and will not recurse.
michael@0:          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
michael@0:          */
michael@0:         const UChar *source = (const UChar *)converter->subChars;
michael@0:         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if(converter->sharedData->impl->writeSub!=NULL) {
michael@0:         converter->sharedData->impl->writeSub(args, offsetIndex, err);
michael@0:     }
michael@0:     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
michael@0:         /*
michael@0:         TODO: Is this untestable because the MBCS converter has a writeSub function to call
michael@0:         and the other converters don't use subChar1?
michael@0:         */
michael@0:         ucnv_cbFromUWriteBytes(args,
michael@0:                                (const char *)&converter->subChar1, 1,
michael@0:                                offsetIndex, err);
michael@0:     }
michael@0:     else {
michael@0:         ucnv_cbFromUWriteBytes(args,
michael@0:                                (const char *)converter->subChars, length,
michael@0:                                offsetIndex, err);
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI void  U_EXPORT2
michael@0: ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
michael@0:                             const UChar* source,
michael@0:                             int32_t length,
michael@0:                             int32_t offsetIndex,
michael@0:                             UErrorCode * err)
michael@0: {
michael@0:     if(U_FAILURE(*err)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     ucnv_toUWriteUChars(
michael@0:         args->converter,
michael@0:         source, length,
michael@0:         &args->target, args->targetLimit,
michael@0:         &args->offsets, offsetIndex,
michael@0:         err);
michael@0: }
michael@0: 
michael@0: U_CAPI void  U_EXPORT2
michael@0: ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
michael@0:                          int32_t offsetIndex,
michael@0:                        UErrorCode * err)
michael@0: {
michael@0:     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
michael@0: 
michael@0:     /* could optimize this case, just one uchar */
michael@0:     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
michael@0:         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
michael@0:     } else {
michael@0:         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
michael@0:     }
michael@0: }
michael@0: 
michael@0: #endif