1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_cb.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,259 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2000-2006, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 + * ucnv_cb.c: 1.10 + * External APIs for the ICU's codeset conversion library 1.11 + * Helena Shih 1.12 + * 1.13 + * Modification History: 1.14 + * 1.15 + * Date Name Description 1.16 + * 7/28/2000 srl Implementation 1.17 + */ 1.18 + 1.19 +/** 1.20 + * @name Character Conversion C API 1.21 + * 1.22 + */ 1.23 + 1.24 +#include "unicode/utypes.h" 1.25 + 1.26 +#if !UCONFIG_NO_CONVERSION 1.27 + 1.28 +#include "unicode/ucnv_cb.h" 1.29 +#include "ucnv_bld.h" 1.30 +#include "ucnv_cnv.h" 1.31 +#include "cmemory.h" 1.32 + 1.33 +/* need to update the offsets when the target moves. */ 1.34 +/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly 1.35 +if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within 1.36 +the same call stack if the complexity arises. */ 1.37 +U_CAPI void U_EXPORT2 1.38 +ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, 1.39 + const char* source, 1.40 + int32_t length, 1.41 + int32_t offsetIndex, 1.42 + UErrorCode * err) 1.43 +{ 1.44 + if(U_FAILURE(*err)) { 1.45 + return; 1.46 + } 1.47 + 1.48 + ucnv_fromUWriteBytes( 1.49 + args->converter, 1.50 + source, length, 1.51 + &args->target, args->targetLimit, 1.52 + &args->offsets, offsetIndex, 1.53 + err); 1.54 +} 1.55 + 1.56 +U_CAPI void U_EXPORT2 1.57 +ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, 1.58 + const UChar** source, 1.59 + const UChar* sourceLimit, 1.60 + int32_t offsetIndex, 1.61 + UErrorCode * err) 1.62 +{ 1.63 + /* 1.64 + This is a fun one. Recursion can occur - we're basically going to 1.65 + just retry shoving data through the same converter. Note, if you got 1.66 + here through some kind of invalid sequence, you maybe should emit a 1.67 + reset sequence of some kind and/or call ucnv_reset(). Since this 1.68 + IS an actual conversion, take care that you've changed the callback 1.69 + or the data, or you'll get an infinite loop. 1.70 + 1.71 + Please set the err value to something reasonable before calling 1.72 + into this. 1.73 + */ 1.74 + 1.75 + char *oldTarget; 1.76 + 1.77 + if(U_FAILURE(*err)) 1.78 + { 1.79 + return; 1.80 + } 1.81 + 1.82 + oldTarget = args->target; 1.83 + 1.84 + ucnv_fromUnicode(args->converter, 1.85 + &args->target, 1.86 + args->targetLimit, 1.87 + source, 1.88 + sourceLimit, 1.89 + NULL, /* no offsets */ 1.90 + FALSE, /* no flush */ 1.91 + err); 1.92 + 1.93 + if(args->offsets) 1.94 + { 1.95 + while (args->target != oldTarget) /* if it moved at all.. */ 1.96 + { 1.97 + *(args->offsets)++ = offsetIndex; 1.98 + oldTarget++; 1.99 + } 1.100 + } 1.101 + 1.102 + /* 1.103 + Note, if you did something like used a Stop subcallback, things would get interesting. 1.104 + In fact, here's where we want to return the partially consumed in-source! 1.105 + */ 1.106 + if(*err == U_BUFFER_OVERFLOW_ERROR) 1.107 + /* && (*source < sourceLimit && args->target >= args->targetLimit) 1.108 + -- S. Hrcek */ 1.109 + { 1.110 + /* Overflowed the target. Now, we'll write into the charErrorBuffer. 1.111 + It's a fixed size. If we overflow it... Hmm */ 1.112 + char *newTarget; 1.113 + const char *newTargetLimit; 1.114 + UErrorCode err2 = U_ZERO_ERROR; 1.115 + 1.116 + int8_t errBuffLen; 1.117 + 1.118 + errBuffLen = args->converter->charErrorBufferLength; 1.119 + 1.120 + /* start the new target at the first free slot in the errbuff.. */ 1.121 + newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen); 1.122 + 1.123 + newTargetLimit = (char *)(args->converter->charErrorBuffer + 1.124 + sizeof(args->converter->charErrorBuffer)); 1.125 + 1.126 + if(newTarget >= newTargetLimit) 1.127 + { 1.128 + *err = U_INTERNAL_PROGRAM_ERROR; 1.129 + return; 1.130 + } 1.131 + 1.132 + /* We're going to tell the converter that the errbuff len is empty. 1.133 + This prevents the existing errbuff from being 'flushed' out onto 1.134 + itself. If the errbuff is needed by the converter this time, 1.135 + we're hosed - we're out of space! */ 1.136 + 1.137 + args->converter->charErrorBufferLength = 0; 1.138 + 1.139 + ucnv_fromUnicode(args->converter, 1.140 + &newTarget, 1.141 + newTargetLimit, 1.142 + source, 1.143 + sourceLimit, 1.144 + NULL, 1.145 + FALSE, 1.146 + &err2); 1.147 + 1.148 + /* We can go ahead and overwrite the length here. We know just how 1.149 + to recalculate it. */ 1.150 + 1.151 + args->converter->charErrorBufferLength = (int8_t)( 1.152 + newTarget - (char*)args->converter->charErrorBuffer); 1.153 + 1.154 + if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR)) 1.155 + { 1.156 + /* now we're REALLY in trouble. 1.157 + Internal program error - callback shouldn't have written this much 1.158 + data! 1.159 + */ 1.160 + *err = U_INTERNAL_PROGRAM_ERROR; 1.161 + return; 1.162 + } 1.163 + /*else {*/ 1.164 + /* sub errs could be invalid/truncated/illegal chars or w/e. 1.165 + These might want to be passed on up.. But the problem is, we already 1.166 + need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these 1.167 + other errs.. */ 1.168 + 1.169 + /* 1.170 + if(U_FAILURE(err2)) 1.171 + ?? 1.172 + */ 1.173 + /*}*/ 1.174 + } 1.175 +} 1.176 + 1.177 +U_CAPI void U_EXPORT2 1.178 +ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, 1.179 + int32_t offsetIndex, 1.180 + UErrorCode * err) 1.181 +{ 1.182 + UConverter *converter; 1.183 + int32_t length; 1.184 + 1.185 + if(U_FAILURE(*err)) { 1.186 + return; 1.187 + } 1.188 + converter = args->converter; 1.189 + length = converter->subCharLen; 1.190 + 1.191 + if(length == 0) { 1.192 + return; 1.193 + } 1.194 + 1.195 + if(length < 0) { 1.196 + /* 1.197 + * Write/convert the substitution string. Its real length is -length. 1.198 + * Unlike the escape callback, we need not change the converter's 1.199 + * callback function because ucnv_setSubstString() verified that 1.200 + * the string can be converted, so we will not get a conversion error 1.201 + * and will not recurse. 1.202 + * At worst we should get a U_BUFFER_OVERFLOW_ERROR. 1.203 + */ 1.204 + const UChar *source = (const UChar *)converter->subChars; 1.205 + ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); 1.206 + return; 1.207 + } 1.208 + 1.209 + if(converter->sharedData->impl->writeSub!=NULL) { 1.210 + converter->sharedData->impl->writeSub(args, offsetIndex, err); 1.211 + } 1.212 + else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { 1.213 + /* 1.214 + TODO: Is this untestable because the MBCS converter has a writeSub function to call 1.215 + and the other converters don't use subChar1? 1.216 + */ 1.217 + ucnv_cbFromUWriteBytes(args, 1.218 + (const char *)&converter->subChar1, 1, 1.219 + offsetIndex, err); 1.220 + } 1.221 + else { 1.222 + ucnv_cbFromUWriteBytes(args, 1.223 + (const char *)converter->subChars, length, 1.224 + offsetIndex, err); 1.225 + } 1.226 +} 1.227 + 1.228 +U_CAPI void U_EXPORT2 1.229 +ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, 1.230 + const UChar* source, 1.231 + int32_t length, 1.232 + int32_t offsetIndex, 1.233 + UErrorCode * err) 1.234 +{ 1.235 + if(U_FAILURE(*err)) { 1.236 + return; 1.237 + } 1.238 + 1.239 + ucnv_toUWriteUChars( 1.240 + args->converter, 1.241 + source, length, 1.242 + &args->target, args->targetLimit, 1.243 + &args->offsets, offsetIndex, 1.244 + err); 1.245 +} 1.246 + 1.247 +U_CAPI void U_EXPORT2 1.248 +ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, 1.249 + int32_t offsetIndex, 1.250 + UErrorCode * err) 1.251 +{ 1.252 + static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD; 1.253 + 1.254 + /* could optimize this case, just one uchar */ 1.255 + if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) { 1.256 + ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err); 1.257 + } else { 1.258 + ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err); 1.259 + } 1.260 +} 1.261 + 1.262 +#endif