intl/icu/source/common/ucnv_cb.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2000-2006, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * ucnv_cb.c:
michael@0 7 * External APIs for the ICU's codeset conversion library
michael@0 8 * Helena Shih
michael@0 9 *
michael@0 10 * Modification History:
michael@0 11 *
michael@0 12 * Date Name Description
michael@0 13 * 7/28/2000 srl Implementation
michael@0 14 */
michael@0 15
michael@0 16 /**
michael@0 17 * @name Character Conversion C API
michael@0 18 *
michael@0 19 */
michael@0 20
michael@0 21 #include "unicode/utypes.h"
michael@0 22
michael@0 23 #if !UCONFIG_NO_CONVERSION
michael@0 24
michael@0 25 #include "unicode/ucnv_cb.h"
michael@0 26 #include "ucnv_bld.h"
michael@0 27 #include "ucnv_cnv.h"
michael@0 28 #include "cmemory.h"
michael@0 29
michael@0 30 /* need to update the offsets when the target moves. */
michael@0 31 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
michael@0 32 if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
michael@0 33 the same call stack if the complexity arises. */
michael@0 34 U_CAPI void U_EXPORT2
michael@0 35 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
michael@0 36 const char* source,
michael@0 37 int32_t length,
michael@0 38 int32_t offsetIndex,
michael@0 39 UErrorCode * err)
michael@0 40 {
michael@0 41 if(U_FAILURE(*err)) {
michael@0 42 return;
michael@0 43 }
michael@0 44
michael@0 45 ucnv_fromUWriteBytes(
michael@0 46 args->converter,
michael@0 47 source, length,
michael@0 48 &args->target, args->targetLimit,
michael@0 49 &args->offsets, offsetIndex,
michael@0 50 err);
michael@0 51 }
michael@0 52
michael@0 53 U_CAPI void U_EXPORT2
michael@0 54 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
michael@0 55 const UChar** source,
michael@0 56 const UChar* sourceLimit,
michael@0 57 int32_t offsetIndex,
michael@0 58 UErrorCode * err)
michael@0 59 {
michael@0 60 /*
michael@0 61 This is a fun one. Recursion can occur - we're basically going to
michael@0 62 just retry shoving data through the same converter. Note, if you got
michael@0 63 here through some kind of invalid sequence, you maybe should emit a
michael@0 64 reset sequence of some kind and/or call ucnv_reset(). Since this
michael@0 65 IS an actual conversion, take care that you've changed the callback
michael@0 66 or the data, or you'll get an infinite loop.
michael@0 67
michael@0 68 Please set the err value to something reasonable before calling
michael@0 69 into this.
michael@0 70 */
michael@0 71
michael@0 72 char *oldTarget;
michael@0 73
michael@0 74 if(U_FAILURE(*err))
michael@0 75 {
michael@0 76 return;
michael@0 77 }
michael@0 78
michael@0 79 oldTarget = args->target;
michael@0 80
michael@0 81 ucnv_fromUnicode(args->converter,
michael@0 82 &args->target,
michael@0 83 args->targetLimit,
michael@0 84 source,
michael@0 85 sourceLimit,
michael@0 86 NULL, /* no offsets */
michael@0 87 FALSE, /* no flush */
michael@0 88 err);
michael@0 89
michael@0 90 if(args->offsets)
michael@0 91 {
michael@0 92 while (args->target != oldTarget) /* if it moved at all.. */
michael@0 93 {
michael@0 94 *(args->offsets)++ = offsetIndex;
michael@0 95 oldTarget++;
michael@0 96 }
michael@0 97 }
michael@0 98
michael@0 99 /*
michael@0 100 Note, if you did something like used a Stop subcallback, things would get interesting.
michael@0 101 In fact, here's where we want to return the partially consumed in-source!
michael@0 102 */
michael@0 103 if(*err == U_BUFFER_OVERFLOW_ERROR)
michael@0 104 /* && (*source < sourceLimit && args->target >= args->targetLimit)
michael@0 105 -- S. Hrcek */
michael@0 106 {
michael@0 107 /* Overflowed the target. Now, we'll write into the charErrorBuffer.
michael@0 108 It's a fixed size. If we overflow it... Hmm */
michael@0 109 char *newTarget;
michael@0 110 const char *newTargetLimit;
michael@0 111 UErrorCode err2 = U_ZERO_ERROR;
michael@0 112
michael@0 113 int8_t errBuffLen;
michael@0 114
michael@0 115 errBuffLen = args->converter->charErrorBufferLength;
michael@0 116
michael@0 117 /* start the new target at the first free slot in the errbuff.. */
michael@0 118 newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
michael@0 119
michael@0 120 newTargetLimit = (char *)(args->converter->charErrorBuffer +
michael@0 121 sizeof(args->converter->charErrorBuffer));
michael@0 122
michael@0 123 if(newTarget >= newTargetLimit)
michael@0 124 {
michael@0 125 *err = U_INTERNAL_PROGRAM_ERROR;
michael@0 126 return;
michael@0 127 }
michael@0 128
michael@0 129 /* We're going to tell the converter that the errbuff len is empty.
michael@0 130 This prevents the existing errbuff from being 'flushed' out onto
michael@0 131 itself. If the errbuff is needed by the converter this time,
michael@0 132 we're hosed - we're out of space! */
michael@0 133
michael@0 134 args->converter->charErrorBufferLength = 0;
michael@0 135
michael@0 136 ucnv_fromUnicode(args->converter,
michael@0 137 &newTarget,
michael@0 138 newTargetLimit,
michael@0 139 source,
michael@0 140 sourceLimit,
michael@0 141 NULL,
michael@0 142 FALSE,
michael@0 143 &err2);
michael@0 144
michael@0 145 /* We can go ahead and overwrite the length here. We know just how
michael@0 146 to recalculate it. */
michael@0 147
michael@0 148 args->converter->charErrorBufferLength = (int8_t)(
michael@0 149 newTarget - (char*)args->converter->charErrorBuffer);
michael@0 150
michael@0 151 if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
michael@0 152 {
michael@0 153 /* now we're REALLY in trouble.
michael@0 154 Internal program error - callback shouldn't have written this much
michael@0 155 data!
michael@0 156 */
michael@0 157 *err = U_INTERNAL_PROGRAM_ERROR;
michael@0 158 return;
michael@0 159 }
michael@0 160 /*else {*/
michael@0 161 /* sub errs could be invalid/truncated/illegal chars or w/e.
michael@0 162 These might want to be passed on up.. But the problem is, we already
michael@0 163 need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
michael@0 164 other errs.. */
michael@0 165
michael@0 166 /*
michael@0 167 if(U_FAILURE(err2))
michael@0 168 ??
michael@0 169 */
michael@0 170 /*}*/
michael@0 171 }
michael@0 172 }
michael@0 173
michael@0 174 U_CAPI void U_EXPORT2
michael@0 175 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
michael@0 176 int32_t offsetIndex,
michael@0 177 UErrorCode * err)
michael@0 178 {
michael@0 179 UConverter *converter;
michael@0 180 int32_t length;
michael@0 181
michael@0 182 if(U_FAILURE(*err)) {
michael@0 183 return;
michael@0 184 }
michael@0 185 converter = args->converter;
michael@0 186 length = converter->subCharLen;
michael@0 187
michael@0 188 if(length == 0) {
michael@0 189 return;
michael@0 190 }
michael@0 191
michael@0 192 if(length < 0) {
michael@0 193 /*
michael@0 194 * Write/convert the substitution string. Its real length is -length.
michael@0 195 * Unlike the escape callback, we need not change the converter's
michael@0 196 * callback function because ucnv_setSubstString() verified that
michael@0 197 * the string can be converted, so we will not get a conversion error
michael@0 198 * and will not recurse.
michael@0 199 * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
michael@0 200 */
michael@0 201 const UChar *source = (const UChar *)converter->subChars;
michael@0 202 ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
michael@0 203 return;
michael@0 204 }
michael@0 205
michael@0 206 if(converter->sharedData->impl->writeSub!=NULL) {
michael@0 207 converter->sharedData->impl->writeSub(args, offsetIndex, err);
michael@0 208 }
michael@0 209 else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
michael@0 210 /*
michael@0 211 TODO: Is this untestable because the MBCS converter has a writeSub function to call
michael@0 212 and the other converters don't use subChar1?
michael@0 213 */
michael@0 214 ucnv_cbFromUWriteBytes(args,
michael@0 215 (const char *)&converter->subChar1, 1,
michael@0 216 offsetIndex, err);
michael@0 217 }
michael@0 218 else {
michael@0 219 ucnv_cbFromUWriteBytes(args,
michael@0 220 (const char *)converter->subChars, length,
michael@0 221 offsetIndex, err);
michael@0 222 }
michael@0 223 }
michael@0 224
michael@0 225 U_CAPI void U_EXPORT2
michael@0 226 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
michael@0 227 const UChar* source,
michael@0 228 int32_t length,
michael@0 229 int32_t offsetIndex,
michael@0 230 UErrorCode * err)
michael@0 231 {
michael@0 232 if(U_FAILURE(*err)) {
michael@0 233 return;
michael@0 234 }
michael@0 235
michael@0 236 ucnv_toUWriteUChars(
michael@0 237 args->converter,
michael@0 238 source, length,
michael@0 239 &args->target, args->targetLimit,
michael@0 240 &args->offsets, offsetIndex,
michael@0 241 err);
michael@0 242 }
michael@0 243
michael@0 244 U_CAPI void U_EXPORT2
michael@0 245 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
michael@0 246 int32_t offsetIndex,
michael@0 247 UErrorCode * err)
michael@0 248 {
michael@0 249 static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
michael@0 250
michael@0 251 /* could optimize this case, just one uchar */
michael@0 252 if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
michael@0 253 ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
michael@0 254 } else {
michael@0 255 ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
michael@0 256 }
michael@0 257 }
michael@0 258
michael@0 259 #endif

mercurial