intl/icu/source/common/ucnv_err.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnv_err.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,406 @@
     1.4 +/*
     1.5 + *****************************************************************************
     1.6 + *
     1.7 + *   Copyright (C) 1998-2007, International Business Machines
     1.8 + *   Corporation and others.  All Rights Reserved.
     1.9 + *
    1.10 + *****************************************************************************
    1.11 + *
    1.12 + *  ucnv_err.c
    1.13 + *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
    1.14 + *
    1.15 + *
    1.16 +*   Change history:
    1.17 +*
    1.18 +*   06/29/2000  helena      Major rewrite of the callback APIs.
    1.19 +*/
    1.20 +
    1.21 +#include "unicode/utypes.h"
    1.22 +
    1.23 +#if !UCONFIG_NO_CONVERSION
    1.24 +
    1.25 +#include "unicode/ucnv_err.h"
    1.26 +#include "unicode/ucnv_cb.h"
    1.27 +#include "ucnv_cnv.h"
    1.28 +#include "cmemory.h"
    1.29 +#include "unicode/ucnv.h"
    1.30 +#include "ustrfmt.h"
    1.31 +
    1.32 +#define VALUE_STRING_LENGTH 32
    1.33 +/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
    1.34 +#define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
    1.35 +#define UNICODE_U_CODEPOINT             0x0055
    1.36 +#define UNICODE_X_CODEPOINT             0x0058
    1.37 +#define UNICODE_RS_CODEPOINT            0x005C
    1.38 +#define UNICODE_U_LOW_CODEPOINT         0x0075
    1.39 +#define UNICODE_X_LOW_CODEPOINT         0x0078
    1.40 +#define UNICODE_AMP_CODEPOINT           0x0026
    1.41 +#define UNICODE_HASH_CODEPOINT          0x0023
    1.42 +#define UNICODE_SEMICOLON_CODEPOINT     0x003B
    1.43 +#define UNICODE_PLUS_CODEPOINT          0x002B
    1.44 +#define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
    1.45 +#define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
    1.46 +#define UNICODE_SPACE_CODEPOINT         0x0020
    1.47 +#define UCNV_PRV_ESCAPE_ICU         0
    1.48 +#define UCNV_PRV_ESCAPE_C           'C'
    1.49 +#define UCNV_PRV_ESCAPE_XML_DEC     'D'
    1.50 +#define UCNV_PRV_ESCAPE_XML_HEX     'X'
    1.51 +#define UCNV_PRV_ESCAPE_JAVA        'J'
    1.52 +#define UCNV_PRV_ESCAPE_UNICODE     'U'
    1.53 +#define UCNV_PRV_ESCAPE_CSS2        'S'
    1.54 +#define UCNV_PRV_STOP_ON_ILLEGAL    'i'
    1.55 +
    1.56 +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
    1.57 +U_CAPI void    U_EXPORT2
    1.58 +UCNV_FROM_U_CALLBACK_STOP (
    1.59 +                  const void *context,
    1.60 +                  UConverterFromUnicodeArgs *fromUArgs,
    1.61 +                  const UChar* codeUnits,
    1.62 +                  int32_t length,
    1.63 +                  UChar32 codePoint,
    1.64 +                  UConverterCallbackReason reason,
    1.65 +                  UErrorCode * err)
    1.66 +{
    1.67 +    /* the caller must have set the error code accordingly */
    1.68 +    return;
    1.69 +}
    1.70 +
    1.71 +
    1.72 +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
    1.73 +U_CAPI void    U_EXPORT2
    1.74 +UCNV_TO_U_CALLBACK_STOP (
    1.75 +                   const void *context,
    1.76 +                   UConverterToUnicodeArgs *toUArgs,
    1.77 +                   const char* codePoints,
    1.78 +                   int32_t length,
    1.79 +                   UConverterCallbackReason reason,
    1.80 +                   UErrorCode * err)
    1.81 +{
    1.82 +    /* the caller must have set the error code accordingly */
    1.83 +    return;
    1.84 +}
    1.85 +
    1.86 +U_CAPI void    U_EXPORT2
    1.87 +UCNV_FROM_U_CALLBACK_SKIP (                  
    1.88 +                  const void *context,
    1.89 +                  UConverterFromUnicodeArgs *fromUArgs,
    1.90 +                  const UChar* codeUnits,
    1.91 +                  int32_t length,
    1.92 +                  UChar32 codePoint,
    1.93 +                  UConverterCallbackReason reason,
    1.94 +                  UErrorCode * err)
    1.95 +{
    1.96 +    if (reason <= UCNV_IRREGULAR)
    1.97 +    {
    1.98 +        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
    1.99 +        {
   1.100 +            *err = U_ZERO_ERROR;
   1.101 +        }
   1.102 +        /* else the caller must have set the error code accordingly. */
   1.103 +    }
   1.104 +    /* else ignore the reset, close and clone calls. */
   1.105 +}
   1.106 +
   1.107 +U_CAPI void    U_EXPORT2
   1.108 +UCNV_FROM_U_CALLBACK_SUBSTITUTE (
   1.109 +                  const void *context,
   1.110 +                  UConverterFromUnicodeArgs *fromArgs,
   1.111 +                  const UChar* codeUnits,
   1.112 +                  int32_t length,
   1.113 +                  UChar32 codePoint,
   1.114 +                  UConverterCallbackReason reason,
   1.115 +                  UErrorCode * err)
   1.116 +{
   1.117 +    if (reason <= UCNV_IRREGULAR)
   1.118 +    {
   1.119 +        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
   1.120 +        {
   1.121 +            *err = U_ZERO_ERROR;
   1.122 +            ucnv_cbFromUWriteSub(fromArgs, 0, err);
   1.123 +        }
   1.124 +        /* else the caller must have set the error code accordingly. */
   1.125 +    }
   1.126 +    /* else ignore the reset, close and clone calls. */
   1.127 +}
   1.128 +
   1.129 +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
   1.130 + *uses a clean copy (resetted) of the converter, to convert that unicode
   1.131 + *escape sequence to the target codepage (if conversion failure happens then
   1.132 + *we revert to substituting with subchar)
   1.133 + */
   1.134 +U_CAPI void    U_EXPORT2
   1.135 +UCNV_FROM_U_CALLBACK_ESCAPE (
   1.136 +                         const void *context,
   1.137 +                         UConverterFromUnicodeArgs *fromArgs,
   1.138 +                         const UChar *codeUnits,
   1.139 +                         int32_t length,
   1.140 +                         UChar32 codePoint,
   1.141 +                         UConverterCallbackReason reason,
   1.142 +                         UErrorCode * err)
   1.143 +{
   1.144 +
   1.145 +  UChar valueString[VALUE_STRING_LENGTH];
   1.146 +  int32_t valueStringLength = 0;
   1.147 +  int32_t i = 0;
   1.148 +
   1.149 +  const UChar *myValueSource = NULL;
   1.150 +  UErrorCode err2 = U_ZERO_ERROR;
   1.151 +  UConverterFromUCallback original = NULL;
   1.152 +  const void *originalContext;
   1.153 +
   1.154 +  UConverterFromUCallback ignoredCallback = NULL;
   1.155 +  const void *ignoredContext;
   1.156 +  
   1.157 +  if (reason > UCNV_IRREGULAR)
   1.158 +  {
   1.159 +      return;
   1.160 +  }
   1.161 +
   1.162 +  ucnv_setFromUCallBack (fromArgs->converter,
   1.163 +                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
   1.164 +                     NULL,
   1.165 +                     &original,
   1.166 +                     &originalContext,
   1.167 +                     &err2);
   1.168 +  
   1.169 +  if (U_FAILURE (err2))
   1.170 +  {
   1.171 +    *err = err2;
   1.172 +    return;
   1.173 +  } 
   1.174 +  if(context==NULL)
   1.175 +  { 
   1.176 +      while (i < length)
   1.177 +      {
   1.178 +        valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
   1.179 +        valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
   1.180 +        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
   1.181 +      }
   1.182 +  }
   1.183 +  else
   1.184 +  {
   1.185 +      switch(*((char*)context))
   1.186 +      {
   1.187 +      case UCNV_PRV_ESCAPE_JAVA:
   1.188 +          while (i < length)
   1.189 +          {
   1.190 +              valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
   1.191 +              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
   1.192 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
   1.193 +          }
   1.194 +          break;
   1.195 +
   1.196 +      case UCNV_PRV_ESCAPE_C:
   1.197 +          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
   1.198 +
   1.199 +          if(length==2){
   1.200 +              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
   1.201 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
   1.202 +
   1.203 +          }
   1.204 +          else{
   1.205 +              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
   1.206 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
   1.207 +          }
   1.208 +          break;
   1.209 +
   1.210 +      case UCNV_PRV_ESCAPE_XML_DEC:
   1.211 +
   1.212 +          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
   1.213 +          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
   1.214 +          if(length==2){
   1.215 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
   1.216 +          }
   1.217 +          else{
   1.218 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
   1.219 +          }
   1.220 +          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
   1.221 +          break;
   1.222 +
   1.223 +      case UCNV_PRV_ESCAPE_XML_HEX:
   1.224 +
   1.225 +          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
   1.226 +          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
   1.227 +          valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
   1.228 +          if(length==2){
   1.229 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
   1.230 +          }
   1.231 +          else{
   1.232 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
   1.233 +          }
   1.234 +          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
   1.235 +          break;
   1.236 +
   1.237 +      case UCNV_PRV_ESCAPE_UNICODE:
   1.238 +          valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
   1.239 +          valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
   1.240 +          valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
   1.241 +          if (length == 2) {
   1.242 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
   1.243 +          } else {
   1.244 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
   1.245 +          }
   1.246 +          valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
   1.247 +          break;
   1.248 +
   1.249 +      case UCNV_PRV_ESCAPE_CSS2:
   1.250 +          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
   1.251 +          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
   1.252 +          /* Always add space character, becase the next character might be whitespace,
   1.253 +             which would erroneously be considered the termination of the escape sequence. */
   1.254 +          valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
   1.255 +          break;
   1.256 +
   1.257 +      default:
   1.258 +          while (i < length)
   1.259 +          {
   1.260 +              valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
   1.261 +              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
   1.262 +              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
   1.263 +          }
   1.264 +      }
   1.265 +  }  
   1.266 +  myValueSource = valueString;
   1.267 +
   1.268 +  /* reset the error */
   1.269 +  *err = U_ZERO_ERROR;
   1.270 +
   1.271 +  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
   1.272 +
   1.273 +  ucnv_setFromUCallBack (fromArgs->converter,
   1.274 +                         original,
   1.275 +                         originalContext,
   1.276 +                         &ignoredCallback,
   1.277 +                         &ignoredContext,
   1.278 +                         &err2);
   1.279 +  if (U_FAILURE (err2))
   1.280 +  {
   1.281 +      *err = err2;
   1.282 +      return;
   1.283 +  }
   1.284 +
   1.285 +  return;
   1.286 +}
   1.287 +
   1.288 +
   1.289 +
   1.290 +U_CAPI void  U_EXPORT2
   1.291 +UCNV_TO_U_CALLBACK_SKIP (
   1.292 +                 const void *context,
   1.293 +                 UConverterToUnicodeArgs *toArgs,
   1.294 +                 const char* codeUnits,
   1.295 +                 int32_t length,
   1.296 +                 UConverterCallbackReason reason,
   1.297 +                 UErrorCode * err)
   1.298 +{
   1.299 +    if (reason <= UCNV_IRREGULAR)
   1.300 +    {
   1.301 +        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
   1.302 +        {
   1.303 +            *err = U_ZERO_ERROR;
   1.304 +        }
   1.305 +        /* else the caller must have set the error code accordingly. */
   1.306 +    }
   1.307 +    /* else ignore the reset, close and clone calls. */
   1.308 +}
   1.309 +
   1.310 +U_CAPI void    U_EXPORT2
   1.311 +UCNV_TO_U_CALLBACK_SUBSTITUTE (
   1.312 +                 const void *context,
   1.313 +                 UConverterToUnicodeArgs *toArgs,
   1.314 +                 const char* codeUnits,
   1.315 +                 int32_t length,
   1.316 +                 UConverterCallbackReason reason,
   1.317 +                 UErrorCode * err)
   1.318 +{
   1.319 +    if (reason <= UCNV_IRREGULAR)
   1.320 +    {
   1.321 +        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
   1.322 +        {
   1.323 +            *err = U_ZERO_ERROR;
   1.324 +            ucnv_cbToUWriteSub(toArgs,0,err);
   1.325 +        }
   1.326 +        /* else the caller must have set the error code accordingly. */
   1.327 +    }
   1.328 +    /* else ignore the reset, close and clone calls. */
   1.329 +}
   1.330 +
   1.331 +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
   1.332 + *and uses that as the substitution sequence
   1.333 + */
   1.334 +U_CAPI void   U_EXPORT2
   1.335 +UCNV_TO_U_CALLBACK_ESCAPE (
   1.336 +                 const void *context,
   1.337 +                 UConverterToUnicodeArgs *toArgs,
   1.338 +                 const char* codeUnits,
   1.339 +                 int32_t length,
   1.340 +                 UConverterCallbackReason reason,
   1.341 +                 UErrorCode * err)
   1.342 +{
   1.343 +    UChar uniValueString[VALUE_STRING_LENGTH];
   1.344 +    int32_t valueStringLength = 0;
   1.345 +    int32_t i = 0;
   1.346 +
   1.347 +    if (reason > UCNV_IRREGULAR)
   1.348 +    {
   1.349 +        return;
   1.350 +    }
   1.351 +
   1.352 +    if(context==NULL)
   1.353 +    {    
   1.354 +        while (i < length)
   1.355 +        {
   1.356 +            uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
   1.357 +            uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
   1.358 +            valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
   1.359 +        }
   1.360 +    }
   1.361 +    else
   1.362 +    {
   1.363 +        switch(*((char*)context))
   1.364 +        {
   1.365 +        case UCNV_PRV_ESCAPE_XML_DEC:
   1.366 +            while (i < length)
   1.367 +            {
   1.368 +                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
   1.369 +                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
   1.370 +                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
   1.371 +                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
   1.372 +            }
   1.373 +            break;
   1.374 +
   1.375 +        case UCNV_PRV_ESCAPE_XML_HEX:
   1.376 +            while (i < length)
   1.377 +            {
   1.378 +                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
   1.379 +                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
   1.380 +                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
   1.381 +                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
   1.382 +                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
   1.383 +            }
   1.384 +            break;
   1.385 +        case UCNV_PRV_ESCAPE_C:
   1.386 +            while (i < length)
   1.387 +            {
   1.388 +                uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
   1.389 +                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
   1.390 +                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
   1.391 +            }
   1.392 +            break;
   1.393 +        default:
   1.394 +            while (i < length)
   1.395 +            {
   1.396 +                uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
   1.397 +                uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
   1.398 +                uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
   1.399 +                valueStringLength += 2;
   1.400 +            }
   1.401 +        }
   1.402 +    }
   1.403 +    /* reset the error */
   1.404 +    *err = U_ZERO_ERROR;
   1.405 +
   1.406 +    ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
   1.407 +}
   1.408 +
   1.409 +#endif

mercurial