1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnvisci.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1631 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2000-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* file name: ucnvisci.c 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2001JUN26 1.15 +* created by: Ram Viswanadha 1.16 +* 1.17 +* Date Name Description 1.18 +* 24/7/2001 Ram Added support for EXT character handling 1.19 +*/ 1.20 + 1.21 +#include "unicode/utypes.h" 1.22 + 1.23 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 1.24 + 1.25 +#include "unicode/ucnv.h" 1.26 +#include "unicode/ucnv_cb.h" 1.27 +#include "unicode/utf16.h" 1.28 +#include "cmemory.h" 1.29 +#include "ucnv_bld.h" 1.30 +#include "ucnv_cnv.h" 1.31 +#include "cstring.h" 1.32 +#include "uassert.h" 1.33 + 1.34 +#define UCNV_OPTIONS_VERSION_MASK 0xf 1.35 +#define NUKTA 0x093c 1.36 +#define HALANT 0x094d 1.37 +#define ZWNJ 0x200c /* Zero Width Non Joiner */ 1.38 +#define ZWJ 0x200d /* Zero width Joiner */ 1.39 +#define INVALID_CHAR 0xffff 1.40 +#define ATR 0xEF /* Attribute code */ 1.41 +#define EXT 0xF0 /* Extension code */ 1.42 +#define DANDA 0x0964 1.43 +#define DOUBLE_DANDA 0x0965 1.44 +#define ISCII_NUKTA 0xE9 1.45 +#define ISCII_HALANT 0xE8 1.46 +#define ISCII_DANDA 0xEA 1.47 +#define ISCII_INV 0xD9 1.48 +#define ISCII_VOWEL_SIGN_E 0xE0 1.49 +#define INDIC_BLOCK_BEGIN 0x0900 1.50 +#define INDIC_BLOCK_END 0x0D7F 1.51 +#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) 1.52 +#define VOCALLIC_RR 0x0931 1.53 +#define LF 0x0A 1.54 +#define ASCII_END 0xA0 1.55 +#define NO_CHAR_MARKER 0xFFFE 1.56 +#define TELUGU_DELTA DELTA * TELUGU 1.57 +#define DEV_ABBR_SIGN 0x0970 1.58 +#define DEV_ANUDATTA 0x0952 1.59 +#define EXT_RANGE_BEGIN 0xA1 1.60 +#define EXT_RANGE_END 0xEE 1.61 + 1.62 +#define PNJ_DELTA 0x0100 1.63 +#define PNJ_BINDI 0x0A02 1.64 +#define PNJ_TIPPI 0x0A70 1.65 +#define PNJ_SIGN_VIRAMA 0x0A4D 1.66 +#define PNJ_ADHAK 0x0A71 1.67 +#define PNJ_HA 0x0A39 1.68 +#define PNJ_RRA 0x0A5C 1.69 + 1.70 +typedef enum { 1.71 + DEVANAGARI =0, 1.72 + BENGALI, 1.73 + GURMUKHI, 1.74 + GUJARATI, 1.75 + ORIYA, 1.76 + TAMIL, 1.77 + TELUGU, 1.78 + KANNADA, 1.79 + MALAYALAM, 1.80 + DELTA=0x80 1.81 +}UniLang; 1.82 + 1.83 +/** 1.84 + * Enumeration for switching code pages if <ATR>+<one of below values> 1.85 + * is encountered 1.86 + */ 1.87 +typedef enum { 1.88 + DEF = 0x40, 1.89 + RMN = 0x41, 1.90 + DEV = 0x42, 1.91 + BNG = 0x43, 1.92 + TML = 0x44, 1.93 + TLG = 0x45, 1.94 + ASM = 0x46, 1.95 + ORI = 0x47, 1.96 + KND = 0x48, 1.97 + MLM = 0x49, 1.98 + GJR = 0x4A, 1.99 + PNJ = 0x4B, 1.100 + ARB = 0x71, 1.101 + PES = 0x72, 1.102 + URD = 0x73, 1.103 + SND = 0x74, 1.104 + KSM = 0x75, 1.105 + PST = 0x76 1.106 +}ISCIILang; 1.107 + 1.108 +typedef enum { 1.109 + DEV_MASK =0x80, 1.110 + PNJ_MASK =0x40, 1.111 + GJR_MASK =0x20, 1.112 + ORI_MASK =0x10, 1.113 + BNG_MASK =0x08, 1.114 + KND_MASK =0x04, 1.115 + MLM_MASK =0x02, 1.116 + TML_MASK =0x01, 1.117 + ZERO =0x00 1.118 +}MaskEnum; 1.119 + 1.120 +#define ISCII_CNV_PREFIX "ISCII,version=" 1.121 + 1.122 +typedef struct { 1.123 + UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ 1.124 + UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ 1.125 + uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ 1.126 + uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ 1.127 + uint16_t currentDeltaToUnicode; /* current delta in Indic block */ 1.128 + MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ 1.129 + MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ 1.130 + MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ 1.131 + UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ 1.132 + UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ 1.133 + char name[sizeof(ISCII_CNV_PREFIX) + 1]; 1.134 + UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ 1.135 +} UConverterDataISCII; 1.136 + 1.137 +typedef struct LookupDataStruct { 1.138 + UniLang uniLang; 1.139 + MaskEnum maskEnum; 1.140 + ISCIILang isciiLang; 1.141 +} LookupDataStruct; 1.142 + 1.143 +static const LookupDataStruct lookupInitialData[]={ 1.144 + { DEVANAGARI, DEV_MASK, DEV }, 1.145 + { BENGALI, BNG_MASK, BNG }, 1.146 + { GURMUKHI, PNJ_MASK, PNJ }, 1.147 + { GUJARATI, GJR_MASK, GJR }, 1.148 + { ORIYA, ORI_MASK, ORI }, 1.149 + { TAMIL, TML_MASK, TML }, 1.150 + { TELUGU, KND_MASK, TLG }, 1.151 + { KANNADA, KND_MASK, KND }, 1.152 + { MALAYALAM, MLM_MASK, MLM } 1.153 +}; 1.154 + 1.155 +/* 1.156 + * For special handling of certain Gurmukhi characters. 1.157 + * Bit 0 (value 1): PNJ consonant 1.158 + * Bit 1 (value 2): PNJ Bindi Tippi 1.159 + */ 1.160 +static const uint8_t pnjMap[80] = { 1.161 + /* 0A00..0A0F */ 1.162 + 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1.163 + /* 0A10..0A1F */ 1.164 + 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1.165 + /* 0A20..0A2F */ 1.166 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 1.167 + /* 0A30..0A3F */ 1.168 + 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, 1.169 + /* 0A40..0A4F */ 1.170 + 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1.171 +}; 1.172 + 1.173 +static UBool 1.174 +isPNJConsonant(UChar32 c) { 1.175 + if (c < 0xa00 || 0xa50 <= c) { 1.176 + return FALSE; 1.177 + } else { 1.178 + return (UBool)(pnjMap[c - 0xa00] & 1); 1.179 + } 1.180 +} 1.181 + 1.182 +static UBool 1.183 +isPNJBindiTippi(UChar32 c) { 1.184 + if (c < 0xa00 || 0xa50 <= c) { 1.185 + return FALSE; 1.186 + } else { 1.187 + return (UBool)(pnjMap[c - 0xa00] >> 1); 1.188 + } 1.189 +} 1.190 + 1.191 +static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { 1.192 + if(pArgs->onlyTestIsLoadable) { 1.193 + return; 1.194 + } 1.195 + 1.196 + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); 1.197 + 1.198 + if (cnv->extraInfo != NULL) { 1.199 + int32_t len=0; 1.200 + UConverterDataISCII *converterData= 1.201 + (UConverterDataISCII *) cnv->extraInfo; 1.202 + converterData->contextCharToUnicode=NO_CHAR_MARKER; 1.203 + cnv->toUnicodeStatus = missingCharMarker; 1.204 + converterData->contextCharFromUnicode=0x0000; 1.205 + converterData->resetToDefaultToUnicode=FALSE; 1.206 + /* check if the version requested is supported */ 1.207 + if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { 1.208 + /* initialize state variables */ 1.209 + converterData->currentDeltaFromUnicode 1.210 + = converterData->currentDeltaToUnicode 1.211 + = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); 1.212 + 1.213 + converterData->currentMaskFromUnicode 1.214 + = converterData->currentMaskToUnicode 1.215 + = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; 1.216 + 1.217 + converterData->isFirstBuffer=TRUE; 1.218 + (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); 1.219 + len = (int32_t)uprv_strlen(converterData->name); 1.220 + converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); 1.221 + converterData->name[len+1]=0; 1.222 + 1.223 + converterData->prevToUnicodeStatus = 0x0000; 1.224 + } else { 1.225 + uprv_free(cnv->extraInfo); 1.226 + cnv->extraInfo = NULL; 1.227 + *errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.228 + } 1.229 + 1.230 + } else { 1.231 + *errorCode =U_MEMORY_ALLOCATION_ERROR; 1.232 + } 1.233 +} 1.234 + 1.235 +static void _ISCIIClose(UConverter *cnv) { 1.236 + if (cnv->extraInfo!=NULL) { 1.237 + if (!cnv->isExtraLocal) { 1.238 + uprv_free(cnv->extraInfo); 1.239 + } 1.240 + cnv->extraInfo=NULL; 1.241 + } 1.242 +} 1.243 + 1.244 +static const char* _ISCIIgetName(const UConverter* cnv) { 1.245 + if (cnv->extraInfo) { 1.246 + UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; 1.247 + return myData->name; 1.248 + } 1.249 + return NULL; 1.250 +} 1.251 + 1.252 +static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { 1.253 + UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); 1.254 + if (choice<=UCNV_RESET_TO_UNICODE) { 1.255 + cnv->toUnicodeStatus = missingCharMarker; 1.256 + cnv->mode=0; 1.257 + data->currentDeltaToUnicode=data->defDeltaToUnicode; 1.258 + data->currentMaskToUnicode = data->defMaskToUnicode; 1.259 + data->contextCharToUnicode=NO_CHAR_MARKER; 1.260 + data->prevToUnicodeStatus = 0x0000; 1.261 + } 1.262 + if (choice!=UCNV_RESET_TO_UNICODE) { 1.263 + cnv->fromUChar32=0x0000; 1.264 + data->contextCharFromUnicode=0x00; 1.265 + data->currentMaskFromUnicode=data->defMaskToUnicode; 1.266 + data->currentDeltaFromUnicode=data->defDeltaToUnicode; 1.267 + data->isFirstBuffer=TRUE; 1.268 + data->resetToDefaultToUnicode=FALSE; 1.269 + } 1.270 +} 1.271 + 1.272 +/** 1.273 + * The values in validity table are indexed by the lower bits of Unicode 1.274 + * range 0x0900 - 0x09ff. The values have a structure like: 1.275 + * --------------------------------------------------------------- 1.276 + * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | 1.277 + * | | | | | ASM | KND | | | 1.278 + * --------------------------------------------------------------- 1.279 + * If a code point is valid in a particular script 1.280 + * then that bit is turned on 1.281 + * 1.282 + * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for 1.283 + * to represent these languages 1.284 + * 1.285 + * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case 1.286 + * and combine and use 1 bit to represent these languages. 1.287 + * 1.288 + * TODO: It is probably easier to understand and maintain to change this 1.289 + * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. 1.290 + */ 1.291 + 1.292 +static const uint8_t validityTable[128] = { 1.293 +/* This state table is tool generated please do not edit unless you know exactly what you are doing */ 1.294 +/* Note: This table was edited to mirror the Windows XP implementation */ 1.295 +/*ISCII:Valid:Unicode */ 1.296 +/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.297 +/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , 1.298 +/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.299 +/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.300 +/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.301 +/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.302 +/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.303 +/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.304 +/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.305 +/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.306 +/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.307 +/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.308 +/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.309 +/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , 1.310 +/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.311 +/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.312 +/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.313 +/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , 1.314 +/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.315 +/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.316 +/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.317 +/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.318 +/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.319 +/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.320 +/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.321 +/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.322 +/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.323 +/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.324 +/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.325 +/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.326 +/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.327 +/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.328 +/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.329 +/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.330 +/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.331 +/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.332 +/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.333 +/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.334 +/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.335 +/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.336 +/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.337 +/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , 1.338 +/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.339 +/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.340 +/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.341 +/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.342 +/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.343 +/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.344 +/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.345 +/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , 1.346 +/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.347 +/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.348 +/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , 1.349 +/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.350 +/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.351 +/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.352 +/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.353 +/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.354 +/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.355 +/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.356 +/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , 1.357 +/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.358 +/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.359 +/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.360 +/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.361 +/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.362 +/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.363 +/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.364 +/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , 1.365 +/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , 1.366 +/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.367 +/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.368 +/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.369 +/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , 1.370 +/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , 1.371 +/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.372 +/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.373 +/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.374 +/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.375 +/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.376 +/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , 1.377 +/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.378 +/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.379 +/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.380 +/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.381 +/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , 1.382 +/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , 1.383 +/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , 1.384 +/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.385 +/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.386 +/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.387 +/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.388 +/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , 1.389 +/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , 1.390 +/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.391 +/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , 1.392 +/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.393 +/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , 1.394 +/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , 1.395 +/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , 1.396 +/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.397 +/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.398 +/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.399 +/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.400 +/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.401 +/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.402 +/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.403 +/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.404 +/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.405 +/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.406 +/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.407 +/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , 1.408 +/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , 1.409 +/* 1.410 + * The length of the array is 128 to provide values for 0x900..0x97f. 1.411 + * The last 15 entries for 0x971..0x97f of the validity table are all zero 1.412 + * because no Indic script uses such Unicode code points. 1.413 + */ 1.414 +/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO 1.415 +}; 1.416 + 1.417 +static const uint16_t fromUnicodeTable[128]={ 1.418 + 0x00a0 ,/* 0x0900 */ 1.419 + 0x00a1 ,/* 0x0901 */ 1.420 + 0x00a2 ,/* 0x0902 */ 1.421 + 0x00a3 ,/* 0x0903 */ 1.422 + 0xa4e0 ,/* 0x0904 */ 1.423 + 0x00a4 ,/* 0x0905 */ 1.424 + 0x00a5 ,/* 0x0906 */ 1.425 + 0x00a6 ,/* 0x0907 */ 1.426 + 0x00a7 ,/* 0x0908 */ 1.427 + 0x00a8 ,/* 0x0909 */ 1.428 + 0x00a9 ,/* 0x090a */ 1.429 + 0x00aa ,/* 0x090b */ 1.430 + 0xA6E9 ,/* 0x090c */ 1.431 + 0x00ae ,/* 0x090d */ 1.432 + 0x00ab ,/* 0x090e */ 1.433 + 0x00ac ,/* 0x090f */ 1.434 + 0x00ad ,/* 0x0910 */ 1.435 + 0x00b2 ,/* 0x0911 */ 1.436 + 0x00af ,/* 0x0912 */ 1.437 + 0x00b0 ,/* 0x0913 */ 1.438 + 0x00b1 ,/* 0x0914 */ 1.439 + 0x00b3 ,/* 0x0915 */ 1.440 + 0x00b4 ,/* 0x0916 */ 1.441 + 0x00b5 ,/* 0x0917 */ 1.442 + 0x00b6 ,/* 0x0918 */ 1.443 + 0x00b7 ,/* 0x0919 */ 1.444 + 0x00b8 ,/* 0x091a */ 1.445 + 0x00b9 ,/* 0x091b */ 1.446 + 0x00ba ,/* 0x091c */ 1.447 + 0x00bb ,/* 0x091d */ 1.448 + 0x00bc ,/* 0x091e */ 1.449 + 0x00bd ,/* 0x091f */ 1.450 + 0x00be ,/* 0x0920 */ 1.451 + 0x00bf ,/* 0x0921 */ 1.452 + 0x00c0 ,/* 0x0922 */ 1.453 + 0x00c1 ,/* 0x0923 */ 1.454 + 0x00c2 ,/* 0x0924 */ 1.455 + 0x00c3 ,/* 0x0925 */ 1.456 + 0x00c4 ,/* 0x0926 */ 1.457 + 0x00c5 ,/* 0x0927 */ 1.458 + 0x00c6 ,/* 0x0928 */ 1.459 + 0x00c7 ,/* 0x0929 */ 1.460 + 0x00c8 ,/* 0x092a */ 1.461 + 0x00c9 ,/* 0x092b */ 1.462 + 0x00ca ,/* 0x092c */ 1.463 + 0x00cb ,/* 0x092d */ 1.464 + 0x00cc ,/* 0x092e */ 1.465 + 0x00cd ,/* 0x092f */ 1.466 + 0x00cf ,/* 0x0930 */ 1.467 + 0x00d0 ,/* 0x0931 */ 1.468 + 0x00d1 ,/* 0x0932 */ 1.469 + 0x00d2 ,/* 0x0933 */ 1.470 + 0x00d3 ,/* 0x0934 */ 1.471 + 0x00d4 ,/* 0x0935 */ 1.472 + 0x00d5 ,/* 0x0936 */ 1.473 + 0x00d6 ,/* 0x0937 */ 1.474 + 0x00d7 ,/* 0x0938 */ 1.475 + 0x00d8 ,/* 0x0939 */ 1.476 + 0xFFFF ,/* 0x093A */ 1.477 + 0xFFFF ,/* 0x093B */ 1.478 + 0x00e9 ,/* 0x093c */ 1.479 + 0xEAE9 ,/* 0x093d */ 1.480 + 0x00da ,/* 0x093e */ 1.481 + 0x00db ,/* 0x093f */ 1.482 + 0x00dc ,/* 0x0940 */ 1.483 + 0x00dd ,/* 0x0941 */ 1.484 + 0x00de ,/* 0x0942 */ 1.485 + 0x00df ,/* 0x0943 */ 1.486 + 0xDFE9 ,/* 0x0944 */ 1.487 + 0x00e3 ,/* 0x0945 */ 1.488 + 0x00e0 ,/* 0x0946 */ 1.489 + 0x00e1 ,/* 0x0947 */ 1.490 + 0x00e2 ,/* 0x0948 */ 1.491 + 0x00e7 ,/* 0x0949 */ 1.492 + 0x00e4 ,/* 0x094a */ 1.493 + 0x00e5 ,/* 0x094b */ 1.494 + 0x00e6 ,/* 0x094c */ 1.495 + 0x00e8 ,/* 0x094d */ 1.496 + 0x00ec ,/* 0x094e */ 1.497 + 0x00ed ,/* 0x094f */ 1.498 + 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ 1.499 + 0xFFFF ,/* 0x0951 */ 1.500 + 0xF0B8 ,/* 0x0952 */ 1.501 + 0xFFFF ,/* 0x0953 */ 1.502 + 0xFFFF ,/* 0x0954 */ 1.503 + 0xFFFF ,/* 0x0955 */ 1.504 + 0xFFFF ,/* 0x0956 */ 1.505 + 0xFFFF ,/* 0x0957 */ 1.506 + 0xb3e9 ,/* 0x0958 */ 1.507 + 0xb4e9 ,/* 0x0959 */ 1.508 + 0xb5e9 ,/* 0x095a */ 1.509 + 0xbae9 ,/* 0x095b */ 1.510 + 0xbfe9 ,/* 0x095c */ 1.511 + 0xC0E9 ,/* 0x095d */ 1.512 + 0xc9e9 ,/* 0x095e */ 1.513 + 0x00ce ,/* 0x095f */ 1.514 + 0xAAe9 ,/* 0x0960 */ 1.515 + 0xA7E9 ,/* 0x0961 */ 1.516 + 0xDBE9 ,/* 0x0962 */ 1.517 + 0xDCE9 ,/* 0x0963 */ 1.518 + 0x00ea ,/* 0x0964 */ 1.519 + 0xeaea ,/* 0x0965 */ 1.520 + 0x00f1 ,/* 0x0966 */ 1.521 + 0x00f2 ,/* 0x0967 */ 1.522 + 0x00f3 ,/* 0x0968 */ 1.523 + 0x00f4 ,/* 0x0969 */ 1.524 + 0x00f5 ,/* 0x096a */ 1.525 + 0x00f6 ,/* 0x096b */ 1.526 + 0x00f7 ,/* 0x096c */ 1.527 + 0x00f8 ,/* 0x096d */ 1.528 + 0x00f9 ,/* 0x096e */ 1.529 + 0x00fa ,/* 0x096f */ 1.530 + 0xF0BF ,/* 0x0970 */ 1.531 + 0xFFFF ,/* 0x0971 */ 1.532 + 0xFFFF ,/* 0x0972 */ 1.533 + 0xFFFF ,/* 0x0973 */ 1.534 + 0xFFFF ,/* 0x0974 */ 1.535 + 0xFFFF ,/* 0x0975 */ 1.536 + 0xFFFF ,/* 0x0976 */ 1.537 + 0xFFFF ,/* 0x0977 */ 1.538 + 0xFFFF ,/* 0x0978 */ 1.539 + 0xFFFF ,/* 0x0979 */ 1.540 + 0xFFFF ,/* 0x097a */ 1.541 + 0xFFFF ,/* 0x097b */ 1.542 + 0xFFFF ,/* 0x097c */ 1.543 + 0xFFFF ,/* 0x097d */ 1.544 + 0xFFFF ,/* 0x097e */ 1.545 + 0xFFFF ,/* 0x097f */ 1.546 +}; 1.547 +static const uint16_t toUnicodeTable[256]={ 1.548 + 0x0000,/* 0x00 */ 1.549 + 0x0001,/* 0x01 */ 1.550 + 0x0002,/* 0x02 */ 1.551 + 0x0003,/* 0x03 */ 1.552 + 0x0004,/* 0x04 */ 1.553 + 0x0005,/* 0x05 */ 1.554 + 0x0006,/* 0x06 */ 1.555 + 0x0007,/* 0x07 */ 1.556 + 0x0008,/* 0x08 */ 1.557 + 0x0009,/* 0x09 */ 1.558 + 0x000a,/* 0x0a */ 1.559 + 0x000b,/* 0x0b */ 1.560 + 0x000c,/* 0x0c */ 1.561 + 0x000d,/* 0x0d */ 1.562 + 0x000e,/* 0x0e */ 1.563 + 0x000f,/* 0x0f */ 1.564 + 0x0010,/* 0x10 */ 1.565 + 0x0011,/* 0x11 */ 1.566 + 0x0012,/* 0x12 */ 1.567 + 0x0013,/* 0x13 */ 1.568 + 0x0014,/* 0x14 */ 1.569 + 0x0015,/* 0x15 */ 1.570 + 0x0016,/* 0x16 */ 1.571 + 0x0017,/* 0x17 */ 1.572 + 0x0018,/* 0x18 */ 1.573 + 0x0019,/* 0x19 */ 1.574 + 0x001a,/* 0x1a */ 1.575 + 0x001b,/* 0x1b */ 1.576 + 0x001c,/* 0x1c */ 1.577 + 0x001d,/* 0x1d */ 1.578 + 0x001e,/* 0x1e */ 1.579 + 0x001f,/* 0x1f */ 1.580 + 0x0020,/* 0x20 */ 1.581 + 0x0021,/* 0x21 */ 1.582 + 0x0022,/* 0x22 */ 1.583 + 0x0023,/* 0x23 */ 1.584 + 0x0024,/* 0x24 */ 1.585 + 0x0025,/* 0x25 */ 1.586 + 0x0026,/* 0x26 */ 1.587 + 0x0027,/* 0x27 */ 1.588 + 0x0028,/* 0x28 */ 1.589 + 0x0029,/* 0x29 */ 1.590 + 0x002a,/* 0x2a */ 1.591 + 0x002b,/* 0x2b */ 1.592 + 0x002c,/* 0x2c */ 1.593 + 0x002d,/* 0x2d */ 1.594 + 0x002e,/* 0x2e */ 1.595 + 0x002f,/* 0x2f */ 1.596 + 0x0030,/* 0x30 */ 1.597 + 0x0031,/* 0x31 */ 1.598 + 0x0032,/* 0x32 */ 1.599 + 0x0033,/* 0x33 */ 1.600 + 0x0034,/* 0x34 */ 1.601 + 0x0035,/* 0x35 */ 1.602 + 0x0036,/* 0x36 */ 1.603 + 0x0037,/* 0x37 */ 1.604 + 0x0038,/* 0x38 */ 1.605 + 0x0039,/* 0x39 */ 1.606 + 0x003A,/* 0x3A */ 1.607 + 0x003B,/* 0x3B */ 1.608 + 0x003c,/* 0x3c */ 1.609 + 0x003d,/* 0x3d */ 1.610 + 0x003e,/* 0x3e */ 1.611 + 0x003f,/* 0x3f */ 1.612 + 0x0040,/* 0x40 */ 1.613 + 0x0041,/* 0x41 */ 1.614 + 0x0042,/* 0x42 */ 1.615 + 0x0043,/* 0x43 */ 1.616 + 0x0044,/* 0x44 */ 1.617 + 0x0045,/* 0x45 */ 1.618 + 0x0046,/* 0x46 */ 1.619 + 0x0047,/* 0x47 */ 1.620 + 0x0048,/* 0x48 */ 1.621 + 0x0049,/* 0x49 */ 1.622 + 0x004a,/* 0x4a */ 1.623 + 0x004b,/* 0x4b */ 1.624 + 0x004c,/* 0x4c */ 1.625 + 0x004d,/* 0x4d */ 1.626 + 0x004e,/* 0x4e */ 1.627 + 0x004f,/* 0x4f */ 1.628 + 0x0050,/* 0x50 */ 1.629 + 0x0051,/* 0x51 */ 1.630 + 0x0052,/* 0x52 */ 1.631 + 0x0053,/* 0x53 */ 1.632 + 0x0054,/* 0x54 */ 1.633 + 0x0055,/* 0x55 */ 1.634 + 0x0056,/* 0x56 */ 1.635 + 0x0057,/* 0x57 */ 1.636 + 0x0058,/* 0x58 */ 1.637 + 0x0059,/* 0x59 */ 1.638 + 0x005a,/* 0x5a */ 1.639 + 0x005b,/* 0x5b */ 1.640 + 0x005c,/* 0x5c */ 1.641 + 0x005d,/* 0x5d */ 1.642 + 0x005e,/* 0x5e */ 1.643 + 0x005f,/* 0x5f */ 1.644 + 0x0060,/* 0x60 */ 1.645 + 0x0061,/* 0x61 */ 1.646 + 0x0062,/* 0x62 */ 1.647 + 0x0063,/* 0x63 */ 1.648 + 0x0064,/* 0x64 */ 1.649 + 0x0065,/* 0x65 */ 1.650 + 0x0066,/* 0x66 */ 1.651 + 0x0067,/* 0x67 */ 1.652 + 0x0068,/* 0x68 */ 1.653 + 0x0069,/* 0x69 */ 1.654 + 0x006a,/* 0x6a */ 1.655 + 0x006b,/* 0x6b */ 1.656 + 0x006c,/* 0x6c */ 1.657 + 0x006d,/* 0x6d */ 1.658 + 0x006e,/* 0x6e */ 1.659 + 0x006f,/* 0x6f */ 1.660 + 0x0070,/* 0x70 */ 1.661 + 0x0071,/* 0x71 */ 1.662 + 0x0072,/* 0x72 */ 1.663 + 0x0073,/* 0x73 */ 1.664 + 0x0074,/* 0x74 */ 1.665 + 0x0075,/* 0x75 */ 1.666 + 0x0076,/* 0x76 */ 1.667 + 0x0077,/* 0x77 */ 1.668 + 0x0078,/* 0x78 */ 1.669 + 0x0079,/* 0x79 */ 1.670 + 0x007a,/* 0x7a */ 1.671 + 0x007b,/* 0x7b */ 1.672 + 0x007c,/* 0x7c */ 1.673 + 0x007d,/* 0x7d */ 1.674 + 0x007e,/* 0x7e */ 1.675 + 0x007f,/* 0x7f */ 1.676 + 0x0080,/* 0x80 */ 1.677 + 0x0081,/* 0x81 */ 1.678 + 0x0082,/* 0x82 */ 1.679 + 0x0083,/* 0x83 */ 1.680 + 0x0084,/* 0x84 */ 1.681 + 0x0085,/* 0x85 */ 1.682 + 0x0086,/* 0x86 */ 1.683 + 0x0087,/* 0x87 */ 1.684 + 0x0088,/* 0x88 */ 1.685 + 0x0089,/* 0x89 */ 1.686 + 0x008a,/* 0x8a */ 1.687 + 0x008b,/* 0x8b */ 1.688 + 0x008c,/* 0x8c */ 1.689 + 0x008d,/* 0x8d */ 1.690 + 0x008e,/* 0x8e */ 1.691 + 0x008f,/* 0x8f */ 1.692 + 0x0090,/* 0x90 */ 1.693 + 0x0091,/* 0x91 */ 1.694 + 0x0092,/* 0x92 */ 1.695 + 0x0093,/* 0x93 */ 1.696 + 0x0094,/* 0x94 */ 1.697 + 0x0095,/* 0x95 */ 1.698 + 0x0096,/* 0x96 */ 1.699 + 0x0097,/* 0x97 */ 1.700 + 0x0098,/* 0x98 */ 1.701 + 0x0099,/* 0x99 */ 1.702 + 0x009a,/* 0x9a */ 1.703 + 0x009b,/* 0x9b */ 1.704 + 0x009c,/* 0x9c */ 1.705 + 0x009d,/* 0x9d */ 1.706 + 0x009e,/* 0x9e */ 1.707 + 0x009f,/* 0x9f */ 1.708 + 0x00A0,/* 0xa0 */ 1.709 + 0x0901,/* 0xa1 */ 1.710 + 0x0902,/* 0xa2 */ 1.711 + 0x0903,/* 0xa3 */ 1.712 + 0x0905,/* 0xa4 */ 1.713 + 0x0906,/* 0xa5 */ 1.714 + 0x0907,/* 0xa6 */ 1.715 + 0x0908,/* 0xa7 */ 1.716 + 0x0909,/* 0xa8 */ 1.717 + 0x090a,/* 0xa9 */ 1.718 + 0x090b,/* 0xaa */ 1.719 + 0x090e,/* 0xab */ 1.720 + 0x090f,/* 0xac */ 1.721 + 0x0910,/* 0xad */ 1.722 + 0x090d,/* 0xae */ 1.723 + 0x0912,/* 0xaf */ 1.724 + 0x0913,/* 0xb0 */ 1.725 + 0x0914,/* 0xb1 */ 1.726 + 0x0911,/* 0xb2 */ 1.727 + 0x0915,/* 0xb3 */ 1.728 + 0x0916,/* 0xb4 */ 1.729 + 0x0917,/* 0xb5 */ 1.730 + 0x0918,/* 0xb6 */ 1.731 + 0x0919,/* 0xb7 */ 1.732 + 0x091a,/* 0xb8 */ 1.733 + 0x091b,/* 0xb9 */ 1.734 + 0x091c,/* 0xba */ 1.735 + 0x091d,/* 0xbb */ 1.736 + 0x091e,/* 0xbc */ 1.737 + 0x091f,/* 0xbd */ 1.738 + 0x0920,/* 0xbe */ 1.739 + 0x0921,/* 0xbf */ 1.740 + 0x0922,/* 0xc0 */ 1.741 + 0x0923,/* 0xc1 */ 1.742 + 0x0924,/* 0xc2 */ 1.743 + 0x0925,/* 0xc3 */ 1.744 + 0x0926,/* 0xc4 */ 1.745 + 0x0927,/* 0xc5 */ 1.746 + 0x0928,/* 0xc6 */ 1.747 + 0x0929,/* 0xc7 */ 1.748 + 0x092a,/* 0xc8 */ 1.749 + 0x092b,/* 0xc9 */ 1.750 + 0x092c,/* 0xca */ 1.751 + 0x092d,/* 0xcb */ 1.752 + 0x092e,/* 0xcc */ 1.753 + 0x092f,/* 0xcd */ 1.754 + 0x095f,/* 0xce */ 1.755 + 0x0930,/* 0xcf */ 1.756 + 0x0931,/* 0xd0 */ 1.757 + 0x0932,/* 0xd1 */ 1.758 + 0x0933,/* 0xd2 */ 1.759 + 0x0934,/* 0xd3 */ 1.760 + 0x0935,/* 0xd4 */ 1.761 + 0x0936,/* 0xd5 */ 1.762 + 0x0937,/* 0xd6 */ 1.763 + 0x0938,/* 0xd7 */ 1.764 + 0x0939,/* 0xd8 */ 1.765 + 0x200D,/* 0xd9 */ 1.766 + 0x093e,/* 0xda */ 1.767 + 0x093f,/* 0xdb */ 1.768 + 0x0940,/* 0xdc */ 1.769 + 0x0941,/* 0xdd */ 1.770 + 0x0942,/* 0xde */ 1.771 + 0x0943,/* 0xdf */ 1.772 + 0x0946,/* 0xe0 */ 1.773 + 0x0947,/* 0xe1 */ 1.774 + 0x0948,/* 0xe2 */ 1.775 + 0x0945,/* 0xe3 */ 1.776 + 0x094a,/* 0xe4 */ 1.777 + 0x094b,/* 0xe5 */ 1.778 + 0x094c,/* 0xe6 */ 1.779 + 0x0949,/* 0xe7 */ 1.780 + 0x094d,/* 0xe8 */ 1.781 + 0x093c,/* 0xe9 */ 1.782 + 0x0964,/* 0xea */ 1.783 + 0xFFFF,/* 0xeb */ 1.784 + 0xFFFF,/* 0xec */ 1.785 + 0xFFFF,/* 0xed */ 1.786 + 0xFFFF,/* 0xee */ 1.787 + 0xFFFF,/* 0xef */ 1.788 + 0xFFFF,/* 0xf0 */ 1.789 + 0x0966,/* 0xf1 */ 1.790 + 0x0967,/* 0xf2 */ 1.791 + 0x0968,/* 0xf3 */ 1.792 + 0x0969,/* 0xf4 */ 1.793 + 0x096a,/* 0xf5 */ 1.794 + 0x096b,/* 0xf6 */ 1.795 + 0x096c,/* 0xf7 */ 1.796 + 0x096d,/* 0xf8 */ 1.797 + 0x096e,/* 0xf9 */ 1.798 + 0x096f,/* 0xfa */ 1.799 + 0xFFFF,/* 0xfb */ 1.800 + 0xFFFF,/* 0xfc */ 1.801 + 0xFFFF,/* 0xfd */ 1.802 + 0xFFFF,/* 0xfe */ 1.803 + 0xFFFF /* 0xff */ 1.804 +}; 1.805 + 1.806 +static const uint16_t vowelSignESpecialCases[][2]={ 1.807 + { 2 /*length of array*/ , 0 }, 1.808 + { 0xA4 , 0x0904 }, 1.809 +}; 1.810 + 1.811 +static const uint16_t nuktaSpecialCases[][2]={ 1.812 + { 16 /*length of array*/ , 0 }, 1.813 + { 0xA6 , 0x090c }, 1.814 + { 0xEA , 0x093D }, 1.815 + { 0xDF , 0x0944 }, 1.816 + { 0xA1 , 0x0950 }, 1.817 + { 0xb3 , 0x0958 }, 1.818 + { 0xb4 , 0x0959 }, 1.819 + { 0xb5 , 0x095a }, 1.820 + { 0xba , 0x095b }, 1.821 + { 0xbf , 0x095c }, 1.822 + { 0xC0 , 0x095d }, 1.823 + { 0xc9 , 0x095e }, 1.824 + { 0xAA , 0x0960 }, 1.825 + { 0xA7 , 0x0961 }, 1.826 + { 0xDB , 0x0962 }, 1.827 + { 0xDC , 0x0963 }, 1.828 +}; 1.829 + 1.830 + 1.831 +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ 1.832 + int32_t offset = (int32_t)(source - args->source-1); \ 1.833 + /* write the targetUniChar to target */ \ 1.834 + if(target < targetLimit){ \ 1.835 + if(targetByteUnit <= 0xFF){ \ 1.836 + *(target)++ = (uint8_t)(targetByteUnit); \ 1.837 + if(offsets){ \ 1.838 + *(offsets++) = offset; \ 1.839 + } \ 1.840 + }else{ \ 1.841 + if (targetByteUnit > 0xFFFF) { \ 1.842 + *(target)++ = (uint8_t)(targetByteUnit>>16); \ 1.843 + if (offsets) { \ 1.844 + --offset; \ 1.845 + *(offsets++) = offset; \ 1.846 + } \ 1.847 + } \ 1.848 + if (!(target < targetLimit)) { \ 1.849 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ 1.850 + (uint8_t)(targetByteUnit >> 8); \ 1.851 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ 1.852 + (uint8_t)targetByteUnit; \ 1.853 + *err = U_BUFFER_OVERFLOW_ERROR; \ 1.854 + } else { \ 1.855 + *(target)++ = (uint8_t)(targetByteUnit>>8); \ 1.856 + if(offsets){ \ 1.857 + *(offsets++) = offset; \ 1.858 + } \ 1.859 + if(target < targetLimit){ \ 1.860 + *(target)++ = (uint8_t) targetByteUnit; \ 1.861 + if(offsets){ \ 1.862 + *(offsets++) = offset ; \ 1.863 + } \ 1.864 + }else{ \ 1.865 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ 1.866 + (uint8_t) (targetByteUnit); \ 1.867 + *err = U_BUFFER_OVERFLOW_ERROR; \ 1.868 + } \ 1.869 + } \ 1.870 + } \ 1.871 + }else{ \ 1.872 + if (targetByteUnit & 0xFF0000) { \ 1.873 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ 1.874 + (uint8_t) (targetByteUnit >>16); \ 1.875 + } \ 1.876 + if(targetByteUnit & 0xFF00){ \ 1.877 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ 1.878 + (uint8_t) (targetByteUnit >>8); \ 1.879 + } \ 1.880 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ 1.881 + (uint8_t) (targetByteUnit); \ 1.882 + *err = U_BUFFER_OVERFLOW_ERROR; \ 1.883 + } \ 1.884 +} 1.885 + 1.886 +/* Rules: 1.887 + * Explicit Halant : 1.888 + * <HALANT> + <ZWNJ> 1.889 + * Soft Halant : 1.890 + * <HALANT> + <ZWJ> 1.891 + */ 1.892 + 1.893 +static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( 1.894 + UConverterFromUnicodeArgs * args, UErrorCode * err) { 1.895 + const UChar *source = args->source; 1.896 + const UChar *sourceLimit = args->sourceLimit; 1.897 + unsigned char *target = (unsigned char *) args->target; 1.898 + unsigned char *targetLimit = (unsigned char *) args->targetLimit; 1.899 + int32_t* offsets = args->offsets; 1.900 + uint32_t targetByteUnit = 0x0000; 1.901 + UChar32 sourceChar = 0x0000; 1.902 + UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ 1.903 + UConverterDataISCII *converterData; 1.904 + uint16_t newDelta=0; 1.905 + uint16_t range = 0; 1.906 + UBool deltaChanged = FALSE; 1.907 + 1.908 + if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { 1.909 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.910 + return; 1.911 + } 1.912 + /* initialize data */ 1.913 + converterData=(UConverterDataISCII*)args->converter->extraInfo; 1.914 + newDelta=converterData->currentDeltaFromUnicode; 1.915 + range = (uint16_t)(newDelta/DELTA); 1.916 + 1.917 + if ((sourceChar = args->converter->fromUChar32)!=0) { 1.918 + goto getTrail; 1.919 + } 1.920 + 1.921 + /*writing the char to the output stream */ 1.922 + while (source < sourceLimit) { 1.923 + /* Write the language code following LF only if LF is not the last character. */ 1.924 + if (args->converter->fromUnicodeStatus == LF) { 1.925 + targetByteUnit = ATR<<8; 1.926 + targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; 1.927 + args->converter->fromUnicodeStatus = 0x0000; 1.928 + /* now append ATR and language code */ 1.929 + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); 1.930 + if (U_FAILURE(*err)) { 1.931 + break; 1.932 + } 1.933 + } 1.934 + 1.935 + sourceChar = *source++; 1.936 + tempContextFromUnicode = converterData->contextCharFromUnicode; 1.937 + 1.938 + targetByteUnit = missingCharMarker; 1.939 + 1.940 + /*check if input is in ASCII and C0 control codes range*/ 1.941 + if (sourceChar <= ASCII_END) { 1.942 + args->converter->fromUnicodeStatus = sourceChar; 1.943 + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); 1.944 + if (U_FAILURE(*err)) { 1.945 + break; 1.946 + } 1.947 + continue; 1.948 + } 1.949 + switch (sourceChar) { 1.950 + case ZWNJ: 1.951 + /* contextChar has HALANT */ 1.952 + if (converterData->contextCharFromUnicode) { 1.953 + converterData->contextCharFromUnicode = 0x00; 1.954 + targetByteUnit = ISCII_HALANT; 1.955 + } else { 1.956 + /* consume ZWNJ and continue */ 1.957 + converterData->contextCharFromUnicode = 0x00; 1.958 + continue; 1.959 + } 1.960 + break; 1.961 + case ZWJ: 1.962 + /* contextChar has HALANT */ 1.963 + if (converterData->contextCharFromUnicode) { 1.964 + targetByteUnit = ISCII_NUKTA; 1.965 + } else { 1.966 + targetByteUnit =ISCII_INV; 1.967 + } 1.968 + converterData->contextCharFromUnicode = 0x00; 1.969 + break; 1.970 + default: 1.971 + /* is the sourceChar in the INDIC_RANGE? */ 1.972 + if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { 1.973 + /* Danda and Double Danda are valid in Northern scripts.. since Unicode 1.974 + * does not include these codepoints in all Northern scrips we need to 1.975 + * filter them out 1.976 + */ 1.977 + if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { 1.978 + /* find out to which block the souceChar belongs*/ 1.979 + range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); 1.980 + newDelta =(uint16_t)(range*DELTA); 1.981 + 1.982 + /* Now are we in the same block as the previous? */ 1.983 + if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { 1.984 + converterData->currentDeltaFromUnicode = newDelta; 1.985 + converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; 1.986 + deltaChanged =TRUE; 1.987 + converterData->isFirstBuffer=FALSE; 1.988 + } 1.989 + 1.990 + if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { 1.991 + if (sourceChar == PNJ_TIPPI) { 1.992 + /* Make sure Tippi is converterd to Bindi. */ 1.993 + sourceChar = PNJ_BINDI; 1.994 + } else if (sourceChar == PNJ_ADHAK) { 1.995 + /* This is for consonant cluster handling. */ 1.996 + converterData->contextCharFromUnicode = PNJ_ADHAK; 1.997 + } 1.998 + 1.999 + } 1.1000 + /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ 1.1001 + /* now subtract the new delta from sourceChar*/ 1.1002 + sourceChar -= converterData->currentDeltaFromUnicode; 1.1003 + } 1.1004 + 1.1005 + /* get the target byte unit */ 1.1006 + targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; 1.1007 + 1.1008 + /* is the code point valid in current script? */ 1.1009 + if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { 1.1010 + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ 1.1011 + if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { 1.1012 + targetByteUnit=missingCharMarker; 1.1013 + } 1.1014 + } 1.1015 + 1.1016 + if (deltaChanged) { 1.1017 + /* we are in a script block which is different than 1.1018 + * previous sourceChar's script block write ATR and language codes 1.1019 + */ 1.1020 + uint32_t temp=0; 1.1021 + temp =(uint16_t)(ATR<<8); 1.1022 + temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); 1.1023 + /* reset */ 1.1024 + deltaChanged=FALSE; 1.1025 + /* now append ATR and language code */ 1.1026 + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); 1.1027 + if (U_FAILURE(*err)) { 1.1028 + break; 1.1029 + } 1.1030 + } 1.1031 + 1.1032 + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { 1.1033 + continue; 1.1034 + } 1.1035 + } 1.1036 + /* reset context char */ 1.1037 + converterData->contextCharFromUnicode = 0x00; 1.1038 + break; 1.1039 + } 1.1040 + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { 1.1041 + /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ 1.1042 + /* reset context char */ 1.1043 + converterData->contextCharFromUnicode = 0x0000; 1.1044 + targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; 1.1045 + /* write targetByteUnit to target */ 1.1046 + WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); 1.1047 + if (U_FAILURE(*err)) { 1.1048 + break; 1.1049 + } 1.1050 + } else if (targetByteUnit != missingCharMarker) { 1.1051 + if (targetByteUnit==ISCII_HALANT) { 1.1052 + converterData->contextCharFromUnicode = (UChar)targetByteUnit; 1.1053 + } 1.1054 + /* write targetByteUnit to target*/ 1.1055 + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); 1.1056 + if (U_FAILURE(*err)) { 1.1057 + break; 1.1058 + } 1.1059 + } else { 1.1060 + /* oops.. the code point is unassigned */ 1.1061 + /*check if the char is a First surrogate*/ 1.1062 + if (U16_IS_SURROGATE(sourceChar)) { 1.1063 + if (U16_IS_SURROGATE_LEAD(sourceChar)) { 1.1064 +getTrail: 1.1065 + /*look ahead to find the trail surrogate*/ 1.1066 + if (source < sourceLimit) { 1.1067 + /* test the following code unit */ 1.1068 + UChar trail= (*source); 1.1069 + if (U16_IS_TRAIL(trail)) { 1.1070 + source++; 1.1071 + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 1.1072 + *err =U_INVALID_CHAR_FOUND; 1.1073 + /* convert this surrogate code point */ 1.1074 + /* exit this condition tree */ 1.1075 + } else { 1.1076 + /* this is an unmatched lead code unit (1st surrogate) */ 1.1077 + /* callback(illegal) */ 1.1078 + *err=U_ILLEGAL_CHAR_FOUND; 1.1079 + } 1.1080 + } else { 1.1081 + /* no more input */ 1.1082 + *err = U_ZERO_ERROR; 1.1083 + } 1.1084 + } else { 1.1085 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.1086 + /* callback(illegal) */ 1.1087 + *err=U_ILLEGAL_CHAR_FOUND; 1.1088 + } 1.1089 + } else { 1.1090 + /* callback(unassigned) for a BMP code point */ 1.1091 + *err = U_INVALID_CHAR_FOUND; 1.1092 + } 1.1093 + 1.1094 + args->converter->fromUChar32=sourceChar; 1.1095 + break; 1.1096 + } 1.1097 + }/* end while(mySourceIndex<mySourceLength) */ 1.1098 + 1.1099 + /*save the state and return */ 1.1100 + args->source = source; 1.1101 + args->target = (char*)target; 1.1102 +} 1.1103 + 1.1104 +static const uint16_t lookupTable[][2]={ 1.1105 + { ZERO, ZERO }, /*DEFALT*/ 1.1106 + { ZERO, ZERO }, /*ROMAN*/ 1.1107 + { DEVANAGARI, DEV_MASK }, 1.1108 + { BENGALI, BNG_MASK }, 1.1109 + { TAMIL, TML_MASK }, 1.1110 + { TELUGU, KND_MASK }, 1.1111 + { BENGALI, BNG_MASK }, 1.1112 + { ORIYA, ORI_MASK }, 1.1113 + { KANNADA, KND_MASK }, 1.1114 + { MALAYALAM, MLM_MASK }, 1.1115 + { GUJARATI, GJR_MASK }, 1.1116 + { GURMUKHI, PNJ_MASK } 1.1117 +}; 1.1118 + 1.1119 +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ 1.1120 + /* add offset to current Indic Block */ \ 1.1121 + if(targetUniChar>ASCII_END && \ 1.1122 + targetUniChar != ZWJ && \ 1.1123 + targetUniChar != ZWNJ && \ 1.1124 + targetUniChar != DANDA && \ 1.1125 + targetUniChar != DOUBLE_DANDA){ \ 1.1126 + \ 1.1127 + targetUniChar+=(uint16_t)(delta); \ 1.1128 + } \ 1.1129 + /* now write the targetUniChar */ \ 1.1130 + if(target<args->targetLimit){ \ 1.1131 + *(target)++ = (UChar)targetUniChar; \ 1.1132 + if(offsets){ \ 1.1133 + *(offsets)++ = (int32_t)(offset); \ 1.1134 + } \ 1.1135 + }else{ \ 1.1136 + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ 1.1137 + (UChar)targetUniChar; \ 1.1138 + *err = U_BUFFER_OVERFLOW_ERROR; \ 1.1139 + } \ 1.1140 +} 1.1141 + 1.1142 +#define GET_MAPPING(sourceChar,targetUniChar,data){ \ 1.1143 + targetUniChar = toUnicodeTable[(sourceChar)] ; \ 1.1144 + /* is the code point valid in current script? */ \ 1.1145 + if(sourceChar> ASCII_END && \ 1.1146 + (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ 1.1147 + /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ 1.1148 + if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ 1.1149 + targetUniChar!=VOCALLIC_RR){ \ 1.1150 + targetUniChar=missingCharMarker; \ 1.1151 + } \ 1.1152 + } \ 1.1153 +} 1.1154 + 1.1155 +/*********** 1.1156 + * Rules for ISCII to Unicode converter 1.1157 + * ISCII is stateful encoding. To convert ISCII bytes to Unicode, 1.1158 + * which has both precomposed and decomposed forms characters 1.1159 + * pre-context and post-context need to be considered. 1.1160 + * 1.1161 + * Post context 1.1162 + * i) ATR : Attribute code is used to declare the font and script switching. 1.1163 + * Currently we only switch scripts and font codes consumed without generating an error 1.1164 + * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, 1.1165 + * obsolete characters 1.1166 + * Pre context 1.1167 + * i) Halant: if preceeded by a halant then it is a explicit halant 1.1168 + * ii) Nukta : 1.1169 + * a) if preceeded by a halant then it is a soft halant 1.1170 + * b) if preceeded by specific consonants and the ligatures have pre-composed 1.1171 + * characters in Unicode then convert to pre-composed characters 1.1172 + * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda 1.1173 + * 1.1174 + */ 1.1175 + 1.1176 +static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { 1.1177 + const char *source = ( char *) args->source; 1.1178 + UChar *target = args->target; 1.1179 + const char *sourceLimit = args->sourceLimit; 1.1180 + const UChar* targetLimit = args->targetLimit; 1.1181 + uint32_t targetUniChar = 0x0000; 1.1182 + uint8_t sourceChar = 0x0000; 1.1183 + UConverterDataISCII* data; 1.1184 + UChar32* toUnicodeStatus=NULL; 1.1185 + UChar32 tempTargetUniChar = 0x0000; 1.1186 + UChar* contextCharToUnicode= NULL; 1.1187 + UBool found; 1.1188 + int i; 1.1189 + int offset = 0; 1.1190 + 1.1191 + if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { 1.1192 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.1193 + return; 1.1194 + } 1.1195 + 1.1196 + data = (UConverterDataISCII*)(args->converter->extraInfo); 1.1197 + contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ 1.1198 + toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ 1.1199 + 1.1200 + while (U_SUCCESS(*err) && source<sourceLimit) { 1.1201 + 1.1202 + targetUniChar = missingCharMarker; 1.1203 + 1.1204 + if (target < targetLimit) { 1.1205 + sourceChar = (unsigned char)*(source)++; 1.1206 + 1.1207 + /* look at the post-context preform special processing */ 1.1208 + if (*contextCharToUnicode==ATR) { 1.1209 + 1.1210 + /* If we have ATR in *contextCharToUnicode then we need to change our 1.1211 + * state to the Indic Script specified by sourceChar 1.1212 + */ 1.1213 + 1.1214 + /* check if the sourceChar is supported script range*/ 1.1215 + if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { 1.1216 + data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); 1.1217 + data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; 1.1218 + } else if (sourceChar==DEF) { 1.1219 + /* switch back to default */ 1.1220 + data->currentDeltaToUnicode = data->defDeltaToUnicode; 1.1221 + data->currentMaskToUnicode = data->defMaskToUnicode; 1.1222 + } else { 1.1223 + if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { 1.1224 + /* these are display codes consume and continue */ 1.1225 + } else { 1.1226 + *err =U_ILLEGAL_CHAR_FOUND; 1.1227 + /* reset */ 1.1228 + *contextCharToUnicode=NO_CHAR_MARKER; 1.1229 + goto CALLBACK; 1.1230 + } 1.1231 + } 1.1232 + 1.1233 + /* reset */ 1.1234 + *contextCharToUnicode=NO_CHAR_MARKER; 1.1235 + 1.1236 + continue; 1.1237 + 1.1238 + } else if (*contextCharToUnicode==EXT) { 1.1239 + /* check if sourceChar is in 0xA1-0xEE range */ 1.1240 + if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { 1.1241 + /* We currently support only Anudatta and Devanagari abbreviation sign */ 1.1242 + if (sourceChar==0xBF || sourceChar == 0xB8) { 1.1243 + targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; 1.1244 + 1.1245 + /* find out if the mapping is valid in this state */ 1.1246 + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { 1.1247 + *contextCharToUnicode= NO_CHAR_MARKER; 1.1248 + 1.1249 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1250 + if (data->prevToUnicodeStatus) { 1.1251 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1252 + data->prevToUnicodeStatus = 0x0000; 1.1253 + } 1.1254 + /* write to target */ 1.1255 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); 1.1256 + 1.1257 + continue; 1.1258 + } 1.1259 + } 1.1260 + /* byte unit is unassigned */ 1.1261 + targetUniChar = missingCharMarker; 1.1262 + *err= U_INVALID_CHAR_FOUND; 1.1263 + } else { 1.1264 + /* only 0xA1 - 0xEE are legal after EXT char */ 1.1265 + *contextCharToUnicode= NO_CHAR_MARKER; 1.1266 + *err = U_ILLEGAL_CHAR_FOUND; 1.1267 + } 1.1268 + goto CALLBACK; 1.1269 + } else if (*contextCharToUnicode==ISCII_INV) { 1.1270 + if (sourceChar==ISCII_HALANT) { 1.1271 + targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ 1.1272 + } else { 1.1273 + targetUniChar = ZWJ; 1.1274 + } 1.1275 + 1.1276 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1277 + if (data->prevToUnicodeStatus) { 1.1278 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1279 + data->prevToUnicodeStatus = 0x0000; 1.1280 + } 1.1281 + /* write to target */ 1.1282 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); 1.1283 + /* reset */ 1.1284 + *contextCharToUnicode=NO_CHAR_MARKER; 1.1285 + } 1.1286 + 1.1287 + /* look at the pre-context and perform special processing */ 1.1288 + switch (sourceChar) { 1.1289 + case ISCII_INV: 1.1290 + case EXT: /*falls through*/ 1.1291 + case ATR: 1.1292 + *contextCharToUnicode = (UChar)sourceChar; 1.1293 + 1.1294 + if (*toUnicodeStatus != missingCharMarker) { 1.1295 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1296 + if (data->prevToUnicodeStatus) { 1.1297 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1298 + data->prevToUnicodeStatus = 0x0000; 1.1299 + } 1.1300 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); 1.1301 + *toUnicodeStatus = missingCharMarker; 1.1302 + } 1.1303 + continue; 1.1304 + case ISCII_DANDA: 1.1305 + /* handle double danda*/ 1.1306 + if (*contextCharToUnicode== ISCII_DANDA) { 1.1307 + targetUniChar = DOUBLE_DANDA; 1.1308 + /* clear the context */ 1.1309 + *contextCharToUnicode = NO_CHAR_MARKER; 1.1310 + *toUnicodeStatus = missingCharMarker; 1.1311 + } else { 1.1312 + GET_MAPPING(sourceChar,targetUniChar,data); 1.1313 + *contextCharToUnicode = sourceChar; 1.1314 + } 1.1315 + break; 1.1316 + case ISCII_HALANT: 1.1317 + /* handle explicit halant */ 1.1318 + if (*contextCharToUnicode == ISCII_HALANT) { 1.1319 + targetUniChar = ZWNJ; 1.1320 + /* clear the context */ 1.1321 + *contextCharToUnicode = NO_CHAR_MARKER; 1.1322 + } else { 1.1323 + GET_MAPPING(sourceChar,targetUniChar,data); 1.1324 + *contextCharToUnicode = sourceChar; 1.1325 + } 1.1326 + break; 1.1327 + case 0x0A: 1.1328 + /* fall through */ 1.1329 + case 0x0D: 1.1330 + data->resetToDefaultToUnicode = TRUE; 1.1331 + GET_MAPPING(sourceChar,targetUniChar,data) 1.1332 + ; 1.1333 + *contextCharToUnicode = sourceChar; 1.1334 + break; 1.1335 + 1.1336 + case ISCII_VOWEL_SIGN_E: 1.1337 + i=1; 1.1338 + found=FALSE; 1.1339 + for (; i<vowelSignESpecialCases[0][0]; i++) { 1.1340 + U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0])); 1.1341 + if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) { 1.1342 + targetUniChar=vowelSignESpecialCases[i][1]; 1.1343 + found=TRUE; 1.1344 + break; 1.1345 + } 1.1346 + } 1.1347 + if (found) { 1.1348 + /* find out if the mapping is valid in this state */ 1.1349 + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { 1.1350 + /*targetUniChar += data->currentDeltaToUnicode ;*/ 1.1351 + *contextCharToUnicode= NO_CHAR_MARKER; 1.1352 + *toUnicodeStatus = missingCharMarker; 1.1353 + break; 1.1354 + } 1.1355 + } 1.1356 + GET_MAPPING(sourceChar,targetUniChar,data); 1.1357 + *contextCharToUnicode = sourceChar; 1.1358 + break; 1.1359 + 1.1360 + case ISCII_NUKTA: 1.1361 + /* handle soft halant */ 1.1362 + if (*contextCharToUnicode == ISCII_HALANT) { 1.1363 + targetUniChar = ZWJ; 1.1364 + /* clear the context */ 1.1365 + *contextCharToUnicode = NO_CHAR_MARKER; 1.1366 + break; 1.1367 + } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { 1.1368 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1369 + if (data->prevToUnicodeStatus) { 1.1370 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1371 + data->prevToUnicodeStatus = 0x0000; 1.1372 + } 1.1373 + /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. 1.1374 + * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). 1.1375 + */ 1.1376 + targetUniChar = PNJ_RRA; 1.1377 + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); 1.1378 + if (U_SUCCESS(*err)) { 1.1379 + targetUniChar = PNJ_SIGN_VIRAMA; 1.1380 + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); 1.1381 + if (U_SUCCESS(*err)) { 1.1382 + targetUniChar = PNJ_HA; 1.1383 + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); 1.1384 + } else { 1.1385 + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; 1.1386 + } 1.1387 + } else { 1.1388 + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; 1.1389 + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; 1.1390 + } 1.1391 + *toUnicodeStatus = missingCharMarker; 1.1392 + data->contextCharToUnicode = NO_CHAR_MARKER; 1.1393 + continue; 1.1394 + } else { 1.1395 + /* try to handle <CHAR> + ISCII_NUKTA special mappings */ 1.1396 + i=1; 1.1397 + found =FALSE; 1.1398 + for (; i<nuktaSpecialCases[0][0]; i++) { 1.1399 + if (nuktaSpecialCases[i][0]==(uint8_t) 1.1400 + *contextCharToUnicode) { 1.1401 + targetUniChar=nuktaSpecialCases[i][1]; 1.1402 + found =TRUE; 1.1403 + break; 1.1404 + } 1.1405 + } 1.1406 + if (found) { 1.1407 + /* find out if the mapping is valid in this state */ 1.1408 + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { 1.1409 + /*targetUniChar += data->currentDeltaToUnicode ;*/ 1.1410 + *contextCharToUnicode= NO_CHAR_MARKER; 1.1411 + *toUnicodeStatus = missingCharMarker; 1.1412 + if (data->currentDeltaToUnicode == PNJ_DELTA) { 1.1413 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1414 + if (data->prevToUnicodeStatus) { 1.1415 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1416 + data->prevToUnicodeStatus = 0x0000; 1.1417 + } 1.1418 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); 1.1419 + continue; 1.1420 + } 1.1421 + break; 1.1422 + } 1.1423 + /* else fall through to default */ 1.1424 + } 1.1425 + /* else fall through to default */ 1.1426 + } 1.1427 + default:GET_MAPPING(sourceChar,targetUniChar,data) 1.1428 + ; 1.1429 + *contextCharToUnicode = sourceChar; 1.1430 + break; 1.1431 + } 1.1432 + 1.1433 + if (*toUnicodeStatus != missingCharMarker) { 1.1434 + /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ 1.1435 + if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && 1.1436 + (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) { 1.1437 + /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ 1.1438 + offset = (int)(source-args->source - 3); 1.1439 + tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ 1.1440 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); 1.1441 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); 1.1442 + data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ 1.1443 + *toUnicodeStatus = missingCharMarker; 1.1444 + continue; 1.1445 + } else { 1.1446 + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1.1447 + if (data->prevToUnicodeStatus) { 1.1448 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); 1.1449 + data->prevToUnicodeStatus = 0x0000; 1.1450 + } 1.1451 + /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 1.1452 + * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. 1.1453 + */ 1.1454 + if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { 1.1455 + targetUniChar = PNJ_TIPPI - PNJ_DELTA; 1.1456 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); 1.1457 + } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { 1.1458 + /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ 1.1459 + data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; 1.1460 + } else { 1.1461 + /* write the previously mapped codepoint */ 1.1462 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); 1.1463 + } 1.1464 + } 1.1465 + *toUnicodeStatus = missingCharMarker; 1.1466 + } 1.1467 + 1.1468 + if (targetUniChar != missingCharMarker) { 1.1469 + /* now save the targetUniChar for delayed write */ 1.1470 + *toUnicodeStatus = (UChar) targetUniChar; 1.1471 + if (data->resetToDefaultToUnicode==TRUE) { 1.1472 + data->currentDeltaToUnicode = data->defDeltaToUnicode; 1.1473 + data->currentMaskToUnicode = data->defMaskToUnicode; 1.1474 + data->resetToDefaultToUnicode=FALSE; 1.1475 + } 1.1476 + } else { 1.1477 + 1.1478 + /* we reach here only if targetUniChar == missingCharMarker 1.1479 + * so assign codes to reason and err 1.1480 + */ 1.1481 + *err = U_INVALID_CHAR_FOUND; 1.1482 +CALLBACK: 1.1483 + args->converter->toUBytes[0] = (uint8_t) sourceChar; 1.1484 + args->converter->toULength = 1; 1.1485 + break; 1.1486 + } 1.1487 + 1.1488 + } else { 1.1489 + *err =U_BUFFER_OVERFLOW_ERROR; 1.1490 + break; 1.1491 + } 1.1492 + } 1.1493 + 1.1494 + if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { 1.1495 + /* end of the input stream */ 1.1496 + UConverter *cnv = args->converter; 1.1497 + 1.1498 + if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { 1.1499 + /* set toUBytes[] */ 1.1500 + cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; 1.1501 + cnv->toULength = 1; 1.1502 + 1.1503 + /* avoid looping on truncated sequences */ 1.1504 + *contextCharToUnicode = NO_CHAR_MARKER; 1.1505 + } else { 1.1506 + cnv->toULength = 0; 1.1507 + } 1.1508 + 1.1509 + if (*toUnicodeStatus != missingCharMarker) { 1.1510 + /* output a remaining target character */ 1.1511 + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); 1.1512 + *toUnicodeStatus = missingCharMarker; 1.1513 + } 1.1514 + } 1.1515 + 1.1516 + args->target = target; 1.1517 + args->source = source; 1.1518 +} 1.1519 + 1.1520 +/* structure for SafeClone calculations */ 1.1521 +struct cloneISCIIStruct { 1.1522 + UConverter cnv; 1.1523 + UConverterDataISCII mydata; 1.1524 +}; 1.1525 + 1.1526 +static UConverter * 1.1527 +_ISCII_SafeClone(const UConverter *cnv, 1.1528 + void *stackBuffer, 1.1529 + int32_t *pBufferSize, 1.1530 + UErrorCode *status) 1.1531 +{ 1.1532 + struct cloneISCIIStruct * localClone; 1.1533 + int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); 1.1534 + 1.1535 + if (U_FAILURE(*status)) { 1.1536 + return 0; 1.1537 + } 1.1538 + 1.1539 + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ 1.1540 + *pBufferSize = bufferSizeNeeded; 1.1541 + return 0; 1.1542 + } 1.1543 + 1.1544 + localClone = (struct cloneISCIIStruct *)stackBuffer; 1.1545 + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 1.1546 + 1.1547 + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); 1.1548 + localClone->cnv.extraInfo = &localClone->mydata; 1.1549 + localClone->cnv.isExtraLocal = TRUE; 1.1550 + 1.1551 + return &localClone->cnv; 1.1552 +} 1.1553 + 1.1554 +static void 1.1555 +_ISCIIGetUnicodeSet(const UConverter *cnv, 1.1556 + const USetAdder *sa, 1.1557 + UConverterUnicodeSet which, 1.1558 + UErrorCode *pErrorCode) 1.1559 +{ 1.1560 + int32_t idx, script; 1.1561 + uint8_t mask; 1.1562 + 1.1563 + /* Since all ISCII versions allow switching to other ISCII 1.1564 + scripts, we add all roundtrippable characters to this set. */ 1.1565 + sa->addRange(sa->set, 0, ASCII_END); 1.1566 + for (script = DEVANAGARI; script <= MALAYALAM; script++) { 1.1567 + mask = (uint8_t)(lookupInitialData[script].maskEnum); 1.1568 + for (idx = 0; idx < DELTA; idx++) { 1.1569 + /* added check for TELUGU character */ 1.1570 + if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { 1.1571 + sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); 1.1572 + } 1.1573 + } 1.1574 + } 1.1575 + sa->add(sa->set, DANDA); 1.1576 + sa->add(sa->set, DOUBLE_DANDA); 1.1577 + sa->add(sa->set, ZWNJ); 1.1578 + sa->add(sa->set, ZWJ); 1.1579 +} 1.1580 + 1.1581 +static const UConverterImpl _ISCIIImpl={ 1.1582 + 1.1583 + UCNV_ISCII, 1.1584 + 1.1585 + NULL, 1.1586 + NULL, 1.1587 + 1.1588 + _ISCIIOpen, 1.1589 + _ISCIIClose, 1.1590 + _ISCIIReset, 1.1591 + 1.1592 + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, 1.1593 + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, 1.1594 + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, 1.1595 + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, 1.1596 + NULL, 1.1597 + 1.1598 + NULL, 1.1599 + _ISCIIgetName, 1.1600 + NULL, 1.1601 + _ISCII_SafeClone, 1.1602 + _ISCIIGetUnicodeSet 1.1603 +}; 1.1604 + 1.1605 +static const UConverterStaticData _ISCIIStaticData={ 1.1606 + sizeof(UConverterStaticData), 1.1607 + "ISCII", 1.1608 + 0, 1.1609 + UCNV_IBM, 1.1610 + UCNV_ISCII, 1.1611 + 1, 1.1612 + 4, 1.1613 + { 0x1a, 0, 0, 0 }, 1.1614 + 0x1, 1.1615 + FALSE, 1.1616 + FALSE, 1.1617 + 0x0, 1.1618 + 0x0, 1.1619 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ 1.1620 + 1.1621 +}; 1.1622 + 1.1623 +const UConverterSharedData _ISCIIData={ 1.1624 + sizeof(UConverterSharedData), 1.1625 + ~((uint32_t) 0), 1.1626 + NULL, 1.1627 + NULL, 1.1628 + &_ISCIIStaticData, 1.1629 + FALSE, 1.1630 + &_ISCIIImpl, 1.1631 + 0 1.1632 +}; 1.1633 + 1.1634 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */