intl/icu/source/common/ucnvisci.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2000-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * file name: ucnvisci.c
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2001JUN26
michael@0 12 * created by: Ram Viswanadha
michael@0 13 *
michael@0 14 * Date Name Description
michael@0 15 * 24/7/2001 Ram Added support for EXT character handling
michael@0 16 */
michael@0 17
michael@0 18 #include "unicode/utypes.h"
michael@0 19
michael@0 20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
michael@0 21
michael@0 22 #include "unicode/ucnv.h"
michael@0 23 #include "unicode/ucnv_cb.h"
michael@0 24 #include "unicode/utf16.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "ucnv_bld.h"
michael@0 27 #include "ucnv_cnv.h"
michael@0 28 #include "cstring.h"
michael@0 29 #include "uassert.h"
michael@0 30
michael@0 31 #define UCNV_OPTIONS_VERSION_MASK 0xf
michael@0 32 #define NUKTA 0x093c
michael@0 33 #define HALANT 0x094d
michael@0 34 #define ZWNJ 0x200c /* Zero Width Non Joiner */
michael@0 35 #define ZWJ 0x200d /* Zero width Joiner */
michael@0 36 #define INVALID_CHAR 0xffff
michael@0 37 #define ATR 0xEF /* Attribute code */
michael@0 38 #define EXT 0xF0 /* Extension code */
michael@0 39 #define DANDA 0x0964
michael@0 40 #define DOUBLE_DANDA 0x0965
michael@0 41 #define ISCII_NUKTA 0xE9
michael@0 42 #define ISCII_HALANT 0xE8
michael@0 43 #define ISCII_DANDA 0xEA
michael@0 44 #define ISCII_INV 0xD9
michael@0 45 #define ISCII_VOWEL_SIGN_E 0xE0
michael@0 46 #define INDIC_BLOCK_BEGIN 0x0900
michael@0 47 #define INDIC_BLOCK_END 0x0D7F
michael@0 48 #define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
michael@0 49 #define VOCALLIC_RR 0x0931
michael@0 50 #define LF 0x0A
michael@0 51 #define ASCII_END 0xA0
michael@0 52 #define NO_CHAR_MARKER 0xFFFE
michael@0 53 #define TELUGU_DELTA DELTA * TELUGU
michael@0 54 #define DEV_ABBR_SIGN 0x0970
michael@0 55 #define DEV_ANUDATTA 0x0952
michael@0 56 #define EXT_RANGE_BEGIN 0xA1
michael@0 57 #define EXT_RANGE_END 0xEE
michael@0 58
michael@0 59 #define PNJ_DELTA 0x0100
michael@0 60 #define PNJ_BINDI 0x0A02
michael@0 61 #define PNJ_TIPPI 0x0A70
michael@0 62 #define PNJ_SIGN_VIRAMA 0x0A4D
michael@0 63 #define PNJ_ADHAK 0x0A71
michael@0 64 #define PNJ_HA 0x0A39
michael@0 65 #define PNJ_RRA 0x0A5C
michael@0 66
michael@0 67 typedef enum {
michael@0 68 DEVANAGARI =0,
michael@0 69 BENGALI,
michael@0 70 GURMUKHI,
michael@0 71 GUJARATI,
michael@0 72 ORIYA,
michael@0 73 TAMIL,
michael@0 74 TELUGU,
michael@0 75 KANNADA,
michael@0 76 MALAYALAM,
michael@0 77 DELTA=0x80
michael@0 78 }UniLang;
michael@0 79
michael@0 80 /**
michael@0 81 * Enumeration for switching code pages if <ATR>+<one of below values>
michael@0 82 * is encountered
michael@0 83 */
michael@0 84 typedef enum {
michael@0 85 DEF = 0x40,
michael@0 86 RMN = 0x41,
michael@0 87 DEV = 0x42,
michael@0 88 BNG = 0x43,
michael@0 89 TML = 0x44,
michael@0 90 TLG = 0x45,
michael@0 91 ASM = 0x46,
michael@0 92 ORI = 0x47,
michael@0 93 KND = 0x48,
michael@0 94 MLM = 0x49,
michael@0 95 GJR = 0x4A,
michael@0 96 PNJ = 0x4B,
michael@0 97 ARB = 0x71,
michael@0 98 PES = 0x72,
michael@0 99 URD = 0x73,
michael@0 100 SND = 0x74,
michael@0 101 KSM = 0x75,
michael@0 102 PST = 0x76
michael@0 103 }ISCIILang;
michael@0 104
michael@0 105 typedef enum {
michael@0 106 DEV_MASK =0x80,
michael@0 107 PNJ_MASK =0x40,
michael@0 108 GJR_MASK =0x20,
michael@0 109 ORI_MASK =0x10,
michael@0 110 BNG_MASK =0x08,
michael@0 111 KND_MASK =0x04,
michael@0 112 MLM_MASK =0x02,
michael@0 113 TML_MASK =0x01,
michael@0 114 ZERO =0x00
michael@0 115 }MaskEnum;
michael@0 116
michael@0 117 #define ISCII_CNV_PREFIX "ISCII,version="
michael@0 118
michael@0 119 typedef struct {
michael@0 120 UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
michael@0 121 UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
michael@0 122 uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
michael@0 123 uint16_t currentDeltaFromUnicode; /* current delta in Indic block */
michael@0 124 uint16_t currentDeltaToUnicode; /* current delta in Indic block */
michael@0 125 MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */
michael@0 126 MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */
michael@0 127 MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */
michael@0 128 UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
michael@0 129 UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/
michael@0 130 char name[sizeof(ISCII_CNV_PREFIX) + 1];
michael@0 131 UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
michael@0 132 } UConverterDataISCII;
michael@0 133
michael@0 134 typedef struct LookupDataStruct {
michael@0 135 UniLang uniLang;
michael@0 136 MaskEnum maskEnum;
michael@0 137 ISCIILang isciiLang;
michael@0 138 } LookupDataStruct;
michael@0 139
michael@0 140 static const LookupDataStruct lookupInitialData[]={
michael@0 141 { DEVANAGARI, DEV_MASK, DEV },
michael@0 142 { BENGALI, BNG_MASK, BNG },
michael@0 143 { GURMUKHI, PNJ_MASK, PNJ },
michael@0 144 { GUJARATI, GJR_MASK, GJR },
michael@0 145 { ORIYA, ORI_MASK, ORI },
michael@0 146 { TAMIL, TML_MASK, TML },
michael@0 147 { TELUGU, KND_MASK, TLG },
michael@0 148 { KANNADA, KND_MASK, KND },
michael@0 149 { MALAYALAM, MLM_MASK, MLM }
michael@0 150 };
michael@0 151
michael@0 152 /*
michael@0 153 * For special handling of certain Gurmukhi characters.
michael@0 154 * Bit 0 (value 1): PNJ consonant
michael@0 155 * Bit 1 (value 2): PNJ Bindi Tippi
michael@0 156 */
michael@0 157 static const uint8_t pnjMap[80] = {
michael@0 158 /* 0A00..0A0F */
michael@0 159 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 160 /* 0A10..0A1F */
michael@0 161 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
michael@0 162 /* 0A20..0A2F */
michael@0 163 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
michael@0 164 /* 0A30..0A3F */
michael@0 165 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
michael@0 166 /* 0A40..0A4F */
michael@0 167 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
michael@0 168 };
michael@0 169
michael@0 170 static UBool
michael@0 171 isPNJConsonant(UChar32 c) {
michael@0 172 if (c < 0xa00 || 0xa50 <= c) {
michael@0 173 return FALSE;
michael@0 174 } else {
michael@0 175 return (UBool)(pnjMap[c - 0xa00] & 1);
michael@0 176 }
michael@0 177 }
michael@0 178
michael@0 179 static UBool
michael@0 180 isPNJBindiTippi(UChar32 c) {
michael@0 181 if (c < 0xa00 || 0xa50 <= c) {
michael@0 182 return FALSE;
michael@0 183 } else {
michael@0 184 return (UBool)(pnjMap[c - 0xa00] >> 1);
michael@0 185 }
michael@0 186 }
michael@0 187
michael@0 188 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
michael@0 189 if(pArgs->onlyTestIsLoadable) {
michael@0 190 return;
michael@0 191 }
michael@0 192
michael@0 193 cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
michael@0 194
michael@0 195 if (cnv->extraInfo != NULL) {
michael@0 196 int32_t len=0;
michael@0 197 UConverterDataISCII *converterData=
michael@0 198 (UConverterDataISCII *) cnv->extraInfo;
michael@0 199 converterData->contextCharToUnicode=NO_CHAR_MARKER;
michael@0 200 cnv->toUnicodeStatus = missingCharMarker;
michael@0 201 converterData->contextCharFromUnicode=0x0000;
michael@0 202 converterData->resetToDefaultToUnicode=FALSE;
michael@0 203 /* check if the version requested is supported */
michael@0 204 if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
michael@0 205 /* initialize state variables */
michael@0 206 converterData->currentDeltaFromUnicode
michael@0 207 = converterData->currentDeltaToUnicode
michael@0 208 = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
michael@0 209
michael@0 210 converterData->currentMaskFromUnicode
michael@0 211 = converterData->currentMaskToUnicode
michael@0 212 = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
michael@0 213
michael@0 214 converterData->isFirstBuffer=TRUE;
michael@0 215 (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
michael@0 216 len = (int32_t)uprv_strlen(converterData->name);
michael@0 217 converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
michael@0 218 converterData->name[len+1]=0;
michael@0 219
michael@0 220 converterData->prevToUnicodeStatus = 0x0000;
michael@0 221 } else {
michael@0 222 uprv_free(cnv->extraInfo);
michael@0 223 cnv->extraInfo = NULL;
michael@0 224 *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 225 }
michael@0 226
michael@0 227 } else {
michael@0 228 *errorCode =U_MEMORY_ALLOCATION_ERROR;
michael@0 229 }
michael@0 230 }
michael@0 231
michael@0 232 static void _ISCIIClose(UConverter *cnv) {
michael@0 233 if (cnv->extraInfo!=NULL) {
michael@0 234 if (!cnv->isExtraLocal) {
michael@0 235 uprv_free(cnv->extraInfo);
michael@0 236 }
michael@0 237 cnv->extraInfo=NULL;
michael@0 238 }
michael@0 239 }
michael@0 240
michael@0 241 static const char* _ISCIIgetName(const UConverter* cnv) {
michael@0 242 if (cnv->extraInfo) {
michael@0 243 UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
michael@0 244 return myData->name;
michael@0 245 }
michael@0 246 return NULL;
michael@0 247 }
michael@0 248
michael@0 249 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
michael@0 250 UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
michael@0 251 if (choice<=UCNV_RESET_TO_UNICODE) {
michael@0 252 cnv->toUnicodeStatus = missingCharMarker;
michael@0 253 cnv->mode=0;
michael@0 254 data->currentDeltaToUnicode=data->defDeltaToUnicode;
michael@0 255 data->currentMaskToUnicode = data->defMaskToUnicode;
michael@0 256 data->contextCharToUnicode=NO_CHAR_MARKER;
michael@0 257 data->prevToUnicodeStatus = 0x0000;
michael@0 258 }
michael@0 259 if (choice!=UCNV_RESET_TO_UNICODE) {
michael@0 260 cnv->fromUChar32=0x0000;
michael@0 261 data->contextCharFromUnicode=0x00;
michael@0 262 data->currentMaskFromUnicode=data->defMaskToUnicode;
michael@0 263 data->currentDeltaFromUnicode=data->defDeltaToUnicode;
michael@0 264 data->isFirstBuffer=TRUE;
michael@0 265 data->resetToDefaultToUnicode=FALSE;
michael@0 266 }
michael@0 267 }
michael@0 268
michael@0 269 /**
michael@0 270 * The values in validity table are indexed by the lower bits of Unicode
michael@0 271 * range 0x0900 - 0x09ff. The values have a structure like:
michael@0 272 * ---------------------------------------------------------------
michael@0 273 * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
michael@0 274 * | | | | | ASM | KND | | |
michael@0 275 * ---------------------------------------------------------------
michael@0 276 * If a code point is valid in a particular script
michael@0 277 * then that bit is turned on
michael@0 278 *
michael@0 279 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
michael@0 280 * to represent these languages
michael@0 281 *
michael@0 282 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
michael@0 283 * and combine and use 1 bit to represent these languages.
michael@0 284 *
michael@0 285 * TODO: It is probably easier to understand and maintain to change this
michael@0 286 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
michael@0 287 */
michael@0 288
michael@0 289 static const uint8_t validityTable[128] = {
michael@0 290 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
michael@0 291 /* Note: This table was edited to mirror the Windows XP implementation */
michael@0 292 /*ISCII:Valid:Unicode */
michael@0 293 /*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 294 /*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 295 /*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 296 /*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 297 /*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 298 /*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 299 /*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 300 /*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 301 /*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 302 /*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 303 /*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 304 /*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 305 /*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 306 /*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 307 /*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 308 /*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 309 /*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 310 /*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 311 /*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 312 /*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 313 /*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 314 /*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 315 /*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 316 /*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 317 /*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 318 /*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 319 /*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 320 /*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 321 /*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 322 /*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 323 /*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 324 /*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 325 /*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 326 /*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 327 /*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 328 /*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 329 /*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 330 /*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 331 /*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 332 /*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 333 /*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 334 /*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK ,
michael@0 335 /*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 336 /*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 337 /*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 338 /*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 339 /*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 340 /*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 341 /*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 342 /*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
michael@0 343 /*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 344 /*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 345 /*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK ,
michael@0 346 /*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 347 /*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 348 /*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 349 /*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 350 /*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 351 /*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 352 /*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 353 /*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 354 /*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 355 /*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 356 /*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 357 /*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 358 /*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 359 /*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 360 /*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 361 /*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO ,
michael@0 362 /*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 363 /*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 364 /*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 365 /*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 366 /*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 367 /*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 368 /*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 369 /*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 370 /*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 371 /*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 372 /*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 373 /*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 374 /*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 375 /*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 376 /*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 377 /*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 378 /*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO ,
michael@0 379 /*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO ,
michael@0 380 /*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO ,
michael@0 381 /*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 382 /*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 383 /*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 384 /*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 385 /*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 386 /*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 387 /*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 388 /*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 389 /*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 390 /*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO ,
michael@0 391 /*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 392 /*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO ,
michael@0 393 /*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 394 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 395 /*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 396 /*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 397 /*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 398 /*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 399 /*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 400 /*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 401 /*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 402 /*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 403 /*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 404 /*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
michael@0 405 /*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO ,
michael@0 406 /*
michael@0 407 * The length of the array is 128 to provide values for 0x900..0x97f.
michael@0 408 * The last 15 entries for 0x971..0x97f of the validity table are all zero
michael@0 409 * because no Indic script uses such Unicode code points.
michael@0 410 */
michael@0 411 /*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO
michael@0 412 };
michael@0 413
michael@0 414 static const uint16_t fromUnicodeTable[128]={
michael@0 415 0x00a0 ,/* 0x0900 */
michael@0 416 0x00a1 ,/* 0x0901 */
michael@0 417 0x00a2 ,/* 0x0902 */
michael@0 418 0x00a3 ,/* 0x0903 */
michael@0 419 0xa4e0 ,/* 0x0904 */
michael@0 420 0x00a4 ,/* 0x0905 */
michael@0 421 0x00a5 ,/* 0x0906 */
michael@0 422 0x00a6 ,/* 0x0907 */
michael@0 423 0x00a7 ,/* 0x0908 */
michael@0 424 0x00a8 ,/* 0x0909 */
michael@0 425 0x00a9 ,/* 0x090a */
michael@0 426 0x00aa ,/* 0x090b */
michael@0 427 0xA6E9 ,/* 0x090c */
michael@0 428 0x00ae ,/* 0x090d */
michael@0 429 0x00ab ,/* 0x090e */
michael@0 430 0x00ac ,/* 0x090f */
michael@0 431 0x00ad ,/* 0x0910 */
michael@0 432 0x00b2 ,/* 0x0911 */
michael@0 433 0x00af ,/* 0x0912 */
michael@0 434 0x00b0 ,/* 0x0913 */
michael@0 435 0x00b1 ,/* 0x0914 */
michael@0 436 0x00b3 ,/* 0x0915 */
michael@0 437 0x00b4 ,/* 0x0916 */
michael@0 438 0x00b5 ,/* 0x0917 */
michael@0 439 0x00b6 ,/* 0x0918 */
michael@0 440 0x00b7 ,/* 0x0919 */
michael@0 441 0x00b8 ,/* 0x091a */
michael@0 442 0x00b9 ,/* 0x091b */
michael@0 443 0x00ba ,/* 0x091c */
michael@0 444 0x00bb ,/* 0x091d */
michael@0 445 0x00bc ,/* 0x091e */
michael@0 446 0x00bd ,/* 0x091f */
michael@0 447 0x00be ,/* 0x0920 */
michael@0 448 0x00bf ,/* 0x0921 */
michael@0 449 0x00c0 ,/* 0x0922 */
michael@0 450 0x00c1 ,/* 0x0923 */
michael@0 451 0x00c2 ,/* 0x0924 */
michael@0 452 0x00c3 ,/* 0x0925 */
michael@0 453 0x00c4 ,/* 0x0926 */
michael@0 454 0x00c5 ,/* 0x0927 */
michael@0 455 0x00c6 ,/* 0x0928 */
michael@0 456 0x00c7 ,/* 0x0929 */
michael@0 457 0x00c8 ,/* 0x092a */
michael@0 458 0x00c9 ,/* 0x092b */
michael@0 459 0x00ca ,/* 0x092c */
michael@0 460 0x00cb ,/* 0x092d */
michael@0 461 0x00cc ,/* 0x092e */
michael@0 462 0x00cd ,/* 0x092f */
michael@0 463 0x00cf ,/* 0x0930 */
michael@0 464 0x00d0 ,/* 0x0931 */
michael@0 465 0x00d1 ,/* 0x0932 */
michael@0 466 0x00d2 ,/* 0x0933 */
michael@0 467 0x00d3 ,/* 0x0934 */
michael@0 468 0x00d4 ,/* 0x0935 */
michael@0 469 0x00d5 ,/* 0x0936 */
michael@0 470 0x00d6 ,/* 0x0937 */
michael@0 471 0x00d7 ,/* 0x0938 */
michael@0 472 0x00d8 ,/* 0x0939 */
michael@0 473 0xFFFF ,/* 0x093A */
michael@0 474 0xFFFF ,/* 0x093B */
michael@0 475 0x00e9 ,/* 0x093c */
michael@0 476 0xEAE9 ,/* 0x093d */
michael@0 477 0x00da ,/* 0x093e */
michael@0 478 0x00db ,/* 0x093f */
michael@0 479 0x00dc ,/* 0x0940 */
michael@0 480 0x00dd ,/* 0x0941 */
michael@0 481 0x00de ,/* 0x0942 */
michael@0 482 0x00df ,/* 0x0943 */
michael@0 483 0xDFE9 ,/* 0x0944 */
michael@0 484 0x00e3 ,/* 0x0945 */
michael@0 485 0x00e0 ,/* 0x0946 */
michael@0 486 0x00e1 ,/* 0x0947 */
michael@0 487 0x00e2 ,/* 0x0948 */
michael@0 488 0x00e7 ,/* 0x0949 */
michael@0 489 0x00e4 ,/* 0x094a */
michael@0 490 0x00e5 ,/* 0x094b */
michael@0 491 0x00e6 ,/* 0x094c */
michael@0 492 0x00e8 ,/* 0x094d */
michael@0 493 0x00ec ,/* 0x094e */
michael@0 494 0x00ed ,/* 0x094f */
michael@0 495 0xA1E9 ,/* 0x0950 */ /* OM Symbol */
michael@0 496 0xFFFF ,/* 0x0951 */
michael@0 497 0xF0B8 ,/* 0x0952 */
michael@0 498 0xFFFF ,/* 0x0953 */
michael@0 499 0xFFFF ,/* 0x0954 */
michael@0 500 0xFFFF ,/* 0x0955 */
michael@0 501 0xFFFF ,/* 0x0956 */
michael@0 502 0xFFFF ,/* 0x0957 */
michael@0 503 0xb3e9 ,/* 0x0958 */
michael@0 504 0xb4e9 ,/* 0x0959 */
michael@0 505 0xb5e9 ,/* 0x095a */
michael@0 506 0xbae9 ,/* 0x095b */
michael@0 507 0xbfe9 ,/* 0x095c */
michael@0 508 0xC0E9 ,/* 0x095d */
michael@0 509 0xc9e9 ,/* 0x095e */
michael@0 510 0x00ce ,/* 0x095f */
michael@0 511 0xAAe9 ,/* 0x0960 */
michael@0 512 0xA7E9 ,/* 0x0961 */
michael@0 513 0xDBE9 ,/* 0x0962 */
michael@0 514 0xDCE9 ,/* 0x0963 */
michael@0 515 0x00ea ,/* 0x0964 */
michael@0 516 0xeaea ,/* 0x0965 */
michael@0 517 0x00f1 ,/* 0x0966 */
michael@0 518 0x00f2 ,/* 0x0967 */
michael@0 519 0x00f3 ,/* 0x0968 */
michael@0 520 0x00f4 ,/* 0x0969 */
michael@0 521 0x00f5 ,/* 0x096a */
michael@0 522 0x00f6 ,/* 0x096b */
michael@0 523 0x00f7 ,/* 0x096c */
michael@0 524 0x00f8 ,/* 0x096d */
michael@0 525 0x00f9 ,/* 0x096e */
michael@0 526 0x00fa ,/* 0x096f */
michael@0 527 0xF0BF ,/* 0x0970 */
michael@0 528 0xFFFF ,/* 0x0971 */
michael@0 529 0xFFFF ,/* 0x0972 */
michael@0 530 0xFFFF ,/* 0x0973 */
michael@0 531 0xFFFF ,/* 0x0974 */
michael@0 532 0xFFFF ,/* 0x0975 */
michael@0 533 0xFFFF ,/* 0x0976 */
michael@0 534 0xFFFF ,/* 0x0977 */
michael@0 535 0xFFFF ,/* 0x0978 */
michael@0 536 0xFFFF ,/* 0x0979 */
michael@0 537 0xFFFF ,/* 0x097a */
michael@0 538 0xFFFF ,/* 0x097b */
michael@0 539 0xFFFF ,/* 0x097c */
michael@0 540 0xFFFF ,/* 0x097d */
michael@0 541 0xFFFF ,/* 0x097e */
michael@0 542 0xFFFF ,/* 0x097f */
michael@0 543 };
michael@0 544 static const uint16_t toUnicodeTable[256]={
michael@0 545 0x0000,/* 0x00 */
michael@0 546 0x0001,/* 0x01 */
michael@0 547 0x0002,/* 0x02 */
michael@0 548 0x0003,/* 0x03 */
michael@0 549 0x0004,/* 0x04 */
michael@0 550 0x0005,/* 0x05 */
michael@0 551 0x0006,/* 0x06 */
michael@0 552 0x0007,/* 0x07 */
michael@0 553 0x0008,/* 0x08 */
michael@0 554 0x0009,/* 0x09 */
michael@0 555 0x000a,/* 0x0a */
michael@0 556 0x000b,/* 0x0b */
michael@0 557 0x000c,/* 0x0c */
michael@0 558 0x000d,/* 0x0d */
michael@0 559 0x000e,/* 0x0e */
michael@0 560 0x000f,/* 0x0f */
michael@0 561 0x0010,/* 0x10 */
michael@0 562 0x0011,/* 0x11 */
michael@0 563 0x0012,/* 0x12 */
michael@0 564 0x0013,/* 0x13 */
michael@0 565 0x0014,/* 0x14 */
michael@0 566 0x0015,/* 0x15 */
michael@0 567 0x0016,/* 0x16 */
michael@0 568 0x0017,/* 0x17 */
michael@0 569 0x0018,/* 0x18 */
michael@0 570 0x0019,/* 0x19 */
michael@0 571 0x001a,/* 0x1a */
michael@0 572 0x001b,/* 0x1b */
michael@0 573 0x001c,/* 0x1c */
michael@0 574 0x001d,/* 0x1d */
michael@0 575 0x001e,/* 0x1e */
michael@0 576 0x001f,/* 0x1f */
michael@0 577 0x0020,/* 0x20 */
michael@0 578 0x0021,/* 0x21 */
michael@0 579 0x0022,/* 0x22 */
michael@0 580 0x0023,/* 0x23 */
michael@0 581 0x0024,/* 0x24 */
michael@0 582 0x0025,/* 0x25 */
michael@0 583 0x0026,/* 0x26 */
michael@0 584 0x0027,/* 0x27 */
michael@0 585 0x0028,/* 0x28 */
michael@0 586 0x0029,/* 0x29 */
michael@0 587 0x002a,/* 0x2a */
michael@0 588 0x002b,/* 0x2b */
michael@0 589 0x002c,/* 0x2c */
michael@0 590 0x002d,/* 0x2d */
michael@0 591 0x002e,/* 0x2e */
michael@0 592 0x002f,/* 0x2f */
michael@0 593 0x0030,/* 0x30 */
michael@0 594 0x0031,/* 0x31 */
michael@0 595 0x0032,/* 0x32 */
michael@0 596 0x0033,/* 0x33 */
michael@0 597 0x0034,/* 0x34 */
michael@0 598 0x0035,/* 0x35 */
michael@0 599 0x0036,/* 0x36 */
michael@0 600 0x0037,/* 0x37 */
michael@0 601 0x0038,/* 0x38 */
michael@0 602 0x0039,/* 0x39 */
michael@0 603 0x003A,/* 0x3A */
michael@0 604 0x003B,/* 0x3B */
michael@0 605 0x003c,/* 0x3c */
michael@0 606 0x003d,/* 0x3d */
michael@0 607 0x003e,/* 0x3e */
michael@0 608 0x003f,/* 0x3f */
michael@0 609 0x0040,/* 0x40 */
michael@0 610 0x0041,/* 0x41 */
michael@0 611 0x0042,/* 0x42 */
michael@0 612 0x0043,/* 0x43 */
michael@0 613 0x0044,/* 0x44 */
michael@0 614 0x0045,/* 0x45 */
michael@0 615 0x0046,/* 0x46 */
michael@0 616 0x0047,/* 0x47 */
michael@0 617 0x0048,/* 0x48 */
michael@0 618 0x0049,/* 0x49 */
michael@0 619 0x004a,/* 0x4a */
michael@0 620 0x004b,/* 0x4b */
michael@0 621 0x004c,/* 0x4c */
michael@0 622 0x004d,/* 0x4d */
michael@0 623 0x004e,/* 0x4e */
michael@0 624 0x004f,/* 0x4f */
michael@0 625 0x0050,/* 0x50 */
michael@0 626 0x0051,/* 0x51 */
michael@0 627 0x0052,/* 0x52 */
michael@0 628 0x0053,/* 0x53 */
michael@0 629 0x0054,/* 0x54 */
michael@0 630 0x0055,/* 0x55 */
michael@0 631 0x0056,/* 0x56 */
michael@0 632 0x0057,/* 0x57 */
michael@0 633 0x0058,/* 0x58 */
michael@0 634 0x0059,/* 0x59 */
michael@0 635 0x005a,/* 0x5a */
michael@0 636 0x005b,/* 0x5b */
michael@0 637 0x005c,/* 0x5c */
michael@0 638 0x005d,/* 0x5d */
michael@0 639 0x005e,/* 0x5e */
michael@0 640 0x005f,/* 0x5f */
michael@0 641 0x0060,/* 0x60 */
michael@0 642 0x0061,/* 0x61 */
michael@0 643 0x0062,/* 0x62 */
michael@0 644 0x0063,/* 0x63 */
michael@0 645 0x0064,/* 0x64 */
michael@0 646 0x0065,/* 0x65 */
michael@0 647 0x0066,/* 0x66 */
michael@0 648 0x0067,/* 0x67 */
michael@0 649 0x0068,/* 0x68 */
michael@0 650 0x0069,/* 0x69 */
michael@0 651 0x006a,/* 0x6a */
michael@0 652 0x006b,/* 0x6b */
michael@0 653 0x006c,/* 0x6c */
michael@0 654 0x006d,/* 0x6d */
michael@0 655 0x006e,/* 0x6e */
michael@0 656 0x006f,/* 0x6f */
michael@0 657 0x0070,/* 0x70 */
michael@0 658 0x0071,/* 0x71 */
michael@0 659 0x0072,/* 0x72 */
michael@0 660 0x0073,/* 0x73 */
michael@0 661 0x0074,/* 0x74 */
michael@0 662 0x0075,/* 0x75 */
michael@0 663 0x0076,/* 0x76 */
michael@0 664 0x0077,/* 0x77 */
michael@0 665 0x0078,/* 0x78 */
michael@0 666 0x0079,/* 0x79 */
michael@0 667 0x007a,/* 0x7a */
michael@0 668 0x007b,/* 0x7b */
michael@0 669 0x007c,/* 0x7c */
michael@0 670 0x007d,/* 0x7d */
michael@0 671 0x007e,/* 0x7e */
michael@0 672 0x007f,/* 0x7f */
michael@0 673 0x0080,/* 0x80 */
michael@0 674 0x0081,/* 0x81 */
michael@0 675 0x0082,/* 0x82 */
michael@0 676 0x0083,/* 0x83 */
michael@0 677 0x0084,/* 0x84 */
michael@0 678 0x0085,/* 0x85 */
michael@0 679 0x0086,/* 0x86 */
michael@0 680 0x0087,/* 0x87 */
michael@0 681 0x0088,/* 0x88 */
michael@0 682 0x0089,/* 0x89 */
michael@0 683 0x008a,/* 0x8a */
michael@0 684 0x008b,/* 0x8b */
michael@0 685 0x008c,/* 0x8c */
michael@0 686 0x008d,/* 0x8d */
michael@0 687 0x008e,/* 0x8e */
michael@0 688 0x008f,/* 0x8f */
michael@0 689 0x0090,/* 0x90 */
michael@0 690 0x0091,/* 0x91 */
michael@0 691 0x0092,/* 0x92 */
michael@0 692 0x0093,/* 0x93 */
michael@0 693 0x0094,/* 0x94 */
michael@0 694 0x0095,/* 0x95 */
michael@0 695 0x0096,/* 0x96 */
michael@0 696 0x0097,/* 0x97 */
michael@0 697 0x0098,/* 0x98 */
michael@0 698 0x0099,/* 0x99 */
michael@0 699 0x009a,/* 0x9a */
michael@0 700 0x009b,/* 0x9b */
michael@0 701 0x009c,/* 0x9c */
michael@0 702 0x009d,/* 0x9d */
michael@0 703 0x009e,/* 0x9e */
michael@0 704 0x009f,/* 0x9f */
michael@0 705 0x00A0,/* 0xa0 */
michael@0 706 0x0901,/* 0xa1 */
michael@0 707 0x0902,/* 0xa2 */
michael@0 708 0x0903,/* 0xa3 */
michael@0 709 0x0905,/* 0xa4 */
michael@0 710 0x0906,/* 0xa5 */
michael@0 711 0x0907,/* 0xa6 */
michael@0 712 0x0908,/* 0xa7 */
michael@0 713 0x0909,/* 0xa8 */
michael@0 714 0x090a,/* 0xa9 */
michael@0 715 0x090b,/* 0xaa */
michael@0 716 0x090e,/* 0xab */
michael@0 717 0x090f,/* 0xac */
michael@0 718 0x0910,/* 0xad */
michael@0 719 0x090d,/* 0xae */
michael@0 720 0x0912,/* 0xaf */
michael@0 721 0x0913,/* 0xb0 */
michael@0 722 0x0914,/* 0xb1 */
michael@0 723 0x0911,/* 0xb2 */
michael@0 724 0x0915,/* 0xb3 */
michael@0 725 0x0916,/* 0xb4 */
michael@0 726 0x0917,/* 0xb5 */
michael@0 727 0x0918,/* 0xb6 */
michael@0 728 0x0919,/* 0xb7 */
michael@0 729 0x091a,/* 0xb8 */
michael@0 730 0x091b,/* 0xb9 */
michael@0 731 0x091c,/* 0xba */
michael@0 732 0x091d,/* 0xbb */
michael@0 733 0x091e,/* 0xbc */
michael@0 734 0x091f,/* 0xbd */
michael@0 735 0x0920,/* 0xbe */
michael@0 736 0x0921,/* 0xbf */
michael@0 737 0x0922,/* 0xc0 */
michael@0 738 0x0923,/* 0xc1 */
michael@0 739 0x0924,/* 0xc2 */
michael@0 740 0x0925,/* 0xc3 */
michael@0 741 0x0926,/* 0xc4 */
michael@0 742 0x0927,/* 0xc5 */
michael@0 743 0x0928,/* 0xc6 */
michael@0 744 0x0929,/* 0xc7 */
michael@0 745 0x092a,/* 0xc8 */
michael@0 746 0x092b,/* 0xc9 */
michael@0 747 0x092c,/* 0xca */
michael@0 748 0x092d,/* 0xcb */
michael@0 749 0x092e,/* 0xcc */
michael@0 750 0x092f,/* 0xcd */
michael@0 751 0x095f,/* 0xce */
michael@0 752 0x0930,/* 0xcf */
michael@0 753 0x0931,/* 0xd0 */
michael@0 754 0x0932,/* 0xd1 */
michael@0 755 0x0933,/* 0xd2 */
michael@0 756 0x0934,/* 0xd3 */
michael@0 757 0x0935,/* 0xd4 */
michael@0 758 0x0936,/* 0xd5 */
michael@0 759 0x0937,/* 0xd6 */
michael@0 760 0x0938,/* 0xd7 */
michael@0 761 0x0939,/* 0xd8 */
michael@0 762 0x200D,/* 0xd9 */
michael@0 763 0x093e,/* 0xda */
michael@0 764 0x093f,/* 0xdb */
michael@0 765 0x0940,/* 0xdc */
michael@0 766 0x0941,/* 0xdd */
michael@0 767 0x0942,/* 0xde */
michael@0 768 0x0943,/* 0xdf */
michael@0 769 0x0946,/* 0xe0 */
michael@0 770 0x0947,/* 0xe1 */
michael@0 771 0x0948,/* 0xe2 */
michael@0 772 0x0945,/* 0xe3 */
michael@0 773 0x094a,/* 0xe4 */
michael@0 774 0x094b,/* 0xe5 */
michael@0 775 0x094c,/* 0xe6 */
michael@0 776 0x0949,/* 0xe7 */
michael@0 777 0x094d,/* 0xe8 */
michael@0 778 0x093c,/* 0xe9 */
michael@0 779 0x0964,/* 0xea */
michael@0 780 0xFFFF,/* 0xeb */
michael@0 781 0xFFFF,/* 0xec */
michael@0 782 0xFFFF,/* 0xed */
michael@0 783 0xFFFF,/* 0xee */
michael@0 784 0xFFFF,/* 0xef */
michael@0 785 0xFFFF,/* 0xf0 */
michael@0 786 0x0966,/* 0xf1 */
michael@0 787 0x0967,/* 0xf2 */
michael@0 788 0x0968,/* 0xf3 */
michael@0 789 0x0969,/* 0xf4 */
michael@0 790 0x096a,/* 0xf5 */
michael@0 791 0x096b,/* 0xf6 */
michael@0 792 0x096c,/* 0xf7 */
michael@0 793 0x096d,/* 0xf8 */
michael@0 794 0x096e,/* 0xf9 */
michael@0 795 0x096f,/* 0xfa */
michael@0 796 0xFFFF,/* 0xfb */
michael@0 797 0xFFFF,/* 0xfc */
michael@0 798 0xFFFF,/* 0xfd */
michael@0 799 0xFFFF,/* 0xfe */
michael@0 800 0xFFFF /* 0xff */
michael@0 801 };
michael@0 802
michael@0 803 static const uint16_t vowelSignESpecialCases[][2]={
michael@0 804 { 2 /*length of array*/ , 0 },
michael@0 805 { 0xA4 , 0x0904 },
michael@0 806 };
michael@0 807
michael@0 808 static const uint16_t nuktaSpecialCases[][2]={
michael@0 809 { 16 /*length of array*/ , 0 },
michael@0 810 { 0xA6 , 0x090c },
michael@0 811 { 0xEA , 0x093D },
michael@0 812 { 0xDF , 0x0944 },
michael@0 813 { 0xA1 , 0x0950 },
michael@0 814 { 0xb3 , 0x0958 },
michael@0 815 { 0xb4 , 0x0959 },
michael@0 816 { 0xb5 , 0x095a },
michael@0 817 { 0xba , 0x095b },
michael@0 818 { 0xbf , 0x095c },
michael@0 819 { 0xC0 , 0x095d },
michael@0 820 { 0xc9 , 0x095e },
michael@0 821 { 0xAA , 0x0960 },
michael@0 822 { 0xA7 , 0x0961 },
michael@0 823 { 0xDB , 0x0962 },
michael@0 824 { 0xDC , 0x0963 },
michael@0 825 };
michael@0 826
michael@0 827
michael@0 828 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \
michael@0 829 int32_t offset = (int32_t)(source - args->source-1); \
michael@0 830 /* write the targetUniChar to target */ \
michael@0 831 if(target < targetLimit){ \
michael@0 832 if(targetByteUnit <= 0xFF){ \
michael@0 833 *(target)++ = (uint8_t)(targetByteUnit); \
michael@0 834 if(offsets){ \
michael@0 835 *(offsets++) = offset; \
michael@0 836 } \
michael@0 837 }else{ \
michael@0 838 if (targetByteUnit > 0xFFFF) { \
michael@0 839 *(target)++ = (uint8_t)(targetByteUnit>>16); \
michael@0 840 if (offsets) { \
michael@0 841 --offset; \
michael@0 842 *(offsets++) = offset; \
michael@0 843 } \
michael@0 844 } \
michael@0 845 if (!(target < targetLimit)) { \
michael@0 846 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
michael@0 847 (uint8_t)(targetByteUnit >> 8); \
michael@0 848 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
michael@0 849 (uint8_t)targetByteUnit; \
michael@0 850 *err = U_BUFFER_OVERFLOW_ERROR; \
michael@0 851 } else { \
michael@0 852 *(target)++ = (uint8_t)(targetByteUnit>>8); \
michael@0 853 if(offsets){ \
michael@0 854 *(offsets++) = offset; \
michael@0 855 } \
michael@0 856 if(target < targetLimit){ \
michael@0 857 *(target)++ = (uint8_t) targetByteUnit; \
michael@0 858 if(offsets){ \
michael@0 859 *(offsets++) = offset ; \
michael@0 860 } \
michael@0 861 }else{ \
michael@0 862 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
michael@0 863 (uint8_t) (targetByteUnit); \
michael@0 864 *err = U_BUFFER_OVERFLOW_ERROR; \
michael@0 865 } \
michael@0 866 } \
michael@0 867 } \
michael@0 868 }else{ \
michael@0 869 if (targetByteUnit & 0xFF0000) { \
michael@0 870 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
michael@0 871 (uint8_t) (targetByteUnit >>16); \
michael@0 872 } \
michael@0 873 if(targetByteUnit & 0xFF00){ \
michael@0 874 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
michael@0 875 (uint8_t) (targetByteUnit >>8); \
michael@0 876 } \
michael@0 877 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \
michael@0 878 (uint8_t) (targetByteUnit); \
michael@0 879 *err = U_BUFFER_OVERFLOW_ERROR; \
michael@0 880 } \
michael@0 881 }
michael@0 882
michael@0 883 /* Rules:
michael@0 884 * Explicit Halant :
michael@0 885 * <HALANT> + <ZWNJ>
michael@0 886 * Soft Halant :
michael@0 887 * <HALANT> + <ZWJ>
michael@0 888 */
michael@0 889
michael@0 890 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
michael@0 891 UConverterFromUnicodeArgs * args, UErrorCode * err) {
michael@0 892 const UChar *source = args->source;
michael@0 893 const UChar *sourceLimit = args->sourceLimit;
michael@0 894 unsigned char *target = (unsigned char *) args->target;
michael@0 895 unsigned char *targetLimit = (unsigned char *) args->targetLimit;
michael@0 896 int32_t* offsets = args->offsets;
michael@0 897 uint32_t targetByteUnit = 0x0000;
michael@0 898 UChar32 sourceChar = 0x0000;
michael@0 899 UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */
michael@0 900 UConverterDataISCII *converterData;
michael@0 901 uint16_t newDelta=0;
michael@0 902 uint16_t range = 0;
michael@0 903 UBool deltaChanged = FALSE;
michael@0 904
michael@0 905 if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
michael@0 906 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 907 return;
michael@0 908 }
michael@0 909 /* initialize data */
michael@0 910 converterData=(UConverterDataISCII*)args->converter->extraInfo;
michael@0 911 newDelta=converterData->currentDeltaFromUnicode;
michael@0 912 range = (uint16_t)(newDelta/DELTA);
michael@0 913
michael@0 914 if ((sourceChar = args->converter->fromUChar32)!=0) {
michael@0 915 goto getTrail;
michael@0 916 }
michael@0 917
michael@0 918 /*writing the char to the output stream */
michael@0 919 while (source < sourceLimit) {
michael@0 920 /* Write the language code following LF only if LF is not the last character. */
michael@0 921 if (args->converter->fromUnicodeStatus == LF) {
michael@0 922 targetByteUnit = ATR<<8;
michael@0 923 targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
michael@0 924 args->converter->fromUnicodeStatus = 0x0000;
michael@0 925 /* now append ATR and language code */
michael@0 926 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
michael@0 927 if (U_FAILURE(*err)) {
michael@0 928 break;
michael@0 929 }
michael@0 930 }
michael@0 931
michael@0 932 sourceChar = *source++;
michael@0 933 tempContextFromUnicode = converterData->contextCharFromUnicode;
michael@0 934
michael@0 935 targetByteUnit = missingCharMarker;
michael@0 936
michael@0 937 /*check if input is in ASCII and C0 control codes range*/
michael@0 938 if (sourceChar <= ASCII_END) {
michael@0 939 args->converter->fromUnicodeStatus = sourceChar;
michael@0 940 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
michael@0 941 if (U_FAILURE(*err)) {
michael@0 942 break;
michael@0 943 }
michael@0 944 continue;
michael@0 945 }
michael@0 946 switch (sourceChar) {
michael@0 947 case ZWNJ:
michael@0 948 /* contextChar has HALANT */
michael@0 949 if (converterData->contextCharFromUnicode) {
michael@0 950 converterData->contextCharFromUnicode = 0x00;
michael@0 951 targetByteUnit = ISCII_HALANT;
michael@0 952 } else {
michael@0 953 /* consume ZWNJ and continue */
michael@0 954 converterData->contextCharFromUnicode = 0x00;
michael@0 955 continue;
michael@0 956 }
michael@0 957 break;
michael@0 958 case ZWJ:
michael@0 959 /* contextChar has HALANT */
michael@0 960 if (converterData->contextCharFromUnicode) {
michael@0 961 targetByteUnit = ISCII_NUKTA;
michael@0 962 } else {
michael@0 963 targetByteUnit =ISCII_INV;
michael@0 964 }
michael@0 965 converterData->contextCharFromUnicode = 0x00;
michael@0 966 break;
michael@0 967 default:
michael@0 968 /* is the sourceChar in the INDIC_RANGE? */
michael@0 969 if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
michael@0 970 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
michael@0 971 * does not include these codepoints in all Northern scrips we need to
michael@0 972 * filter them out
michael@0 973 */
michael@0 974 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
michael@0 975 /* find out to which block the souceChar belongs*/
michael@0 976 range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
michael@0 977 newDelta =(uint16_t)(range*DELTA);
michael@0 978
michael@0 979 /* Now are we in the same block as the previous? */
michael@0 980 if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
michael@0 981 converterData->currentDeltaFromUnicode = newDelta;
michael@0 982 converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
michael@0 983 deltaChanged =TRUE;
michael@0 984 converterData->isFirstBuffer=FALSE;
michael@0 985 }
michael@0 986
michael@0 987 if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
michael@0 988 if (sourceChar == PNJ_TIPPI) {
michael@0 989 /* Make sure Tippi is converterd to Bindi. */
michael@0 990 sourceChar = PNJ_BINDI;
michael@0 991 } else if (sourceChar == PNJ_ADHAK) {
michael@0 992 /* This is for consonant cluster handling. */
michael@0 993 converterData->contextCharFromUnicode = PNJ_ADHAK;
michael@0 994 }
michael@0 995
michael@0 996 }
michael@0 997 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
michael@0 998 /* now subtract the new delta from sourceChar*/
michael@0 999 sourceChar -= converterData->currentDeltaFromUnicode;
michael@0 1000 }
michael@0 1001
michael@0 1002 /* get the target byte unit */
michael@0 1003 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
michael@0 1004
michael@0 1005 /* is the code point valid in current script? */
michael@0 1006 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
michael@0 1007 /* Vocallic RR is assigned in ISCII Telugu and Unicode */
michael@0 1008 if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
michael@0 1009 targetByteUnit=missingCharMarker;
michael@0 1010 }
michael@0 1011 }
michael@0 1012
michael@0 1013 if (deltaChanged) {
michael@0 1014 /* we are in a script block which is different than
michael@0 1015 * previous sourceChar's script block write ATR and language codes
michael@0 1016 */
michael@0 1017 uint32_t temp=0;
michael@0 1018 temp =(uint16_t)(ATR<<8);
michael@0 1019 temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
michael@0 1020 /* reset */
michael@0 1021 deltaChanged=FALSE;
michael@0 1022 /* now append ATR and language code */
michael@0 1023 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
michael@0 1024 if (U_FAILURE(*err)) {
michael@0 1025 break;
michael@0 1026 }
michael@0 1027 }
michael@0 1028
michael@0 1029 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
michael@0 1030 continue;
michael@0 1031 }
michael@0 1032 }
michael@0 1033 /* reset context char */
michael@0 1034 converterData->contextCharFromUnicode = 0x00;
michael@0 1035 break;
michael@0 1036 }
michael@0 1037 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
michael@0 1038 /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
michael@0 1039 /* reset context char */
michael@0 1040 converterData->contextCharFromUnicode = 0x0000;
michael@0 1041 targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
michael@0 1042 /* write targetByteUnit to target */
michael@0 1043 WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
michael@0 1044 if (U_FAILURE(*err)) {
michael@0 1045 break;
michael@0 1046 }
michael@0 1047 } else if (targetByteUnit != missingCharMarker) {
michael@0 1048 if (targetByteUnit==ISCII_HALANT) {
michael@0 1049 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
michael@0 1050 }
michael@0 1051 /* write targetByteUnit to target*/
michael@0 1052 WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
michael@0 1053 if (U_FAILURE(*err)) {
michael@0 1054 break;
michael@0 1055 }
michael@0 1056 } else {
michael@0 1057 /* oops.. the code point is unassigned */
michael@0 1058 /*check if the char is a First surrogate*/
michael@0 1059 if (U16_IS_SURROGATE(sourceChar)) {
michael@0 1060 if (U16_IS_SURROGATE_LEAD(sourceChar)) {
michael@0 1061 getTrail:
michael@0 1062 /*look ahead to find the trail surrogate*/
michael@0 1063 if (source < sourceLimit) {
michael@0 1064 /* test the following code unit */
michael@0 1065 UChar trail= (*source);
michael@0 1066 if (U16_IS_TRAIL(trail)) {
michael@0 1067 source++;
michael@0 1068 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
michael@0 1069 *err =U_INVALID_CHAR_FOUND;
michael@0 1070 /* convert this surrogate code point */
michael@0 1071 /* exit this condition tree */
michael@0 1072 } else {
michael@0 1073 /* this is an unmatched lead code unit (1st surrogate) */
michael@0 1074 /* callback(illegal) */
michael@0 1075 *err=U_ILLEGAL_CHAR_FOUND;
michael@0 1076 }
michael@0 1077 } else {
michael@0 1078 /* no more input */
michael@0 1079 *err = U_ZERO_ERROR;
michael@0 1080 }
michael@0 1081 } else {
michael@0 1082 /* this is an unmatched trail code unit (2nd surrogate) */
michael@0 1083 /* callback(illegal) */
michael@0 1084 *err=U_ILLEGAL_CHAR_FOUND;
michael@0 1085 }
michael@0 1086 } else {
michael@0 1087 /* callback(unassigned) for a BMP code point */
michael@0 1088 *err = U_INVALID_CHAR_FOUND;
michael@0 1089 }
michael@0 1090
michael@0 1091 args->converter->fromUChar32=sourceChar;
michael@0 1092 break;
michael@0 1093 }
michael@0 1094 }/* end while(mySourceIndex<mySourceLength) */
michael@0 1095
michael@0 1096 /*save the state and return */
michael@0 1097 args->source = source;
michael@0 1098 args->target = (char*)target;
michael@0 1099 }
michael@0 1100
michael@0 1101 static const uint16_t lookupTable[][2]={
michael@0 1102 { ZERO, ZERO }, /*DEFALT*/
michael@0 1103 { ZERO, ZERO }, /*ROMAN*/
michael@0 1104 { DEVANAGARI, DEV_MASK },
michael@0 1105 { BENGALI, BNG_MASK },
michael@0 1106 { TAMIL, TML_MASK },
michael@0 1107 { TELUGU, KND_MASK },
michael@0 1108 { BENGALI, BNG_MASK },
michael@0 1109 { ORIYA, ORI_MASK },
michael@0 1110 { KANNADA, KND_MASK },
michael@0 1111 { MALAYALAM, MLM_MASK },
michael@0 1112 { GUJARATI, GJR_MASK },
michael@0 1113 { GURMUKHI, PNJ_MASK }
michael@0 1114 };
michael@0 1115
michael@0 1116 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
michael@0 1117 /* add offset to current Indic Block */ \
michael@0 1118 if(targetUniChar>ASCII_END && \
michael@0 1119 targetUniChar != ZWJ && \
michael@0 1120 targetUniChar != ZWNJ && \
michael@0 1121 targetUniChar != DANDA && \
michael@0 1122 targetUniChar != DOUBLE_DANDA){ \
michael@0 1123 \
michael@0 1124 targetUniChar+=(uint16_t)(delta); \
michael@0 1125 } \
michael@0 1126 /* now write the targetUniChar */ \
michael@0 1127 if(target<args->targetLimit){ \
michael@0 1128 *(target)++ = (UChar)targetUniChar; \
michael@0 1129 if(offsets){ \
michael@0 1130 *(offsets)++ = (int32_t)(offset); \
michael@0 1131 } \
michael@0 1132 }else{ \
michael@0 1133 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \
michael@0 1134 (UChar)targetUniChar; \
michael@0 1135 *err = U_BUFFER_OVERFLOW_ERROR; \
michael@0 1136 } \
michael@0 1137 }
michael@0 1138
michael@0 1139 #define GET_MAPPING(sourceChar,targetUniChar,data){ \
michael@0 1140 targetUniChar = toUnicodeTable[(sourceChar)] ; \
michael@0 1141 /* is the code point valid in current script? */ \
michael@0 1142 if(sourceChar> ASCII_END && \
michael@0 1143 (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
michael@0 1144 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
michael@0 1145 if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
michael@0 1146 targetUniChar!=VOCALLIC_RR){ \
michael@0 1147 targetUniChar=missingCharMarker; \
michael@0 1148 } \
michael@0 1149 } \
michael@0 1150 }
michael@0 1151
michael@0 1152 /***********
michael@0 1153 * Rules for ISCII to Unicode converter
michael@0 1154 * ISCII is stateful encoding. To convert ISCII bytes to Unicode,
michael@0 1155 * which has both precomposed and decomposed forms characters
michael@0 1156 * pre-context and post-context need to be considered.
michael@0 1157 *
michael@0 1158 * Post context
michael@0 1159 * i) ATR : Attribute code is used to declare the font and script switching.
michael@0 1160 * Currently we only switch scripts and font codes consumed without generating an error
michael@0 1161 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
michael@0 1162 * obsolete characters
michael@0 1163 * Pre context
michael@0 1164 * i) Halant: if preceeded by a halant then it is a explicit halant
michael@0 1165 * ii) Nukta :
michael@0 1166 * a) if preceeded by a halant then it is a soft halant
michael@0 1167 * b) if preceeded by specific consonants and the ligatures have pre-composed
michael@0 1168 * characters in Unicode then convert to pre-composed characters
michael@0 1169 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
michael@0 1170 *
michael@0 1171 */
michael@0 1172
michael@0 1173 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
michael@0 1174 const char *source = ( char *) args->source;
michael@0 1175 UChar *target = args->target;
michael@0 1176 const char *sourceLimit = args->sourceLimit;
michael@0 1177 const UChar* targetLimit = args->targetLimit;
michael@0 1178 uint32_t targetUniChar = 0x0000;
michael@0 1179 uint8_t sourceChar = 0x0000;
michael@0 1180 UConverterDataISCII* data;
michael@0 1181 UChar32* toUnicodeStatus=NULL;
michael@0 1182 UChar32 tempTargetUniChar = 0x0000;
michael@0 1183 UChar* contextCharToUnicode= NULL;
michael@0 1184 UBool found;
michael@0 1185 int i;
michael@0 1186 int offset = 0;
michael@0 1187
michael@0 1188 if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
michael@0 1189 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1190 return;
michael@0 1191 }
michael@0 1192
michael@0 1193 data = (UConverterDataISCII*)(args->converter->extraInfo);
michael@0 1194 contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
michael@0 1195 toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
michael@0 1196
michael@0 1197 while (U_SUCCESS(*err) && source<sourceLimit) {
michael@0 1198
michael@0 1199 targetUniChar = missingCharMarker;
michael@0 1200
michael@0 1201 if (target < targetLimit) {
michael@0 1202 sourceChar = (unsigned char)*(source)++;
michael@0 1203
michael@0 1204 /* look at the post-context preform special processing */
michael@0 1205 if (*contextCharToUnicode==ATR) {
michael@0 1206
michael@0 1207 /* If we have ATR in *contextCharToUnicode then we need to change our
michael@0 1208 * state to the Indic Script specified by sourceChar
michael@0 1209 */
michael@0 1210
michael@0 1211 /* check if the sourceChar is supported script range*/
michael@0 1212 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
michael@0 1213 data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
michael@0 1214 data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
michael@0 1215 } else if (sourceChar==DEF) {
michael@0 1216 /* switch back to default */
michael@0 1217 data->currentDeltaToUnicode = data->defDeltaToUnicode;
michael@0 1218 data->currentMaskToUnicode = data->defMaskToUnicode;
michael@0 1219 } else {
michael@0 1220 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
michael@0 1221 /* these are display codes consume and continue */
michael@0 1222 } else {
michael@0 1223 *err =U_ILLEGAL_CHAR_FOUND;
michael@0 1224 /* reset */
michael@0 1225 *contextCharToUnicode=NO_CHAR_MARKER;
michael@0 1226 goto CALLBACK;
michael@0 1227 }
michael@0 1228 }
michael@0 1229
michael@0 1230 /* reset */
michael@0 1231 *contextCharToUnicode=NO_CHAR_MARKER;
michael@0 1232
michael@0 1233 continue;
michael@0 1234
michael@0 1235 } else if (*contextCharToUnicode==EXT) {
michael@0 1236 /* check if sourceChar is in 0xA1-0xEE range */
michael@0 1237 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
michael@0 1238 /* We currently support only Anudatta and Devanagari abbreviation sign */
michael@0 1239 if (sourceChar==0xBF || sourceChar == 0xB8) {
michael@0 1240 targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
michael@0 1241
michael@0 1242 /* find out if the mapping is valid in this state */
michael@0 1243 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
michael@0 1244 *contextCharToUnicode= NO_CHAR_MARKER;
michael@0 1245
michael@0 1246 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1247 if (data->prevToUnicodeStatus) {
michael@0 1248 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1249 data->prevToUnicodeStatus = 0x0000;
michael@0 1250 }
michael@0 1251 /* write to target */
michael@0 1252 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
michael@0 1253
michael@0 1254 continue;
michael@0 1255 }
michael@0 1256 }
michael@0 1257 /* byte unit is unassigned */
michael@0 1258 targetUniChar = missingCharMarker;
michael@0 1259 *err= U_INVALID_CHAR_FOUND;
michael@0 1260 } else {
michael@0 1261 /* only 0xA1 - 0xEE are legal after EXT char */
michael@0 1262 *contextCharToUnicode= NO_CHAR_MARKER;
michael@0 1263 *err = U_ILLEGAL_CHAR_FOUND;
michael@0 1264 }
michael@0 1265 goto CALLBACK;
michael@0 1266 } else if (*contextCharToUnicode==ISCII_INV) {
michael@0 1267 if (sourceChar==ISCII_HALANT) {
michael@0 1268 targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
michael@0 1269 } else {
michael@0 1270 targetUniChar = ZWJ;
michael@0 1271 }
michael@0 1272
michael@0 1273 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1274 if (data->prevToUnicodeStatus) {
michael@0 1275 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1276 data->prevToUnicodeStatus = 0x0000;
michael@0 1277 }
michael@0 1278 /* write to target */
michael@0 1279 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
michael@0 1280 /* reset */
michael@0 1281 *contextCharToUnicode=NO_CHAR_MARKER;
michael@0 1282 }
michael@0 1283
michael@0 1284 /* look at the pre-context and perform special processing */
michael@0 1285 switch (sourceChar) {
michael@0 1286 case ISCII_INV:
michael@0 1287 case EXT: /*falls through*/
michael@0 1288 case ATR:
michael@0 1289 *contextCharToUnicode = (UChar)sourceChar;
michael@0 1290
michael@0 1291 if (*toUnicodeStatus != missingCharMarker) {
michael@0 1292 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1293 if (data->prevToUnicodeStatus) {
michael@0 1294 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1295 data->prevToUnicodeStatus = 0x0000;
michael@0 1296 }
michael@0 1297 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
michael@0 1298 *toUnicodeStatus = missingCharMarker;
michael@0 1299 }
michael@0 1300 continue;
michael@0 1301 case ISCII_DANDA:
michael@0 1302 /* handle double danda*/
michael@0 1303 if (*contextCharToUnicode== ISCII_DANDA) {
michael@0 1304 targetUniChar = DOUBLE_DANDA;
michael@0 1305 /* clear the context */
michael@0 1306 *contextCharToUnicode = NO_CHAR_MARKER;
michael@0 1307 *toUnicodeStatus = missingCharMarker;
michael@0 1308 } else {
michael@0 1309 GET_MAPPING(sourceChar,targetUniChar,data);
michael@0 1310 *contextCharToUnicode = sourceChar;
michael@0 1311 }
michael@0 1312 break;
michael@0 1313 case ISCII_HALANT:
michael@0 1314 /* handle explicit halant */
michael@0 1315 if (*contextCharToUnicode == ISCII_HALANT) {
michael@0 1316 targetUniChar = ZWNJ;
michael@0 1317 /* clear the context */
michael@0 1318 *contextCharToUnicode = NO_CHAR_MARKER;
michael@0 1319 } else {
michael@0 1320 GET_MAPPING(sourceChar,targetUniChar,data);
michael@0 1321 *contextCharToUnicode = sourceChar;
michael@0 1322 }
michael@0 1323 break;
michael@0 1324 case 0x0A:
michael@0 1325 /* fall through */
michael@0 1326 case 0x0D:
michael@0 1327 data->resetToDefaultToUnicode = TRUE;
michael@0 1328 GET_MAPPING(sourceChar,targetUniChar,data)
michael@0 1329 ;
michael@0 1330 *contextCharToUnicode = sourceChar;
michael@0 1331 break;
michael@0 1332
michael@0 1333 case ISCII_VOWEL_SIGN_E:
michael@0 1334 i=1;
michael@0 1335 found=FALSE;
michael@0 1336 for (; i<vowelSignESpecialCases[0][0]; i++) {
michael@0 1337 U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0]));
michael@0 1338 if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
michael@0 1339 targetUniChar=vowelSignESpecialCases[i][1];
michael@0 1340 found=TRUE;
michael@0 1341 break;
michael@0 1342 }
michael@0 1343 }
michael@0 1344 if (found) {
michael@0 1345 /* find out if the mapping is valid in this state */
michael@0 1346 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
michael@0 1347 /*targetUniChar += data->currentDeltaToUnicode ;*/
michael@0 1348 *contextCharToUnicode= NO_CHAR_MARKER;
michael@0 1349 *toUnicodeStatus = missingCharMarker;
michael@0 1350 break;
michael@0 1351 }
michael@0 1352 }
michael@0 1353 GET_MAPPING(sourceChar,targetUniChar,data);
michael@0 1354 *contextCharToUnicode = sourceChar;
michael@0 1355 break;
michael@0 1356
michael@0 1357 case ISCII_NUKTA:
michael@0 1358 /* handle soft halant */
michael@0 1359 if (*contextCharToUnicode == ISCII_HALANT) {
michael@0 1360 targetUniChar = ZWJ;
michael@0 1361 /* clear the context */
michael@0 1362 *contextCharToUnicode = NO_CHAR_MARKER;
michael@0 1363 break;
michael@0 1364 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
michael@0 1365 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1366 if (data->prevToUnicodeStatus) {
michael@0 1367 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1368 data->prevToUnicodeStatus = 0x0000;
michael@0 1369 }
michael@0 1370 /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
michael@0 1371 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
michael@0 1372 */
michael@0 1373 targetUniChar = PNJ_RRA;
michael@0 1374 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
michael@0 1375 if (U_SUCCESS(*err)) {
michael@0 1376 targetUniChar = PNJ_SIGN_VIRAMA;
michael@0 1377 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
michael@0 1378 if (U_SUCCESS(*err)) {
michael@0 1379 targetUniChar = PNJ_HA;
michael@0 1380 WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
michael@0 1381 } else {
michael@0 1382 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
michael@0 1383 }
michael@0 1384 } else {
michael@0 1385 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
michael@0 1386 args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
michael@0 1387 }
michael@0 1388 *toUnicodeStatus = missingCharMarker;
michael@0 1389 data->contextCharToUnicode = NO_CHAR_MARKER;
michael@0 1390 continue;
michael@0 1391 } else {
michael@0 1392 /* try to handle <CHAR> + ISCII_NUKTA special mappings */
michael@0 1393 i=1;
michael@0 1394 found =FALSE;
michael@0 1395 for (; i<nuktaSpecialCases[0][0]; i++) {
michael@0 1396 if (nuktaSpecialCases[i][0]==(uint8_t)
michael@0 1397 *contextCharToUnicode) {
michael@0 1398 targetUniChar=nuktaSpecialCases[i][1];
michael@0 1399 found =TRUE;
michael@0 1400 break;
michael@0 1401 }
michael@0 1402 }
michael@0 1403 if (found) {
michael@0 1404 /* find out if the mapping is valid in this state */
michael@0 1405 if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
michael@0 1406 /*targetUniChar += data->currentDeltaToUnicode ;*/
michael@0 1407 *contextCharToUnicode= NO_CHAR_MARKER;
michael@0 1408 *toUnicodeStatus = missingCharMarker;
michael@0 1409 if (data->currentDeltaToUnicode == PNJ_DELTA) {
michael@0 1410 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1411 if (data->prevToUnicodeStatus) {
michael@0 1412 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1413 data->prevToUnicodeStatus = 0x0000;
michael@0 1414 }
michael@0 1415 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
michael@0 1416 continue;
michael@0 1417 }
michael@0 1418 break;
michael@0 1419 }
michael@0 1420 /* else fall through to default */
michael@0 1421 }
michael@0 1422 /* else fall through to default */
michael@0 1423 }
michael@0 1424 default:GET_MAPPING(sourceChar,targetUniChar,data)
michael@0 1425 ;
michael@0 1426 *contextCharToUnicode = sourceChar;
michael@0 1427 break;
michael@0 1428 }
michael@0 1429
michael@0 1430 if (*toUnicodeStatus != missingCharMarker) {
michael@0 1431 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
michael@0 1432 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
michael@0 1433 (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
michael@0 1434 /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
michael@0 1435 offset = (int)(source-args->source - 3);
michael@0 1436 tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
michael@0 1437 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
michael@0 1438 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
michael@0 1439 data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
michael@0 1440 *toUnicodeStatus = missingCharMarker;
michael@0 1441 continue;
michael@0 1442 } else {
michael@0 1443 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
michael@0 1444 if (data->prevToUnicodeStatus) {
michael@0 1445 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
michael@0 1446 data->prevToUnicodeStatus = 0x0000;
michael@0 1447 }
michael@0 1448 /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
michael@0 1449 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
michael@0 1450 */
michael@0 1451 if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
michael@0 1452 targetUniChar = PNJ_TIPPI - PNJ_DELTA;
michael@0 1453 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
michael@0 1454 } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
michael@0 1455 /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
michael@0 1456 data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
michael@0 1457 } else {
michael@0 1458 /* write the previously mapped codepoint */
michael@0 1459 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
michael@0 1460 }
michael@0 1461 }
michael@0 1462 *toUnicodeStatus = missingCharMarker;
michael@0 1463 }
michael@0 1464
michael@0 1465 if (targetUniChar != missingCharMarker) {
michael@0 1466 /* now save the targetUniChar for delayed write */
michael@0 1467 *toUnicodeStatus = (UChar) targetUniChar;
michael@0 1468 if (data->resetToDefaultToUnicode==TRUE) {
michael@0 1469 data->currentDeltaToUnicode = data->defDeltaToUnicode;
michael@0 1470 data->currentMaskToUnicode = data->defMaskToUnicode;
michael@0 1471 data->resetToDefaultToUnicode=FALSE;
michael@0 1472 }
michael@0 1473 } else {
michael@0 1474
michael@0 1475 /* we reach here only if targetUniChar == missingCharMarker
michael@0 1476 * so assign codes to reason and err
michael@0 1477 */
michael@0 1478 *err = U_INVALID_CHAR_FOUND;
michael@0 1479 CALLBACK:
michael@0 1480 args->converter->toUBytes[0] = (uint8_t) sourceChar;
michael@0 1481 args->converter->toULength = 1;
michael@0 1482 break;
michael@0 1483 }
michael@0 1484
michael@0 1485 } else {
michael@0 1486 *err =U_BUFFER_OVERFLOW_ERROR;
michael@0 1487 break;
michael@0 1488 }
michael@0 1489 }
michael@0 1490
michael@0 1491 if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
michael@0 1492 /* end of the input stream */
michael@0 1493 UConverter *cnv = args->converter;
michael@0 1494
michael@0 1495 if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
michael@0 1496 /* set toUBytes[] */
michael@0 1497 cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
michael@0 1498 cnv->toULength = 1;
michael@0 1499
michael@0 1500 /* avoid looping on truncated sequences */
michael@0 1501 *contextCharToUnicode = NO_CHAR_MARKER;
michael@0 1502 } else {
michael@0 1503 cnv->toULength = 0;
michael@0 1504 }
michael@0 1505
michael@0 1506 if (*toUnicodeStatus != missingCharMarker) {
michael@0 1507 /* output a remaining target character */
michael@0 1508 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
michael@0 1509 *toUnicodeStatus = missingCharMarker;
michael@0 1510 }
michael@0 1511 }
michael@0 1512
michael@0 1513 args->target = target;
michael@0 1514 args->source = source;
michael@0 1515 }
michael@0 1516
michael@0 1517 /* structure for SafeClone calculations */
michael@0 1518 struct cloneISCIIStruct {
michael@0 1519 UConverter cnv;
michael@0 1520 UConverterDataISCII mydata;
michael@0 1521 };
michael@0 1522
michael@0 1523 static UConverter *
michael@0 1524 _ISCII_SafeClone(const UConverter *cnv,
michael@0 1525 void *stackBuffer,
michael@0 1526 int32_t *pBufferSize,
michael@0 1527 UErrorCode *status)
michael@0 1528 {
michael@0 1529 struct cloneISCIIStruct * localClone;
michael@0 1530 int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
michael@0 1531
michael@0 1532 if (U_FAILURE(*status)) {
michael@0 1533 return 0;
michael@0 1534 }
michael@0 1535
michael@0 1536 if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
michael@0 1537 *pBufferSize = bufferSizeNeeded;
michael@0 1538 return 0;
michael@0 1539 }
michael@0 1540
michael@0 1541 localClone = (struct cloneISCIIStruct *)stackBuffer;
michael@0 1542 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
michael@0 1543
michael@0 1544 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
michael@0 1545 localClone->cnv.extraInfo = &localClone->mydata;
michael@0 1546 localClone->cnv.isExtraLocal = TRUE;
michael@0 1547
michael@0 1548 return &localClone->cnv;
michael@0 1549 }
michael@0 1550
michael@0 1551 static void
michael@0 1552 _ISCIIGetUnicodeSet(const UConverter *cnv,
michael@0 1553 const USetAdder *sa,
michael@0 1554 UConverterUnicodeSet which,
michael@0 1555 UErrorCode *pErrorCode)
michael@0 1556 {
michael@0 1557 int32_t idx, script;
michael@0 1558 uint8_t mask;
michael@0 1559
michael@0 1560 /* Since all ISCII versions allow switching to other ISCII
michael@0 1561 scripts, we add all roundtrippable characters to this set. */
michael@0 1562 sa->addRange(sa->set, 0, ASCII_END);
michael@0 1563 for (script = DEVANAGARI; script <= MALAYALAM; script++) {
michael@0 1564 mask = (uint8_t)(lookupInitialData[script].maskEnum);
michael@0 1565 for (idx = 0; idx < DELTA; idx++) {
michael@0 1566 /* added check for TELUGU character */
michael@0 1567 if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
michael@0 1568 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
michael@0 1569 }
michael@0 1570 }
michael@0 1571 }
michael@0 1572 sa->add(sa->set, DANDA);
michael@0 1573 sa->add(sa->set, DOUBLE_DANDA);
michael@0 1574 sa->add(sa->set, ZWNJ);
michael@0 1575 sa->add(sa->set, ZWJ);
michael@0 1576 }
michael@0 1577
michael@0 1578 static const UConverterImpl _ISCIIImpl={
michael@0 1579
michael@0 1580 UCNV_ISCII,
michael@0 1581
michael@0 1582 NULL,
michael@0 1583 NULL,
michael@0 1584
michael@0 1585 _ISCIIOpen,
michael@0 1586 _ISCIIClose,
michael@0 1587 _ISCIIReset,
michael@0 1588
michael@0 1589 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
michael@0 1590 UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
michael@0 1591 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
michael@0 1592 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
michael@0 1593 NULL,
michael@0 1594
michael@0 1595 NULL,
michael@0 1596 _ISCIIgetName,
michael@0 1597 NULL,
michael@0 1598 _ISCII_SafeClone,
michael@0 1599 _ISCIIGetUnicodeSet
michael@0 1600 };
michael@0 1601
michael@0 1602 static const UConverterStaticData _ISCIIStaticData={
michael@0 1603 sizeof(UConverterStaticData),
michael@0 1604 "ISCII",
michael@0 1605 0,
michael@0 1606 UCNV_IBM,
michael@0 1607 UCNV_ISCII,
michael@0 1608 1,
michael@0 1609 4,
michael@0 1610 { 0x1a, 0, 0, 0 },
michael@0 1611 0x1,
michael@0 1612 FALSE,
michael@0 1613 FALSE,
michael@0 1614 0x0,
michael@0 1615 0x0,
michael@0 1616 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
michael@0 1617
michael@0 1618 };
michael@0 1619
michael@0 1620 const UConverterSharedData _ISCIIData={
michael@0 1621 sizeof(UConverterSharedData),
michael@0 1622 ~((uint32_t) 0),
michael@0 1623 NULL,
michael@0 1624 NULL,
michael@0 1625 &_ISCIIStaticData,
michael@0 1626 FALSE,
michael@0 1627 &_ISCIIImpl,
michael@0 1628 0
michael@0 1629 };
michael@0 1630
michael@0 1631 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

mercurial