intl/icu/source/common/ucnvisci.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnvisci.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1631 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2000-2012, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   file name:  ucnvisci.c
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2001JUN26
    1.15 +*   created by: Ram Viswanadha
    1.16 +*
    1.17 +*   Date        Name        Description
    1.18 +*   24/7/2001   Ram         Added support for EXT character handling
    1.19 +*/
    1.20 +
    1.21 +#include "unicode/utypes.h"
    1.22 +
    1.23 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
    1.24 +
    1.25 +#include "unicode/ucnv.h"
    1.26 +#include "unicode/ucnv_cb.h"
    1.27 +#include "unicode/utf16.h"
    1.28 +#include "cmemory.h"
    1.29 +#include "ucnv_bld.h"
    1.30 +#include "ucnv_cnv.h"
    1.31 +#include "cstring.h"
    1.32 +#include "uassert.h"
    1.33 +
    1.34 +#define UCNV_OPTIONS_VERSION_MASK 0xf
    1.35 +#define NUKTA               0x093c
    1.36 +#define HALANT              0x094d
    1.37 +#define ZWNJ                0x200c /* Zero Width Non Joiner */
    1.38 +#define ZWJ                 0x200d /* Zero width Joiner */
    1.39 +#define INVALID_CHAR        0xffff
    1.40 +#define ATR                 0xEF   /* Attribute code */
    1.41 +#define EXT                 0xF0   /* Extension code */
    1.42 +#define DANDA               0x0964
    1.43 +#define DOUBLE_DANDA        0x0965
    1.44 +#define ISCII_NUKTA         0xE9
    1.45 +#define ISCII_HALANT        0xE8
    1.46 +#define ISCII_DANDA         0xEA
    1.47 +#define ISCII_INV           0xD9
    1.48 +#define ISCII_VOWEL_SIGN_E  0xE0
    1.49 +#define INDIC_BLOCK_BEGIN   0x0900
    1.50 +#define INDIC_BLOCK_END     0x0D7F
    1.51 +#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
    1.52 +#define VOCALLIC_RR         0x0931
    1.53 +#define LF                  0x0A
    1.54 +#define ASCII_END           0xA0
    1.55 +#define NO_CHAR_MARKER      0xFFFE
    1.56 +#define TELUGU_DELTA        DELTA * TELUGU
    1.57 +#define DEV_ABBR_SIGN       0x0970
    1.58 +#define DEV_ANUDATTA        0x0952
    1.59 +#define EXT_RANGE_BEGIN     0xA1
    1.60 +#define EXT_RANGE_END       0xEE
    1.61 +
    1.62 +#define PNJ_DELTA           0x0100
    1.63 +#define PNJ_BINDI           0x0A02
    1.64 +#define PNJ_TIPPI           0x0A70
    1.65 +#define PNJ_SIGN_VIRAMA     0x0A4D
    1.66 +#define PNJ_ADHAK           0x0A71
    1.67 +#define PNJ_HA              0x0A39
    1.68 +#define PNJ_RRA             0x0A5C
    1.69 +
    1.70 +typedef enum {
    1.71 +    DEVANAGARI =0,
    1.72 +    BENGALI,
    1.73 +    GURMUKHI,
    1.74 +    GUJARATI,
    1.75 +    ORIYA,
    1.76 +    TAMIL,
    1.77 +    TELUGU,
    1.78 +    KANNADA,
    1.79 +    MALAYALAM,
    1.80 +    DELTA=0x80
    1.81 +}UniLang;
    1.82 +
    1.83 +/**
    1.84 + * Enumeration for switching code pages if <ATR>+<one of below values>
    1.85 + * is encountered
    1.86 + */
    1.87 +typedef enum {
    1.88 +    DEF = 0x40,
    1.89 +    RMN = 0x41,
    1.90 +    DEV = 0x42,
    1.91 +    BNG = 0x43,
    1.92 +    TML = 0x44,
    1.93 +    TLG = 0x45,
    1.94 +    ASM = 0x46,
    1.95 +    ORI = 0x47,
    1.96 +    KND = 0x48,
    1.97 +    MLM = 0x49,
    1.98 +    GJR = 0x4A,
    1.99 +    PNJ = 0x4B,
   1.100 +    ARB = 0x71,
   1.101 +    PES = 0x72,
   1.102 +    URD = 0x73,
   1.103 +    SND = 0x74,
   1.104 +    KSM = 0x75,
   1.105 +    PST = 0x76
   1.106 +}ISCIILang;
   1.107 +
   1.108 +typedef enum {
   1.109 +    DEV_MASK =0x80,
   1.110 +    PNJ_MASK =0x40,
   1.111 +    GJR_MASK =0x20,
   1.112 +    ORI_MASK =0x10,
   1.113 +    BNG_MASK =0x08,
   1.114 +    KND_MASK =0x04,
   1.115 +    MLM_MASK =0x02,
   1.116 +    TML_MASK =0x01,
   1.117 +    ZERO =0x00
   1.118 +}MaskEnum;
   1.119 +
   1.120 +#define ISCII_CNV_PREFIX "ISCII,version="
   1.121 +
   1.122 +typedef struct {
   1.123 +    UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
   1.124 +    UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
   1.125 +    uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
   1.126 +    uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
   1.127 +    uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
   1.128 +    MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
   1.129 +    MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
   1.130 +    MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
   1.131 +    UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
   1.132 +    UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
   1.133 +    char name[sizeof(ISCII_CNV_PREFIX) + 1];
   1.134 +    UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
   1.135 +} UConverterDataISCII;
   1.136 +
   1.137 +typedef struct LookupDataStruct {
   1.138 +    UniLang uniLang;
   1.139 +    MaskEnum maskEnum;
   1.140 +    ISCIILang isciiLang;
   1.141 +} LookupDataStruct;
   1.142 +
   1.143 +static const LookupDataStruct lookupInitialData[]={
   1.144 +    { DEVANAGARI, DEV_MASK,  DEV },
   1.145 +    { BENGALI,    BNG_MASK,  BNG },
   1.146 +    { GURMUKHI,   PNJ_MASK,  PNJ },
   1.147 +    { GUJARATI,   GJR_MASK,  GJR },
   1.148 +    { ORIYA,      ORI_MASK,  ORI },
   1.149 +    { TAMIL,      TML_MASK,  TML },
   1.150 +    { TELUGU,     KND_MASK,  TLG },
   1.151 +    { KANNADA,    KND_MASK,  KND },
   1.152 +    { MALAYALAM,  MLM_MASK,  MLM }
   1.153 +};
   1.154 +
   1.155 +/*
   1.156 + * For special handling of certain Gurmukhi characters.
   1.157 + * Bit 0 (value 1): PNJ consonant
   1.158 + * Bit 1 (value 2): PNJ Bindi Tippi
   1.159 + */
   1.160 +static const uint8_t pnjMap[80] = {
   1.161 +    /* 0A00..0A0F */
   1.162 +    0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
   1.163 +    /* 0A10..0A1F */
   1.164 +    0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
   1.165 +    /* 0A20..0A2F */
   1.166 +    3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
   1.167 +    /* 0A30..0A3F */
   1.168 +    3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
   1.169 +    /* 0A40..0A4F */
   1.170 +    0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
   1.171 +};
   1.172 +
   1.173 +static UBool
   1.174 +isPNJConsonant(UChar32 c) {
   1.175 +    if (c < 0xa00 || 0xa50 <= c) {
   1.176 +        return FALSE;
   1.177 +    } else {
   1.178 +        return (UBool)(pnjMap[c - 0xa00] & 1);
   1.179 +    }
   1.180 +}
   1.181 +
   1.182 +static UBool
   1.183 +isPNJBindiTippi(UChar32 c) {
   1.184 +    if (c < 0xa00 || 0xa50 <= c) {
   1.185 +        return FALSE;
   1.186 +    } else {
   1.187 +        return (UBool)(pnjMap[c - 0xa00] >> 1);
   1.188 +    }
   1.189 +}
   1.190 +
   1.191 +static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
   1.192 +    if(pArgs->onlyTestIsLoadable) {
   1.193 +        return;
   1.194 +    }
   1.195 +
   1.196 +    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
   1.197 +
   1.198 +    if (cnv->extraInfo != NULL) {
   1.199 +        int32_t len=0;
   1.200 +        UConverterDataISCII *converterData=
   1.201 +                (UConverterDataISCII *) cnv->extraInfo;
   1.202 +        converterData->contextCharToUnicode=NO_CHAR_MARKER;
   1.203 +        cnv->toUnicodeStatus = missingCharMarker;
   1.204 +        converterData->contextCharFromUnicode=0x0000;
   1.205 +        converterData->resetToDefaultToUnicode=FALSE;
   1.206 +        /* check if the version requested is supported */
   1.207 +        if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
   1.208 +            /* initialize state variables */
   1.209 +            converterData->currentDeltaFromUnicode
   1.210 +                    = converterData->currentDeltaToUnicode
   1.211 +                            = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
   1.212 +
   1.213 +            converterData->currentMaskFromUnicode
   1.214 +                    = converterData->currentMaskToUnicode
   1.215 +                            = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
   1.216 +            
   1.217 +            converterData->isFirstBuffer=TRUE;
   1.218 +            (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
   1.219 +            len = (int32_t)uprv_strlen(converterData->name);
   1.220 +            converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
   1.221 +            converterData->name[len+1]=0;
   1.222 +            
   1.223 +            converterData->prevToUnicodeStatus = 0x0000;
   1.224 +        } else {
   1.225 +            uprv_free(cnv->extraInfo);
   1.226 +            cnv->extraInfo = NULL;
   1.227 +            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1.228 +        }
   1.229 +
   1.230 +    } else {
   1.231 +        *errorCode =U_MEMORY_ALLOCATION_ERROR;
   1.232 +    }
   1.233 +}
   1.234 +
   1.235 +static void _ISCIIClose(UConverter *cnv) {
   1.236 +    if (cnv->extraInfo!=NULL) {
   1.237 +        if (!cnv->isExtraLocal) {
   1.238 +            uprv_free(cnv->extraInfo);
   1.239 +        }
   1.240 +        cnv->extraInfo=NULL;
   1.241 +    }
   1.242 +}
   1.243 +
   1.244 +static const char* _ISCIIgetName(const UConverter* cnv) {
   1.245 +    if (cnv->extraInfo) {
   1.246 +        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
   1.247 +        return myData->name;
   1.248 +    }
   1.249 +    return NULL;
   1.250 +}
   1.251 +
   1.252 +static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
   1.253 +    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
   1.254 +    if (choice<=UCNV_RESET_TO_UNICODE) {
   1.255 +        cnv->toUnicodeStatus = missingCharMarker;
   1.256 +        cnv->mode=0;
   1.257 +        data->currentDeltaToUnicode=data->defDeltaToUnicode;
   1.258 +        data->currentMaskToUnicode = data->defMaskToUnicode;
   1.259 +        data->contextCharToUnicode=NO_CHAR_MARKER;
   1.260 +        data->prevToUnicodeStatus = 0x0000;
   1.261 +    }
   1.262 +    if (choice!=UCNV_RESET_TO_UNICODE) {
   1.263 +        cnv->fromUChar32=0x0000;
   1.264 +        data->contextCharFromUnicode=0x00;
   1.265 +        data->currentMaskFromUnicode=data->defMaskToUnicode;
   1.266 +        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
   1.267 +        data->isFirstBuffer=TRUE;
   1.268 +        data->resetToDefaultToUnicode=FALSE;
   1.269 +    }
   1.270 +}
   1.271 +
   1.272 +/**
   1.273 + * The values in validity table are indexed by the lower bits of Unicode
   1.274 + * range 0x0900 - 0x09ff. The values have a structure like:
   1.275 + *       ---------------------------------------------------------------
   1.276 + *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
   1.277 + *      |       |       |       |       | ASM   | KND   |       |       |
   1.278 + *       ---------------------------------------------------------------
   1.279 + * If a code point is valid in a particular script
   1.280 + * then that bit is turned on
   1.281 + *
   1.282 + * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
   1.283 + * to represent these languages
   1.284 + *
   1.285 + * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
   1.286 + * and combine and use 1 bit to represent these languages.
   1.287 + *
   1.288 + * TODO: It is probably easier to understand and maintain to change this
   1.289 + * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
   1.290 + */
   1.291 +
   1.292 +static const uint8_t validityTable[128] = {
   1.293 +/* This state table is tool generated please do not edit unless you know exactly what you are doing */
   1.294 +/* Note: This table was edited to mirror the Windows XP implementation */
   1.295 +/*ISCII:Valid:Unicode */
   1.296 +/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.297 +/*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.298 +/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.299 +/*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.300 +/*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.301 +/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.302 +/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.303 +/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.304 +/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.305 +/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.306 +/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.307 +/*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.308 +/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.309 +/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.310 +/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.311 +/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.312 +/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.313 +/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.314 +/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.315 +/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.316 +/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.317 +/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.318 +/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.319 +/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.320 +/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.321 +/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.322 +/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.323 +/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.324 +/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.325 +/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.326 +/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.327 +/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.328 +/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.329 +/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.330 +/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.331 +/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.332 +/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.333 +/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.334 +/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.335 +/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.336 +/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.337 +/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
   1.338 +/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.339 +/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.340 +/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.341 +/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.342 +/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.343 +/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.344 +/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.345 +/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
   1.346 +/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.347 +/*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.348 +/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
   1.349 +/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.350 +/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.351 +/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.352 +/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.353 +/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.354 +/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.355 +/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.356 +/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.357 +/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.358 +/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.359 +/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.360 +/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.361 +/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.362 +/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.363 +/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.364 +/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
   1.365 +/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.366 +/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.367 +/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.368 +/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.369 +/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.370 +/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   1.371 +/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.372 +/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.373 +/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.374 +/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.375 +/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.376 +/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.377 +/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.378 +/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.379 +/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.380 +/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.381 +/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
   1.382 +/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
   1.383 +/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
   1.384 +/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.385 +/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.386 +/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.387 +/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.388 +/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.389 +/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.390 +/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.391 +/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.392 +/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.393 +/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   1.394 +/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.395 +/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   1.396 +/*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.397 +/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.398 +/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.399 +/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.400 +/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.401 +/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.402 +/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.403 +/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.404 +/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.405 +/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.406 +/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.407 +/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   1.408 +/*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   1.409 +/*
   1.410 + * The length of the array is 128 to provide values for 0x900..0x97f.
   1.411 + * The last 15 entries for 0x971..0x97f of the validity table are all zero
   1.412 + * because no Indic script uses such Unicode code points.
   1.413 + */
   1.414 +/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
   1.415 +};
   1.416 +
   1.417 +static const uint16_t fromUnicodeTable[128]={
   1.418 +    0x00a0 ,/* 0x0900 */
   1.419 +    0x00a1 ,/* 0x0901 */
   1.420 +    0x00a2 ,/* 0x0902 */
   1.421 +    0x00a3 ,/* 0x0903 */
   1.422 +    0xa4e0 ,/* 0x0904 */
   1.423 +    0x00a4 ,/* 0x0905 */
   1.424 +    0x00a5 ,/* 0x0906 */
   1.425 +    0x00a6 ,/* 0x0907 */
   1.426 +    0x00a7 ,/* 0x0908 */
   1.427 +    0x00a8 ,/* 0x0909 */
   1.428 +    0x00a9 ,/* 0x090a */
   1.429 +    0x00aa ,/* 0x090b */
   1.430 +    0xA6E9 ,/* 0x090c */
   1.431 +    0x00ae ,/* 0x090d */
   1.432 +    0x00ab ,/* 0x090e */
   1.433 +    0x00ac ,/* 0x090f */
   1.434 +    0x00ad ,/* 0x0910 */
   1.435 +    0x00b2 ,/* 0x0911 */
   1.436 +    0x00af ,/* 0x0912 */
   1.437 +    0x00b0 ,/* 0x0913 */
   1.438 +    0x00b1 ,/* 0x0914 */
   1.439 +    0x00b3 ,/* 0x0915 */
   1.440 +    0x00b4 ,/* 0x0916 */
   1.441 +    0x00b5 ,/* 0x0917 */
   1.442 +    0x00b6 ,/* 0x0918 */
   1.443 +    0x00b7 ,/* 0x0919 */
   1.444 +    0x00b8 ,/* 0x091a */
   1.445 +    0x00b9 ,/* 0x091b */
   1.446 +    0x00ba ,/* 0x091c */
   1.447 +    0x00bb ,/* 0x091d */
   1.448 +    0x00bc ,/* 0x091e */
   1.449 +    0x00bd ,/* 0x091f */
   1.450 +    0x00be ,/* 0x0920 */
   1.451 +    0x00bf ,/* 0x0921 */
   1.452 +    0x00c0 ,/* 0x0922 */
   1.453 +    0x00c1 ,/* 0x0923 */
   1.454 +    0x00c2 ,/* 0x0924 */
   1.455 +    0x00c3 ,/* 0x0925 */
   1.456 +    0x00c4 ,/* 0x0926 */
   1.457 +    0x00c5 ,/* 0x0927 */
   1.458 +    0x00c6 ,/* 0x0928 */
   1.459 +    0x00c7 ,/* 0x0929 */
   1.460 +    0x00c8 ,/* 0x092a */
   1.461 +    0x00c9 ,/* 0x092b */
   1.462 +    0x00ca ,/* 0x092c */
   1.463 +    0x00cb ,/* 0x092d */
   1.464 +    0x00cc ,/* 0x092e */
   1.465 +    0x00cd ,/* 0x092f */
   1.466 +    0x00cf ,/* 0x0930 */
   1.467 +    0x00d0 ,/* 0x0931 */
   1.468 +    0x00d1 ,/* 0x0932 */
   1.469 +    0x00d2 ,/* 0x0933 */
   1.470 +    0x00d3 ,/* 0x0934 */
   1.471 +    0x00d4 ,/* 0x0935 */
   1.472 +    0x00d5 ,/* 0x0936 */
   1.473 +    0x00d6 ,/* 0x0937 */
   1.474 +    0x00d7 ,/* 0x0938 */
   1.475 +    0x00d8 ,/* 0x0939 */
   1.476 +    0xFFFF ,/* 0x093A */
   1.477 +    0xFFFF ,/* 0x093B */
   1.478 +    0x00e9 ,/* 0x093c */
   1.479 +    0xEAE9 ,/* 0x093d */
   1.480 +    0x00da ,/* 0x093e */
   1.481 +    0x00db ,/* 0x093f */
   1.482 +    0x00dc ,/* 0x0940 */
   1.483 +    0x00dd ,/* 0x0941 */
   1.484 +    0x00de ,/* 0x0942 */
   1.485 +    0x00df ,/* 0x0943 */
   1.486 +    0xDFE9 ,/* 0x0944 */
   1.487 +    0x00e3 ,/* 0x0945 */
   1.488 +    0x00e0 ,/* 0x0946 */
   1.489 +    0x00e1 ,/* 0x0947 */
   1.490 +    0x00e2 ,/* 0x0948 */
   1.491 +    0x00e7 ,/* 0x0949 */
   1.492 +    0x00e4 ,/* 0x094a */
   1.493 +    0x00e5 ,/* 0x094b */
   1.494 +    0x00e6 ,/* 0x094c */
   1.495 +    0x00e8 ,/* 0x094d */
   1.496 +    0x00ec ,/* 0x094e */
   1.497 +    0x00ed ,/* 0x094f */
   1.498 +    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
   1.499 +    0xFFFF ,/* 0x0951 */
   1.500 +    0xF0B8 ,/* 0x0952 */
   1.501 +    0xFFFF ,/* 0x0953 */
   1.502 +    0xFFFF ,/* 0x0954 */
   1.503 +    0xFFFF ,/* 0x0955 */
   1.504 +    0xFFFF ,/* 0x0956 */
   1.505 +    0xFFFF ,/* 0x0957 */
   1.506 +    0xb3e9 ,/* 0x0958 */
   1.507 +    0xb4e9 ,/* 0x0959 */
   1.508 +    0xb5e9 ,/* 0x095a */
   1.509 +    0xbae9 ,/* 0x095b */
   1.510 +    0xbfe9 ,/* 0x095c */
   1.511 +    0xC0E9 ,/* 0x095d */
   1.512 +    0xc9e9 ,/* 0x095e */
   1.513 +    0x00ce ,/* 0x095f */
   1.514 +    0xAAe9 ,/* 0x0960 */
   1.515 +    0xA7E9 ,/* 0x0961 */
   1.516 +    0xDBE9 ,/* 0x0962 */
   1.517 +    0xDCE9 ,/* 0x0963 */
   1.518 +    0x00ea ,/* 0x0964 */
   1.519 +    0xeaea ,/* 0x0965 */
   1.520 +    0x00f1 ,/* 0x0966 */
   1.521 +    0x00f2 ,/* 0x0967 */
   1.522 +    0x00f3 ,/* 0x0968 */
   1.523 +    0x00f4 ,/* 0x0969 */
   1.524 +    0x00f5 ,/* 0x096a */
   1.525 +    0x00f6 ,/* 0x096b */
   1.526 +    0x00f7 ,/* 0x096c */
   1.527 +    0x00f8 ,/* 0x096d */
   1.528 +    0x00f9 ,/* 0x096e */
   1.529 +    0x00fa ,/* 0x096f */
   1.530 +    0xF0BF ,/* 0x0970 */
   1.531 +    0xFFFF ,/* 0x0971 */
   1.532 +    0xFFFF ,/* 0x0972 */
   1.533 +    0xFFFF ,/* 0x0973 */
   1.534 +    0xFFFF ,/* 0x0974 */
   1.535 +    0xFFFF ,/* 0x0975 */
   1.536 +    0xFFFF ,/* 0x0976 */
   1.537 +    0xFFFF ,/* 0x0977 */
   1.538 +    0xFFFF ,/* 0x0978 */
   1.539 +    0xFFFF ,/* 0x0979 */
   1.540 +    0xFFFF ,/* 0x097a */
   1.541 +    0xFFFF ,/* 0x097b */
   1.542 +    0xFFFF ,/* 0x097c */
   1.543 +    0xFFFF ,/* 0x097d */
   1.544 +    0xFFFF ,/* 0x097e */
   1.545 +    0xFFFF ,/* 0x097f */
   1.546 +};
   1.547 +static const uint16_t toUnicodeTable[256]={
   1.548 +    0x0000,/* 0x00 */
   1.549 +    0x0001,/* 0x01 */
   1.550 +    0x0002,/* 0x02 */
   1.551 +    0x0003,/* 0x03 */
   1.552 +    0x0004,/* 0x04 */
   1.553 +    0x0005,/* 0x05 */
   1.554 +    0x0006,/* 0x06 */
   1.555 +    0x0007,/* 0x07 */
   1.556 +    0x0008,/* 0x08 */
   1.557 +    0x0009,/* 0x09 */
   1.558 +    0x000a,/* 0x0a */
   1.559 +    0x000b,/* 0x0b */
   1.560 +    0x000c,/* 0x0c */
   1.561 +    0x000d,/* 0x0d */
   1.562 +    0x000e,/* 0x0e */
   1.563 +    0x000f,/* 0x0f */
   1.564 +    0x0010,/* 0x10 */
   1.565 +    0x0011,/* 0x11 */
   1.566 +    0x0012,/* 0x12 */
   1.567 +    0x0013,/* 0x13 */
   1.568 +    0x0014,/* 0x14 */
   1.569 +    0x0015,/* 0x15 */
   1.570 +    0x0016,/* 0x16 */
   1.571 +    0x0017,/* 0x17 */
   1.572 +    0x0018,/* 0x18 */
   1.573 +    0x0019,/* 0x19 */
   1.574 +    0x001a,/* 0x1a */
   1.575 +    0x001b,/* 0x1b */
   1.576 +    0x001c,/* 0x1c */
   1.577 +    0x001d,/* 0x1d */
   1.578 +    0x001e,/* 0x1e */
   1.579 +    0x001f,/* 0x1f */
   1.580 +    0x0020,/* 0x20 */
   1.581 +    0x0021,/* 0x21 */
   1.582 +    0x0022,/* 0x22 */
   1.583 +    0x0023,/* 0x23 */
   1.584 +    0x0024,/* 0x24 */
   1.585 +    0x0025,/* 0x25 */
   1.586 +    0x0026,/* 0x26 */
   1.587 +    0x0027,/* 0x27 */
   1.588 +    0x0028,/* 0x28 */
   1.589 +    0x0029,/* 0x29 */
   1.590 +    0x002a,/* 0x2a */
   1.591 +    0x002b,/* 0x2b */
   1.592 +    0x002c,/* 0x2c */
   1.593 +    0x002d,/* 0x2d */
   1.594 +    0x002e,/* 0x2e */
   1.595 +    0x002f,/* 0x2f */
   1.596 +    0x0030,/* 0x30 */
   1.597 +    0x0031,/* 0x31 */
   1.598 +    0x0032,/* 0x32 */
   1.599 +    0x0033,/* 0x33 */
   1.600 +    0x0034,/* 0x34 */
   1.601 +    0x0035,/* 0x35 */
   1.602 +    0x0036,/* 0x36 */
   1.603 +    0x0037,/* 0x37 */
   1.604 +    0x0038,/* 0x38 */
   1.605 +    0x0039,/* 0x39 */
   1.606 +    0x003A,/* 0x3A */
   1.607 +    0x003B,/* 0x3B */
   1.608 +    0x003c,/* 0x3c */
   1.609 +    0x003d,/* 0x3d */
   1.610 +    0x003e,/* 0x3e */
   1.611 +    0x003f,/* 0x3f */
   1.612 +    0x0040,/* 0x40 */
   1.613 +    0x0041,/* 0x41 */
   1.614 +    0x0042,/* 0x42 */
   1.615 +    0x0043,/* 0x43 */
   1.616 +    0x0044,/* 0x44 */
   1.617 +    0x0045,/* 0x45 */
   1.618 +    0x0046,/* 0x46 */
   1.619 +    0x0047,/* 0x47 */
   1.620 +    0x0048,/* 0x48 */
   1.621 +    0x0049,/* 0x49 */
   1.622 +    0x004a,/* 0x4a */
   1.623 +    0x004b,/* 0x4b */
   1.624 +    0x004c,/* 0x4c */
   1.625 +    0x004d,/* 0x4d */
   1.626 +    0x004e,/* 0x4e */
   1.627 +    0x004f,/* 0x4f */
   1.628 +    0x0050,/* 0x50 */
   1.629 +    0x0051,/* 0x51 */
   1.630 +    0x0052,/* 0x52 */
   1.631 +    0x0053,/* 0x53 */
   1.632 +    0x0054,/* 0x54 */
   1.633 +    0x0055,/* 0x55 */
   1.634 +    0x0056,/* 0x56 */
   1.635 +    0x0057,/* 0x57 */
   1.636 +    0x0058,/* 0x58 */
   1.637 +    0x0059,/* 0x59 */
   1.638 +    0x005a,/* 0x5a */
   1.639 +    0x005b,/* 0x5b */
   1.640 +    0x005c,/* 0x5c */
   1.641 +    0x005d,/* 0x5d */
   1.642 +    0x005e,/* 0x5e */
   1.643 +    0x005f,/* 0x5f */
   1.644 +    0x0060,/* 0x60 */
   1.645 +    0x0061,/* 0x61 */
   1.646 +    0x0062,/* 0x62 */
   1.647 +    0x0063,/* 0x63 */
   1.648 +    0x0064,/* 0x64 */
   1.649 +    0x0065,/* 0x65 */
   1.650 +    0x0066,/* 0x66 */
   1.651 +    0x0067,/* 0x67 */
   1.652 +    0x0068,/* 0x68 */
   1.653 +    0x0069,/* 0x69 */
   1.654 +    0x006a,/* 0x6a */
   1.655 +    0x006b,/* 0x6b */
   1.656 +    0x006c,/* 0x6c */
   1.657 +    0x006d,/* 0x6d */
   1.658 +    0x006e,/* 0x6e */
   1.659 +    0x006f,/* 0x6f */
   1.660 +    0x0070,/* 0x70 */
   1.661 +    0x0071,/* 0x71 */
   1.662 +    0x0072,/* 0x72 */
   1.663 +    0x0073,/* 0x73 */
   1.664 +    0x0074,/* 0x74 */
   1.665 +    0x0075,/* 0x75 */
   1.666 +    0x0076,/* 0x76 */
   1.667 +    0x0077,/* 0x77 */
   1.668 +    0x0078,/* 0x78 */
   1.669 +    0x0079,/* 0x79 */
   1.670 +    0x007a,/* 0x7a */
   1.671 +    0x007b,/* 0x7b */
   1.672 +    0x007c,/* 0x7c */
   1.673 +    0x007d,/* 0x7d */
   1.674 +    0x007e,/* 0x7e */
   1.675 +    0x007f,/* 0x7f */
   1.676 +    0x0080,/* 0x80 */
   1.677 +    0x0081,/* 0x81 */
   1.678 +    0x0082,/* 0x82 */
   1.679 +    0x0083,/* 0x83 */
   1.680 +    0x0084,/* 0x84 */
   1.681 +    0x0085,/* 0x85 */
   1.682 +    0x0086,/* 0x86 */
   1.683 +    0x0087,/* 0x87 */
   1.684 +    0x0088,/* 0x88 */
   1.685 +    0x0089,/* 0x89 */
   1.686 +    0x008a,/* 0x8a */
   1.687 +    0x008b,/* 0x8b */
   1.688 +    0x008c,/* 0x8c */
   1.689 +    0x008d,/* 0x8d */
   1.690 +    0x008e,/* 0x8e */
   1.691 +    0x008f,/* 0x8f */
   1.692 +    0x0090,/* 0x90 */
   1.693 +    0x0091,/* 0x91 */
   1.694 +    0x0092,/* 0x92 */
   1.695 +    0x0093,/* 0x93 */
   1.696 +    0x0094,/* 0x94 */
   1.697 +    0x0095,/* 0x95 */
   1.698 +    0x0096,/* 0x96 */
   1.699 +    0x0097,/* 0x97 */
   1.700 +    0x0098,/* 0x98 */
   1.701 +    0x0099,/* 0x99 */
   1.702 +    0x009a,/* 0x9a */
   1.703 +    0x009b,/* 0x9b */
   1.704 +    0x009c,/* 0x9c */
   1.705 +    0x009d,/* 0x9d */
   1.706 +    0x009e,/* 0x9e */
   1.707 +    0x009f,/* 0x9f */
   1.708 +    0x00A0,/* 0xa0 */
   1.709 +    0x0901,/* 0xa1 */
   1.710 +    0x0902,/* 0xa2 */
   1.711 +    0x0903,/* 0xa3 */
   1.712 +    0x0905,/* 0xa4 */
   1.713 +    0x0906,/* 0xa5 */
   1.714 +    0x0907,/* 0xa6 */
   1.715 +    0x0908,/* 0xa7 */
   1.716 +    0x0909,/* 0xa8 */
   1.717 +    0x090a,/* 0xa9 */
   1.718 +    0x090b,/* 0xaa */
   1.719 +    0x090e,/* 0xab */
   1.720 +    0x090f,/* 0xac */
   1.721 +    0x0910,/* 0xad */
   1.722 +    0x090d,/* 0xae */
   1.723 +    0x0912,/* 0xaf */
   1.724 +    0x0913,/* 0xb0 */
   1.725 +    0x0914,/* 0xb1 */
   1.726 +    0x0911,/* 0xb2 */
   1.727 +    0x0915,/* 0xb3 */
   1.728 +    0x0916,/* 0xb4 */
   1.729 +    0x0917,/* 0xb5 */
   1.730 +    0x0918,/* 0xb6 */
   1.731 +    0x0919,/* 0xb7 */
   1.732 +    0x091a,/* 0xb8 */
   1.733 +    0x091b,/* 0xb9 */
   1.734 +    0x091c,/* 0xba */
   1.735 +    0x091d,/* 0xbb */
   1.736 +    0x091e,/* 0xbc */
   1.737 +    0x091f,/* 0xbd */
   1.738 +    0x0920,/* 0xbe */
   1.739 +    0x0921,/* 0xbf */
   1.740 +    0x0922,/* 0xc0 */
   1.741 +    0x0923,/* 0xc1 */
   1.742 +    0x0924,/* 0xc2 */
   1.743 +    0x0925,/* 0xc3 */
   1.744 +    0x0926,/* 0xc4 */
   1.745 +    0x0927,/* 0xc5 */
   1.746 +    0x0928,/* 0xc6 */
   1.747 +    0x0929,/* 0xc7 */
   1.748 +    0x092a,/* 0xc8 */
   1.749 +    0x092b,/* 0xc9 */
   1.750 +    0x092c,/* 0xca */
   1.751 +    0x092d,/* 0xcb */
   1.752 +    0x092e,/* 0xcc */
   1.753 +    0x092f,/* 0xcd */
   1.754 +    0x095f,/* 0xce */
   1.755 +    0x0930,/* 0xcf */
   1.756 +    0x0931,/* 0xd0 */
   1.757 +    0x0932,/* 0xd1 */
   1.758 +    0x0933,/* 0xd2 */
   1.759 +    0x0934,/* 0xd3 */
   1.760 +    0x0935,/* 0xd4 */
   1.761 +    0x0936,/* 0xd5 */
   1.762 +    0x0937,/* 0xd6 */
   1.763 +    0x0938,/* 0xd7 */
   1.764 +    0x0939,/* 0xd8 */
   1.765 +    0x200D,/* 0xd9 */
   1.766 +    0x093e,/* 0xda */
   1.767 +    0x093f,/* 0xdb */
   1.768 +    0x0940,/* 0xdc */
   1.769 +    0x0941,/* 0xdd */
   1.770 +    0x0942,/* 0xde */
   1.771 +    0x0943,/* 0xdf */
   1.772 +    0x0946,/* 0xe0 */
   1.773 +    0x0947,/* 0xe1 */
   1.774 +    0x0948,/* 0xe2 */
   1.775 +    0x0945,/* 0xe3 */
   1.776 +    0x094a,/* 0xe4 */
   1.777 +    0x094b,/* 0xe5 */
   1.778 +    0x094c,/* 0xe6 */
   1.779 +    0x0949,/* 0xe7 */
   1.780 +    0x094d,/* 0xe8 */
   1.781 +    0x093c,/* 0xe9 */
   1.782 +    0x0964,/* 0xea */
   1.783 +    0xFFFF,/* 0xeb */
   1.784 +    0xFFFF,/* 0xec */
   1.785 +    0xFFFF,/* 0xed */
   1.786 +    0xFFFF,/* 0xee */
   1.787 +    0xFFFF,/* 0xef */
   1.788 +    0xFFFF,/* 0xf0 */
   1.789 +    0x0966,/* 0xf1 */
   1.790 +    0x0967,/* 0xf2 */
   1.791 +    0x0968,/* 0xf3 */
   1.792 +    0x0969,/* 0xf4 */
   1.793 +    0x096a,/* 0xf5 */
   1.794 +    0x096b,/* 0xf6 */
   1.795 +    0x096c,/* 0xf7 */
   1.796 +    0x096d,/* 0xf8 */
   1.797 +    0x096e,/* 0xf9 */
   1.798 +    0x096f,/* 0xfa */
   1.799 +    0xFFFF,/* 0xfb */
   1.800 +    0xFFFF,/* 0xfc */
   1.801 +    0xFFFF,/* 0xfd */
   1.802 +    0xFFFF,/* 0xfe */
   1.803 +    0xFFFF /* 0xff */
   1.804 +};
   1.805 +
   1.806 +static const uint16_t vowelSignESpecialCases[][2]={
   1.807 +	{ 2 /*length of array*/    , 0      },
   1.808 +	{ 0xA4 , 0x0904 },
   1.809 +};
   1.810 +
   1.811 +static const uint16_t nuktaSpecialCases[][2]={
   1.812 +    { 16 /*length of array*/   , 0      },
   1.813 +    { 0xA6 , 0x090c },
   1.814 +    { 0xEA , 0x093D },
   1.815 +    { 0xDF , 0x0944 },
   1.816 +    { 0xA1 , 0x0950 },
   1.817 +    { 0xb3 , 0x0958 },
   1.818 +    { 0xb4 , 0x0959 },
   1.819 +    { 0xb5 , 0x095a },
   1.820 +    { 0xba , 0x095b },
   1.821 +    { 0xbf , 0x095c },
   1.822 +    { 0xC0 , 0x095d },
   1.823 +    { 0xc9 , 0x095e },
   1.824 +    { 0xAA , 0x0960 },
   1.825 +    { 0xA7 , 0x0961 },
   1.826 +    { 0xDB , 0x0962 },
   1.827 +    { 0xDC , 0x0963 },
   1.828 +};
   1.829 +
   1.830 +
   1.831 +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
   1.832 +    int32_t offset = (int32_t)(source - args->source-1);                                        \
   1.833 +      /* write the targetUniChar  to target */                                                  \
   1.834 +    if(target < targetLimit){                                                                   \
   1.835 +        if(targetByteUnit <= 0xFF){                                                             \
   1.836 +            *(target)++ = (uint8_t)(targetByteUnit);                                            \
   1.837 +            if(offsets){                                                                        \
   1.838 +                *(offsets++) = offset;                                                          \
   1.839 +            }                                                                                   \
   1.840 +        }else{                                                                                  \
   1.841 +            if (targetByteUnit > 0xFFFF) {                                                      \
   1.842 +                *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
   1.843 +                if (offsets) {                                                                  \
   1.844 +                    --offset;                                                                   \
   1.845 +                    *(offsets++) = offset;                                                      \
   1.846 +                }                                                                               \
   1.847 +            }                                                                                   \
   1.848 +            if (!(target < targetLimit)) {                                                      \
   1.849 +                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
   1.850 +                                (uint8_t)(targetByteUnit >> 8);                                 \
   1.851 +                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
   1.852 +                                (uint8_t)targetByteUnit;                                        \
   1.853 +                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
   1.854 +            } else {                                                                            \
   1.855 +                *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
   1.856 +                if(offsets){                                                                    \
   1.857 +                    *(offsets++) = offset;                                                      \
   1.858 +                }                                                                               \
   1.859 +                if(target < targetLimit){                                                       \
   1.860 +                    *(target)++ = (uint8_t)  targetByteUnit;                                    \
   1.861 +                    if(offsets){                                                                \
   1.862 +                        *(offsets++) = offset                            ;                      \
   1.863 +                    }                                                                           \
   1.864 +                }else{                                                                          \
   1.865 +                    args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
   1.866 +                                (uint8_t) (targetByteUnit);                                     \
   1.867 +                    *err = U_BUFFER_OVERFLOW_ERROR;                                             \
   1.868 +                }                                                                               \
   1.869 +            }                                                                                   \
   1.870 +        }                                                                                       \
   1.871 +    }else{                                                                                      \
   1.872 +        if (targetByteUnit & 0xFF0000) {                                                        \
   1.873 +            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
   1.874 +                        (uint8_t) (targetByteUnit >>16);                                        \
   1.875 +        }                                                                                       \
   1.876 +        if(targetByteUnit & 0xFF00){                                                            \
   1.877 +            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
   1.878 +                        (uint8_t) (targetByteUnit >>8);                                         \
   1.879 +        }                                                                                       \
   1.880 +        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
   1.881 +                        (uint8_t) (targetByteUnit);                                             \
   1.882 +        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
   1.883 +    }                                                                                           \
   1.884 +}
   1.885 +
   1.886 +/* Rules:
   1.887 + *    Explicit Halant :
   1.888 + *                      <HALANT> + <ZWNJ>
   1.889 + *    Soft Halant :
   1.890 + *                      <HALANT> + <ZWJ>
   1.891 + */
   1.892 +
   1.893 +static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
   1.894 +        UConverterFromUnicodeArgs * args, UErrorCode * err) {
   1.895 +    const UChar *source = args->source;
   1.896 +    const UChar *sourceLimit = args->sourceLimit;
   1.897 +    unsigned char *target = (unsigned char *) args->target;
   1.898 +    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   1.899 +    int32_t* offsets = args->offsets;
   1.900 +    uint32_t targetByteUnit = 0x0000;
   1.901 +    UChar32 sourceChar = 0x0000;
   1.902 +    UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
   1.903 +    UConverterDataISCII *converterData;
   1.904 +    uint16_t newDelta=0;
   1.905 +    uint16_t range = 0;
   1.906 +    UBool deltaChanged = FALSE;
   1.907 +
   1.908 +    if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
   1.909 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
   1.910 +        return;
   1.911 +    }
   1.912 +    /* initialize data */
   1.913 +    converterData=(UConverterDataISCII*)args->converter->extraInfo;
   1.914 +    newDelta=converterData->currentDeltaFromUnicode;
   1.915 +    range = (uint16_t)(newDelta/DELTA);
   1.916 +
   1.917 +    if ((sourceChar = args->converter->fromUChar32)!=0) {
   1.918 +        goto getTrail;
   1.919 +    }
   1.920 +
   1.921 +    /*writing the char to the output stream */
   1.922 +    while (source < sourceLimit) {
   1.923 +        /* Write the language code following LF only if LF is not the last character. */
   1.924 +        if (args->converter->fromUnicodeStatus == LF) {
   1.925 +            targetByteUnit = ATR<<8;
   1.926 +            targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
   1.927 +            args->converter->fromUnicodeStatus = 0x0000;
   1.928 +            /* now append ATR and language code */
   1.929 +            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
   1.930 +            if (U_FAILURE(*err)) {
   1.931 +                break;
   1.932 +            }
   1.933 +        }
   1.934 +        
   1.935 +        sourceChar = *source++;
   1.936 +        tempContextFromUnicode = converterData->contextCharFromUnicode;
   1.937 +        
   1.938 +        targetByteUnit = missingCharMarker;
   1.939 +        
   1.940 +        /*check if input is in ASCII and C0 control codes range*/
   1.941 +        if (sourceChar <= ASCII_END) {
   1.942 +            args->converter->fromUnicodeStatus = sourceChar;
   1.943 +            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
   1.944 +            if (U_FAILURE(*err)) {
   1.945 +                break;
   1.946 +            }
   1.947 +            continue;
   1.948 +        }
   1.949 +        switch (sourceChar) {
   1.950 +        case ZWNJ:
   1.951 +            /* contextChar has HALANT */
   1.952 +            if (converterData->contextCharFromUnicode) {
   1.953 +                converterData->contextCharFromUnicode = 0x00;
   1.954 +                targetByteUnit = ISCII_HALANT;
   1.955 +            } else {
   1.956 +                /* consume ZWNJ and continue */
   1.957 +                converterData->contextCharFromUnicode = 0x00;
   1.958 +                continue;
   1.959 +            }
   1.960 +            break;
   1.961 +        case ZWJ:
   1.962 +            /* contextChar has HALANT */
   1.963 +            if (converterData->contextCharFromUnicode) {
   1.964 +                targetByteUnit = ISCII_NUKTA;
   1.965 +            } else {
   1.966 +                targetByteUnit =ISCII_INV;
   1.967 +            }
   1.968 +            converterData->contextCharFromUnicode = 0x00;
   1.969 +            break;
   1.970 +        default:
   1.971 +            /* is the sourceChar in the INDIC_RANGE? */
   1.972 +            if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
   1.973 +                /* Danda and Double Danda are valid in Northern scripts.. since Unicode
   1.974 +                 * does not include these codepoints in all Northern scrips we need to
   1.975 +                 * filter them out
   1.976 +                 */
   1.977 +                if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
   1.978 +                    /* find out to which block the souceChar belongs*/
   1.979 +                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
   1.980 +                    newDelta =(uint16_t)(range*DELTA);
   1.981 +
   1.982 +                    /* Now are we in the same block as the previous? */
   1.983 +                    if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
   1.984 +                        converterData->currentDeltaFromUnicode = newDelta;
   1.985 +                        converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
   1.986 +                        deltaChanged =TRUE;
   1.987 +                        converterData->isFirstBuffer=FALSE;
   1.988 +                    }
   1.989 +                    
   1.990 +                    if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { 
   1.991 +                        if (sourceChar == PNJ_TIPPI) {
   1.992 +                            /* Make sure Tippi is converterd to Bindi. */
   1.993 +                            sourceChar = PNJ_BINDI;
   1.994 +                        } else if (sourceChar == PNJ_ADHAK) {
   1.995 +                            /* This is for consonant cluster handling. */
   1.996 +                            converterData->contextCharFromUnicode = PNJ_ADHAK;
   1.997 +                        }
   1.998 +                        
   1.999 +                    }
  1.1000 +                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
  1.1001 +                    /* now subtract the new delta from sourceChar*/
  1.1002 +                    sourceChar -= converterData->currentDeltaFromUnicode;
  1.1003 +                }
  1.1004 +
  1.1005 +                /* get the target byte unit */
  1.1006 +                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
  1.1007 +
  1.1008 +                /* is the code point valid in current script? */
  1.1009 +                if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
  1.1010 +                    /* Vocallic RR is assigned in ISCII Telugu and Unicode */
  1.1011 +                    if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
  1.1012 +                        targetByteUnit=missingCharMarker;
  1.1013 +                    }
  1.1014 +                }
  1.1015 +
  1.1016 +                if (deltaChanged) {
  1.1017 +                    /* we are in a script block which is different than
  1.1018 +                     * previous sourceChar's script block write ATR and language codes
  1.1019 +                     */
  1.1020 +                    uint32_t temp=0;
  1.1021 +                    temp =(uint16_t)(ATR<<8);
  1.1022 +                    temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
  1.1023 +                    /* reset */
  1.1024 +                    deltaChanged=FALSE;
  1.1025 +                    /* now append ATR and language code */
  1.1026 +                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
  1.1027 +                    if (U_FAILURE(*err)) {
  1.1028 +                        break;
  1.1029 +                    }
  1.1030 +                }
  1.1031 +                
  1.1032 +                if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
  1.1033 +                    continue;
  1.1034 +                }
  1.1035 +            }
  1.1036 +            /* reset context char */
  1.1037 +            converterData->contextCharFromUnicode = 0x00;
  1.1038 +            break;
  1.1039 +        }
  1.1040 +        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
  1.1041 +            /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
  1.1042 +            /* reset context char */
  1.1043 +            converterData->contextCharFromUnicode = 0x0000;
  1.1044 +            targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
  1.1045 +            /* write targetByteUnit to target */
  1.1046 +            WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
  1.1047 +            if (U_FAILURE(*err)) {
  1.1048 +                break;
  1.1049 +            }
  1.1050 +        } else if (targetByteUnit != missingCharMarker) {
  1.1051 +            if (targetByteUnit==ISCII_HALANT) {
  1.1052 +                converterData->contextCharFromUnicode = (UChar)targetByteUnit;
  1.1053 +            }
  1.1054 +            /* write targetByteUnit to target*/
  1.1055 +            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
  1.1056 +            if (U_FAILURE(*err)) {
  1.1057 +                break;
  1.1058 +            }
  1.1059 +        } else {
  1.1060 +            /* oops.. the code point is unassigned */
  1.1061 +            /*check if the char is a First surrogate*/
  1.1062 +            if (U16_IS_SURROGATE(sourceChar)) {
  1.1063 +                if (U16_IS_SURROGATE_LEAD(sourceChar)) {
  1.1064 +getTrail:
  1.1065 +                    /*look ahead to find the trail surrogate*/
  1.1066 +                    if (source < sourceLimit) {
  1.1067 +                        /* test the following code unit */
  1.1068 +                        UChar trail= (*source);
  1.1069 +                        if (U16_IS_TRAIL(trail)) {
  1.1070 +                            source++;
  1.1071 +                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
  1.1072 +                            *err =U_INVALID_CHAR_FOUND;
  1.1073 +                            /* convert this surrogate code point */
  1.1074 +                            /* exit this condition tree */
  1.1075 +                        } else {
  1.1076 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.1077 +                            /* callback(illegal) */
  1.1078 +                            *err=U_ILLEGAL_CHAR_FOUND;
  1.1079 +                        }
  1.1080 +                    } else {
  1.1081 +                        /* no more input */
  1.1082 +                        *err = U_ZERO_ERROR;
  1.1083 +                    }
  1.1084 +                } else {
  1.1085 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.1086 +                    /* callback(illegal) */
  1.1087 +                    *err=U_ILLEGAL_CHAR_FOUND;
  1.1088 +                }
  1.1089 +            } else {
  1.1090 +                /* callback(unassigned) for a BMP code point */
  1.1091 +                *err = U_INVALID_CHAR_FOUND;
  1.1092 +            }
  1.1093 +
  1.1094 +            args->converter->fromUChar32=sourceChar;
  1.1095 +            break;
  1.1096 +        }
  1.1097 +    }/* end while(mySourceIndex<mySourceLength) */
  1.1098 +
  1.1099 +    /*save the state and return */
  1.1100 +    args->source = source;
  1.1101 +    args->target = (char*)target;
  1.1102 +}
  1.1103 +
  1.1104 +static const uint16_t lookupTable[][2]={
  1.1105 +    { ZERO,       ZERO     },     /*DEFALT*/
  1.1106 +    { ZERO,       ZERO     },     /*ROMAN*/
  1.1107 +    { DEVANAGARI, DEV_MASK },
  1.1108 +    { BENGALI,    BNG_MASK },
  1.1109 +    { TAMIL,      TML_MASK },
  1.1110 +    { TELUGU,     KND_MASK },
  1.1111 +    { BENGALI,    BNG_MASK },
  1.1112 +    { ORIYA,      ORI_MASK },
  1.1113 +    { KANNADA,    KND_MASK },
  1.1114 +    { MALAYALAM,  MLM_MASK },
  1.1115 +    { GUJARATI,   GJR_MASK },
  1.1116 +    { GURMUKHI,   PNJ_MASK }
  1.1117 +};
  1.1118 +
  1.1119 +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
  1.1120 +    /* add offset to current Indic Block */                                              \
  1.1121 +    if(targetUniChar>ASCII_END &&                                                        \
  1.1122 +           targetUniChar != ZWJ &&                                                       \
  1.1123 +           targetUniChar != ZWNJ &&                                                      \
  1.1124 +           targetUniChar != DANDA &&                                                     \
  1.1125 +           targetUniChar != DOUBLE_DANDA){                                               \
  1.1126 +                                                                                         \
  1.1127 +           targetUniChar+=(uint16_t)(delta);                                             \
  1.1128 +    }                                                                                    \
  1.1129 +    /* now write the targetUniChar */                                                    \
  1.1130 +    if(target<args->targetLimit){                                                        \
  1.1131 +        *(target)++ = (UChar)targetUniChar;                                              \
  1.1132 +        if(offsets){                                                                     \
  1.1133 +            *(offsets)++ = (int32_t)(offset);                                            \
  1.1134 +        }                                                                                \
  1.1135 +    }else{                                                                               \
  1.1136 +        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
  1.1137 +            (UChar)targetUniChar;                                                        \
  1.1138 +        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
  1.1139 +    }                                                                                    \
  1.1140 +}
  1.1141 +
  1.1142 +#define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
  1.1143 +    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
  1.1144 +    /* is the code point valid in current script? */                                     \
  1.1145 +    if(sourceChar> ASCII_END &&                                                          \
  1.1146 +            (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
  1.1147 +        /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
  1.1148 +        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
  1.1149 +                    targetUniChar!=VOCALLIC_RR){                                         \
  1.1150 +            targetUniChar=missingCharMarker;                                             \
  1.1151 +        }                                                                                \
  1.1152 +    }                                                                                    \
  1.1153 +}
  1.1154 +
  1.1155 +/***********
  1.1156 + *  Rules for ISCII to Unicode converter
  1.1157 + *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
  1.1158 + *  which has both precomposed and decomposed forms characters
  1.1159 + *  pre-context and post-context need to be considered.
  1.1160 + *
  1.1161 + *  Post context
  1.1162 + *  i)  ATR : Attribute code is used to declare the font and script switching.
  1.1163 + *      Currently we only switch scripts and font codes consumed without generating an error
  1.1164 + *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
  1.1165 + *      obsolete characters
  1.1166 + *  Pre context
  1.1167 + *  i)  Halant: if preceeded by a halant then it is a explicit halant
  1.1168 + *  ii) Nukta :
  1.1169 + *       a) if preceeded by a halant then it is a soft halant
  1.1170 + *       b) if preceeded by specific consonants and the ligatures have pre-composed
  1.1171 + *          characters in Unicode then convert to pre-composed characters
  1.1172 + *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
  1.1173 + *
  1.1174 + */
  1.1175 +
  1.1176 +static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
  1.1177 +    const char *source = ( char *) args->source;
  1.1178 +    UChar *target = args->target;
  1.1179 +    const char *sourceLimit = args->sourceLimit;
  1.1180 +    const UChar* targetLimit = args->targetLimit;
  1.1181 +    uint32_t targetUniChar = 0x0000;
  1.1182 +    uint8_t sourceChar = 0x0000;
  1.1183 +    UConverterDataISCII* data;
  1.1184 +    UChar32* toUnicodeStatus=NULL;
  1.1185 +    UChar32 tempTargetUniChar = 0x0000;
  1.1186 +    UChar* contextCharToUnicode= NULL;
  1.1187 +    UBool found;
  1.1188 +    int i; 
  1.1189 +    int offset = 0;
  1.1190 +
  1.1191 +    if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
  1.1192 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
  1.1193 +        return;
  1.1194 +    }
  1.1195 +
  1.1196 +    data = (UConverterDataISCII*)(args->converter->extraInfo);
  1.1197 +    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
  1.1198 +    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
  1.1199 +
  1.1200 +    while (U_SUCCESS(*err) && source<sourceLimit) {
  1.1201 +
  1.1202 +        targetUniChar = missingCharMarker;
  1.1203 +
  1.1204 +        if (target < targetLimit) {
  1.1205 +            sourceChar = (unsigned char)*(source)++;
  1.1206 +
  1.1207 +            /* look at the post-context preform special processing */
  1.1208 +            if (*contextCharToUnicode==ATR) {
  1.1209 +
  1.1210 +                /* If we have ATR in *contextCharToUnicode then we need to change our
  1.1211 +                 * state to the Indic Script specified by sourceChar
  1.1212 +                 */
  1.1213 +
  1.1214 +                /* check if the sourceChar is supported script range*/
  1.1215 +                if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
  1.1216 +                    data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
  1.1217 +                    data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
  1.1218 +                } else if (sourceChar==DEF) {
  1.1219 +                    /* switch back to default */
  1.1220 +                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
  1.1221 +                    data->currentMaskToUnicode = data->defMaskToUnicode;
  1.1222 +                } else {
  1.1223 +                    if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
  1.1224 +                        /* these are display codes consume and continue */
  1.1225 +                    } else {
  1.1226 +                        *err =U_ILLEGAL_CHAR_FOUND;
  1.1227 +                        /* reset */
  1.1228 +                        *contextCharToUnicode=NO_CHAR_MARKER;
  1.1229 +                        goto CALLBACK;
  1.1230 +                    }
  1.1231 +                }
  1.1232 +
  1.1233 +                /* reset */
  1.1234 +                *contextCharToUnicode=NO_CHAR_MARKER;
  1.1235 +
  1.1236 +                continue;
  1.1237 +
  1.1238 +            } else if (*contextCharToUnicode==EXT) {
  1.1239 +                /* check if sourceChar is in 0xA1-0xEE range */
  1.1240 +                if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
  1.1241 +                    /* We currently support only Anudatta and Devanagari abbreviation sign */
  1.1242 +                    if (sourceChar==0xBF || sourceChar == 0xB8) {
  1.1243 +                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
  1.1244 +                        
  1.1245 +                        /* find out if the mapping is valid in this state */
  1.1246 +                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1.1247 +                            *contextCharToUnicode= NO_CHAR_MARKER;
  1.1248 +
  1.1249 +                            /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1250 +                            if (data->prevToUnicodeStatus) {
  1.1251 +                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1252 +                                data->prevToUnicodeStatus = 0x0000;
  1.1253 +                            }
  1.1254 +                            /* write to target */
  1.1255 +                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1.1256 +
  1.1257 +                            continue;
  1.1258 +                        }
  1.1259 +                    }
  1.1260 +                    /* byte unit is unassigned */
  1.1261 +                    targetUniChar = missingCharMarker;
  1.1262 +                    *err= U_INVALID_CHAR_FOUND;
  1.1263 +                } else {
  1.1264 +                    /* only 0xA1 - 0xEE are legal after EXT char */
  1.1265 +                    *contextCharToUnicode= NO_CHAR_MARKER;
  1.1266 +                    *err = U_ILLEGAL_CHAR_FOUND;
  1.1267 +                }
  1.1268 +                goto CALLBACK;
  1.1269 +            } else if (*contextCharToUnicode==ISCII_INV) {
  1.1270 +                if (sourceChar==ISCII_HALANT) {
  1.1271 +                    targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
  1.1272 +                } else {
  1.1273 +                    targetUniChar = ZWJ;
  1.1274 +                }
  1.1275 +
  1.1276 +                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1277 +                if (data->prevToUnicodeStatus) {
  1.1278 +                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1279 +                    data->prevToUnicodeStatus = 0x0000;
  1.1280 +                }
  1.1281 +                /* write to target */
  1.1282 +                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1.1283 +                /* reset */
  1.1284 +                *contextCharToUnicode=NO_CHAR_MARKER;
  1.1285 +            }
  1.1286 +
  1.1287 +            /* look at the pre-context and perform special processing */
  1.1288 +            switch (sourceChar) {
  1.1289 +            case ISCII_INV:
  1.1290 +            case EXT: /*falls through*/
  1.1291 +            case ATR:
  1.1292 +                *contextCharToUnicode = (UChar)sourceChar;
  1.1293 +
  1.1294 +                if (*toUnicodeStatus != missingCharMarker) {
  1.1295 +                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1296 +                    if (data->prevToUnicodeStatus) {
  1.1297 +                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1298 +                        data->prevToUnicodeStatus = 0x0000;
  1.1299 +                    }
  1.1300 +                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1.1301 +                    *toUnicodeStatus = missingCharMarker;
  1.1302 +                }
  1.1303 +                continue;
  1.1304 +            case ISCII_DANDA:
  1.1305 +                /* handle double danda*/
  1.1306 +                if (*contextCharToUnicode== ISCII_DANDA) {
  1.1307 +                    targetUniChar = DOUBLE_DANDA;
  1.1308 +                    /* clear the context */
  1.1309 +                    *contextCharToUnicode = NO_CHAR_MARKER;
  1.1310 +                    *toUnicodeStatus = missingCharMarker;
  1.1311 +                } else {
  1.1312 +                    GET_MAPPING(sourceChar,targetUniChar,data);
  1.1313 +                    *contextCharToUnicode = sourceChar;
  1.1314 +                }
  1.1315 +                break;
  1.1316 +            case ISCII_HALANT:
  1.1317 +                /* handle explicit halant */
  1.1318 +                if (*contextCharToUnicode == ISCII_HALANT) {
  1.1319 +                    targetUniChar = ZWNJ;
  1.1320 +                    /* clear the context */
  1.1321 +                    *contextCharToUnicode = NO_CHAR_MARKER;
  1.1322 +                } else {
  1.1323 +                    GET_MAPPING(sourceChar,targetUniChar,data);
  1.1324 +                    *contextCharToUnicode = sourceChar;
  1.1325 +                }
  1.1326 +                break;
  1.1327 +            case 0x0A:
  1.1328 +                /* fall through */
  1.1329 +            case 0x0D:
  1.1330 +                data->resetToDefaultToUnicode = TRUE;
  1.1331 +                GET_MAPPING(sourceChar,targetUniChar,data)
  1.1332 +                ;
  1.1333 +                *contextCharToUnicode = sourceChar;
  1.1334 +                break;
  1.1335 +
  1.1336 +            case ISCII_VOWEL_SIGN_E:
  1.1337 +                i=1;
  1.1338 +                found=FALSE;
  1.1339 +                for (; i<vowelSignESpecialCases[0][0]; i++) {
  1.1340 +                    U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0]));
  1.1341 +                    if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
  1.1342 +                        targetUniChar=vowelSignESpecialCases[i][1];
  1.1343 +                        found=TRUE;
  1.1344 +                        break;
  1.1345 +                    }
  1.1346 +                }
  1.1347 +                if (found) {
  1.1348 +                    /* find out if the mapping is valid in this state */
  1.1349 +                    if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1.1350 +                        /*targetUniChar += data->currentDeltaToUnicode ;*/
  1.1351 +                        *contextCharToUnicode= NO_CHAR_MARKER;
  1.1352 +                        *toUnicodeStatus = missingCharMarker;
  1.1353 +                        break;
  1.1354 +                    }
  1.1355 +                }
  1.1356 +                GET_MAPPING(sourceChar,targetUniChar,data);
  1.1357 +                *contextCharToUnicode = sourceChar;
  1.1358 +                break;
  1.1359 +
  1.1360 +            case ISCII_NUKTA:
  1.1361 +                /* handle soft halant */
  1.1362 +                if (*contextCharToUnicode == ISCII_HALANT) {
  1.1363 +                    targetUniChar = ZWJ;
  1.1364 +                    /* clear the context */
  1.1365 +                    *contextCharToUnicode = NO_CHAR_MARKER;
  1.1366 +                    break;
  1.1367 +                } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
  1.1368 +                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1369 +                    if (data->prevToUnicodeStatus) {
  1.1370 +                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1371 +                        data->prevToUnicodeStatus = 0x0000;
  1.1372 +                    }
  1.1373 +                    /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
  1.1374 +                     * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
  1.1375 +                     */
  1.1376 +                    targetUniChar = PNJ_RRA;
  1.1377 +                    WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1.1378 +                    if (U_SUCCESS(*err)) {
  1.1379 +                        targetUniChar = PNJ_SIGN_VIRAMA;
  1.1380 +                        WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1.1381 +                        if (U_SUCCESS(*err)) {
  1.1382 +                            targetUniChar = PNJ_HA;
  1.1383 +                            WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1.1384 +                        } else {
  1.1385 +                            args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
  1.1386 +                        }
  1.1387 +                    } else {
  1.1388 +                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
  1.1389 +                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
  1.1390 +                    }
  1.1391 +                    *toUnicodeStatus = missingCharMarker;
  1.1392 +                    data->contextCharToUnicode = NO_CHAR_MARKER;
  1.1393 +                    continue;
  1.1394 +                } else {
  1.1395 +                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
  1.1396 +                    i=1;
  1.1397 +                    found =FALSE;
  1.1398 +                    for (; i<nuktaSpecialCases[0][0]; i++) {
  1.1399 +                        if (nuktaSpecialCases[i][0]==(uint8_t)
  1.1400 +                                *contextCharToUnicode) {
  1.1401 +                            targetUniChar=nuktaSpecialCases[i][1];
  1.1402 +                            found =TRUE;
  1.1403 +                            break;
  1.1404 +                        }
  1.1405 +                    }
  1.1406 +                    if (found) {
  1.1407 +                        /* find out if the mapping is valid in this state */
  1.1408 +                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1.1409 +                            /*targetUniChar += data->currentDeltaToUnicode ;*/
  1.1410 +                            *contextCharToUnicode= NO_CHAR_MARKER;
  1.1411 +                            *toUnicodeStatus = missingCharMarker;
  1.1412 +                            if (data->currentDeltaToUnicode == PNJ_DELTA) {
  1.1413 +                                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1414 +                                if (data->prevToUnicodeStatus) {
  1.1415 +                                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1416 +                                    data->prevToUnicodeStatus = 0x0000;
  1.1417 +                                }
  1.1418 +                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1.1419 +                                continue;
  1.1420 +                            }
  1.1421 +                            break;
  1.1422 +                        }
  1.1423 +                        /* else fall through to default */
  1.1424 +                    }
  1.1425 +                    /* else fall through to default */
  1.1426 +                }
  1.1427 +            default:GET_MAPPING(sourceChar,targetUniChar,data)
  1.1428 +                ;
  1.1429 +                *contextCharToUnicode = sourceChar;
  1.1430 +                break;
  1.1431 +            }
  1.1432 +
  1.1433 +            if (*toUnicodeStatus != missingCharMarker) {
  1.1434 +                /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
  1.1435 +                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
  1.1436 +                        (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
  1.1437 +                    /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
  1.1438 +                    offset = (int)(source-args->source - 3);
  1.1439 +                    tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
  1.1440 +                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
  1.1441 +                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
  1.1442 +                    data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
  1.1443 +                    *toUnicodeStatus = missingCharMarker;
  1.1444 +                    continue;
  1.1445 +                } else {
  1.1446 +                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1.1447 +                    if (data->prevToUnicodeStatus) {
  1.1448 +                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1.1449 +                        data->prevToUnicodeStatus = 0x0000;
  1.1450 +                    }
  1.1451 +                    /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 
  1.1452 +                     * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
  1.1453 +                     */
  1.1454 +                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
  1.1455 +                        targetUniChar = PNJ_TIPPI - PNJ_DELTA;
  1.1456 +                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
  1.1457 +                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
  1.1458 +                        /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
  1.1459 +                        data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
  1.1460 +                    } else {
  1.1461 +                        /* write the previously mapped codepoint */
  1.1462 +                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1.1463 +                    }
  1.1464 +                }
  1.1465 +                *toUnicodeStatus = missingCharMarker;
  1.1466 +            }
  1.1467 +
  1.1468 +            if (targetUniChar != missingCharMarker) {
  1.1469 +                /* now save the targetUniChar for delayed write */
  1.1470 +                *toUnicodeStatus = (UChar) targetUniChar;
  1.1471 +                if (data->resetToDefaultToUnicode==TRUE) {
  1.1472 +                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
  1.1473 +                    data->currentMaskToUnicode = data->defMaskToUnicode;
  1.1474 +                    data->resetToDefaultToUnicode=FALSE;
  1.1475 +                }
  1.1476 +            } else {
  1.1477 +
  1.1478 +                /* we reach here only if targetUniChar == missingCharMarker
  1.1479 +                 * so assign codes to reason and err
  1.1480 +                 */
  1.1481 +                *err = U_INVALID_CHAR_FOUND;
  1.1482 +CALLBACK:
  1.1483 +                args->converter->toUBytes[0] = (uint8_t) sourceChar;
  1.1484 +                args->converter->toULength = 1;
  1.1485 +                break;
  1.1486 +            }
  1.1487 +
  1.1488 +        } else {
  1.1489 +            *err =U_BUFFER_OVERFLOW_ERROR;
  1.1490 +            break;
  1.1491 +        }
  1.1492 +    }
  1.1493 +
  1.1494 +    if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
  1.1495 +        /* end of the input stream */
  1.1496 +        UConverter *cnv = args->converter;
  1.1497 +
  1.1498 +        if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
  1.1499 +            /* set toUBytes[] */
  1.1500 +            cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
  1.1501 +            cnv->toULength = 1;
  1.1502 +
  1.1503 +            /* avoid looping on truncated sequences */
  1.1504 +            *contextCharToUnicode = NO_CHAR_MARKER;
  1.1505 +        } else {
  1.1506 +            cnv->toULength = 0;
  1.1507 +        }
  1.1508 +
  1.1509 +        if (*toUnicodeStatus != missingCharMarker) {
  1.1510 +            /* output a remaining target character */
  1.1511 +            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1.1512 +            *toUnicodeStatus = missingCharMarker;
  1.1513 +        }
  1.1514 +    }
  1.1515 +
  1.1516 +    args->target = target;
  1.1517 +    args->source = source;
  1.1518 +}
  1.1519 +
  1.1520 +/* structure for SafeClone calculations */
  1.1521 +struct cloneISCIIStruct {
  1.1522 +    UConverter cnv;
  1.1523 +    UConverterDataISCII mydata;
  1.1524 +};
  1.1525 +
  1.1526 +static UConverter *
  1.1527 +_ISCII_SafeClone(const UConverter *cnv,
  1.1528 +              void *stackBuffer,
  1.1529 +              int32_t *pBufferSize,
  1.1530 +              UErrorCode *status)
  1.1531 +{
  1.1532 +    struct cloneISCIIStruct * localClone;
  1.1533 +    int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
  1.1534 +
  1.1535 +    if (U_FAILURE(*status)) {
  1.1536 +        return 0;
  1.1537 +    }
  1.1538 +
  1.1539 +    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
  1.1540 +        *pBufferSize = bufferSizeNeeded;
  1.1541 +        return 0;
  1.1542 +    }
  1.1543 +
  1.1544 +    localClone = (struct cloneISCIIStruct *)stackBuffer;
  1.1545 +    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
  1.1546 +
  1.1547 +    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
  1.1548 +    localClone->cnv.extraInfo = &localClone->mydata;
  1.1549 +    localClone->cnv.isExtraLocal = TRUE;
  1.1550 +
  1.1551 +    return &localClone->cnv;
  1.1552 +}
  1.1553 +
  1.1554 +static void
  1.1555 +_ISCIIGetUnicodeSet(const UConverter *cnv,
  1.1556 +                    const USetAdder *sa,
  1.1557 +                    UConverterUnicodeSet which,
  1.1558 +                    UErrorCode *pErrorCode)
  1.1559 +{
  1.1560 +    int32_t idx, script;
  1.1561 +    uint8_t mask;
  1.1562 +
  1.1563 +    /* Since all ISCII versions allow switching to other ISCII
  1.1564 +    scripts, we add all roundtrippable characters to this set. */
  1.1565 +    sa->addRange(sa->set, 0, ASCII_END);
  1.1566 +    for (script = DEVANAGARI; script <= MALAYALAM; script++) {
  1.1567 +        mask = (uint8_t)(lookupInitialData[script].maskEnum);
  1.1568 +        for (idx = 0; idx < DELTA; idx++) {
  1.1569 +            /* added check for TELUGU character */
  1.1570 +            if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
  1.1571 +                sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
  1.1572 +            }
  1.1573 +        }
  1.1574 +    }
  1.1575 +    sa->add(sa->set, DANDA);
  1.1576 +    sa->add(sa->set, DOUBLE_DANDA);
  1.1577 +    sa->add(sa->set, ZWNJ);
  1.1578 +    sa->add(sa->set, ZWJ);
  1.1579 +}
  1.1580 +
  1.1581 +static const UConverterImpl _ISCIIImpl={
  1.1582 +
  1.1583 +    UCNV_ISCII,
  1.1584 +
  1.1585 +    NULL,
  1.1586 +    NULL,
  1.1587 +
  1.1588 +    _ISCIIOpen,
  1.1589 +    _ISCIIClose,
  1.1590 +    _ISCIIReset,
  1.1591 +
  1.1592 +    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
  1.1593 +    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
  1.1594 +    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
  1.1595 +    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
  1.1596 +    NULL,
  1.1597 +
  1.1598 +    NULL,
  1.1599 +    _ISCIIgetName,
  1.1600 +    NULL,
  1.1601 +    _ISCII_SafeClone,
  1.1602 +    _ISCIIGetUnicodeSet
  1.1603 +};
  1.1604 +
  1.1605 +static const UConverterStaticData _ISCIIStaticData={
  1.1606 +    sizeof(UConverterStaticData),
  1.1607 +        "ISCII",
  1.1608 +         0,
  1.1609 +         UCNV_IBM,
  1.1610 +         UCNV_ISCII,
  1.1611 +         1,
  1.1612 +         4,
  1.1613 +        { 0x1a, 0, 0, 0 },
  1.1614 +        0x1,
  1.1615 +        FALSE,
  1.1616 +        FALSE,
  1.1617 +        0x0,
  1.1618 +        0x0,
  1.1619 +        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
  1.1620 +
  1.1621 +};
  1.1622 +
  1.1623 +const UConverterSharedData _ISCIIData={
  1.1624 +    sizeof(UConverterSharedData),
  1.1625 +        ~((uint32_t) 0),
  1.1626 +        NULL,
  1.1627 +        NULL,
  1.1628 +        &_ISCIIStaticData,
  1.1629 +        FALSE,
  1.1630 +        &_ISCIIImpl,
  1.1631 +        0
  1.1632 +};
  1.1633 +
  1.1634 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

mercurial