The Tor Browser: comparison intl/icu/source/common/ucnvisci.c

--1:000000000000
+:878c354ec3c1
+/*
+**********************************************************************
+*   Copyright (C) 2000-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  ucnvisci.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001JUN26
+*   created by: Ram Viswanadha
+*
+*   Date        Name        Description
+*   24/7/2001   Ram         Added support for EXT character handling
+*/
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cstring.h"
+#include "uassert.h"
+#define UCNV_OPTIONS_VERSION_MASK 0xf
+#define NUKTA               0x093c
+#define HALANT              0x094d
+#define ZWNJ                0x200c /* Zero Width Non Joiner */
+#define ZWJ                 0x200d /* Zero width Joiner */
+#define INVALID_CHAR        0xffff
+#define ATR                 0xEF   /* Attribute code */
+#define EXT                 0xF0   /* Extension code */
+#define DANDA               0x0964
+#define DOUBLE_DANDA        0x0965
+#define ISCII_NUKTA         0xE9
+#define ISCII_HALANT        0xE8
+#define ISCII_DANDA         0xEA
+#define ISCII_INV           0xD9
+#define ISCII_VOWEL_SIGN_E  0xE0
+#define INDIC_BLOCK_BEGIN   0x0900
+#define INDIC_BLOCK_END     0x0D7F
+#define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
+#define VOCALLIC_RR         0x0931
+#define LF                  0x0A
+#define ASCII_END           0xA0
+#define NO_CHAR_MARKER      0xFFFE
+#define TELUGU_DELTA        DELTA * TELUGU
+#define DEV_ABBR_SIGN       0x0970
+#define DEV_ANUDATTA        0x0952
+#define EXT_RANGE_BEGIN     0xA1
+#define EXT_RANGE_END       0xEE
+#define PNJ_DELTA           0x0100
+#define PNJ_BINDI           0x0A02
+#define PNJ_TIPPI           0x0A70
+#define PNJ_SIGN_VIRAMA     0x0A4D
+#define PNJ_ADHAK           0x0A71
+#define PNJ_HA              0x0A39
+#define PNJ_RRA             0x0A5C
+typedef enum {
+DEVANAGARI =0,
+BENGALI,
+GURMUKHI,
+GUJARATI,
+ORIYA,
+TAMIL,
+TELUGU,
+KANNADA,
+MALAYALAM,
+DELTA=0x80
+}UniLang;
+/**
+* Enumeration for switching code pages if <ATR>+<one of below values>
+* is encountered
+*/
+typedef enum {
+DEF = 0x40,
+RMN = 0x41,
+DEV = 0x42,
+BNG = 0x43,
+TML = 0x44,
+TLG = 0x45,
+ASM = 0x46,
+ORI = 0x47,
+KND = 0x48,
+MLM = 0x49,
+GJR = 0x4A,
+PNJ = 0x4B,
+ARB = 0x71,
+PES = 0x72,
+URD = 0x73,
+SND = 0x74,
+KSM = 0x75,
+PST = 0x76
+}ISCIILang;
+typedef enum {
+DEV_MASK =0x80,
+PNJ_MASK =0x40,
+GJR_MASK =0x20,
+ORI_MASK =0x10,
+BNG_MASK =0x08,
+KND_MASK =0x04,
+MLM_MASK =0x02,
+TML_MASK =0x01,
+ZERO =0x00
+}MaskEnum;
+#define ISCII_CNV_PREFIX "ISCII,version="
+typedef struct {
+UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
+UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
+uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
+uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
+uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
+MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
+MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
+MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
+UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
+UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
+char name[sizeof(ISCII_CNV_PREFIX) + 1];
+UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
+} UConverterDataISCII;
+typedef struct LookupDataStruct {
+UniLang uniLang;
+MaskEnum maskEnum;
+ISCIILang isciiLang;
+} LookupDataStruct;
+static const LookupDataStruct lookupInitialData[]={
+{ DEVANAGARI, DEV_MASK,  DEV },
+{ BENGALI,    BNG_MASK,  BNG },
+{ GURMUKHI,   PNJ_MASK,  PNJ },
+{ GUJARATI,   GJR_MASK,  GJR },
+{ ORIYA,      ORI_MASK,  ORI },
+{ TAMIL,      TML_MASK,  TML },
+{ TELUGU,     KND_MASK,  TLG },
+{ KANNADA,    KND_MASK,  KND },
+{ MALAYALAM,  MLM_MASK,  MLM }
+};
+/*
+* For special handling of certain Gurmukhi characters.
+* Bit 0 (value 1): PNJ consonant
+* Bit 1 (value 2): PNJ Bindi Tippi
+*/
+static const uint8_t pnjMap[80] = {
+/* 0A00..0A0F */
+0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
+/* 0A10..0A1F */
+0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
+/* 0A20..0A2F */
+3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
+/* 0A30..0A3F */
+3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
+/* 0A40..0A4F */
+0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
+};
+static UBool
+isPNJConsonant(UChar32 c) {
+if (c < 0xa00 || 0xa50 <= c) {
+return FALSE;
+} else {
+return (UBool)(pnjMap[c - 0xa00] & 1);
+}
+}
+static UBool
+isPNJBindiTippi(UChar32 c) {
+if (c < 0xa00 || 0xa50 <= c) {
+return FALSE;
+} else {
+return (UBool)(pnjMap[c - 0xa00] >> 1);
+}
+}
+static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
+if(pArgs->onlyTestIsLoadable) {
+return;
+}
+cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
+if (cnv->extraInfo != NULL) {
+int32_t len=0;
+UConverterDataISCII *converterData=
+(UConverterDataISCII *) cnv->extraInfo;
+converterData->contextCharToUnicode=NO_CHAR_MARKER;
+cnv->toUnicodeStatus = missingCharMarker;
+converterData->contextCharFromUnicode=0x0000;
+converterData->resetToDefaultToUnicode=FALSE;
+/* check if the version requested is supported */
+if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
+/* initialize state variables */
+converterData->currentDeltaFromUnicode
+= converterData->currentDeltaToUnicode
+= converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
+converterData->currentMaskFromUnicode
+= converterData->currentMaskToUnicode
+= converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
+converterData->isFirstBuffer=TRUE;
+(void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
+len = (int32_t)uprv_strlen(converterData->name);
+converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
+converterData->name[len+1]=0;
+converterData->prevToUnicodeStatus = 0x0000;
+} else {
+uprv_free(cnv->extraInfo);
+cnv->extraInfo = NULL;
+*errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+}
+} else {
+*errorCode =U_MEMORY_ALLOCATION_ERROR;
+}
+}
+static void _ISCIIClose(UConverter *cnv) {
+if (cnv->extraInfo!=NULL) {
+if (!cnv->isExtraLocal) {
+uprv_free(cnv->extraInfo);
+}
+cnv->extraInfo=NULL;
+}
+}
+static const char* _ISCIIgetName(const UConverter* cnv) {
+if (cnv->extraInfo) {
+UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
+return myData->name;
+}
+return NULL;
+}
+static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
+UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
+if (choice<=UCNV_RESET_TO_UNICODE) {
+cnv->toUnicodeStatus = missingCharMarker;
+cnv->mode=0;
+data->currentDeltaToUnicode=data->defDeltaToUnicode;
+data->currentMaskToUnicode = data->defMaskToUnicode;
+data->contextCharToUnicode=NO_CHAR_MARKER;
+data->prevToUnicodeStatus = 0x0000;
+}
+if (choice!=UCNV_RESET_TO_UNICODE) {
+cnv->fromUChar32=0x0000;
+data->contextCharFromUnicode=0x00;
+data->currentMaskFromUnicode=data->defMaskToUnicode;
+data->currentDeltaFromUnicode=data->defDeltaToUnicode;
+data->isFirstBuffer=TRUE;
+data->resetToDefaultToUnicode=FALSE;
+}
+}
+/**
+* The values in validity table are indexed by the lower bits of Unicode
+* range 0x0900 - 0x09ff. The values have a structure like:
+*       ---------------------------------------------------------------
+*      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
+*      |       |       |       |       | ASM   | KND   |       |       |
+*       ---------------------------------------------------------------
+* If a code point is valid in a particular script
+* then that bit is turned on
+*
+* Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
+* to represent these languages
+*
+* Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
+* and combine and use 1 bit to represent these languages.
+*
+* TODO: It is probably easier to understand and maintain to change this
+* to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
+*/
+static const uint8_t validityTable[128] = {
+/* This state table is tool generated please do not edit unless you know exactly what you are doing */
+/* Note: This table was edited to mirror the Windows XP implementation */
+/*ISCII:Valid:Unicode */
+/*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
+/*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
+/*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
+/*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
+/*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
+/*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
+/*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
+/*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
+/*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
+/*
+* The length of the array is 128 to provide values for 0x900..0x97f.
+* The last 15 entries for 0x971..0x97f of the validity table are all zero
+* because no Indic script uses such Unicode code points.
+*/
+/*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
+};
+static const uint16_t fromUnicodeTable[128]={
+0x00a0 ,/* 0x0900 */
+0x00a1 ,/* 0x0901 */
+0x00a2 ,/* 0x0902 */
+0x00a3 ,/* 0x0903 */
+0xa4e0 ,/* 0x0904 */
+0x00a4 ,/* 0x0905 */
+0x00a5 ,/* 0x0906 */
+0x00a6 ,/* 0x0907 */
+0x00a7 ,/* 0x0908 */
+0x00a8 ,/* 0x0909 */
+0x00a9 ,/* 0x090a */
+0x00aa ,/* 0x090b */
+0xA6E9 ,/* 0x090c */
+0x00ae ,/* 0x090d */
+0x00ab ,/* 0x090e */
+0x00ac ,/* 0x090f */
+0x00ad ,/* 0x0910 */
+0x00b2 ,/* 0x0911 */
+0x00af ,/* 0x0912 */
+0x00b0 ,/* 0x0913 */
+0x00b1 ,/* 0x0914 */
+0x00b3 ,/* 0x0915 */
+0x00b4 ,/* 0x0916 */
+0x00b5 ,/* 0x0917 */
+0x00b6 ,/* 0x0918 */
+0x00b7 ,/* 0x0919 */
+0x00b8 ,/* 0x091a */
+0x00b9 ,/* 0x091b */
+0x00ba ,/* 0x091c */
+0x00bb ,/* 0x091d */
+0x00bc ,/* 0x091e */
+0x00bd ,/* 0x091f */
+0x00be ,/* 0x0920 */
+0x00bf ,/* 0x0921 */
+0x00c0 ,/* 0x0922 */
+0x00c1 ,/* 0x0923 */
+0x00c2 ,/* 0x0924 */
+0x00c3 ,/* 0x0925 */
+0x00c4 ,/* 0x0926 */
+0x00c5 ,/* 0x0927 */
+0x00c6 ,/* 0x0928 */
+0x00c7 ,/* 0x0929 */
+0x00c8 ,/* 0x092a */
+0x00c9 ,/* 0x092b */
+0x00ca ,/* 0x092c */
+0x00cb ,/* 0x092d */
+0x00cc ,/* 0x092e */
+0x00cd ,/* 0x092f */
+0x00cf ,/* 0x0930 */
+0x00d0 ,/* 0x0931 */
+0x00d1 ,/* 0x0932 */
+0x00d2 ,/* 0x0933 */
+0x00d3 ,/* 0x0934 */
+0x00d4 ,/* 0x0935 */
+0x00d5 ,/* 0x0936 */
+0x00d6 ,/* 0x0937 */
+0x00d7 ,/* 0x0938 */
+0x00d8 ,/* 0x0939 */
+0xFFFF ,/* 0x093A */
+0xFFFF ,/* 0x093B */
+0x00e9 ,/* 0x093c */
+0xEAE9 ,/* 0x093d */
+0x00da ,/* 0x093e */
+0x00db ,/* 0x093f */
+0x00dc ,/* 0x0940 */
+0x00dd ,/* 0x0941 */
+0x00de ,/* 0x0942 */
+0x00df ,/* 0x0943 */
+0xDFE9 ,/* 0x0944 */
+0x00e3 ,/* 0x0945 */
+0x00e0 ,/* 0x0946 */
+0x00e1 ,/* 0x0947 */
+0x00e2 ,/* 0x0948 */
+0x00e7 ,/* 0x0949 */
+0x00e4 ,/* 0x094a */
+0x00e5 ,/* 0x094b */
+0x00e6 ,/* 0x094c */
+0x00e8 ,/* 0x094d */
+0x00ec ,/* 0x094e */
+0x00ed ,/* 0x094f */
+0xA1E9 ,/* 0x0950 */ /* OM Symbol */
+0xFFFF ,/* 0x0951 */
+0xF0B8 ,/* 0x0952 */
+0xFFFF ,/* 0x0953 */
+0xFFFF ,/* 0x0954 */
+0xFFFF ,/* 0x0955 */
+0xFFFF ,/* 0x0956 */
+0xFFFF ,/* 0x0957 */
+0xb3e9 ,/* 0x0958 */
+0xb4e9 ,/* 0x0959 */
+0xb5e9 ,/* 0x095a */
+0xbae9 ,/* 0x095b */
+0xbfe9 ,/* 0x095c */
+0xC0E9 ,/* 0x095d */
+0xc9e9 ,/* 0x095e */
+0x00ce ,/* 0x095f */
+0xAAe9 ,/* 0x0960 */
+0xA7E9 ,/* 0x0961 */
+0xDBE9 ,/* 0x0962 */
+0xDCE9 ,/* 0x0963 */
+0x00ea ,/* 0x0964 */
+0xeaea ,/* 0x0965 */
+0x00f1 ,/* 0x0966 */
+0x00f2 ,/* 0x0967 */
+0x00f3 ,/* 0x0968 */
+0x00f4 ,/* 0x0969 */
+0x00f5 ,/* 0x096a */
+0x00f6 ,/* 0x096b */
+0x00f7 ,/* 0x096c */
+0x00f8 ,/* 0x096d */
+0x00f9 ,/* 0x096e */
+0x00fa ,/* 0x096f */
+0xF0BF ,/* 0x0970 */
+0xFFFF ,/* 0x0971 */
+0xFFFF ,/* 0x0972 */
+0xFFFF ,/* 0x0973 */
+0xFFFF ,/* 0x0974 */
+0xFFFF ,/* 0x0975 */
+0xFFFF ,/* 0x0976 */
+0xFFFF ,/* 0x0977 */
+0xFFFF ,/* 0x0978 */
+0xFFFF ,/* 0x0979 */
+0xFFFF ,/* 0x097a */
+0xFFFF ,/* 0x097b */
+0xFFFF ,/* 0x097c */
+0xFFFF ,/* 0x097d */
+0xFFFF ,/* 0x097e */
+0xFFFF ,/* 0x097f */
+};
+static const uint16_t toUnicodeTable[256]={
+0x0000,/* 0x00 */
+0x0001,/* 0x01 */
+0x0002,/* 0x02 */
+0x0003,/* 0x03 */
+0x0004,/* 0x04 */
+0x0005,/* 0x05 */
+0x0006,/* 0x06 */
+0x0007,/* 0x07 */
+0x0008,/* 0x08 */
+0x0009,/* 0x09 */
+0x000a,/* 0x0a */
+0x000b,/* 0x0b */
+0x000c,/* 0x0c */
+0x000d,/* 0x0d */
+0x000e,/* 0x0e */
+0x000f,/* 0x0f */
+0x0010,/* 0x10 */
+0x0011,/* 0x11 */
+0x0012,/* 0x12 */
+0x0013,/* 0x13 */
+0x0014,/* 0x14 */
+0x0015,/* 0x15 */
+0x0016,/* 0x16 */
+0x0017,/* 0x17 */
+0x0018,/* 0x18 */
+0x0019,/* 0x19 */
+0x001a,/* 0x1a */
+0x001b,/* 0x1b */
+0x001c,/* 0x1c */
+0x001d,/* 0x1d */
+0x001e,/* 0x1e */
+0x001f,/* 0x1f */
+0x0020,/* 0x20 */
+0x0021,/* 0x21 */
+0x0022,/* 0x22 */
+0x0023,/* 0x23 */
+0x0024,/* 0x24 */
+0x0025,/* 0x25 */
+0x0026,/* 0x26 */
+0x0027,/* 0x27 */
+0x0028,/* 0x28 */
+0x0029,/* 0x29 */
+0x002a,/* 0x2a */
+0x002b,/* 0x2b */
+0x002c,/* 0x2c */
+0x002d,/* 0x2d */
+0x002e,/* 0x2e */
+0x002f,/* 0x2f */
+0x0030,/* 0x30 */
+0x0031,/* 0x31 */
+0x0032,/* 0x32 */
+0x0033,/* 0x33 */
+0x0034,/* 0x34 */
+0x0035,/* 0x35 */
+0x0036,/* 0x36 */
+0x0037,/* 0x37 */
+0x0038,/* 0x38 */
+0x0039,/* 0x39 */
+0x003A,/* 0x3A */
+0x003B,/* 0x3B */
+0x003c,/* 0x3c */
+0x003d,/* 0x3d */
+0x003e,/* 0x3e */
+0x003f,/* 0x3f */
+0x0040,/* 0x40 */
+0x0041,/* 0x41 */
+0x0042,/* 0x42 */
+0x0043,/* 0x43 */
+0x0044,/* 0x44 */
+0x0045,/* 0x45 */
+0x0046,/* 0x46 */
+0x0047,/* 0x47 */
+0x0048,/* 0x48 */
+0x0049,/* 0x49 */
+0x004a,/* 0x4a */
+0x004b,/* 0x4b */
+0x004c,/* 0x4c */
+0x004d,/* 0x4d */
+0x004e,/* 0x4e */
+0x004f,/* 0x4f */
+0x0050,/* 0x50 */
+0x0051,/* 0x51 */
+0x0052,/* 0x52 */
+0x0053,/* 0x53 */
+0x0054,/* 0x54 */
+0x0055,/* 0x55 */
+0x0056,/* 0x56 */
+0x0057,/* 0x57 */
+0x0058,/* 0x58 */
+0x0059,/* 0x59 */
+0x005a,/* 0x5a */
+0x005b,/* 0x5b */
+0x005c,/* 0x5c */
+0x005d,/* 0x5d */
+0x005e,/* 0x5e */
+0x005f,/* 0x5f */
+0x0060,/* 0x60 */
+0x0061,/* 0x61 */
+0x0062,/* 0x62 */
+0x0063,/* 0x63 */
+0x0064,/* 0x64 */
+0x0065,/* 0x65 */
+0x0066,/* 0x66 */
+0x0067,/* 0x67 */
+0x0068,/* 0x68 */
+0x0069,/* 0x69 */
+0x006a,/* 0x6a */
+0x006b,/* 0x6b */
+0x006c,/* 0x6c */
+0x006d,/* 0x6d */
+0x006e,/* 0x6e */
+0x006f,/* 0x6f */
+0x0070,/* 0x70 */
+0x0071,/* 0x71 */
+0x0072,/* 0x72 */
+0x0073,/* 0x73 */
+0x0074,/* 0x74 */
+0x0075,/* 0x75 */
+0x0076,/* 0x76 */
+0x0077,/* 0x77 */
+0x0078,/* 0x78 */
+0x0079,/* 0x79 */
+0x007a,/* 0x7a */
+0x007b,/* 0x7b */
+0x007c,/* 0x7c */
+0x007d,/* 0x7d */
+0x007e,/* 0x7e */
+0x007f,/* 0x7f */
+0x0080,/* 0x80 */
+0x0081,/* 0x81 */
+0x0082,/* 0x82 */
+0x0083,/* 0x83 */
+0x0084,/* 0x84 */
+0x0085,/* 0x85 */
+0x0086,/* 0x86 */
+0x0087,/* 0x87 */
+0x0088,/* 0x88 */
+0x0089,/* 0x89 */
+0x008a,/* 0x8a */
+0x008b,/* 0x8b */
+0x008c,/* 0x8c */
+0x008d,/* 0x8d */
+0x008e,/* 0x8e */
+0x008f,/* 0x8f */
+0x0090,/* 0x90 */
+0x0091,/* 0x91 */
+0x0092,/* 0x92 */
+0x0093,/* 0x93 */
+0x0094,/* 0x94 */
+0x0095,/* 0x95 */
+0x0096,/* 0x96 */
+0x0097,/* 0x97 */
+0x0098,/* 0x98 */
+0x0099,/* 0x99 */
+0x009a,/* 0x9a */
+0x009b,/* 0x9b */
+0x009c,/* 0x9c */
+0x009d,/* 0x9d */
+0x009e,/* 0x9e */
+0x009f,/* 0x9f */
+0x00A0,/* 0xa0 */
+0x0901,/* 0xa1 */
+0x0902,/* 0xa2 */
+0x0903,/* 0xa3 */
+0x0905,/* 0xa4 */
+0x0906,/* 0xa5 */
+0x0907,/* 0xa6 */
+0x0908,/* 0xa7 */
+0x0909,/* 0xa8 */
+0x090a,/* 0xa9 */
+0x090b,/* 0xaa */
+0x090e,/* 0xab */
+0x090f,/* 0xac */
+0x0910,/* 0xad */
+0x090d,/* 0xae */
+0x0912,/* 0xaf */
+0x0913,/* 0xb0 */
+0x0914,/* 0xb1 */
+0x0911,/* 0xb2 */
+0x0915,/* 0xb3 */
+0x0916,/* 0xb4 */
+0x0917,/* 0xb5 */
+0x0918,/* 0xb6 */
+0x0919,/* 0xb7 */
+0x091a,/* 0xb8 */
+0x091b,/* 0xb9 */
+0x091c,/* 0xba */
+0x091d,/* 0xbb */
+0x091e,/* 0xbc */
+0x091f,/* 0xbd */
+0x0920,/* 0xbe */
+0x0921,/* 0xbf */
+0x0922,/* 0xc0 */
+0x0923,/* 0xc1 */
+0x0924,/* 0xc2 */
+0x0925,/* 0xc3 */
+0x0926,/* 0xc4 */
+0x0927,/* 0xc5 */
+0x0928,/* 0xc6 */
+0x0929,/* 0xc7 */
+0x092a,/* 0xc8 */
+0x092b,/* 0xc9 */
+0x092c,/* 0xca */
+0x092d,/* 0xcb */
+0x092e,/* 0xcc */
+0x092f,/* 0xcd */
+0x095f,/* 0xce */
+0x0930,/* 0xcf */
+0x0931,/* 0xd0 */
+0x0932,/* 0xd1 */
+0x0933,/* 0xd2 */
+0x0934,/* 0xd3 */
+0x0935,/* 0xd4 */
+0x0936,/* 0xd5 */
+0x0937,/* 0xd6 */
+0x0938,/* 0xd7 */
+0x0939,/* 0xd8 */
+0x200D,/* 0xd9 */
+0x093e,/* 0xda */
+0x093f,/* 0xdb */
+0x0940,/* 0xdc */
+0x0941,/* 0xdd */
+0x0942,/* 0xde */
+0x0943,/* 0xdf */
+0x0946,/* 0xe0 */
+0x0947,/* 0xe1 */
+0x0948,/* 0xe2 */
+0x0945,/* 0xe3 */
+0x094a,/* 0xe4 */
+0x094b,/* 0xe5 */
+0x094c,/* 0xe6 */
+0x0949,/* 0xe7 */
+0x094d,/* 0xe8 */
+0x093c,/* 0xe9 */
+0x0964,/* 0xea */
+0xFFFF,/* 0xeb */
+0xFFFF,/* 0xec */
+0xFFFF,/* 0xed */
+0xFFFF,/* 0xee */
+0xFFFF,/* 0xef */
+0xFFFF,/* 0xf0 */
+0x0966,/* 0xf1 */
+0x0967,/* 0xf2 */
+0x0968,/* 0xf3 */
+0x0969,/* 0xf4 */
+0x096a,/* 0xf5 */
+0x096b,/* 0xf6 */
+0x096c,/* 0xf7 */
+0x096d,/* 0xf8 */
+0x096e,/* 0xf9 */
+0x096f,/* 0xfa */
+0xFFFF,/* 0xfb */
+0xFFFF,/* 0xfc */
+0xFFFF,/* 0xfd */
+0xFFFF,/* 0xfe */
+0xFFFF /* 0xff */
+};
+static const uint16_t vowelSignESpecialCases[][2]={
+	{ 2 /*length of array*/    , 0      },
+	{ 0xA4 , 0x0904 },
+};
+static const uint16_t nuktaSpecialCases[][2]={
+{ 16 /*length of array*/   , 0      },
+{ 0xA6 , 0x090c },
+{ 0xEA , 0x093D },
+{ 0xDF , 0x0944 },
+{ 0xA1 , 0x0950 },
+{ 0xb3 , 0x0958 },
+{ 0xb4 , 0x0959 },
+{ 0xb5 , 0x095a },
+{ 0xba , 0x095b },
+{ 0xbf , 0x095c },
+{ 0xC0 , 0x095d },
+{ 0xc9 , 0x095e },
+{ 0xAA , 0x0960 },
+{ 0xA7 , 0x0961 },
+{ 0xDB , 0x0962 },
+{ 0xDC , 0x0963 },
+};
+#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
+int32_t offset = (int32_t)(source - args->source-1);                                        \
+/* write the targetUniChar  to target */                                                  \
+if(target < targetLimit){                                                                   \
+if(targetByteUnit <= 0xFF){                                                             \
+*(target)++ = (uint8_t)(targetByteUnit);                                            \
+if(offsets){                                                                        \
+*(offsets++) = offset;                                                          \
+}                                                                                   \
+}else{                                                                                  \
+if (targetByteUnit > 0xFFFF) {                                                      \
+*(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
+if (offsets) {                                                                  \
+--offset;                                                                   \
+*(offsets++) = offset;                                                      \
+}                                                                               \
+}                                                                                   \
+if (!(target < targetLimit)) {                                                      \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
+(uint8_t)(targetByteUnit >> 8);                                 \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
+(uint8_t)targetByteUnit;                                        \
+*err = U_BUFFER_OVERFLOW_ERROR;                                                 \
+} else {                                                                            \
+*(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
+if(offsets){                                                                    \
+*(offsets++) = offset;                                                      \
+}                                                                               \
+if(target < targetLimit){                                                       \
+*(target)++ = (uint8_t)  targetByteUnit;                                    \
+if(offsets){                                                                \
+*(offsets++) = offset                            ;                      \
+}                                                                           \
+}else{                                                                          \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
+(uint8_t) (targetByteUnit);                                     \
+*err = U_BUFFER_OVERFLOW_ERROR;                                             \
+}                                                                               \
+}                                                                                   \
+}                                                                                       \
+}else{                                                                                      \
+if (targetByteUnit & 0xFF0000) {                                                        \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
+(uint8_t) (targetByteUnit >>16);                                        \
+}                                                                                       \
+if(targetByteUnit & 0xFF00){                                                            \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
+(uint8_t) (targetByteUnit >>8);                                         \
+}                                                                                       \
+args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
+(uint8_t) (targetByteUnit);                                             \
+*err = U_BUFFER_OVERFLOW_ERROR;                                                         \
+}                                                                                           \
+}
+/* Rules:
+*    Explicit Halant :
+*                      <HALANT> + <ZWNJ>
+*    Soft Halant :
+*                      <HALANT> + <ZWJ>
+*/
+static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
+UConverterFromUnicodeArgs * args, UErrorCode * err) {
+const UChar *source = args->source;
+const UChar *sourceLimit = args->sourceLimit;
+unsigned char *target = (unsigned char *) args->target;
+unsigned char *targetLimit = (unsigned char *) args->targetLimit;
+int32_t* offsets = args->offsets;
+uint32_t targetByteUnit = 0x0000;
+UChar32 sourceChar = 0x0000;
+UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
+UConverterDataISCII *converterData;
+uint16_t newDelta=0;
+uint16_t range = 0;
+UBool deltaChanged = FALSE;
+if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
+*err = U_ILLEGAL_ARGUMENT_ERROR;
+return;
+}
+/* initialize data */
+converterData=(UConverterDataISCII*)args->converter->extraInfo;
+newDelta=converterData->currentDeltaFromUnicode;
+range = (uint16_t)(newDelta/DELTA);
+if ((sourceChar = args->converter->fromUChar32)!=0) {
+goto getTrail;
+}
+/*writing the char to the output stream */
+while (source < sourceLimit) {
+/* Write the language code following LF only if LF is not the last character. */
+if (args->converter->fromUnicodeStatus == LF) {
+targetByteUnit = ATR<<8;
+targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
+args->converter->fromUnicodeStatus = 0x0000;
+/* now append ATR and language code */
+WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
+if (U_FAILURE(*err)) {
+break;
+}
+}
+sourceChar = *source++;
+tempContextFromUnicode = converterData->contextCharFromUnicode;
+targetByteUnit = missingCharMarker;
+/*check if input is in ASCII and C0 control codes range*/
+if (sourceChar <= ASCII_END) {
+args->converter->fromUnicodeStatus = sourceChar;
+WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
+if (U_FAILURE(*err)) {
+break;
+}
+continue;
+}
+switch (sourceChar) {
+case ZWNJ:
+/* contextChar has HALANT */
+if (converterData->contextCharFromUnicode) {
+converterData->contextCharFromUnicode = 0x00;
+targetByteUnit = ISCII_HALANT;
+} else {
+/* consume ZWNJ and continue */
+converterData->contextCharFromUnicode = 0x00;
+continue;
+}
+break;
+case ZWJ:
+/* contextChar has HALANT */
+if (converterData->contextCharFromUnicode) {
+targetByteUnit = ISCII_NUKTA;
+} else {
+targetByteUnit =ISCII_INV;
+}
+converterData->contextCharFromUnicode = 0x00;
+break;
+default:
+/* is the sourceChar in the INDIC_RANGE? */
+if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
+/* Danda and Double Danda are valid in Northern scripts.. since Unicode
+* does not include these codepoints in all Northern scrips we need to
+* filter them out
+*/
+if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
+/* find out to which block the souceChar belongs*/
+range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
+newDelta =(uint16_t)(range*DELTA);
+/* Now are we in the same block as the previous? */
+if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
+converterData->currentDeltaFromUnicode = newDelta;
+converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
+deltaChanged =TRUE;
+converterData->isFirstBuffer=FALSE;
+}
+if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
+if (sourceChar == PNJ_TIPPI) {
+/* Make sure Tippi is converterd to Bindi. */
+sourceChar = PNJ_BINDI;
+} else if (sourceChar == PNJ_ADHAK) {
+/* This is for consonant cluster handling. */
+converterData->contextCharFromUnicode = PNJ_ADHAK;
+}
+}
+/* Normalize all Indic codepoints to Devanagari and map them to ISCII */
+/* now subtract the new delta from sourceChar*/
+sourceChar -= converterData->currentDeltaFromUnicode;
+}
+/* get the target byte unit */
+targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
+/* is the code point valid in current script? */
+if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
+/* Vocallic RR is assigned in ISCII Telugu and Unicode */
+if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
+targetByteUnit=missingCharMarker;
+}
+}
+if (deltaChanged) {
+/* we are in a script block which is different than
+* previous sourceChar's script block write ATR and language codes
+*/
+uint32_t temp=0;
+temp =(uint16_t)(ATR<<8);
+temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
+/* reset */
+deltaChanged=FALSE;
+/* now append ATR and language code */
+WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
+if (U_FAILURE(*err)) {
+break;
+}
+}
+if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
+continue;
+}
+}
+/* reset context char */
+converterData->contextCharFromUnicode = 0x00;
+break;
+}
+if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
+/* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
+/* reset context char */
+converterData->contextCharFromUnicode = 0x0000;
+targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
+/* write targetByteUnit to target */
+WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
+if (U_FAILURE(*err)) {
+break;
+}
+} else if (targetByteUnit != missingCharMarker) {
+if (targetByteUnit==ISCII_HALANT) {
+converterData->contextCharFromUnicode = (UChar)targetByteUnit;
+}
+/* write targetByteUnit to target*/
+WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
+if (U_FAILURE(*err)) {
+break;
+}
+} else {
+/* oops.. the code point is unassigned */
+/*check if the char is a First surrogate*/
+if (U16_IS_SURROGATE(sourceChar)) {
+if (U16_IS_SURROGATE_LEAD(sourceChar)) {
+getTrail:
+/*look ahead to find the trail surrogate*/
+if (source < sourceLimit) {
+/* test the following code unit */
+UChar trail= (*source);
+if (U16_IS_TRAIL(trail)) {
+source++;
+sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
+*err =U_INVALID_CHAR_FOUND;
+/* convert this surrogate code point */
+/* exit this condition tree */
+} else {
+/* this is an unmatched lead code unit (1st surrogate) */
+/* callback(illegal) */
+*err=U_ILLEGAL_CHAR_FOUND;
+}
+} else {
+/* no more input */
+*err = U_ZERO_ERROR;
+}
+} else {
+/* this is an unmatched trail code unit (2nd surrogate) */
+/* callback(illegal) */
+*err=U_ILLEGAL_CHAR_FOUND;
+}
+} else {
+/* callback(unassigned) for a BMP code point */
+*err = U_INVALID_CHAR_FOUND;
+}
+args->converter->fromUChar32=sourceChar;
+break;
+}
+}/* end while(mySourceIndex<mySourceLength) */
+/*save the state and return */
+args->source = source;
+args->target = (char*)target;
+}
+static const uint16_t lookupTable[][2]={
+{ ZERO,       ZERO     },     /*DEFALT*/
+{ ZERO,       ZERO     },     /*ROMAN*/
+{ DEVANAGARI, DEV_MASK },
+{ BENGALI,    BNG_MASK },
+{ TAMIL,      TML_MASK },
+{ TELUGU,     KND_MASK },
+{ BENGALI,    BNG_MASK },
+{ ORIYA,      ORI_MASK },
+{ KANNADA,    KND_MASK },
+{ MALAYALAM,  MLM_MASK },
+{ GUJARATI,   GJR_MASK },
+{ GURMUKHI,   PNJ_MASK }
+};
+#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
+/* add offset to current Indic Block */                                              \
+if(targetUniChar>ASCII_END &&                                                        \
+targetUniChar != ZWJ &&                                                       \
+targetUniChar != ZWNJ &&                                                      \
+targetUniChar != DANDA &&                                                     \
+targetUniChar != DOUBLE_DANDA){                                               \
+\
+targetUniChar+=(uint16_t)(delta);                                             \
+}                                                                                    \
+/* now write the targetUniChar */                                                    \
+if(target<args->targetLimit){                                                        \
+*(target)++ = (UChar)targetUniChar;                                              \
+if(offsets){                                                                     \
+*(offsets)++ = (int32_t)(offset);                                            \
+}                                                                                \
+}else{                                                                               \
+args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
+(UChar)targetUniChar;                                                        \
+*err = U_BUFFER_OVERFLOW_ERROR;                                                  \
+}                                                                                    \
+}
+#define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
+targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
+/* is the code point valid in current script? */                                     \
+if(sourceChar> ASCII_END &&                                                          \
+(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
+/* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
+if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
+targetUniChar!=VOCALLIC_RR){                                         \
+targetUniChar=missingCharMarker;                                             \
+}                                                                                \
+}                                                                                    \
+}
+/***********
+*  Rules for ISCII to Unicode converter
+*  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
+*  which has both precomposed and decomposed forms characters
+*  pre-context and post-context need to be considered.
+*
+*  Post context
+*  i)  ATR : Attribute code is used to declare the font and script switching.
+*      Currently we only switch scripts and font codes consumed without generating an error
+*  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
+*      obsolete characters
+*  Pre context
+*  i)  Halant: if preceeded by a halant then it is a explicit halant
+*  ii) Nukta :
+*       a) if preceeded by a halant then it is a soft halant
+*       b) if preceeded by specific consonants and the ligatures have pre-composed
+*          characters in Unicode then convert to pre-composed characters
+*  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
+*
+*/
+static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
+const char *source = ( char *) args->source;
+UChar *target = args->target;
+const char *sourceLimit = args->sourceLimit;
+const UChar* targetLimit = args->targetLimit;
+uint32_t targetUniChar = 0x0000;
+uint8_t sourceChar = 0x0000;
+UConverterDataISCII* data;
+UChar32* toUnicodeStatus=NULL;
+UChar32 tempTargetUniChar = 0x0000;
+UChar* contextCharToUnicode= NULL;
+UBool found;
+int i;
+int offset = 0;
+if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
+*err = U_ILLEGAL_ARGUMENT_ERROR;
+return;
+}
+data = (UConverterDataISCII*)(args->converter->extraInfo);
+contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
+toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
+while (U_SUCCESS(*err) && source<sourceLimit) {
+targetUniChar = missingCharMarker;
+if (target < targetLimit) {
+sourceChar = (unsigned char)*(source)++;
+/* look at the post-context preform special processing */
+if (*contextCharToUnicode==ATR) {
+/* If we have ATR in *contextCharToUnicode then we need to change our
+* state to the Indic Script specified by sourceChar
+*/
+/* check if the sourceChar is supported script range*/
+if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
+data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
+data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
+} else if (sourceChar==DEF) {
+/* switch back to default */
+data->currentDeltaToUnicode = data->defDeltaToUnicode;
+data->currentMaskToUnicode = data->defMaskToUnicode;
+} else {
+if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
+/* these are display codes consume and continue */
+} else {
+*err =U_ILLEGAL_CHAR_FOUND;
+/* reset */
+*contextCharToUnicode=NO_CHAR_MARKER;
+goto CALLBACK;
+}
+}
+/* reset */
+*contextCharToUnicode=NO_CHAR_MARKER;
+continue;
+} else if (*contextCharToUnicode==EXT) {
+/* check if sourceChar is in 0xA1-0xEE range */
+if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
+/* We currently support only Anudatta and Devanagari abbreviation sign */
+if (sourceChar==0xBF || sourceChar == 0xB8) {
+targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
+/* find out if the mapping is valid in this state */
+if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+*contextCharToUnicode= NO_CHAR_MARKER;
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+/* write to target */
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+continue;
+}
+}
+/* byte unit is unassigned */
+targetUniChar = missingCharMarker;
+*err= U_INVALID_CHAR_FOUND;
+} else {
+/* only 0xA1 - 0xEE are legal after EXT char */
+*contextCharToUnicode= NO_CHAR_MARKER;
+*err = U_ILLEGAL_CHAR_FOUND;
+}
+goto CALLBACK;
+} else if (*contextCharToUnicode==ISCII_INV) {
+if (sourceChar==ISCII_HALANT) {
+targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
+} else {
+targetUniChar = ZWJ;
+}
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+/* write to target */
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+/* reset */
+*contextCharToUnicode=NO_CHAR_MARKER;
+}
+/* look at the pre-context and perform special processing */
+switch (sourceChar) {
+case ISCII_INV:
+case EXT: /*falls through*/
+case ATR:
+*contextCharToUnicode = (UChar)sourceChar;
+if (*toUnicodeStatus != missingCharMarker) {
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+*toUnicodeStatus = missingCharMarker;
+}
+continue;
+case ISCII_DANDA:
+/* handle double danda*/
+if (*contextCharToUnicode== ISCII_DANDA) {
+targetUniChar = DOUBLE_DANDA;
+/* clear the context */
+*contextCharToUnicode = NO_CHAR_MARKER;
+*toUnicodeStatus = missingCharMarker;
+} else {
+GET_MAPPING(sourceChar,targetUniChar,data);
+*contextCharToUnicode = sourceChar;
+}
+break;
+case ISCII_HALANT:
+/* handle explicit halant */
+if (*contextCharToUnicode == ISCII_HALANT) {
+targetUniChar = ZWNJ;
+/* clear the context */
+*contextCharToUnicode = NO_CHAR_MARKER;
+} else {
+GET_MAPPING(sourceChar,targetUniChar,data);
+*contextCharToUnicode = sourceChar;
+}
+break;
+case 0x0A:
+/* fall through */
+case 0x0D:
+data->resetToDefaultToUnicode = TRUE;
+GET_MAPPING(sourceChar,targetUniChar,data)
+;
+*contextCharToUnicode = sourceChar;
+break;
+case ISCII_VOWEL_SIGN_E:
+i=1;
+found=FALSE;
+for (; i<vowelSignESpecialCases[0][0]; i++) {
+U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0]));
+if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
+targetUniChar=vowelSignESpecialCases[i][1];
+found=TRUE;
+break;
+}
+}
+if (found) {
+/* find out if the mapping is valid in this state */
+if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+/*targetUniChar += data->currentDeltaToUnicode ;*/
+*contextCharToUnicode= NO_CHAR_MARKER;
+*toUnicodeStatus = missingCharMarker;
+break;
+}
+}
+GET_MAPPING(sourceChar,targetUniChar,data);
+*contextCharToUnicode = sourceChar;
+break;
+case ISCII_NUKTA:
+/* handle soft halant */
+if (*contextCharToUnicode == ISCII_HALANT) {
+targetUniChar = ZWJ;
+/* clear the context */
+*contextCharToUnicode = NO_CHAR_MARKER;
+break;
+} else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+/* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
+* In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
+*/
+targetUniChar = PNJ_RRA;
+WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+if (U_SUCCESS(*err)) {
+targetUniChar = PNJ_SIGN_VIRAMA;
+WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+if (U_SUCCESS(*err)) {
+targetUniChar = PNJ_HA;
+WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
+} else {
+args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
+}
+} else {
+args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
+args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
+}
+*toUnicodeStatus = missingCharMarker;
+data->contextCharToUnicode = NO_CHAR_MARKER;
+continue;
+} else {
+/* try to handle <CHAR> + ISCII_NUKTA special mappings */
+i=1;
+found =FALSE;
+for (; i<nuktaSpecialCases[0][0]; i++) {
+if (nuktaSpecialCases[i][0]==(uint8_t)
+*contextCharToUnicode) {
+targetUniChar=nuktaSpecialCases[i][1];
+found =TRUE;
+break;
+}
+}
+if (found) {
+/* find out if the mapping is valid in this state */
+if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
+/*targetUniChar += data->currentDeltaToUnicode ;*/
+*contextCharToUnicode= NO_CHAR_MARKER;
+*toUnicodeStatus = missingCharMarker;
+if (data->currentDeltaToUnicode == PNJ_DELTA) {
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
+continue;
+}
+break;
+}
+/* else fall through to default */
+}
+/* else fall through to default */
+}
+default:GET_MAPPING(sourceChar,targetUniChar,data)
+;
+*contextCharToUnicode = sourceChar;
+break;
+}
+if (*toUnicodeStatus != missingCharMarker) {
+/* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
+if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
+(*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
+/* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
+offset = (int)(source-args->source - 3);
+tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
+*toUnicodeStatus = missingCharMarker;
+continue;
+} else {
+/* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
+if (data->prevToUnicodeStatus) {
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
+data->prevToUnicodeStatus = 0x0000;
+}
+/* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
+* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
+*/
+if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
+targetUniChar = PNJ_TIPPI - PNJ_DELTA;
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
+} else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
+/* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
+data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
+} else {
+/* write the previously mapped codepoint */
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+}
+}
+*toUnicodeStatus = missingCharMarker;
+}
+if (targetUniChar != missingCharMarker) {
+/* now save the targetUniChar for delayed write */
+*toUnicodeStatus = (UChar) targetUniChar;
+if (data->resetToDefaultToUnicode==TRUE) {
+data->currentDeltaToUnicode = data->defDeltaToUnicode;
+data->currentMaskToUnicode = data->defMaskToUnicode;
+data->resetToDefaultToUnicode=FALSE;
+}
+} else {
+/* we reach here only if targetUniChar == missingCharMarker
+* so assign codes to reason and err
+*/
+*err = U_INVALID_CHAR_FOUND;
+CALLBACK:
+args->converter->toUBytes[0] = (uint8_t) sourceChar;
+args->converter->toULength = 1;
+break;
+}
+} else {
+*err =U_BUFFER_OVERFLOW_ERROR;
+break;
+}
+}
+if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
+/* end of the input stream */
+UConverter *cnv = args->converter;
+if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
+/* set toUBytes[] */
+cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
+cnv->toULength = 1;
+/* avoid looping on truncated sequences */
+*contextCharToUnicode = NO_CHAR_MARKER;
+} else {
+cnv->toULength = 0;
+}
+if (*toUnicodeStatus != missingCharMarker) {
+/* output a remaining target character */
+WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
+*toUnicodeStatus = missingCharMarker;
+}
+}
+args->target = target;
+args->source = source;
+}
+/* structure for SafeClone calculations */
+struct cloneISCIIStruct {
+UConverter cnv;
+UConverterDataISCII mydata;
+};
+static UConverter *
+_ISCII_SafeClone(const UConverter *cnv,
+void *stackBuffer,
+int32_t *pBufferSize,
+UErrorCode *status)
+{
+struct cloneISCIIStruct * localClone;
+int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
+if (U_FAILURE(*status)) {
+return 0;
+}
+if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
+*pBufferSize = bufferSizeNeeded;
+return 0;
+}
+localClone = (struct cloneISCIIStruct *)stackBuffer;
+/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
+uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
+localClone->cnv.extraInfo = &localClone->mydata;
+localClone->cnv.isExtraLocal = TRUE;
+return &localClone->cnv;
+}
+static void
+_ISCIIGetUnicodeSet(const UConverter *cnv,
+const USetAdder *sa,
+UConverterUnicodeSet which,
+UErrorCode *pErrorCode)
+{
+int32_t idx, script;
+uint8_t mask;
+/* Since all ISCII versions allow switching to other ISCII
+scripts, we add all roundtrippable characters to this set. */
+sa->addRange(sa->set, 0, ASCII_END);
+for (script = DEVANAGARI; script <= MALAYALAM; script++) {
+mask = (uint8_t)(lookupInitialData[script].maskEnum);
+for (idx = 0; idx < DELTA; idx++) {
+/* added check for TELUGU character */
+if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
+sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
+}
+}
+}
+sa->add(sa->set, DANDA);
+sa->add(sa->set, DOUBLE_DANDA);
+sa->add(sa->set, ZWNJ);
+sa->add(sa->set, ZWJ);
+}
+static const UConverterImpl _ISCIIImpl={
+UCNV_ISCII,
+NULL,
+NULL,
+_ISCIIOpen,
+_ISCIIClose,
+_ISCIIReset,
+UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
+UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
+UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
+UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
+NULL,
+NULL,
+_ISCIIgetName,
+NULL,
+_ISCII_SafeClone,
+_ISCIIGetUnicodeSet
+};
+static const UConverterStaticData _ISCIIStaticData={
+sizeof(UConverterStaticData),
+"ISCII",
+0,
+UCNV_IBM,
+UCNV_ISCII,
+1,
+4,
+{ 0x1a, 0, 0, 0 },
+0x1,
+FALSE,
+FALSE,
+0x0,
+0x0,
+{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
+};
+const UConverterSharedData _ISCIIData={
+sizeof(UConverterSharedData),
+~((uint32_t) 0),
+NULL,
+NULL,
+&_ISCIIStaticData,
+FALSE,
+&_ISCIIImpl,
+0
+};
+#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

The Tor Browser / file comparison

comparison: intl/icu/source/common/ucnvisci.c

intl/icu/source/common/ucnvisci.c