intl/icu/source/common/ucnvisci.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2000-2012, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *   file name:  ucnvisci.c
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2001JUN26
    12 *   created by: Ram Viswanadha
    13 *
    14 *   Date        Name        Description
    15 *   24/7/2001   Ram         Added support for EXT character handling
    16 */
    18 #include "unicode/utypes.h"
    20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
    22 #include "unicode/ucnv.h"
    23 #include "unicode/ucnv_cb.h"
    24 #include "unicode/utf16.h"
    25 #include "cmemory.h"
    26 #include "ucnv_bld.h"
    27 #include "ucnv_cnv.h"
    28 #include "cstring.h"
    29 #include "uassert.h"
    31 #define UCNV_OPTIONS_VERSION_MASK 0xf
    32 #define NUKTA               0x093c
    33 #define HALANT              0x094d
    34 #define ZWNJ                0x200c /* Zero Width Non Joiner */
    35 #define ZWJ                 0x200d /* Zero width Joiner */
    36 #define INVALID_CHAR        0xffff
    37 #define ATR                 0xEF   /* Attribute code */
    38 #define EXT                 0xF0   /* Extension code */
    39 #define DANDA               0x0964
    40 #define DOUBLE_DANDA        0x0965
    41 #define ISCII_NUKTA         0xE9
    42 #define ISCII_HALANT        0xE8
    43 #define ISCII_DANDA         0xEA
    44 #define ISCII_INV           0xD9
    45 #define ISCII_VOWEL_SIGN_E  0xE0
    46 #define INDIC_BLOCK_BEGIN   0x0900
    47 #define INDIC_BLOCK_END     0x0D7F
    48 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
    49 #define VOCALLIC_RR         0x0931
    50 #define LF                  0x0A
    51 #define ASCII_END           0xA0
    52 #define NO_CHAR_MARKER      0xFFFE
    53 #define TELUGU_DELTA        DELTA * TELUGU
    54 #define DEV_ABBR_SIGN       0x0970
    55 #define DEV_ANUDATTA        0x0952
    56 #define EXT_RANGE_BEGIN     0xA1
    57 #define EXT_RANGE_END       0xEE
    59 #define PNJ_DELTA           0x0100
    60 #define PNJ_BINDI           0x0A02
    61 #define PNJ_TIPPI           0x0A70
    62 #define PNJ_SIGN_VIRAMA     0x0A4D
    63 #define PNJ_ADHAK           0x0A71
    64 #define PNJ_HA              0x0A39
    65 #define PNJ_RRA             0x0A5C
    67 typedef enum {
    68     DEVANAGARI =0,
    69     BENGALI,
    70     GURMUKHI,
    71     GUJARATI,
    72     ORIYA,
    73     TAMIL,
    74     TELUGU,
    75     KANNADA,
    76     MALAYALAM,
    77     DELTA=0x80
    78 }UniLang;
    80 /**
    81  * Enumeration for switching code pages if <ATR>+<one of below values>
    82  * is encountered
    83  */
    84 typedef enum {
    85     DEF = 0x40,
    86     RMN = 0x41,
    87     DEV = 0x42,
    88     BNG = 0x43,
    89     TML = 0x44,
    90     TLG = 0x45,
    91     ASM = 0x46,
    92     ORI = 0x47,
    93     KND = 0x48,
    94     MLM = 0x49,
    95     GJR = 0x4A,
    96     PNJ = 0x4B,
    97     ARB = 0x71,
    98     PES = 0x72,
    99     URD = 0x73,
   100     SND = 0x74,
   101     KSM = 0x75,
   102     PST = 0x76
   103 }ISCIILang;
   105 typedef enum {
   106     DEV_MASK =0x80,
   107     PNJ_MASK =0x40,
   108     GJR_MASK =0x20,
   109     ORI_MASK =0x10,
   110     BNG_MASK =0x08,
   111     KND_MASK =0x04,
   112     MLM_MASK =0x02,
   113     TML_MASK =0x01,
   114     ZERO =0x00
   115 }MaskEnum;
   117 #define ISCII_CNV_PREFIX "ISCII,version="
   119 typedef struct {
   120     UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
   121     UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
   122     uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
   123     uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
   124     uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
   125     MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
   126     MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
   127     MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
   128     UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
   129     UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
   130     char name[sizeof(ISCII_CNV_PREFIX) + 1];
   131     UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
   132 } UConverterDataISCII;
   134 typedef struct LookupDataStruct {
   135     UniLang uniLang;
   136     MaskEnum maskEnum;
   137     ISCIILang isciiLang;
   138 } LookupDataStruct;
   140 static const LookupDataStruct lookupInitialData[]={
   141     { DEVANAGARI, DEV_MASK,  DEV },
   142     { BENGALI,    BNG_MASK,  BNG },
   143     { GURMUKHI,   PNJ_MASK,  PNJ },
   144     { GUJARATI,   GJR_MASK,  GJR },
   145     { ORIYA,      ORI_MASK,  ORI },
   146     { TAMIL,      TML_MASK,  TML },
   147     { TELUGU,     KND_MASK,  TLG },
   148     { KANNADA,    KND_MASK,  KND },
   149     { MALAYALAM,  MLM_MASK,  MLM }
   150 };
   152 /*
   153  * For special handling of certain Gurmukhi characters.
   154  * Bit 0 (value 1): PNJ consonant
   155  * Bit 1 (value 2): PNJ Bindi Tippi
   156  */
   157 static const uint8_t pnjMap[80] = {
   158     /* 0A00..0A0F */
   159     0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
   160     /* 0A10..0A1F */
   161     0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
   162     /* 0A20..0A2F */
   163     3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
   164     /* 0A30..0A3F */
   165     3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
   166     /* 0A40..0A4F */
   167     0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
   168 };
   170 static UBool
   171 isPNJConsonant(UChar32 c) {
   172     if (c < 0xa00 || 0xa50 <= c) {
   173         return FALSE;
   174     } else {
   175         return (UBool)(pnjMap[c - 0xa00] & 1);
   176     }
   177 }
   179 static UBool
   180 isPNJBindiTippi(UChar32 c) {
   181     if (c < 0xa00 || 0xa50 <= c) {
   182         return FALSE;
   183     } else {
   184         return (UBool)(pnjMap[c - 0xa00] >> 1);
   185     }
   186 }
   188 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
   189     if(pArgs->onlyTestIsLoadable) {
   190         return;
   191     }
   193     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
   195     if (cnv->extraInfo != NULL) {
   196         int32_t len=0;
   197         UConverterDataISCII *converterData=
   198                 (UConverterDataISCII *) cnv->extraInfo;
   199         converterData->contextCharToUnicode=NO_CHAR_MARKER;
   200         cnv->toUnicodeStatus = missingCharMarker;
   201         converterData->contextCharFromUnicode=0x0000;
   202         converterData->resetToDefaultToUnicode=FALSE;
   203         /* check if the version requested is supported */
   204         if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
   205             /* initialize state variables */
   206             converterData->currentDeltaFromUnicode
   207                     = converterData->currentDeltaToUnicode
   208                             = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
   210             converterData->currentMaskFromUnicode
   211                     = converterData->currentMaskToUnicode
   212                             = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
   214             converterData->isFirstBuffer=TRUE;
   215             (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
   216             len = (int32_t)uprv_strlen(converterData->name);
   217             converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
   218             converterData->name[len+1]=0;
   220             converterData->prevToUnicodeStatus = 0x0000;
   221         } else {
   222             uprv_free(cnv->extraInfo);
   223             cnv->extraInfo = NULL;
   224             *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   225         }
   227     } else {
   228         *errorCode =U_MEMORY_ALLOCATION_ERROR;
   229     }
   230 }
   232 static void _ISCIIClose(UConverter *cnv) {
   233     if (cnv->extraInfo!=NULL) {
   234         if (!cnv->isExtraLocal) {
   235             uprv_free(cnv->extraInfo);
   236         }
   237         cnv->extraInfo=NULL;
   238     }
   239 }
   241 static const char* _ISCIIgetName(const UConverter* cnv) {
   242     if (cnv->extraInfo) {
   243         UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
   244         return myData->name;
   245     }
   246     return NULL;
   247 }
   249 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
   250     UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
   251     if (choice<=UCNV_RESET_TO_UNICODE) {
   252         cnv->toUnicodeStatus = missingCharMarker;
   253         cnv->mode=0;
   254         data->currentDeltaToUnicode=data->defDeltaToUnicode;
   255         data->currentMaskToUnicode = data->defMaskToUnicode;
   256         data->contextCharToUnicode=NO_CHAR_MARKER;
   257         data->prevToUnicodeStatus = 0x0000;
   258     }
   259     if (choice!=UCNV_RESET_TO_UNICODE) {
   260         cnv->fromUChar32=0x0000;
   261         data->contextCharFromUnicode=0x00;
   262         data->currentMaskFromUnicode=data->defMaskToUnicode;
   263         data->currentDeltaFromUnicode=data->defDeltaToUnicode;
   264         data->isFirstBuffer=TRUE;
   265         data->resetToDefaultToUnicode=FALSE;
   266     }
   267 }
   269 /**
   270  * The values in validity table are indexed by the lower bits of Unicode
   271  * range 0x0900 - 0x09ff. The values have a structure like:
   272  *       ---------------------------------------------------------------
   273  *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
   274  *      |       |       |       |       | ASM   | KND   |       |       |
   275  *       ---------------------------------------------------------------
   276  * If a code point is valid in a particular script
   277  * then that bit is turned on
   278  *
   279  * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
   280  * to represent these languages
   281  *
   282  * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
   283  * and combine and use 1 bit to represent these languages.
   284  *
   285  * TODO: It is probably easier to understand and maintain to change this
   286  * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
   287  */
   289 static const uint8_t validityTable[128] = {
   290 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
   291 /* Note: This table was edited to mirror the Windows XP implementation */
   292 /*ISCII:Valid:Unicode */
   293 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   294 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   295 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   296 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   297 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   298 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   299 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   300 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   301 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   302 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   303 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   304 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   305 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   306 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   307 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   308 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   309 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   310 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   311 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   312 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   313 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   314 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   315 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   316 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   317 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   318 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   319 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   320 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   321 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   322 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   323 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   324 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   325 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   326 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   327 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   328 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   329 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   330 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   331 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   332 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   333 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   334 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
   335 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   336 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   337 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   338 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   339 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   340 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   341 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   342 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
   343 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   344 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   345 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
   346 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   347 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   348 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   349 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   350 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   351 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   352 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   353 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   354 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   355 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   356 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   357 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   358 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   359 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   360 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   361 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
   362 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   363 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   364 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   365 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   366 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   367 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
   368 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   369 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   370 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   371 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   372 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   373 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   374 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   375 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   376 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   377 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   378 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
   379 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
   380 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
   381 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   382 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   383 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   384 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   385 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   386 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   387 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   388 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   389 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   390 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
   391 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   392 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
   393 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   394 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   395 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   396 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   397 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   398 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   399 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   400 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   401 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   402 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   403 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   404 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
   405 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
   406 /*
   407  * The length of the array is 128 to provide values for 0x900..0x97f.
   408  * The last 15 entries for 0x971..0x97f of the validity table are all zero
   409  * because no Indic script uses such Unicode code points.
   410  */
   411 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
   412 };
   414 static const uint16_t fromUnicodeTable[128]={
   415     0x00a0 ,/* 0x0900 */
   416     0x00a1 ,/* 0x0901 */
   417     0x00a2 ,/* 0x0902 */
   418     0x00a3 ,/* 0x0903 */
   419     0xa4e0 ,/* 0x0904 */
   420     0x00a4 ,/* 0x0905 */
   421     0x00a5 ,/* 0x0906 */
   422     0x00a6 ,/* 0x0907 */
   423     0x00a7 ,/* 0x0908 */
   424     0x00a8 ,/* 0x0909 */
   425     0x00a9 ,/* 0x090a */
   426     0x00aa ,/* 0x090b */
   427     0xA6E9 ,/* 0x090c */
   428     0x00ae ,/* 0x090d */
   429     0x00ab ,/* 0x090e */
   430     0x00ac ,/* 0x090f */
   431     0x00ad ,/* 0x0910 */
   432     0x00b2 ,/* 0x0911 */
   433     0x00af ,/* 0x0912 */
   434     0x00b0 ,/* 0x0913 */
   435     0x00b1 ,/* 0x0914 */
   436     0x00b3 ,/* 0x0915 */
   437     0x00b4 ,/* 0x0916 */
   438     0x00b5 ,/* 0x0917 */
   439     0x00b6 ,/* 0x0918 */
   440     0x00b7 ,/* 0x0919 */
   441     0x00b8 ,/* 0x091a */
   442     0x00b9 ,/* 0x091b */
   443     0x00ba ,/* 0x091c */
   444     0x00bb ,/* 0x091d */
   445     0x00bc ,/* 0x091e */
   446     0x00bd ,/* 0x091f */
   447     0x00be ,/* 0x0920 */
   448     0x00bf ,/* 0x0921 */
   449     0x00c0 ,/* 0x0922 */
   450     0x00c1 ,/* 0x0923 */
   451     0x00c2 ,/* 0x0924 */
   452     0x00c3 ,/* 0x0925 */
   453     0x00c4 ,/* 0x0926 */
   454     0x00c5 ,/* 0x0927 */
   455     0x00c6 ,/* 0x0928 */
   456     0x00c7 ,/* 0x0929 */
   457     0x00c8 ,/* 0x092a */
   458     0x00c9 ,/* 0x092b */
   459     0x00ca ,/* 0x092c */
   460     0x00cb ,/* 0x092d */
   461     0x00cc ,/* 0x092e */
   462     0x00cd ,/* 0x092f */
   463     0x00cf ,/* 0x0930 */
   464     0x00d0 ,/* 0x0931 */
   465     0x00d1 ,/* 0x0932 */
   466     0x00d2 ,/* 0x0933 */
   467     0x00d3 ,/* 0x0934 */
   468     0x00d4 ,/* 0x0935 */
   469     0x00d5 ,/* 0x0936 */
   470     0x00d6 ,/* 0x0937 */
   471     0x00d7 ,/* 0x0938 */
   472     0x00d8 ,/* 0x0939 */
   473     0xFFFF ,/* 0x093A */
   474     0xFFFF ,/* 0x093B */
   475     0x00e9 ,/* 0x093c */
   476     0xEAE9 ,/* 0x093d */
   477     0x00da ,/* 0x093e */
   478     0x00db ,/* 0x093f */
   479     0x00dc ,/* 0x0940 */
   480     0x00dd ,/* 0x0941 */
   481     0x00de ,/* 0x0942 */
   482     0x00df ,/* 0x0943 */
   483     0xDFE9 ,/* 0x0944 */
   484     0x00e3 ,/* 0x0945 */
   485     0x00e0 ,/* 0x0946 */
   486     0x00e1 ,/* 0x0947 */
   487     0x00e2 ,/* 0x0948 */
   488     0x00e7 ,/* 0x0949 */
   489     0x00e4 ,/* 0x094a */
   490     0x00e5 ,/* 0x094b */
   491     0x00e6 ,/* 0x094c */
   492     0x00e8 ,/* 0x094d */
   493     0x00ec ,/* 0x094e */
   494     0x00ed ,/* 0x094f */
   495     0xA1E9 ,/* 0x0950 */ /* OM Symbol */
   496     0xFFFF ,/* 0x0951 */
   497     0xF0B8 ,/* 0x0952 */
   498     0xFFFF ,/* 0x0953 */
   499     0xFFFF ,/* 0x0954 */
   500     0xFFFF ,/* 0x0955 */
   501     0xFFFF ,/* 0x0956 */
   502     0xFFFF ,/* 0x0957 */
   503     0xb3e9 ,/* 0x0958 */
   504     0xb4e9 ,/* 0x0959 */
   505     0xb5e9 ,/* 0x095a */
   506     0xbae9 ,/* 0x095b */
   507     0xbfe9 ,/* 0x095c */
   508     0xC0E9 ,/* 0x095d */
   509     0xc9e9 ,/* 0x095e */
   510     0x00ce ,/* 0x095f */
   511     0xAAe9 ,/* 0x0960 */
   512     0xA7E9 ,/* 0x0961 */
   513     0xDBE9 ,/* 0x0962 */
   514     0xDCE9 ,/* 0x0963 */
   515     0x00ea ,/* 0x0964 */
   516     0xeaea ,/* 0x0965 */
   517     0x00f1 ,/* 0x0966 */
   518     0x00f2 ,/* 0x0967 */
   519     0x00f3 ,/* 0x0968 */
   520     0x00f4 ,/* 0x0969 */
   521     0x00f5 ,/* 0x096a */
   522     0x00f6 ,/* 0x096b */
   523     0x00f7 ,/* 0x096c */
   524     0x00f8 ,/* 0x096d */
   525     0x00f9 ,/* 0x096e */
   526     0x00fa ,/* 0x096f */
   527     0xF0BF ,/* 0x0970 */
   528     0xFFFF ,/* 0x0971 */
   529     0xFFFF ,/* 0x0972 */
   530     0xFFFF ,/* 0x0973 */
   531     0xFFFF ,/* 0x0974 */
   532     0xFFFF ,/* 0x0975 */
   533     0xFFFF ,/* 0x0976 */
   534     0xFFFF ,/* 0x0977 */
   535     0xFFFF ,/* 0x0978 */
   536     0xFFFF ,/* 0x0979 */
   537     0xFFFF ,/* 0x097a */
   538     0xFFFF ,/* 0x097b */
   539     0xFFFF ,/* 0x097c */
   540     0xFFFF ,/* 0x097d */
   541     0xFFFF ,/* 0x097e */
   542     0xFFFF ,/* 0x097f */
   543 };
   544 static const uint16_t toUnicodeTable[256]={
   545     0x0000,/* 0x00 */
   546     0x0001,/* 0x01 */
   547     0x0002,/* 0x02 */
   548     0x0003,/* 0x03 */
   549     0x0004,/* 0x04 */
   550     0x0005,/* 0x05 */
   551     0x0006,/* 0x06 */
   552     0x0007,/* 0x07 */
   553     0x0008,/* 0x08 */
   554     0x0009,/* 0x09 */
   555     0x000a,/* 0x0a */
   556     0x000b,/* 0x0b */
   557     0x000c,/* 0x0c */
   558     0x000d,/* 0x0d */
   559     0x000e,/* 0x0e */
   560     0x000f,/* 0x0f */
   561     0x0010,/* 0x10 */
   562     0x0011,/* 0x11 */
   563     0x0012,/* 0x12 */
   564     0x0013,/* 0x13 */
   565     0x0014,/* 0x14 */
   566     0x0015,/* 0x15 */
   567     0x0016,/* 0x16 */
   568     0x0017,/* 0x17 */
   569     0x0018,/* 0x18 */
   570     0x0019,/* 0x19 */
   571     0x001a,/* 0x1a */
   572     0x001b,/* 0x1b */
   573     0x001c,/* 0x1c */
   574     0x001d,/* 0x1d */
   575     0x001e,/* 0x1e */
   576     0x001f,/* 0x1f */
   577     0x0020,/* 0x20 */
   578     0x0021,/* 0x21 */
   579     0x0022,/* 0x22 */
   580     0x0023,/* 0x23 */
   581     0x0024,/* 0x24 */
   582     0x0025,/* 0x25 */
   583     0x0026,/* 0x26 */
   584     0x0027,/* 0x27 */
   585     0x0028,/* 0x28 */
   586     0x0029,/* 0x29 */
   587     0x002a,/* 0x2a */
   588     0x002b,/* 0x2b */
   589     0x002c,/* 0x2c */
   590     0x002d,/* 0x2d */
   591     0x002e,/* 0x2e */
   592     0x002f,/* 0x2f */
   593     0x0030,/* 0x30 */
   594     0x0031,/* 0x31 */
   595     0x0032,/* 0x32 */
   596     0x0033,/* 0x33 */
   597     0x0034,/* 0x34 */
   598     0x0035,/* 0x35 */
   599     0x0036,/* 0x36 */
   600     0x0037,/* 0x37 */
   601     0x0038,/* 0x38 */
   602     0x0039,/* 0x39 */
   603     0x003A,/* 0x3A */
   604     0x003B,/* 0x3B */
   605     0x003c,/* 0x3c */
   606     0x003d,/* 0x3d */
   607     0x003e,/* 0x3e */
   608     0x003f,/* 0x3f */
   609     0x0040,/* 0x40 */
   610     0x0041,/* 0x41 */
   611     0x0042,/* 0x42 */
   612     0x0043,/* 0x43 */
   613     0x0044,/* 0x44 */
   614     0x0045,/* 0x45 */
   615     0x0046,/* 0x46 */
   616     0x0047,/* 0x47 */
   617     0x0048,/* 0x48 */
   618     0x0049,/* 0x49 */
   619     0x004a,/* 0x4a */
   620     0x004b,/* 0x4b */
   621     0x004c,/* 0x4c */
   622     0x004d,/* 0x4d */
   623     0x004e,/* 0x4e */
   624     0x004f,/* 0x4f */
   625     0x0050,/* 0x50 */
   626     0x0051,/* 0x51 */
   627     0x0052,/* 0x52 */
   628     0x0053,/* 0x53 */
   629     0x0054,/* 0x54 */
   630     0x0055,/* 0x55 */
   631     0x0056,/* 0x56 */
   632     0x0057,/* 0x57 */
   633     0x0058,/* 0x58 */
   634     0x0059,/* 0x59 */
   635     0x005a,/* 0x5a */
   636     0x005b,/* 0x5b */
   637     0x005c,/* 0x5c */
   638     0x005d,/* 0x5d */
   639     0x005e,/* 0x5e */
   640     0x005f,/* 0x5f */
   641     0x0060,/* 0x60 */
   642     0x0061,/* 0x61 */
   643     0x0062,/* 0x62 */
   644     0x0063,/* 0x63 */
   645     0x0064,/* 0x64 */
   646     0x0065,/* 0x65 */
   647     0x0066,/* 0x66 */
   648     0x0067,/* 0x67 */
   649     0x0068,/* 0x68 */
   650     0x0069,/* 0x69 */
   651     0x006a,/* 0x6a */
   652     0x006b,/* 0x6b */
   653     0x006c,/* 0x6c */
   654     0x006d,/* 0x6d */
   655     0x006e,/* 0x6e */
   656     0x006f,/* 0x6f */
   657     0x0070,/* 0x70 */
   658     0x0071,/* 0x71 */
   659     0x0072,/* 0x72 */
   660     0x0073,/* 0x73 */
   661     0x0074,/* 0x74 */
   662     0x0075,/* 0x75 */
   663     0x0076,/* 0x76 */
   664     0x0077,/* 0x77 */
   665     0x0078,/* 0x78 */
   666     0x0079,/* 0x79 */
   667     0x007a,/* 0x7a */
   668     0x007b,/* 0x7b */
   669     0x007c,/* 0x7c */
   670     0x007d,/* 0x7d */
   671     0x007e,/* 0x7e */
   672     0x007f,/* 0x7f */
   673     0x0080,/* 0x80 */
   674     0x0081,/* 0x81 */
   675     0x0082,/* 0x82 */
   676     0x0083,/* 0x83 */
   677     0x0084,/* 0x84 */
   678     0x0085,/* 0x85 */
   679     0x0086,/* 0x86 */
   680     0x0087,/* 0x87 */
   681     0x0088,/* 0x88 */
   682     0x0089,/* 0x89 */
   683     0x008a,/* 0x8a */
   684     0x008b,/* 0x8b */
   685     0x008c,/* 0x8c */
   686     0x008d,/* 0x8d */
   687     0x008e,/* 0x8e */
   688     0x008f,/* 0x8f */
   689     0x0090,/* 0x90 */
   690     0x0091,/* 0x91 */
   691     0x0092,/* 0x92 */
   692     0x0093,/* 0x93 */
   693     0x0094,/* 0x94 */
   694     0x0095,/* 0x95 */
   695     0x0096,/* 0x96 */
   696     0x0097,/* 0x97 */
   697     0x0098,/* 0x98 */
   698     0x0099,/* 0x99 */
   699     0x009a,/* 0x9a */
   700     0x009b,/* 0x9b */
   701     0x009c,/* 0x9c */
   702     0x009d,/* 0x9d */
   703     0x009e,/* 0x9e */
   704     0x009f,/* 0x9f */
   705     0x00A0,/* 0xa0 */
   706     0x0901,/* 0xa1 */
   707     0x0902,/* 0xa2 */
   708     0x0903,/* 0xa3 */
   709     0x0905,/* 0xa4 */
   710     0x0906,/* 0xa5 */
   711     0x0907,/* 0xa6 */
   712     0x0908,/* 0xa7 */
   713     0x0909,/* 0xa8 */
   714     0x090a,/* 0xa9 */
   715     0x090b,/* 0xaa */
   716     0x090e,/* 0xab */
   717     0x090f,/* 0xac */
   718     0x0910,/* 0xad */
   719     0x090d,/* 0xae */
   720     0x0912,/* 0xaf */
   721     0x0913,/* 0xb0 */
   722     0x0914,/* 0xb1 */
   723     0x0911,/* 0xb2 */
   724     0x0915,/* 0xb3 */
   725     0x0916,/* 0xb4 */
   726     0x0917,/* 0xb5 */
   727     0x0918,/* 0xb6 */
   728     0x0919,/* 0xb7 */
   729     0x091a,/* 0xb8 */
   730     0x091b,/* 0xb9 */
   731     0x091c,/* 0xba */
   732     0x091d,/* 0xbb */
   733     0x091e,/* 0xbc */
   734     0x091f,/* 0xbd */
   735     0x0920,/* 0xbe */
   736     0x0921,/* 0xbf */
   737     0x0922,/* 0xc0 */
   738     0x0923,/* 0xc1 */
   739     0x0924,/* 0xc2 */
   740     0x0925,/* 0xc3 */
   741     0x0926,/* 0xc4 */
   742     0x0927,/* 0xc5 */
   743     0x0928,/* 0xc6 */
   744     0x0929,/* 0xc7 */
   745     0x092a,/* 0xc8 */
   746     0x092b,/* 0xc9 */
   747     0x092c,/* 0xca */
   748     0x092d,/* 0xcb */
   749     0x092e,/* 0xcc */
   750     0x092f,/* 0xcd */
   751     0x095f,/* 0xce */
   752     0x0930,/* 0xcf */
   753     0x0931,/* 0xd0 */
   754     0x0932,/* 0xd1 */
   755     0x0933,/* 0xd2 */
   756     0x0934,/* 0xd3 */
   757     0x0935,/* 0xd4 */
   758     0x0936,/* 0xd5 */
   759     0x0937,/* 0xd6 */
   760     0x0938,/* 0xd7 */
   761     0x0939,/* 0xd8 */
   762     0x200D,/* 0xd9 */
   763     0x093e,/* 0xda */
   764     0x093f,/* 0xdb */
   765     0x0940,/* 0xdc */
   766     0x0941,/* 0xdd */
   767     0x0942,/* 0xde */
   768     0x0943,/* 0xdf */
   769     0x0946,/* 0xe0 */
   770     0x0947,/* 0xe1 */
   771     0x0948,/* 0xe2 */
   772     0x0945,/* 0xe3 */
   773     0x094a,/* 0xe4 */
   774     0x094b,/* 0xe5 */
   775     0x094c,/* 0xe6 */
   776     0x0949,/* 0xe7 */
   777     0x094d,/* 0xe8 */
   778     0x093c,/* 0xe9 */
   779     0x0964,/* 0xea */
   780     0xFFFF,/* 0xeb */
   781     0xFFFF,/* 0xec */
   782     0xFFFF,/* 0xed */
   783     0xFFFF,/* 0xee */
   784     0xFFFF,/* 0xef */
   785     0xFFFF,/* 0xf0 */
   786     0x0966,/* 0xf1 */
   787     0x0967,/* 0xf2 */
   788     0x0968,/* 0xf3 */
   789     0x0969,/* 0xf4 */
   790     0x096a,/* 0xf5 */
   791     0x096b,/* 0xf6 */
   792     0x096c,/* 0xf7 */
   793     0x096d,/* 0xf8 */
   794     0x096e,/* 0xf9 */
   795     0x096f,/* 0xfa */
   796     0xFFFF,/* 0xfb */
   797     0xFFFF,/* 0xfc */
   798     0xFFFF,/* 0xfd */
   799     0xFFFF,/* 0xfe */
   800     0xFFFF /* 0xff */
   801 };
   803 static const uint16_t vowelSignESpecialCases[][2]={
   804 	{ 2 /*length of array*/    , 0      },
   805 	{ 0xA4 , 0x0904 },
   806 };
   808 static const uint16_t nuktaSpecialCases[][2]={
   809     { 16 /*length of array*/   , 0      },
   810     { 0xA6 , 0x090c },
   811     { 0xEA , 0x093D },
   812     { 0xDF , 0x0944 },
   813     { 0xA1 , 0x0950 },
   814     { 0xb3 , 0x0958 },
   815     { 0xb4 , 0x0959 },
   816     { 0xb5 , 0x095a },
   817     { 0xba , 0x095b },
   818     { 0xbf , 0x095c },
   819     { 0xC0 , 0x095d },
   820     { 0xc9 , 0x095e },
   821     { 0xAA , 0x0960 },
   822     { 0xA7 , 0x0961 },
   823     { 0xDB , 0x0962 },
   824     { 0xDC , 0x0963 },
   825 };
   828 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
   829     int32_t offset = (int32_t)(source - args->source-1);                                        \
   830       /* write the targetUniChar  to target */                                                  \
   831     if(target < targetLimit){                                                                   \
   832         if(targetByteUnit <= 0xFF){                                                             \
   833             *(target)++ = (uint8_t)(targetByteUnit);                                            \
   834             if(offsets){                                                                        \
   835                 *(offsets++) = offset;                                                          \
   836             }                                                                                   \
   837         }else{                                                                                  \
   838             if (targetByteUnit > 0xFFFF) {                                                      \
   839                 *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
   840                 if (offsets) {                                                                  \
   841                     --offset;                                                                   \
   842                     *(offsets++) = offset;                                                      \
   843                 }                                                                               \
   844             }                                                                                   \
   845             if (!(target < targetLimit)) {                                                      \
   846                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
   847                                 (uint8_t)(targetByteUnit >> 8);                                 \
   848                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
   849                                 (uint8_t)targetByteUnit;                                        \
   850                 *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
   851             } else {                                                                            \
   852                 *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
   853                 if(offsets){                                                                    \
   854                     *(offsets++) = offset;                                                      \
   855                 }                                                                               \
   856                 if(target < targetLimit){                                                       \
   857                     *(target)++ = (uint8_t)  targetByteUnit;                                    \
   858                     if(offsets){                                                                \
   859                         *(offsets++) = offset                            ;                      \
   860                     }                                                                           \
   861                 }else{                                                                          \
   862                     args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
   863                                 (uint8_t) (targetByteUnit);                                     \
   864                     *err = U_BUFFER_OVERFLOW_ERROR;                                             \
   865                 }                                                                               \
   866             }                                                                                   \
   867         }                                                                                       \
   868     }else{                                                                                      \
   869         if (targetByteUnit & 0xFF0000) {                                                        \
   870             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
   871                         (uint8_t) (targetByteUnit >>16);                                        \
   872         }                                                                                       \
   873         if(targetByteUnit & 0xFF00){                                                            \
   874             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
   875                         (uint8_t) (targetByteUnit >>8);                                         \
   876         }                                                                                       \
   877         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
   878                         (uint8_t) (targetByteUnit);                                             \
   879         *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
   880     }                                                                                           \
   881 }
   883 /* Rules:
   884  *    Explicit Halant :
   885  *                      <HALANT> + <ZWNJ>
   886  *    Soft Halant :
   887  *                      <HALANT> + <ZWJ>
   888  */
   890 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
   891         UConverterFromUnicodeArgs * args, UErrorCode * err) {
   892     const UChar *source = args->source;
   893     const UChar *sourceLimit = args->sourceLimit;
   894     unsigned char *target = (unsigned char *) args->target;
   895     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   896     int32_t* offsets = args->offsets;
   897     uint32_t targetByteUnit = 0x0000;
   898     UChar32 sourceChar = 0x0000;
   899     UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
   900     UConverterDataISCII *converterData;
   901     uint16_t newDelta=0;
   902     uint16_t range = 0;
   903     UBool deltaChanged = FALSE;
   905     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
   906         *err = U_ILLEGAL_ARGUMENT_ERROR;
   907         return;
   908     }
   909     /* initialize data */
   910     converterData=(UConverterDataISCII*)args->converter->extraInfo;
   911     newDelta=converterData->currentDeltaFromUnicode;
   912     range = (uint16_t)(newDelta/DELTA);
   914     if ((sourceChar = args->converter->fromUChar32)!=0) {
   915         goto getTrail;
   916     }
   918     /*writing the char to the output stream */
   919     while (source < sourceLimit) {
   920         /* Write the language code following LF only if LF is not the last character. */
   921         if (args->converter->fromUnicodeStatus == LF) {
   922             targetByteUnit = ATR<<8;
   923             targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
   924             args->converter->fromUnicodeStatus = 0x0000;
   925             /* now append ATR and language code */
   926             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
   927             if (U_FAILURE(*err)) {
   928                 break;
   929             }
   930         }
   932         sourceChar = *source++;
   933         tempContextFromUnicode = converterData->contextCharFromUnicode;
   935         targetByteUnit = missingCharMarker;
   937         /*check if input is in ASCII and C0 control codes range*/
   938         if (sourceChar <= ASCII_END) {
   939             args->converter->fromUnicodeStatus = sourceChar;
   940             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
   941             if (U_FAILURE(*err)) {
   942                 break;
   943             }
   944             continue;
   945         }
   946         switch (sourceChar) {
   947         case ZWNJ:
   948             /* contextChar has HALANT */
   949             if (converterData->contextCharFromUnicode) {
   950                 converterData->contextCharFromUnicode = 0x00;
   951                 targetByteUnit = ISCII_HALANT;
   952             } else {
   953                 /* consume ZWNJ and continue */
   954                 converterData->contextCharFromUnicode = 0x00;
   955                 continue;
   956             }
   957             break;
   958         case ZWJ:
   959             /* contextChar has HALANT */
   960             if (converterData->contextCharFromUnicode) {
   961                 targetByteUnit = ISCII_NUKTA;
   962             } else {
   963                 targetByteUnit =ISCII_INV;
   964             }
   965             converterData->contextCharFromUnicode = 0x00;
   966             break;
   967         default:
   968             /* is the sourceChar in the INDIC_RANGE? */
   969             if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
   970                 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
   971                  * does not include these codepoints in all Northern scrips we need to
   972                  * filter them out
   973                  */
   974                 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
   975                     /* find out to which block the souceChar belongs*/
   976                     range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
   977                     newDelta =(uint16_t)(range*DELTA);
   979                     /* Now are we in the same block as the previous? */
   980                     if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
   981                         converterData->currentDeltaFromUnicode = newDelta;
   982                         converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
   983                         deltaChanged =TRUE;
   984                         converterData->isFirstBuffer=FALSE;
   985                     }
   987                     if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { 
   988                         if (sourceChar == PNJ_TIPPI) {
   989                             /* Make sure Tippi is converterd to Bindi. */
   990                             sourceChar = PNJ_BINDI;
   991                         } else if (sourceChar == PNJ_ADHAK) {
   992                             /* This is for consonant cluster handling. */
   993                             converterData->contextCharFromUnicode = PNJ_ADHAK;
   994                         }
   996                     }
   997                     /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
   998                     /* now subtract the new delta from sourceChar*/
   999                     sourceChar -= converterData->currentDeltaFromUnicode;
  1002                 /* get the target byte unit */
  1003                 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
  1005                 /* is the code point valid in current script? */
  1006                 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
  1007                     /* Vocallic RR is assigned in ISCII Telugu and Unicode */
  1008                     if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
  1009                         targetByteUnit=missingCharMarker;
  1013                 if (deltaChanged) {
  1014                     /* we are in a script block which is different than
  1015                      * previous sourceChar's script block write ATR and language codes
  1016                      */
  1017                     uint32_t temp=0;
  1018                     temp =(uint16_t)(ATR<<8);
  1019                     temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
  1020                     /* reset */
  1021                     deltaChanged=FALSE;
  1022                     /* now append ATR and language code */
  1023                     WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
  1024                     if (U_FAILURE(*err)) {
  1025                         break;
  1029                 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
  1030                     continue;
  1033             /* reset context char */
  1034             converterData->contextCharFromUnicode = 0x00;
  1035             break;
  1037         if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
  1038             /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
  1039             /* reset context char */
  1040             converterData->contextCharFromUnicode = 0x0000;
  1041             targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
  1042             /* write targetByteUnit to target */
  1043             WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
  1044             if (U_FAILURE(*err)) {
  1045                 break;
  1047         } else if (targetByteUnit != missingCharMarker) {
  1048             if (targetByteUnit==ISCII_HALANT) {
  1049                 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
  1051             /* write targetByteUnit to target*/
  1052             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
  1053             if (U_FAILURE(*err)) {
  1054                 break;
  1056         } else {
  1057             /* oops.. the code point is unassigned */
  1058             /*check if the char is a First surrogate*/
  1059             if (U16_IS_SURROGATE(sourceChar)) {
  1060                 if (U16_IS_SURROGATE_LEAD(sourceChar)) {
  1061 getTrail:
  1062                     /*look ahead to find the trail surrogate*/
  1063                     if (source < sourceLimit) {
  1064                         /* test the following code unit */
  1065                         UChar trail= (*source);
  1066                         if (U16_IS_TRAIL(trail)) {
  1067                             source++;
  1068                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
  1069                             *err =U_INVALID_CHAR_FOUND;
  1070                             /* convert this surrogate code point */
  1071                             /* exit this condition tree */
  1072                         } else {
  1073                             /* this is an unmatched lead code unit (1st surrogate) */
  1074                             /* callback(illegal) */
  1075                             *err=U_ILLEGAL_CHAR_FOUND;
  1077                     } else {
  1078                         /* no more input */
  1079                         *err = U_ZERO_ERROR;
  1081                 } else {
  1082                     /* this is an unmatched trail code unit (2nd surrogate) */
  1083                     /* callback(illegal) */
  1084                     *err=U_ILLEGAL_CHAR_FOUND;
  1086             } else {
  1087                 /* callback(unassigned) for a BMP code point */
  1088                 *err = U_INVALID_CHAR_FOUND;
  1091             args->converter->fromUChar32=sourceChar;
  1092             break;
  1094     }/* end while(mySourceIndex<mySourceLength) */
  1096     /*save the state and return */
  1097     args->source = source;
  1098     args->target = (char*)target;
  1101 static const uint16_t lookupTable[][2]={
  1102     { ZERO,       ZERO     },     /*DEFALT*/
  1103     { ZERO,       ZERO     },     /*ROMAN*/
  1104     { DEVANAGARI, DEV_MASK },
  1105     { BENGALI,    BNG_MASK },
  1106     { TAMIL,      TML_MASK },
  1107     { TELUGU,     KND_MASK },
  1108     { BENGALI,    BNG_MASK },
  1109     { ORIYA,      ORI_MASK },
  1110     { KANNADA,    KND_MASK },
  1111     { MALAYALAM,  MLM_MASK },
  1112     { GUJARATI,   GJR_MASK },
  1113     { GURMUKHI,   PNJ_MASK }
  1114 };
  1116 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
  1117     /* add offset to current Indic Block */                                              \
  1118     if(targetUniChar>ASCII_END &&                                                        \
  1119            targetUniChar != ZWJ &&                                                       \
  1120            targetUniChar != ZWNJ &&                                                      \
  1121            targetUniChar != DANDA &&                                                     \
  1122            targetUniChar != DOUBLE_DANDA){                                               \
  1124            targetUniChar+=(uint16_t)(delta);                                             \
  1125     }                                                                                    \
  1126     /* now write the targetUniChar */                                                    \
  1127     if(target<args->targetLimit){                                                        \
  1128         *(target)++ = (UChar)targetUniChar;                                              \
  1129         if(offsets){                                                                     \
  1130             *(offsets)++ = (int32_t)(offset);                                            \
  1131         }                                                                                \
  1132     }else{                                                                               \
  1133         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
  1134             (UChar)targetUniChar;                                                        \
  1135         *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
  1136     }                                                                                    \
  1139 #define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
  1140     targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
  1141     /* is the code point valid in current script? */                                     \
  1142     if(sourceChar> ASCII_END &&                                                          \
  1143             (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
  1144         /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
  1145         if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
  1146                     targetUniChar!=VOCALLIC_RR){                                         \
  1147             targetUniChar=missingCharMarker;                                             \
  1148         }                                                                                \
  1149     }                                                                                    \
  1152 /***********
  1153  *  Rules for ISCII to Unicode converter
  1154  *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
  1155  *  which has both precomposed and decomposed forms characters
  1156  *  pre-context and post-context need to be considered.
  1158  *  Post context
  1159  *  i)  ATR : Attribute code is used to declare the font and script switching.
  1160  *      Currently we only switch scripts and font codes consumed without generating an error
  1161  *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
  1162  *      obsolete characters
  1163  *  Pre context
  1164  *  i)  Halant: if preceeded by a halant then it is a explicit halant
  1165  *  ii) Nukta :
  1166  *       a) if preceeded by a halant then it is a soft halant
  1167  *       b) if preceeded by specific consonants and the ligatures have pre-composed
  1168  *          characters in Unicode then convert to pre-composed characters
  1169  *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
  1171  */
  1173 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
  1174     const char *source = ( char *) args->source;
  1175     UChar *target = args->target;
  1176     const char *sourceLimit = args->sourceLimit;
  1177     const UChar* targetLimit = args->targetLimit;
  1178     uint32_t targetUniChar = 0x0000;
  1179     uint8_t sourceChar = 0x0000;
  1180     UConverterDataISCII* data;
  1181     UChar32* toUnicodeStatus=NULL;
  1182     UChar32 tempTargetUniChar = 0x0000;
  1183     UChar* contextCharToUnicode= NULL;
  1184     UBool found;
  1185     int i; 
  1186     int offset = 0;
  1188     if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
  1189         *err = U_ILLEGAL_ARGUMENT_ERROR;
  1190         return;
  1193     data = (UConverterDataISCII*)(args->converter->extraInfo);
  1194     contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
  1195     toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
  1197     while (U_SUCCESS(*err) && source<sourceLimit) {
  1199         targetUniChar = missingCharMarker;
  1201         if (target < targetLimit) {
  1202             sourceChar = (unsigned char)*(source)++;
  1204             /* look at the post-context preform special processing */
  1205             if (*contextCharToUnicode==ATR) {
  1207                 /* If we have ATR in *contextCharToUnicode then we need to change our
  1208                  * state to the Indic Script specified by sourceChar
  1209                  */
  1211                 /* check if the sourceChar is supported script range*/
  1212                 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
  1213                     data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
  1214                     data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
  1215                 } else if (sourceChar==DEF) {
  1216                     /* switch back to default */
  1217                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
  1218                     data->currentMaskToUnicode = data->defMaskToUnicode;
  1219                 } else {
  1220                     if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
  1221                         /* these are display codes consume and continue */
  1222                     } else {
  1223                         *err =U_ILLEGAL_CHAR_FOUND;
  1224                         /* reset */
  1225                         *contextCharToUnicode=NO_CHAR_MARKER;
  1226                         goto CALLBACK;
  1230                 /* reset */
  1231                 *contextCharToUnicode=NO_CHAR_MARKER;
  1233                 continue;
  1235             } else if (*contextCharToUnicode==EXT) {
  1236                 /* check if sourceChar is in 0xA1-0xEE range */
  1237                 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
  1238                     /* We currently support only Anudatta and Devanagari abbreviation sign */
  1239                     if (sourceChar==0xBF || sourceChar == 0xB8) {
  1240                         targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
  1242                         /* find out if the mapping is valid in this state */
  1243                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1244                             *contextCharToUnicode= NO_CHAR_MARKER;
  1246                             /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1247                             if (data->prevToUnicodeStatus) {
  1248                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1249                                 data->prevToUnicodeStatus = 0x0000;
  1251                             /* write to target */
  1252                             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1254                             continue;
  1257                     /* byte unit is unassigned */
  1258                     targetUniChar = missingCharMarker;
  1259                     *err= U_INVALID_CHAR_FOUND;
  1260                 } else {
  1261                     /* only 0xA1 - 0xEE are legal after EXT char */
  1262                     *contextCharToUnicode= NO_CHAR_MARKER;
  1263                     *err = U_ILLEGAL_CHAR_FOUND;
  1265                 goto CALLBACK;
  1266             } else if (*contextCharToUnicode==ISCII_INV) {
  1267                 if (sourceChar==ISCII_HALANT) {
  1268                     targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
  1269                 } else {
  1270                     targetUniChar = ZWJ;
  1273                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1274                 if (data->prevToUnicodeStatus) {
  1275                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1276                     data->prevToUnicodeStatus = 0x0000;
  1278                 /* write to target */
  1279                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1280                 /* reset */
  1281                 *contextCharToUnicode=NO_CHAR_MARKER;
  1284             /* look at the pre-context and perform special processing */
  1285             switch (sourceChar) {
  1286             case ISCII_INV:
  1287             case EXT: /*falls through*/
  1288             case ATR:
  1289                 *contextCharToUnicode = (UChar)sourceChar;
  1291                 if (*toUnicodeStatus != missingCharMarker) {
  1292                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1293                     if (data->prevToUnicodeStatus) {
  1294                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1295                         data->prevToUnicodeStatus = 0x0000;
  1297                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1298                     *toUnicodeStatus = missingCharMarker;
  1300                 continue;
  1301             case ISCII_DANDA:
  1302                 /* handle double danda*/
  1303                 if (*contextCharToUnicode== ISCII_DANDA) {
  1304                     targetUniChar = DOUBLE_DANDA;
  1305                     /* clear the context */
  1306                     *contextCharToUnicode = NO_CHAR_MARKER;
  1307                     *toUnicodeStatus = missingCharMarker;
  1308                 } else {
  1309                     GET_MAPPING(sourceChar,targetUniChar,data);
  1310                     *contextCharToUnicode = sourceChar;
  1312                 break;
  1313             case ISCII_HALANT:
  1314                 /* handle explicit halant */
  1315                 if (*contextCharToUnicode == ISCII_HALANT) {
  1316                     targetUniChar = ZWNJ;
  1317                     /* clear the context */
  1318                     *contextCharToUnicode = NO_CHAR_MARKER;
  1319                 } else {
  1320                     GET_MAPPING(sourceChar,targetUniChar,data);
  1321                     *contextCharToUnicode = sourceChar;
  1323                 break;
  1324             case 0x0A:
  1325                 /* fall through */
  1326             case 0x0D:
  1327                 data->resetToDefaultToUnicode = TRUE;
  1328                 GET_MAPPING(sourceChar,targetUniChar,data)
  1330                 *contextCharToUnicode = sourceChar;
  1331                 break;
  1333             case ISCII_VOWEL_SIGN_E:
  1334                 i=1;
  1335                 found=FALSE;
  1336                 for (; i<vowelSignESpecialCases[0][0]; i++) {
  1337                     U_ASSERT(i<sizeof(vowelSignESpecialCases)/sizeof(vowelSignESpecialCases[0]));
  1338                     if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
  1339                         targetUniChar=vowelSignESpecialCases[i][1];
  1340                         found=TRUE;
  1341                         break;
  1344                 if (found) {
  1345                     /* find out if the mapping is valid in this state */
  1346                     if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1347                         /*targetUniChar += data->currentDeltaToUnicode ;*/
  1348                         *contextCharToUnicode= NO_CHAR_MARKER;
  1349                         *toUnicodeStatus = missingCharMarker;
  1350                         break;
  1353                 GET_MAPPING(sourceChar,targetUniChar,data);
  1354                 *contextCharToUnicode = sourceChar;
  1355                 break;
  1357             case ISCII_NUKTA:
  1358                 /* handle soft halant */
  1359                 if (*contextCharToUnicode == ISCII_HALANT) {
  1360                     targetUniChar = ZWJ;
  1361                     /* clear the context */
  1362                     *contextCharToUnicode = NO_CHAR_MARKER;
  1363                     break;
  1364                 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
  1365                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1366                     if (data->prevToUnicodeStatus) {
  1367                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1368                         data->prevToUnicodeStatus = 0x0000;
  1370                     /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
  1371                      * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
  1372                      */
  1373                     targetUniChar = PNJ_RRA;
  1374                     WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1375                     if (U_SUCCESS(*err)) {
  1376                         targetUniChar = PNJ_SIGN_VIRAMA;
  1377                         WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1378                         if (U_SUCCESS(*err)) {
  1379                             targetUniChar = PNJ_HA;
  1380                             WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
  1381                         } else {
  1382                             args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
  1384                     } else {
  1385                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
  1386                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
  1388                     *toUnicodeStatus = missingCharMarker;
  1389                     data->contextCharToUnicode = NO_CHAR_MARKER;
  1390                     continue;
  1391                 } else {
  1392                     /* try to handle <CHAR> + ISCII_NUKTA special mappings */
  1393                     i=1;
  1394                     found =FALSE;
  1395                     for (; i<nuktaSpecialCases[0][0]; i++) {
  1396                         if (nuktaSpecialCases[i][0]==(uint8_t)
  1397                                 *contextCharToUnicode) {
  1398                             targetUniChar=nuktaSpecialCases[i][1];
  1399                             found =TRUE;
  1400                             break;
  1403                     if (found) {
  1404                         /* find out if the mapping is valid in this state */
  1405                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
  1406                             /*targetUniChar += data->currentDeltaToUnicode ;*/
  1407                             *contextCharToUnicode= NO_CHAR_MARKER;
  1408                             *toUnicodeStatus = missingCharMarker;
  1409                             if (data->currentDeltaToUnicode == PNJ_DELTA) {
  1410                                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1411                                 if (data->prevToUnicodeStatus) {
  1412                                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1413                                     data->prevToUnicodeStatus = 0x0000;
  1415                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
  1416                                 continue;
  1418                             break;
  1420                         /* else fall through to default */
  1422                     /* else fall through to default */
  1424             default:GET_MAPPING(sourceChar,targetUniChar,data)
  1426                 *contextCharToUnicode = sourceChar;
  1427                 break;
  1430             if (*toUnicodeStatus != missingCharMarker) {
  1431                 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
  1432                 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
  1433                         (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
  1434                     /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
  1435                     offset = (int)(source-args->source - 3);
  1436                     tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
  1437                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
  1438                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
  1439                     data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
  1440                     *toUnicodeStatus = missingCharMarker;
  1441                     continue;
  1442                 } else {
  1443                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
  1444                     if (data->prevToUnicodeStatus) {
  1445                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
  1446                         data->prevToUnicodeStatus = 0x0000;
  1448                     /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 
  1449                      * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
  1450                      */
  1451                     if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
  1452                         targetUniChar = PNJ_TIPPI - PNJ_DELTA;
  1453                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
  1454                     } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
  1455                         /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
  1456                         data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
  1457                     } else {
  1458                         /* write the previously mapped codepoint */
  1459                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1462                 *toUnicodeStatus = missingCharMarker;
  1465             if (targetUniChar != missingCharMarker) {
  1466                 /* now save the targetUniChar for delayed write */
  1467                 *toUnicodeStatus = (UChar) targetUniChar;
  1468                 if (data->resetToDefaultToUnicode==TRUE) {
  1469                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
  1470                     data->currentMaskToUnicode = data->defMaskToUnicode;
  1471                     data->resetToDefaultToUnicode=FALSE;
  1473             } else {
  1475                 /* we reach here only if targetUniChar == missingCharMarker
  1476                  * so assign codes to reason and err
  1477                  */
  1478                 *err = U_INVALID_CHAR_FOUND;
  1479 CALLBACK:
  1480                 args->converter->toUBytes[0] = (uint8_t) sourceChar;
  1481                 args->converter->toULength = 1;
  1482                 break;
  1485         } else {
  1486             *err =U_BUFFER_OVERFLOW_ERROR;
  1487             break;
  1491     if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
  1492         /* end of the input stream */
  1493         UConverter *cnv = args->converter;
  1495         if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
  1496             /* set toUBytes[] */
  1497             cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
  1498             cnv->toULength = 1;
  1500             /* avoid looping on truncated sequences */
  1501             *contextCharToUnicode = NO_CHAR_MARKER;
  1502         } else {
  1503             cnv->toULength = 0;
  1506         if (*toUnicodeStatus != missingCharMarker) {
  1507             /* output a remaining target character */
  1508             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
  1509             *toUnicodeStatus = missingCharMarker;
  1513     args->target = target;
  1514     args->source = source;
  1517 /* structure for SafeClone calculations */
  1518 struct cloneISCIIStruct {
  1519     UConverter cnv;
  1520     UConverterDataISCII mydata;
  1521 };
  1523 static UConverter *
  1524 _ISCII_SafeClone(const UConverter *cnv,
  1525               void *stackBuffer,
  1526               int32_t *pBufferSize,
  1527               UErrorCode *status)
  1529     struct cloneISCIIStruct * localClone;
  1530     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
  1532     if (U_FAILURE(*status)) {
  1533         return 0;
  1536     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
  1537         *pBufferSize = bufferSizeNeeded;
  1538         return 0;
  1541     localClone = (struct cloneISCIIStruct *)stackBuffer;
  1542     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
  1544     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
  1545     localClone->cnv.extraInfo = &localClone->mydata;
  1546     localClone->cnv.isExtraLocal = TRUE;
  1548     return &localClone->cnv;
  1551 static void
  1552 _ISCIIGetUnicodeSet(const UConverter *cnv,
  1553                     const USetAdder *sa,
  1554                     UConverterUnicodeSet which,
  1555                     UErrorCode *pErrorCode)
  1557     int32_t idx, script;
  1558     uint8_t mask;
  1560     /* Since all ISCII versions allow switching to other ISCII
  1561     scripts, we add all roundtrippable characters to this set. */
  1562     sa->addRange(sa->set, 0, ASCII_END);
  1563     for (script = DEVANAGARI; script <= MALAYALAM; script++) {
  1564         mask = (uint8_t)(lookupInitialData[script].maskEnum);
  1565         for (idx = 0; idx < DELTA; idx++) {
  1566             /* added check for TELUGU character */
  1567             if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
  1568                 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
  1572     sa->add(sa->set, DANDA);
  1573     sa->add(sa->set, DOUBLE_DANDA);
  1574     sa->add(sa->set, ZWNJ);
  1575     sa->add(sa->set, ZWJ);
  1578 static const UConverterImpl _ISCIIImpl={
  1580     UCNV_ISCII,
  1582     NULL,
  1583     NULL,
  1585     _ISCIIOpen,
  1586     _ISCIIClose,
  1587     _ISCIIReset,
  1589     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
  1590     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
  1591     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
  1592     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
  1593     NULL,
  1595     NULL,
  1596     _ISCIIgetName,
  1597     NULL,
  1598     _ISCII_SafeClone,
  1599     _ISCIIGetUnicodeSet
  1600 };
  1602 static const UConverterStaticData _ISCIIStaticData={
  1603     sizeof(UConverterStaticData),
  1604         "ISCII",
  1605          0,
  1606          UCNV_IBM,
  1607          UCNV_ISCII,
  1608          1,
  1609          4,
  1610         { 0x1a, 0, 0, 0 },
  1611         0x1,
  1612         FALSE,
  1613         FALSE,
  1614         0x0,
  1615         0x0,
  1616         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
  1618 };
  1620 const UConverterSharedData _ISCIIData={
  1621     sizeof(UConverterSharedData),
  1622         ~((uint32_t) 0),
  1623         NULL,
  1624         NULL,
  1625         &_ISCIIStaticData,
  1626         FALSE,
  1627         &_ISCIIImpl,
  1629 };
  1631 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

mercurial