The Tor Browser: intl/icu/source/common/unames.cpp@fc2d59ddac77

     1 /*

     2 ******************************************************************************

3 *

     4 *   Copyright (C) 1999-2013, International Business Machines

     5 *   Corporation and others.  All Rights Reserved.

6 *

     7 ******************************************************************************

     8 *   file name:  unames.c

     9 *   encoding:   US-ASCII

    10 *   tab size:   8 (not used)

    11 *   indentation:4

    12 *

    13 *   created on: 1999oct04

    14 *   created by: Markus W. Scherer

    15 */

    17 #include "unicode/utypes.h"

    18 #include "unicode/putil.h"

    19 #include "unicode/uchar.h"

    20 #include "unicode/udata.h"

    21 #include "unicode/utf.h"

    22 #include "unicode/utf16.h"

    23 #include "uassert.h"

    24 #include "ustr_imp.h"

    25 #include "umutex.h"

    26 #include "cmemory.h"

    27 #include "cstring.h"

    28 #include "ucln_cmn.h"

    29 #include "udataswp.h"

    30 #include "uprops.h"

    32 U_NAMESPACE_BEGIN

    34 /* prototypes ------------------------------------------------------------- */

    36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

    38 static const char DATA_NAME[] = "unames";

    39 static const char DATA_TYPE[] = "icu";

    41 #define GROUP_SHIFT 5

    42 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)

    43 #define GROUP_MASK (LINES_PER_GROUP-1)

    45 /*

    46  * This struct was replaced by explicitly accessing equivalent

    47  * fields from triples of uint16_t.

    48  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,

    49  * which broke the assumption that sizeof(Group)==6 and that the ++ operator

    50  * would advance by 6 bytes (3 uint16_t).

    51  *

    52  * We can't just change the data structure because it's loaded from a data file,

    53  * and we don't want to make it less compact, so we changed the access code.

    54  *

    55  * For details see ICU tickets 6331 and 6008.

    56 typedef struct {

    57     uint16_t groupMSB,

    58              offsetHigh, offsetLow; / * avoid padding * /

    59 } Group;

    60  */

    61 enum {

    62     GROUP_MSB,

    63     GROUP_OFFSET_HIGH,

    64     GROUP_OFFSET_LOW,

    65     GROUP_LENGTH

    66 };

    68 /*

    69  * Get the 32-bit group offset.

    70  * @param group (const uint16_t *) pointer to a Group triple of uint16_t

    71  * @return group offset (int32_t)

    72  */

    73 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])

    75 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)

    76 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)

    78 typedef struct {

    79     uint32_t start, end;

    80     uint8_t type, variant;

    81     uint16_t size;

    82 } AlgorithmicRange;

    84 typedef struct {

    85     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;

    86 } UCharNames;

    88 /*

    89  * Get the groups table from a UCharNames struct.

    90  * The groups table consists of one uint16_t groupCount followed by

    91  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH

    92  * and the comment for the old struct Group above.

    93  *

    94  * @param names (const UCharNames *) pointer to the UCharNames indexes

    95  * @return (const uint16_t *) pointer to the groups table

    96  */

    97 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)

    99 typedef struct {

   100     const char *otherName;

   101     UChar32 code;

   102 } FindName;

   104 #define DO_FIND_NAME NULL

   106 static UDataMemory *uCharNamesData=NULL;

   107 static UCharNames *uCharNames=NULL;

   108 static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;

   110 /*

   111  * Maximum length of character names (regular & 1.0).

   112  */

   113 static int32_t gMaxNameLength=0;

   115 /*

   116  * Set of chars used in character names (regular & 1.0).

   117  * Chars are platform-dependent (can be EBCDIC).

   118  */

   119 static uint32_t gNameSet[8]={ 0 };

   121 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT

   122 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1

   123 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2

   125 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)

   127 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {

   128     "unassigned",

   129     "uppercase letter",

   130     "lowercase letter",

   131     "titlecase letter",

   132     "modifier letter",

   133     "other letter",

   134     "non spacing mark",

   135     "enclosing mark",

   136     "combining spacing mark",

   137     "decimal digit number",

   138     "letter number",

   139     "other number",

   140     "space separator",

   141     "line separator",

   142     "paragraph separator",

   143     "control",

   144     "format",

   145     "private use area",

   146     "surrogate",

   147     "dash punctuation",

   148     "start punctuation",

   149     "end punctuation",

   150     "connector punctuation",

   151     "other punctuation",

   152     "math symbol",

   153     "currency symbol",

   154     "modifier symbol",

   155     "other symbol",

   156     "initial punctuation",

   157     "final punctuation",

   158     "noncharacter",

   159     "lead surrogate",

   160     "trail surrogate"

   161 };

   163 /* implementation ----------------------------------------------------------- */

   165 static UBool U_CALLCONV unames_cleanup(void)

   166 {

   167     if(uCharNamesData) {

   168         udata_close(uCharNamesData);

   169         uCharNamesData = NULL;

   170     }

   171     if(uCharNames) {

   172         uCharNames = NULL;

   173     }

   174     gCharNamesInitOnce.reset();

   175     gMaxNameLength=0;

   176     return TRUE;

   177 }

   179 static UBool U_CALLCONV

   180 isAcceptable(void * /*context*/,

   181              const char * /*type*/, const char * /*name*/,

   182              const UDataInfo *pInfo) {

   183     return (UBool)(

   184         pInfo->size>=20 &&

   185         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&

   186         pInfo->charsetFamily==U_CHARSET_FAMILY &&

   187         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */

   188         pInfo->dataFormat[1]==0x6e &&

   189         pInfo->dataFormat[2]==0x61 &&

   190         pInfo->dataFormat[3]==0x6d &&

   191         pInfo->formatVersion[0]==1);

   192 }

   194 static void U_CALLCONV

   195 loadCharNames(UErrorCode &status) {

   196     U_ASSERT(uCharNamesData == NULL);

   197     U_ASSERT(uCharNames == NULL);

   199     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);

   200     if(U_FAILURE(status)) {

   201         uCharNamesData = NULL;

   202     } else {

   203         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);

   204     }

   205     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);

   206 }

   209 static UBool

   210 isDataLoaded(UErrorCode *pErrorCode) {

   211     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);

   212     return U_SUCCESS(*pErrorCode);

   213 }

   215 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \

   216     if((bufferLength)>0) { \

   217         *(buffer)++=c; \

   218         --(bufferLength); \

   219     } \

   220     ++(bufferPos); \

   221 }

   223 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT

   225 /*

   226  * Important: expandName() and compareName() are almost the same -

   227  * apply fixes to both.

   228  *

   229  * UnicodeData.txt uses ';' as a field separator, so no

   230  * field can contain ';' as part of its contents.

   231  * In unames.dat, it is marked as token[';']==-1 only if the

   232  * semicolon is used in the data file - which is iff we

   233  * have Unicode 1.0 names or ISO comments or aliases.

   234  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases

   235  * although we know that it will never be part of a name.

   236  */

   237 static uint16_t

   238 expandName(UCharNames *names,

   239            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,

   240            char *buffer, uint16_t bufferLength) {

   241     uint16_t *tokens=(uint16_t *)names+8;

   242     uint16_t token, tokenCount=*tokens++, bufferPos=0;

   243     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;

   244     uint8_t c;

   246     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

   247         /*

   248          * skip the modern name if it is not requested _and_

   249          * if the semicolon byte value is a character, not a token number

   250          */

   251         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {

   252             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;

   253             do {

   254                 while(nameLength>0) {

   255                     --nameLength;

   256                     if(*name++==';') {

   257                         break;

   258                     }

   259                 }

   260             } while(--fieldIndex>0);

   261         } else {

   262             /*

   263              * the semicolon byte value is a token number, therefore

   264              * only modern names are stored in unames.dat and there is no

   265              * such requested alternate name here

   266              */

   267             nameLength=0;

   268         }

   269     }

   271     /* write each letter directly, and write a token word per token */

   272     while(nameLength>0) {

   273         --nameLength;

   274         c=*name++;

   276         if(c>=tokenCount) {

   277             if(c!=';') {

   278                 /* implicit letter */

   279                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   280             } else {

   281                 /* finished */

   282                 break;

   283             }

   284         } else {

   285             token=tokens[c];

   286             if(token==(uint16_t)(-2)) {

   287                 /* this is a lead byte for a double-byte token */

   288                 token=tokens[c<<8|*name++];

   289                 --nameLength;

   290             }

   291             if(token==(uint16_t)(-1)) {

   292                 if(c!=';') {

   293                     /* explicit letter */

   294                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   295                 } else {

   296                     /* stop, but skip the semicolon if we are seeking

   297                        extended names and there was no 2.0 name but there

   298                        is a 1.0 name. */

   299                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {

   300                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {

   301                             continue;

   302                         }

   303                     }

   304                     /* finished */

   305                     break;

   306                 }

   307             } else {

   308                 /* write token word */

   309                 uint8_t *tokenString=tokenStrings+token;

   310                 while((c=*tokenString++)!=0) {

   311                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   312                 }

   313             }

   314         }

   315     }

   317     /* zero-terminate */

   318     if(bufferLength>0) {

   319         *buffer=0;

   320     }

   322     return bufferPos;

   323 }

   325 /*

   326  * compareName() is almost the same as expandName() except that it compares

   327  * the currently expanded name to an input name.

   328  * It returns the match/no match result as soon as possible.

   329  */

   330 static UBool

   331 compareName(UCharNames *names,

   332             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,

   333             const char *otherName) {

   334     uint16_t *tokens=(uint16_t *)names+8;

   335     uint16_t token, tokenCount=*tokens++;

   336     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;

   337     uint8_t c;

   338     const char *origOtherName = otherName;

   340     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

   341         /*

   342          * skip the modern name if it is not requested _and_

   343          * if the semicolon byte value is a character, not a token number

   344          */

   345         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {

   346             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;

   347             do {

   348                 while(nameLength>0) {

   349                     --nameLength;

   350                     if(*name++==';') {

   351                         break;

   352                     }

   353                 }

   354             } while(--fieldIndex>0);

   355         } else {

   356             /*

   357              * the semicolon byte value is a token number, therefore

   358              * only modern names are stored in unames.dat and there is no

   359              * such requested alternate name here

   360              */

   361             nameLength=0;

   362         }

   363     }

   365     /* compare each letter directly, and compare a token word per token */

   366     while(nameLength>0) {

   367         --nameLength;

   368         c=*name++;

   370         if(c>=tokenCount) {

   371             if(c!=';') {

   372                 /* implicit letter */

   373                 if((char)c!=*otherName++) {

   374                     return FALSE;

   375                 }

   376             } else {

   377                 /* finished */

   378                 break;

   379             }

   380         } else {

   381             token=tokens[c];

   382             if(token==(uint16_t)(-2)) {

   383                 /* this is a lead byte for a double-byte token */

   384                 token=tokens[c<<8|*name++];

   385                 --nameLength;

   386             }

   387             if(token==(uint16_t)(-1)) {

   388                 if(c!=';') {

   389                     /* explicit letter */

   390                     if((char)c!=*otherName++) {

   391                         return FALSE;

   392                     }

   393                 } else {

   394                     /* stop, but skip the semicolon if we are seeking

   395                        extended names and there was no 2.0 name but there

   396                        is a 1.0 name. */

   397                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {

   398                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {

   399                             continue;

   400                         }

   401                     }

   402                     /* finished */

   403                     break;

   404                 }

   405             } else {

   406                 /* write token word */

   407                 uint8_t *tokenString=tokenStrings+token;

   408                 while((c=*tokenString++)!=0) {

   409                     if((char)c!=*otherName++) {

   410                         return FALSE;

   411                     }

   412                 }

   413             }

   414         }

   415     }

   417     /* complete match? */

   418     return (UBool)(*otherName==0);

   419 }

   421 static uint8_t getCharCat(UChar32 cp) {

   422     uint8_t cat;

   424     if (U_IS_UNICODE_NONCHAR(cp)) {

   425         return U_NONCHARACTER_CODE_POINT;

   426     }

   428     if ((cat = u_charType(cp)) == U_SURROGATE) {

   429         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;

   430     }

   432     return cat;

   433 }

   435 static const char *getCharCatName(UChar32 cp) {

   436     uint8_t cat = getCharCat(cp);

   438     /* Return unknown if the table of names above is not up to

   439        date. */

   441     if (cat >= LENGTHOF(charCatNames)) {

   442         return "unknown";

   443     } else {

   444         return charCatNames[cat];

   445     }

   446 }

   448 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {

   449     const char *catname = getCharCatName(code);

   450     uint16_t length = 0;

   452     UChar32 cp;

   453     int ndigits, i;

   455     WRITE_CHAR(buffer, bufferLength, length, '<');

   456     while (catname[length - 1]) {

   457         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);

   458     }

   459     WRITE_CHAR(buffer, bufferLength, length, '-');

   460     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)

   461         ;

   462     if (ndigits < 4)

   463         ndigits = 4;

   464     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {

   465         uint8_t v = (uint8_t)(cp & 0xf);

   466         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);

   467     }

   468     buffer += ndigits;

   469     length += ndigits;

   470     WRITE_CHAR(buffer, bufferLength, length, '>');

   472     return length;

   473 }

   475 /*

   476  * getGroup() does a binary search for the group that contains the

   477  * Unicode code point "code".

   478  * The return value is always a valid Group* that may contain "code"

   479  * or else is the highest group before "code".

   480  * If the lowest group is after "code", then that one is returned.

   481  */

   482 static const uint16_t *

   483 getGroup(UCharNames *names, uint32_t code) {

   484     const uint16_t *groups=GET_GROUPS(names);

   485     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),

   486              start=0,

   487              limit=*groups++,

   488              number;

   490     /* binary search for the group of names that contains the one for code */

   491     while(start<limit-1) {

   492         number=(uint16_t)((start+limit)/2);

   493         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {

   494             limit=number;

   495         } else {

   496             start=number;

   497         }

   498     }

   500     /* return this regardless of whether it is an exact match */

   501     return groups+start*GROUP_LENGTH;

   502 }

   504 /*

   505  * expandGroupLengths() reads a block of compressed lengths of 32 strings and

   506  * expands them into offsets and lengths for each string.

   507  * Lengths are stored with a variable-width encoding in consecutive nibbles:

   508  * If a nibble<0xc, then it is the length itself (0=empty string).

   509  * If a nibble>=0xc, then it forms a length value with the following nibble.

   510  * Calculation see below.

   511  * The offsets and lengths arrays must be at least 33 (one more) long because

   512  * there is no check here at the end if the last nibble is still used.

   513  */

   514 static const uint8_t *

   515 expandGroupLengths(const uint8_t *s,

   516                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {

   517     /* read the lengths of the 32 strings in this group and get each string's offset */

   518     uint16_t i=0, offset=0, length=0;

   519     uint8_t lengthByte;

   521     /* all 32 lengths must be read to get the offset of the first group string */

   522     while(i<LINES_PER_GROUP) {

   523         lengthByte=*s++;

   525         /* read even nibble - MSBs of lengthByte */

   526         if(length>=12) {

   527             /* double-nibble length spread across two bytes */

   528             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);

   529             lengthByte&=0xf;

   530         } else if((lengthByte /* &0xf0 */)>=0xc0) {

   531             /* double-nibble length spread across this one byte */

   532             length=(uint16_t)((lengthByte&0x3f)+12);

   533         } else {

   534             /* single-nibble length in MSBs */

   535             length=(uint16_t)(lengthByte>>4);

   536             lengthByte&=0xf;

   537         }

   539         *offsets++=offset;

   540         *lengths++=length;

   542         offset+=length;

   543         ++i;

   545         /* read odd nibble - LSBs of lengthByte */

   546         if((lengthByte&0xf0)==0) {

   547             /* this nibble was not consumed for a double-nibble length above */

   548             length=lengthByte;

   549             if(length<12) {

   550                 /* single-nibble length in LSBs */

   551                 *offsets++=offset;

   552                 *lengths++=length;

   554                 offset+=length;

   555                 ++i;

   556             }

   557         } else {

   558             length=0;   /* prevent double-nibble detection in the next iteration */

   559         }

   560     }

   562     /* now, s is at the first group string */

   563     return s;

   564 }

   566 static uint16_t

   567 expandGroupName(UCharNames *names, const uint16_t *group,

   568                 uint16_t lineNumber, UCharNameChoice nameChoice,

   569                 char *buffer, uint16_t bufferLength) {

   570     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

   571     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);

   572     s=expandGroupLengths(s, offsets, lengths);

   573     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,

   574                       buffer, bufferLength);

   575 }

   577 static uint16_t

   578 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,

   579         char *buffer, uint16_t bufferLength) {

   580     const uint16_t *group=getGroup(names, code);

   581     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {

   582         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,

   583                                buffer, bufferLength);

   584     } else {

   585         /* group not found */

   586         /* zero-terminate */

   587         if(bufferLength>0) {

   588             *buffer=0;

   589         }

   590         return 0;

   591     }

   592 }

   594 /*

   595  * enumGroupNames() enumerates all the names in a 32-group

   596  * and either calls the enumerator function or finds a given input name.

   597  */

   598 static UBool

   599 enumGroupNames(UCharNames *names, const uint16_t *group,

   600                UChar32 start, UChar32 end,

   601                UEnumCharNamesFn *fn, void *context,

   602                UCharNameChoice nameChoice) {

   603     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

   604     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);

   606     s=expandGroupLengths(s, offsets, lengths);

   607     if(fn!=DO_FIND_NAME) {

   608         char buffer[200];

   609         uint16_t length;

   611         while(start<=end) {

   612             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));

   613             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {

   614                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;

   615             }

   616             /* here, we assume that the buffer is large enough */

   617             if(length>0) {

   618                 if(!fn(context, start, nameChoice, buffer, length)) {

   619                     return FALSE;

   620                 }

   621             }

   622             ++start;

   623         }

   624     } else {

   625         const char *otherName=((FindName *)context)->otherName;

   626         while(start<=end) {

   627             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {

   628                 ((FindName *)context)->code=start;

   629                 return FALSE;

   630             }

   631             ++start;

   632         }

   633     }

   634     return TRUE;

   635 }

   637 /*

   638  * enumExtNames enumerate extended names.

   639  * It only needs to do it if it is called with a real function and not

   640  * with the dummy DO_FIND_NAME, because u_charFromName() does a check

   641  * for extended names by itself.

   642  */

   643 static UBool

   644 enumExtNames(UChar32 start, UChar32 end,

   645              UEnumCharNamesFn *fn, void *context)

   646 {

   647     if(fn!=DO_FIND_NAME) {

   648         char buffer[200];

   649         uint16_t length;

   651         while(start<=end) {

   652             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;

   653             /* here, we assume that the buffer is large enough */

   654             if(length>0) {

   655                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {

   656                     return FALSE;

   657                 }

   658             }

   659             ++start;

   660         }

   661     }

   663     return TRUE;

   664 }

   666 static UBool

   667 enumNames(UCharNames *names,

   668           UChar32 start, UChar32 limit,

   669           UEnumCharNamesFn *fn, void *context,

   670           UCharNameChoice nameChoice) {

   671     uint16_t startGroupMSB, endGroupMSB, groupCount;

   672     const uint16_t *group, *groupLimit;

   674     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);

   675     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);

   677     /* find the group that contains start, or the highest before it */

   678     group=getGroup(names, start);

   680     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {

   681         /* enumerate synthetic names between start and the group start */

   682         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);

   683         if(extLimit>limit) {

   684             extLimit=limit;

   685         }

   686         if(!enumExtNames(start, extLimit-1, fn, context)) {

   687             return FALSE;

   688         }

   689         start=extLimit;

   690     }

   692     if(startGroupMSB==endGroupMSB) {

   693         if(startGroupMSB==group[GROUP_MSB]) {

   694             /* if start and limit-1 are in the same group, then enumerate only in that one */

   695             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);

   696         }

   697     } else {

   698         const uint16_t *groups=GET_GROUPS(names);

   699         groupCount=*groups++;

   700         groupLimit=groups+groupCount*GROUP_LENGTH;

   702         if(startGroupMSB==group[GROUP_MSB]) {

   703             /* enumerate characters in the partial start group */

   704             if((start&GROUP_MASK)!=0) {

   705                 if(!enumGroupNames(names, group,

   706                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,

   707                                    fn, context, nameChoice)) {

   708                     return FALSE;

   709                 }

   710                 group=NEXT_GROUP(group); /* continue with the next group */

   711             }

   712         } else if(startGroupMSB>group[GROUP_MSB]) {

   713             /* make sure that we start enumerating with the first group after start */

   714             const uint16_t *nextGroup=NEXT_GROUP(group);

   715             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {

   716                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;

   717                 if (end > limit) {

   718                     end = limit;

   719                 }

   720                 if (!enumExtNames(start, end - 1, fn, context)) {

   721                     return FALSE;

   722                 }

   723             }

   724             group=nextGroup;

   725         }

   727         /* enumerate entire groups between the start- and end-groups */

   728         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {

   729             const uint16_t *nextGroup;

   730             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;

   731             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {

   732                 return FALSE;

   733             }

   734             nextGroup=NEXT_GROUP(group);

   735             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {

   736                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;

   737                 if (end > limit) {

   738                     end = limit;

   739                 }

   740                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {

   741                     return FALSE;

   742                 }

   743             }

   744             group=nextGroup;

   745         }

   747         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */

   748         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {

   749             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);

   750         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {

   751             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;

   752             if (next > start) {

   753                 start = next;

   754             }

   755         } else {

   756             return TRUE;

   757         }

   758     }

   760     /* we have not found a group, which means everything is made of

   761        extended names. */

   762     if (nameChoice == U_EXTENDED_CHAR_NAME) {

   763         if (limit > UCHAR_MAX_VALUE + 1) {

   764             limit = UCHAR_MAX_VALUE + 1;

   765         }

   766         return enumExtNames(start, limit - 1, fn, context);

   767     }

   769     return TRUE;

   770 }

   772 static uint16_t

   773 writeFactorSuffix(const uint16_t *factors, uint16_t count,

   774                   const char *s, /* suffix elements */

   775                   uint32_t code,

   776                   uint16_t indexes[8], /* output fields from here */

   777                   const char *elementBases[8], const char *elements[8],

   778                   char *buffer, uint16_t bufferLength) {

   779     uint16_t i, factor, bufferPos=0;

   780     char c;

   782     /* write elements according to the factors */

   784     /*

   785      * the factorized elements are determined by modulo arithmetic

   786      * with the factors of this algorithm

   787      *

   788      * note that for fewer operations, count is decremented here

   789      */

   790     --count;

   791     for(i=count; i>0; --i) {

   792         factor=factors[i];

   793         indexes[i]=(uint16_t)(code%factor);

   794         code/=factor;

   795     }

   796     /*

   797      * we don't need to calculate the last modulus because start<=code<=end

   798      * guarantees here that code<=factors[0]

   799      */

   800     indexes[0]=(uint16_t)code;

   802     /* write each element */

   803     for(;;) {

   804         if(elementBases!=NULL) {

   805             *elementBases++=s;

   806         }

   808         /* skip indexes[i] strings */

   809         factor=indexes[i];

   810         while(factor>0) {

   811             while(*s++!=0) {}

   812             --factor;

   813         }

   814         if(elements!=NULL) {

   815             *elements++=s;

   816         }

   818         /* write element */

   819         while((c=*s++)!=0) {

   820             WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   821         }

   823         /* we do not need to perform the rest of this loop for i==count - break here */

   824         if(i>=count) {

   825             break;

   826         }

   828         /* skip the rest of the strings for this factors[i] */

   829         factor=(uint16_t)(factors[i]-indexes[i]-1);

   830         while(factor>0) {

   831             while(*s++!=0) {}

   832             --factor;

   833         }

   835         ++i;

   836     }

   838     /* zero-terminate */

   839     if(bufferLength>0) {

   840         *buffer=0;

   841     }

   843     return bufferPos;

   844 }

   846 /*

   847  * Important:

   848  * Parts of findAlgName() are almost the same as some of getAlgName().

   849  * Fixes must be applied to both.

   850  */

   851 static uint16_t

   852 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,

   853         char *buffer, uint16_t bufferLength) {

   854     uint16_t bufferPos=0;

   856     /* Only the normative character name can be algorithmic. */

   857     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

   858         /* zero-terminate */

   859         if(bufferLength>0) {

   860             *buffer=0;

   861         }

   862         return 0;

   863     }

   865     switch(range->type) {

   866     case 0: {

   867         /* name = prefix hex-digits */

   868         const char *s=(const char *)(range+1);

   869         char c;

   871         uint16_t i, count;

   873         /* copy prefix */

   874         while((c=*s++)!=0) {

   875             WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   876         }

   878         /* write hexadecimal code point value */

   879         count=range->variant;

   881         /* zero-terminate */

   882         if(count<bufferLength) {

   883             buffer[count]=0;

   884         }

   886         for(i=count; i>0;) {

   887             if(--i<bufferLength) {

   888                 c=(char)(code&0xf);

   889                 if(c<10) {

   890                     c+='0';

   891                 } else {

   892                     c+='A'-10;

   893                 }

   894                 buffer[i]=c;

   895             }

   896             code>>=4;

   897         }

   899         bufferPos+=count;

   900         break;

   901     }

   902     case 1: {

   903         /* name = prefix factorized-elements */

   904         uint16_t indexes[8];

   905         const uint16_t *factors=(const uint16_t *)(range+1);

   906         uint16_t count=range->variant;

   907         const char *s=(const char *)(factors+count);

   908         char c;

   910         /* copy prefix */

   911         while((c=*s++)!=0) {

   912             WRITE_CHAR(buffer, bufferLength, bufferPos, c);

   913         }

   915         bufferPos+=writeFactorSuffix(factors, count,

   916                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);

   917         break;

   918     }

   919     default:

   920         /* undefined type */

   921         /* zero-terminate */

   922         if(bufferLength>0) {

   923             *buffer=0;

   924         }

   925         break;

   926     }

   928     return bufferPos;

   929 }

   931 /*

   932  * Important: enumAlgNames() and findAlgName() are almost the same.

   933  * Any fix must be applied to both.

   934  */

   935 static UBool

   936 enumAlgNames(AlgorithmicRange *range,

   937              UChar32 start, UChar32 limit,

   938              UEnumCharNamesFn *fn, void *context,

   939              UCharNameChoice nameChoice) {

   940     char buffer[200];

   941     uint16_t length;

   943     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

   944         return TRUE;

   945     }

   947     switch(range->type) {

   948     case 0: {

   949         char *s, *end;

   950         char c;

   952         /* get the full name of the start character */

   953         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));

   954         if(length<=0) {

   955             return TRUE;

   956         }

   958         /* call the enumerator function with this first character */

   959         if(!fn(context, start, nameChoice, buffer, length)) {

   960             return FALSE;

   961         }

   963         /* go to the end of the name; all these names have the same length */

   964         end=buffer;

   965         while(*end!=0) {

   966             ++end;

   967         }

   969         /* enumerate the rest of the names */

   970         while(++start<limit) {

   971             /* increment the hexadecimal number on a character-basis */

   972             s=end;

   973             for (;;) {

   974                 c=*--s;

   975                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {

   976                     *s=(char)(c+1);

   977                     break;

   978                 } else if(c=='9') {

   979                     *s='A';

   980                     break;

   981                 } else if(c=='F') {

   982                     *s='0';

   983                 }

   984             }

   986             if(!fn(context, start, nameChoice, buffer, length)) {

   987                 return FALSE;

   988             }

   989         }

   990         break;

   991     }

   992     case 1: {

   993         uint16_t indexes[8];

   994         const char *elementBases[8], *elements[8];

   995         const uint16_t *factors=(const uint16_t *)(range+1);

   996         uint16_t count=range->variant;

   997         const char *s=(const char *)(factors+count);

   998         char *suffix, *t;

   999         uint16_t prefixLength, i, idx;

  1001         char c;

  1003         /* name = prefix factorized-elements */

  1005         /* copy prefix */

  1006         suffix=buffer;

  1007         prefixLength=0;

  1008         while((c=*s++)!=0) {

  1009             *suffix++=c;

  1010             ++prefixLength;

  1011         }

  1013         /* append the suffix of the start character */

  1014         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,

  1015                                               s, (uint32_t)start-range->start,

  1016                                               indexes, elementBases, elements,

  1017                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));

  1019         /* call the enumerator function with this first character */

  1020         if(!fn(context, start, nameChoice, buffer, length)) {

  1021             return FALSE;

  1022         }

  1024         /* enumerate the rest of the names */

  1025         while(++start<limit) {

  1026             /* increment the indexes in lexical order bound by the factors */

  1027             i=count;

  1028             for (;;) {

  1029                 idx=(uint16_t)(indexes[--i]+1);

  1030                 if(idx<factors[i]) {

  1031                     /* skip one index and its element string */

  1032                     indexes[i]=idx;

  1033                     s=elements[i];

  1034                     while(*s++!=0) {

  1035                     }

  1036                     elements[i]=s;

  1037                     break;

  1038                 } else {

  1039                     /* reset this index to 0 and its element string to the first one */

  1040                     indexes[i]=0;

  1041                     elements[i]=elementBases[i];

  1042                 }

  1043             }

  1045             /* to make matters a little easier, just append all elements to the suffix */

  1046             t=suffix;

  1047             length=prefixLength;

  1048             for(i=0; i<count; ++i) {

  1049                 s=elements[i];

  1050                 while((c=*s++)!=0) {

  1051                     *t++=c;

  1052                     ++length;

  1053                 }

  1054             }

  1055             /* zero-terminate */

  1056             *t=0;

  1058             if(!fn(context, start, nameChoice, buffer, length)) {

  1059                 return FALSE;

  1060             }

  1061         }

  1062         break;

  1063     }

  1064     default:

  1065         /* undefined type */

  1066         break;

  1067     }

  1069     return TRUE;

  1070 }

  1072 /*

  1073  * findAlgName() is almost the same as enumAlgNames() except that it

  1074  * returns the code point for a name if it fits into the range.

  1075  * It returns 0xffff otherwise.

  1076  */

  1077 static UChar32

  1078 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {

  1079     UChar32 code;

  1081     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {

  1082         return 0xffff;

  1083     }

  1085     switch(range->type) {

  1086     case 0: {

  1087         /* name = prefix hex-digits */

  1088         const char *s=(const char *)(range+1);

  1089         char c;

  1091         uint16_t i, count;

  1093         /* compare prefix */

  1094         while((c=*s++)!=0) {

  1095             if((char)c!=*otherName++) {

  1096                 return 0xffff;

  1097             }

  1098         }

  1100         /* read hexadecimal code point value */

  1101         count=range->variant;

  1102         code=0;

  1103         for(i=0; i<count; ++i) {

  1104             c=*otherName++;

  1105             if('0'<=c && c<='9') {

  1106                 code=(code<<4)|(c-'0');

  1107             } else if('A'<=c && c<='F') {

  1108                 code=(code<<4)|(c-'A'+10);

  1109             } else {

  1110                 return 0xffff;

  1111             }

  1112         }

  1114         /* does it fit into the range? */

  1115         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {

  1116             return code;

  1117         }

  1118         break;

  1119     }

  1120     case 1: {

  1121         char buffer[64];

  1122         uint16_t indexes[8];

  1123         const char *elementBases[8], *elements[8];

  1124         const uint16_t *factors=(const uint16_t *)(range+1);

  1125         uint16_t count=range->variant;

  1126         const char *s=(const char *)(factors+count), *t;

  1127         UChar32 start, limit;

  1128         uint16_t i, idx;

  1130         char c;

  1132         /* name = prefix factorized-elements */

  1134         /* compare prefix */

  1135         while((c=*s++)!=0) {

  1136             if((char)c!=*otherName++) {

  1137                 return 0xffff;

  1138             }

  1139         }

  1141         start=(UChar32)range->start;

  1142         limit=(UChar32)(range->end+1);

  1144         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */

  1145         writeFactorSuffix(factors, count, s, 0,

  1146                           indexes, elementBases, elements, buffer, sizeof(buffer));

  1148         /* compare the first suffix */

  1149         if(0==uprv_strcmp(otherName, buffer)) {

  1150             return start;

  1151         }

  1153         /* enumerate and compare the rest of the suffixes */

  1154         while(++start<limit) {

  1155             /* increment the indexes in lexical order bound by the factors */

  1156             i=count;

  1157             for (;;) {

  1158                 idx=(uint16_t)(indexes[--i]+1);

  1159                 if(idx<factors[i]) {

  1160                     /* skip one index and its element string */

  1161                     indexes[i]=idx;

  1162                     s=elements[i];

  1163                     while(*s++!=0) {}

  1164                     elements[i]=s;

  1165                     break;

  1166                 } else {

  1167                     /* reset this index to 0 and its element string to the first one */

  1168                     indexes[i]=0;

  1169                     elements[i]=elementBases[i];

  1170                 }

  1171             }

  1173             /* to make matters a little easier, just compare all elements of the suffix */

  1174             t=otherName;

  1175             for(i=0; i<count; ++i) {

  1176                 s=elements[i];

  1177                 while((c=*s++)!=0) {

  1178                     if(c!=*t++) {

  1179                         s=""; /* does not match */

  1180                         i=99;

  1181                     }

  1182                 }

  1183             }

  1184             if(i<99 && *t==0) {

  1185                 return start;

  1186             }

  1187         }

  1188         break;

  1189     }

  1190     default:

  1191         /* undefined type */

  1192         break;

  1193     }

  1195     return 0xffff;

  1196 }

  1198 /* sets of name characters, maximum name lengths ---------------------------- */

  1200 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))

  1201 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)

  1203 static int32_t

  1204 calcStringSetLength(uint32_t set[8], const char *s) {

  1205     int32_t length=0;

  1206     char c;

  1208     while((c=*s++)!=0) {

  1209         SET_ADD(set, c);

  1210         ++length;

  1211     }

  1212     return length;

  1213 }

  1215 static int32_t

  1216 calcAlgNameSetsLengths(int32_t maxNameLength) {

  1217     AlgorithmicRange *range;

  1218     uint32_t *p;

  1219     uint32_t rangeCount;

  1220     int32_t length;

  1222     /* enumerate algorithmic ranges */

  1223     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);

  1224     rangeCount=*p;

  1225     range=(AlgorithmicRange *)(p+1);

  1226     while(rangeCount>0) {

  1227         switch(range->type) {

  1228         case 0:

  1229             /* name = prefix + (range->variant times) hex-digits */

  1230             /* prefix */

  1231             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;

  1232             if(length>maxNameLength) {

  1233                 maxNameLength=length;

  1234             }

  1235             break;

  1236         case 1: {

  1237             /* name = prefix factorized-elements */

  1238             const uint16_t *factors=(const uint16_t *)(range+1);

  1239             const char *s;

  1240             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;

  1242             /* prefix length */

  1243             s=(const char *)(factors+count);

  1244             length=calcStringSetLength(gNameSet, s);

  1245             s+=length+1; /* start of factor suffixes */

  1247             /* get the set and maximum factor suffix length for each factor */

  1248             for(i=0; i<count; ++i) {

  1249                 maxFactorLength=0;

  1250                 for(factor=factors[i]; factor>0; --factor) {

  1251                     factorLength=calcStringSetLength(gNameSet, s);

  1252                     s+=factorLength+1;

  1253                     if(factorLength>maxFactorLength) {

  1254                         maxFactorLength=factorLength;

  1255                     }

  1256                 }

  1257                 length+=maxFactorLength;

  1258             }

  1260             if(length>maxNameLength) {

  1261                 maxNameLength=length;

  1262             }

  1263             break;

  1264         }

  1265         default:

  1266             /* unknown type */

  1267             break;

  1268         }

  1270         range=(AlgorithmicRange *)((uint8_t *)range+range->size);

  1271         --rangeCount;

  1272     }

  1273     return maxNameLength;

  1274 }

  1276 static int32_t

  1277 calcExtNameSetsLengths(int32_t maxNameLength) {

  1278     int32_t i, length;

  1280     for(i=0; i<LENGTHOF(charCatNames); ++i) {

  1281         /*

  1282          * for each category, count the length of the category name

  1283          * plus 9=

  1284          * 2 for <>

  1285          * 1 for -

  1286          * 6 for most hex digits per code point

  1287          */

  1288         length=9+calcStringSetLength(gNameSet, charCatNames[i]);

  1289         if(length>maxNameLength) {

  1290             maxNameLength=length;

  1291         }

  1292     }

  1293     return maxNameLength;

  1294 }

  1296 static int32_t

  1297 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,

  1298                   uint32_t set[8],

  1299                   const uint8_t **pLine, const uint8_t *lineLimit) {

  1300     const uint8_t *line=*pLine;

  1301     int32_t length=0, tokenLength;

  1302     uint16_t c, token;

  1304     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {

  1305         if(c>=tokenCount) {

  1306             /* implicit letter */

  1307             SET_ADD(set, c);

  1308             ++length;

  1309         } else {

  1310             token=tokens[c];

  1311             if(token==(uint16_t)(-2)) {

  1312                 /* this is a lead byte for a double-byte token */

  1313                 c=c<<8|*line++;

  1314                 token=tokens[c];

  1315             }

  1316             if(token==(uint16_t)(-1)) {

  1317                 /* explicit letter */

  1318                 SET_ADD(set, c);

  1319                 ++length;

  1320             } else {

  1321                 /* count token word */

  1322                 if(tokenLengths!=NULL) {

  1323                     /* use cached token length */

  1324                     tokenLength=tokenLengths[c];

  1325                     if(tokenLength==0) {

  1326                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);

  1327                         tokenLengths[c]=(int8_t)tokenLength;

  1328                     }

  1329                 } else {

  1330                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);

  1331                 }

  1332                 length+=tokenLength;

  1333             }

  1334         }

  1335     }

  1337     *pLine=line;

  1338     return length;

  1339 }

  1341 static void

  1342 calcGroupNameSetsLengths(int32_t maxNameLength) {

  1343     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];

  1345     uint16_t *tokens=(uint16_t *)uCharNames+8;

  1346     uint16_t tokenCount=*tokens++;

  1347     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;

  1349     int8_t *tokenLengths;

  1351     const uint16_t *group;

  1352     const uint8_t *s, *line, *lineLimit;

  1354     int32_t groupCount, lineNumber, length;

  1356     tokenLengths=(int8_t *)uprv_malloc(tokenCount);

  1357     if(tokenLengths!=NULL) {

  1358         uprv_memset(tokenLengths, 0, tokenCount);

  1359     }

  1361     group=GET_GROUPS(uCharNames);

  1362     groupCount=*group++;

  1364     /* enumerate all groups */

  1365     while(groupCount>0) {

  1366         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);

  1367         s=expandGroupLengths(s, offsets, lengths);

  1369         /* enumerate all lines in each group */

  1370         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {

  1371             line=s+offsets[lineNumber];

  1372             length=lengths[lineNumber];

  1373             if(length==0) {

  1374                 continue;

  1375             }

  1377             lineLimit=line+length;

  1379             /* read regular name */

  1380             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);

  1381             if(length>maxNameLength) {

  1382                 maxNameLength=length;

  1383             }

  1384             if(line==lineLimit) {

  1385                 continue;

  1386             }

  1388             /* read Unicode 1.0 name */

  1389             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);

  1390             if(length>maxNameLength) {

  1391                 maxNameLength=length;

  1392             }

  1393             if(line==lineLimit) {

  1394                 continue;

  1395             }

  1397             /* read ISO comment */

  1398             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/

  1399         }

  1401         group=NEXT_GROUP(group);

  1402         --groupCount;

  1403     }

  1405     if(tokenLengths!=NULL) {

  1406         uprv_free(tokenLengths);

  1407     }

  1409     /* set gMax... - name length last for threading */

  1410     gMaxNameLength=maxNameLength;

  1411 }

  1413 static UBool

  1414 calcNameSetsLengths(UErrorCode *pErrorCode) {

  1415     static const char extChars[]="0123456789ABCDEF<>-";

  1416     int32_t i, maxNameLength;

  1418     if(gMaxNameLength!=0) {

  1419         return TRUE;

  1420     }

  1422     if(!isDataLoaded(pErrorCode)) {

  1423         return FALSE;

  1424     }

  1426     /* set hex digits, used in various names, and <>-, used in extended names */

  1427     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {

  1428         SET_ADD(gNameSet, extChars[i]);

  1429     }

  1431     /* set sets and lengths from algorithmic names */

  1432     maxNameLength=calcAlgNameSetsLengths(0);

  1434     /* set sets and lengths from extended names */

  1435     maxNameLength=calcExtNameSetsLengths(maxNameLength);

  1437     /* set sets and lengths from group names, set global maximum values */

  1438     calcGroupNameSetsLengths(maxNameLength);

  1440     return TRUE;

  1441 }

  1443 /* public API --------------------------------------------------------------- */

  1445 U_CAPI int32_t U_EXPORT2

  1446 u_charName(UChar32 code, UCharNameChoice nameChoice,

  1447            char *buffer, int32_t bufferLength,

  1448            UErrorCode *pErrorCode) {

  1449     AlgorithmicRange *algRange;

  1450     uint32_t *p;

  1451     uint32_t i;

  1452     int32_t length;

  1454     /* check the argument values */

  1455     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

  1456         return 0;

  1457     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||

  1458               bufferLength<0 || (bufferLength>0 && buffer==NULL)

  1459     ) {

  1460         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

  1461         return 0;

  1462     }

  1464     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {

  1465         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);

  1466     }

  1468     length=0;

  1470     /* try algorithmic names first */

  1471     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);

  1472     i=*p;

  1473     algRange=(AlgorithmicRange *)(p+1);

  1474     while(i>0) {

  1475         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {

  1476             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);

  1477             break;

  1478         }

  1479         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);

  1480         --i;

  1481     }

  1483     if(i==0) {

  1484         if (nameChoice == U_EXTENDED_CHAR_NAME) {

  1485             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);

  1486             if (!length) {

  1487                 /* extended character name */

  1488                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);

  1489             }

  1490         } else {

  1491             /* normal character name */

  1492             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);

  1493         }

  1494     }

  1496     return u_terminateChars(buffer, bufferLength, length, pErrorCode);

  1497 }

  1499 U_CAPI int32_t U_EXPORT2

  1500 u_getISOComment(UChar32 /*c*/,

  1501                 char *dest, int32_t destCapacity,

  1502                 UErrorCode *pErrorCode) {

  1503     /* check the argument values */

  1504     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

  1505         return 0;

  1506     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {

  1507         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

  1508         return 0;

  1509     }

  1511     return u_terminateChars(dest, destCapacity, 0, pErrorCode);

  1512 }

  1514 U_CAPI UChar32 U_EXPORT2

  1515 u_charFromName(UCharNameChoice nameChoice,

  1516                const char *name,

  1517                UErrorCode *pErrorCode) {

  1518     char upper[120], lower[120];

  1519     FindName findName;

  1520     AlgorithmicRange *algRange;

  1521     uint32_t *p;

  1522     uint32_t i;

  1523     UChar32 cp = 0;

  1524     char c0;

  1525     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */

  1527     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

  1528         return error;

  1529     }

  1531     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {

  1532         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

  1533         return error;

  1534     }

  1536     if(!isDataLoaded(pErrorCode)) {

  1537         return error;

  1538     }

  1540     /* construct the uppercase and lowercase of the name first */

  1541     for(i=0; i<sizeof(upper); ++i) {

  1542         if((c0=*name++)!=0) {

  1543             upper[i]=uprv_toupper(c0);

  1544             lower[i]=uprv_tolower(c0);

  1545         } else {

  1546             upper[i]=lower[i]=0;

  1547             break;

  1548         }

  1549     }

  1550     if(i==sizeof(upper)) {

  1551         /* name too long, there is no such character */

  1552         *pErrorCode = U_ILLEGAL_CHAR_FOUND;

  1553         return error;

  1554     }

  1556     /* try extended names first */

  1557     if (lower[0] == '<') {

  1558         if (nameChoice == U_EXTENDED_CHAR_NAME) {

  1559             if (lower[--i] == '>') {

  1560                 for (--i; lower[i] && lower[i] != '-'; --i) {

  1561                 }

  1563                 if (lower[i] == '-') { /* We've got a category. */

  1564                     uint32_t cIdx;

  1566                     lower[i] = 0;

  1568                     for (++i; lower[i] != '>'; ++i) {

  1569                         if (lower[i] >= '0' && lower[i] <= '9') {

  1570                             cp = (cp << 4) + lower[i] - '0';

  1571                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {

  1572                             cp = (cp << 4) + lower[i] - 'a' + 10;

  1573                         } else {

  1574                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;

  1575                             return error;

  1576                         }

  1577                     }

  1579                     /* Now validate the category name.

  1580                        We could use a binary search, or a trie, if

  1581                        we really wanted to. */

  1583                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {

  1585                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {

  1586                             if (getCharCat(cp) == cIdx) {

  1587                                 return cp;

  1588                             }

  1589                             break;

  1590                         }

  1591                     }

  1592                 }

  1593             }

  1594         }

  1596         *pErrorCode = U_ILLEGAL_CHAR_FOUND;

  1597         return error;

  1598     }

  1600     /* try algorithmic names now */

  1601     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);

  1602     i=*p;

  1603     algRange=(AlgorithmicRange *)(p+1);

  1604     while(i>0) {

  1605         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {

  1606             return cp;

  1607         }

  1608         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);

  1609         --i;

  1610     }

  1612     /* normal character name */

  1613     findName.otherName=upper;

  1614     findName.code=error;

  1615     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);

  1616     if (findName.code == error) {

  1617          *pErrorCode = U_ILLEGAL_CHAR_FOUND;

  1618     }

  1619     return findName.code;

  1620 }

  1622 U_CAPI void U_EXPORT2

  1623 u_enumCharNames(UChar32 start, UChar32 limit,

  1624                 UEnumCharNamesFn *fn,

  1625                 void *context,

  1626                 UCharNameChoice nameChoice,

  1627                 UErrorCode *pErrorCode) {

  1628     AlgorithmicRange *algRange;

  1629     uint32_t *p;

  1630     uint32_t i;

  1632     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

  1633         return;

  1634     }

  1636     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {

  1637         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

  1638         return;

  1639     }

  1641     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {

  1642         limit = UCHAR_MAX_VALUE + 1;

  1643     }

  1644     if((uint32_t)start>=(uint32_t)limit) {

  1645         return;

  1646     }

  1648     if(!isDataLoaded(pErrorCode)) {

  1649         return;

  1650     }

  1652     /* interleave the data-driven ones with the algorithmic ones */

  1653     /* iterate over all algorithmic ranges; assume that they are in ascending order */

  1654     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);

  1655     i=*p;

  1656     algRange=(AlgorithmicRange *)(p+1);

  1657     while(i>0) {

  1658         /* enumerate the character names before the current algorithmic range */

  1659         /* here: start<limit */

  1660         if((uint32_t)start<algRange->start) {

  1661             if((uint32_t)limit<=algRange->start) {

  1662                 enumNames(uCharNames, start, limit, fn, context, nameChoice);

  1663                 return;

  1664             }

  1665             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {

  1666                 return;

  1667             }

  1668             start=(UChar32)algRange->start;

  1669         }

  1670         /* enumerate the character names in the current algorithmic range */

  1671         /* here: algRange->start<=start<limit */

  1672         if((uint32_t)start<=algRange->end) {

  1673             if((uint32_t)limit<=(algRange->end+1)) {

  1674                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);

  1675                 return;

  1676             }

  1677             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {

  1678                 return;

  1679             }

  1680             start=(UChar32)algRange->end+1;

  1681         }

  1682         /* continue to the next algorithmic range (here: start<limit) */

  1683         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);

  1684         --i;

  1685     }

  1686     /* enumerate the character names after the last algorithmic range */

  1687     enumNames(uCharNames, start, limit, fn, context, nameChoice);

  1688 }

  1690 U_CAPI int32_t U_EXPORT2

  1691 uprv_getMaxCharNameLength() {

  1692     UErrorCode errorCode=U_ZERO_ERROR;

  1693     if(calcNameSetsLengths(&errorCode)) {

  1694         return gMaxNameLength;

  1695     } else {

  1696         return 0;

  1697     }

  1698 }

  1700 /**

  1701  * Converts the char set cset into a Unicode set uset.

  1702  * @param cset Set of 256 bit flags corresponding to a set of chars.

  1703  * @param uset USet to receive characters. Existing contents are deleted.

  1704  */

  1705 static void

  1706 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {

  1707     UChar us[256];

  1708     char cs[256];

  1710     int32_t i, length;

  1711     UErrorCode errorCode;

  1713     errorCode=U_ZERO_ERROR;

  1715     if(!calcNameSetsLengths(&errorCode)) {

  1716         return;

  1717     }

  1719     /* build a char string with all chars that are used in character names */

  1720     length=0;

  1721     for(i=0; i<256; ++i) {

  1722         if(SET_CONTAINS(cset, i)) {

  1723             cs[length++]=(char)i;

  1724         }

  1725     }

  1727     /* convert the char string to a UChar string */

  1728     u_charsToUChars(cs, us, length);

  1730     /* add each UChar to the USet */

  1731     for(i=0; i<length; ++i) {

  1732         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */

  1733             sa->add(sa->set, us[i]);

  1734         }

  1735     }

  1736 }

  1738 /**

  1739  * Fills set with characters that are used in Unicode character names.

  1740  * @param set USet to receive characters.

  1741  */

  1742 U_CAPI void U_EXPORT2

  1743 uprv_getCharNameCharacters(const USetAdder *sa) {

  1744     charSetToUSet(gNameSet, sa);

  1745 }

  1747 /* data swapping ------------------------------------------------------------ */

  1749 /*

  1750  * The token table contains non-negative entries for token bytes,

  1751  * and -1 for bytes that represent themselves in the data file's charset.

  1752  * -2 entries are used for lead bytes.

  1753  *

  1754  * Direct bytes (-1 entries) must be translated from the input charset family

  1755  * to the output charset family.

  1756  * makeTokenMap() writes a permutation mapping for this.

  1757  * Use it once for single-/lead-byte tokens and once more for all trail byte

  1758  * tokens. (';' is an unused trail byte marked with -1.)

  1759  */

  1760 static void

  1761 makeTokenMap(const UDataSwapper *ds,

  1762              int16_t tokens[], uint16_t tokenCount,

  1763              uint8_t map[256],

  1764              UErrorCode *pErrorCode) {

  1765     UBool usedOutChar[256];

  1766     uint16_t i, j;

  1767     uint8_t c1, c2;

  1769     if(U_FAILURE(*pErrorCode)) {

  1770         return;

  1771     }

  1773     if(ds->inCharset==ds->outCharset) {

  1774         /* Same charset family: identity permutation */

  1775         for(i=0; i<256; ++i) {

  1776             map[i]=(uint8_t)i;

  1777         }

  1778     } else {

  1779         uprv_memset(map, 0, 256);

  1780         uprv_memset(usedOutChar, 0, 256);

  1782         if(tokenCount>256) {

  1783             tokenCount=256;

  1784         }

  1786         /* set the direct bytes (byte 0 always maps to itself) */

  1787         for(i=1; i<tokenCount; ++i) {

  1788             if(tokens[i]==-1) {

  1789                 /* convert the direct byte character */

  1790                 c1=(uint8_t)i;

  1791                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);

  1792                 if(U_FAILURE(*pErrorCode)) {

  1793                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",

  1794                                      i, ds->inCharset);

  1795                     return;

  1796                 }

  1798                 /* enter the converted character into the map and mark it used */

  1799                 map[c1]=c2;

  1800                 usedOutChar[c2]=TRUE;

  1801             }

  1802         }

  1804         /* set the mappings for the rest of the permutation */

  1805         for(i=j=1; i<tokenCount; ++i) {

  1806             /* set mappings that were not set for direct bytes */

  1807             if(map[i]==0) {

  1808                 /* set an output byte value that was not used as an output byte above */

  1809                 while(usedOutChar[j]) {

  1810                     ++j;

  1811                 }

  1812                 map[i]=(uint8_t)j++;

  1813             }

  1814         }

  1816         /*

  1817          * leave mappings at tokenCount and above unset if tokenCount<256

  1818          * because they won't be used

  1819          */

  1820     }

  1821 }

  1823 U_CAPI int32_t U_EXPORT2

  1824 uchar_swapNames(const UDataSwapper *ds,

  1825                 const void *inData, int32_t length, void *outData,

  1826                 UErrorCode *pErrorCode) {

  1827     const UDataInfo *pInfo;

  1828     int32_t headerSize;

  1830     const uint8_t *inBytes;

  1831     uint8_t *outBytes;

  1833     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,

  1834              offset, i, count, stringsCount;

  1836     const AlgorithmicRange *inRange;

  1837     AlgorithmicRange *outRange;

  1839     /* udata_swapDataHeader checks the arguments */

  1840     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

  1841     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

  1842         return 0;

  1843     }

  1845     /* check data format and format version */

  1846     pInfo=(const UDataInfo *)((const char *)inData+4);

  1847     if(!(

  1848         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */

  1849         pInfo->dataFormat[1]==0x6e &&

  1850         pInfo->dataFormat[2]==0x61 &&

  1851         pInfo->dataFormat[3]==0x6d &&

  1852         pInfo->formatVersion[0]==1

  1853     )) {

  1854         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",

  1855                          pInfo->dataFormat[0], pInfo->dataFormat[1],

  1856                          pInfo->dataFormat[2], pInfo->dataFormat[3],

  1857                          pInfo->formatVersion[0]);

  1858         *pErrorCode=U_UNSUPPORTED_ERROR;

  1859         return 0;

  1860     }

  1862     inBytes=(const uint8_t *)inData+headerSize;

  1863     outBytes=(uint8_t *)outData+headerSize;

  1864     if(length<0) {

  1865         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);

  1866     } else {

  1867         length-=headerSize;

  1868         if( length<20 ||

  1869             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))

  1870         ) {

  1871             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",

  1872                              length);

  1873             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

  1874             return 0;

  1875         }

  1876     }

  1878     if(length<0) {

  1879         /* preflighting: iterate through algorithmic ranges */

  1880         offset=algNamesOffset;

  1881         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));

  1882         offset+=4;

  1884         for(i=0; i<count; ++i) {

  1885             inRange=(const AlgorithmicRange *)(inBytes+offset);

  1886             offset+=ds->readUInt16(inRange->size);

  1887         }

  1888     } else {

  1889         /* swap data */

  1890         const uint16_t *p;

  1891         uint16_t *q, *temp;

  1893         int16_t tokens[512];

  1894         uint16_t tokenCount;

  1896         uint8_t map[256], trailMap[256];

  1898         /* copy the data for inaccessible bytes */

  1899         if(inBytes!=outBytes) {

  1900             uprv_memcpy(outBytes, inBytes, length);

  1901         }

  1903         /* the initial 4 offsets first */

  1904         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);

  1905         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);

  1906         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);

  1907         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);

  1909         /*

  1910          * now the tokens table

  1911          * it needs to be permutated along with the compressed name strings

  1912          */

  1913         p=(const uint16_t *)(inBytes+16);

  1914         q=(uint16_t *)(outBytes+16);

  1916         /* read and swap the tokenCount */

  1917         tokenCount=ds->readUInt16(*p);

  1918         ds->swapArray16(ds, p, 2, q, pErrorCode);

  1919         ++p;

  1920         ++q;

  1922         /* read the first 512 tokens and make the token maps */

  1923         if(tokenCount<=512) {

  1924             count=tokenCount;

  1925         } else {

  1926             count=512;

  1927         }

  1928         for(i=0; i<count; ++i) {

  1929             tokens[i]=udata_readInt16(ds, p[i]);

  1930         }

  1931         for(; i<512; ++i) {

  1932             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */

  1933         }

  1934         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);

  1935         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);

  1936         if(U_FAILURE(*pErrorCode)) {

  1937             return 0;

  1938         }

  1940         /*

  1941          * swap and permutate the tokens

  1942          * go through a temporary array to support in-place swapping

  1943          */

  1944         temp=(uint16_t *)uprv_malloc(tokenCount*2);

  1945         if(temp==NULL) {

  1946             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",

  1947                              tokenCount);

  1948             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

  1949             return 0;

  1950         }

  1952         /* swap and permutate single-/lead-byte tokens */

  1953         for(i=0; i<tokenCount && i<256; ++i) {

  1954             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);

  1955         }

  1957         /* swap and permutate trail-byte tokens */

  1958         for(; i<tokenCount; ++i) {

  1959             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);

  1960         }

  1962         /* copy the result into the output and free the temporary array */

  1963         uprv_memcpy(q, temp, tokenCount*2);

  1964         uprv_free(temp);

  1966         /*

  1967          * swap the token strings but not a possible padding byte after

  1968          * the terminating NUL of the last string

  1969          */

  1970         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),

  1971                                     outBytes+tokenStringOffset, pErrorCode);

  1972         if(U_FAILURE(*pErrorCode)) {

  1973             udata_printError(ds, "uchar_swapNames(token strings) failed\n");

  1974             return 0;

  1975         }

  1977         /* swap the group table */

  1978         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));

  1979         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),

  1980                            outBytes+groupsOffset, pErrorCode);

  1982         /*

  1983          * swap the group strings

  1984          * swap the string bytes but not the nibble-encoded string lengths

  1985          */

  1986         if(ds->inCharset!=ds->outCharset) {

  1987             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];

  1989             const uint8_t *inStrings, *nextInStrings;

  1990             uint8_t *outStrings;

  1992             uint8_t c;

  1994             inStrings=inBytes+groupStringOffset;

  1995             outStrings=outBytes+groupStringOffset;

  1997             stringsCount=algNamesOffset-groupStringOffset;

  1999             /* iterate through string groups until only a few padding bytes are left */

  2000             while(stringsCount>32) {

  2001                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);

  2003                 /* move past the length bytes */

  2004                 stringsCount-=(uint32_t)(nextInStrings-inStrings);

  2005                 outStrings+=nextInStrings-inStrings;

  2006                 inStrings=nextInStrings;

  2008                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */

  2009                 stringsCount-=count;

  2011                 /* swap the string bytes using map[] and trailMap[] */

  2012                 while(count>0) {

  2013                     c=*inStrings++;

  2014                     *outStrings++=map[c];

  2015                     if(tokens[c]!=-2) {

  2016                         --count;

  2017                     } else {

  2018                         /* token lead byte: swap the trail byte, too */

  2019                         *outStrings++=trailMap[*inStrings++];

  2020                         count-=2;

  2021                     }

  2022                 }

  2023             }

  2024         }

  2026         /* swap the algorithmic ranges */

  2027         offset=algNamesOffset;

  2028         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));

  2029         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);

  2030         offset+=4;

  2032         for(i=0; i<count; ++i) {

  2033             if(offset>(uint32_t)length) {

  2034                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",

  2035                                  length, i);

  2036                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

  2037                 return 0;

  2038             }

  2040             inRange=(const AlgorithmicRange *)(inBytes+offset);

  2041             outRange=(AlgorithmicRange *)(outBytes+offset);

  2042             offset+=ds->readUInt16(inRange->size);

  2044             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);

  2045             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);

  2046             switch(inRange->type) {

  2047             case 0:

  2048                 /* swap prefix string */

  2049                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),

  2050                                     outRange+1, pErrorCode);

  2051                 if(U_FAILURE(*pErrorCode)) {

  2052                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",

  2053                                      i);

  2054                     return 0;

  2055                 }

  2056                 break;

  2057             case 1:

  2058                 {

  2059                     /* swap factors and the prefix and factor strings */

  2060                     uint32_t factorsCount;

  2062                     factorsCount=inRange->variant;

  2063                     p=(const uint16_t *)(inRange+1);

  2064                     q=(uint16_t *)(outRange+1);

  2065                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);

  2067                     /* swap the strings, up to the last terminating NUL */

  2068                     p+=factorsCount;

  2069                     q+=factorsCount;

  2070                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);

  2071                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {

  2072                         --stringsCount;

  2073                     }

  2074                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);

  2075                 }

  2076                 break;

  2077             default:

  2078                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",

  2079                                  inRange->type, i);

  2080                 *pErrorCode=U_UNSUPPORTED_ERROR;

  2081                 return 0;

  2082             }

  2083         }

  2084     }

  2086     return headerSize+(int32_t)offset;

  2087 }

  2089 U_NAMESPACE_END

  2091 /*

  2092  * Hey, Emacs, please set the following:

  2093  *

  2094  * Local Variables:

  2095  * indent-tabs-mode: nil

  2096  * End:

  2097  *

  2098  */

The Tor Browser / file revision

intl/icu/source/common/unames.cpp@fc2d59ddac77

intl/icu/source/common/unames.cpp