intl/icu/source/common/unames.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: unames.c
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 1999oct04
michael@0 14 * created by: Markus W. Scherer
michael@0 15 */
michael@0 16
michael@0 17 #include "unicode/utypes.h"
michael@0 18 #include "unicode/putil.h"
michael@0 19 #include "unicode/uchar.h"
michael@0 20 #include "unicode/udata.h"
michael@0 21 #include "unicode/utf.h"
michael@0 22 #include "unicode/utf16.h"
michael@0 23 #include "uassert.h"
michael@0 24 #include "ustr_imp.h"
michael@0 25 #include "umutex.h"
michael@0 26 #include "cmemory.h"
michael@0 27 #include "cstring.h"
michael@0 28 #include "ucln_cmn.h"
michael@0 29 #include "udataswp.h"
michael@0 30 #include "uprops.h"
michael@0 31
michael@0 32 U_NAMESPACE_BEGIN
michael@0 33
michael@0 34 /* prototypes ------------------------------------------------------------- */
michael@0 35
michael@0 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 37
michael@0 38 static const char DATA_NAME[] = "unames";
michael@0 39 static const char DATA_TYPE[] = "icu";
michael@0 40
michael@0 41 #define GROUP_SHIFT 5
michael@0 42 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
michael@0 43 #define GROUP_MASK (LINES_PER_GROUP-1)
michael@0 44
michael@0 45 /*
michael@0 46 * This struct was replaced by explicitly accessing equivalent
michael@0 47 * fields from triples of uint16_t.
michael@0 48 * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
michael@0 49 * which broke the assumption that sizeof(Group)==6 and that the ++ operator
michael@0 50 * would advance by 6 bytes (3 uint16_t).
michael@0 51 *
michael@0 52 * We can't just change the data structure because it's loaded from a data file,
michael@0 53 * and we don't want to make it less compact, so we changed the access code.
michael@0 54 *
michael@0 55 * For details see ICU tickets 6331 and 6008.
michael@0 56 typedef struct {
michael@0 57 uint16_t groupMSB,
michael@0 58 offsetHigh, offsetLow; / * avoid padding * /
michael@0 59 } Group;
michael@0 60 */
michael@0 61 enum {
michael@0 62 GROUP_MSB,
michael@0 63 GROUP_OFFSET_HIGH,
michael@0 64 GROUP_OFFSET_LOW,
michael@0 65 GROUP_LENGTH
michael@0 66 };
michael@0 67
michael@0 68 /*
michael@0 69 * Get the 32-bit group offset.
michael@0 70 * @param group (const uint16_t *) pointer to a Group triple of uint16_t
michael@0 71 * @return group offset (int32_t)
michael@0 72 */
michael@0 73 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
michael@0 74
michael@0 75 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
michael@0 76 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
michael@0 77
michael@0 78 typedef struct {
michael@0 79 uint32_t start, end;
michael@0 80 uint8_t type, variant;
michael@0 81 uint16_t size;
michael@0 82 } AlgorithmicRange;
michael@0 83
michael@0 84 typedef struct {
michael@0 85 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
michael@0 86 } UCharNames;
michael@0 87
michael@0 88 /*
michael@0 89 * Get the groups table from a UCharNames struct.
michael@0 90 * The groups table consists of one uint16_t groupCount followed by
michael@0 91 * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
michael@0 92 * and the comment for the old struct Group above.
michael@0 93 *
michael@0 94 * @param names (const UCharNames *) pointer to the UCharNames indexes
michael@0 95 * @return (const uint16_t *) pointer to the groups table
michael@0 96 */
michael@0 97 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
michael@0 98
michael@0 99 typedef struct {
michael@0 100 const char *otherName;
michael@0 101 UChar32 code;
michael@0 102 } FindName;
michael@0 103
michael@0 104 #define DO_FIND_NAME NULL
michael@0 105
michael@0 106 static UDataMemory *uCharNamesData=NULL;
michael@0 107 static UCharNames *uCharNames=NULL;
michael@0 108 static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
michael@0 109
michael@0 110 /*
michael@0 111 * Maximum length of character names (regular & 1.0).
michael@0 112 */
michael@0 113 static int32_t gMaxNameLength=0;
michael@0 114
michael@0 115 /*
michael@0 116 * Set of chars used in character names (regular & 1.0).
michael@0 117 * Chars are platform-dependent (can be EBCDIC).
michael@0 118 */
michael@0 119 static uint32_t gNameSet[8]={ 0 };
michael@0 120
michael@0 121 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
michael@0 122 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
michael@0 123 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
michael@0 124
michael@0 125 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
michael@0 126
michael@0 127 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
michael@0 128 "unassigned",
michael@0 129 "uppercase letter",
michael@0 130 "lowercase letter",
michael@0 131 "titlecase letter",
michael@0 132 "modifier letter",
michael@0 133 "other letter",
michael@0 134 "non spacing mark",
michael@0 135 "enclosing mark",
michael@0 136 "combining spacing mark",
michael@0 137 "decimal digit number",
michael@0 138 "letter number",
michael@0 139 "other number",
michael@0 140 "space separator",
michael@0 141 "line separator",
michael@0 142 "paragraph separator",
michael@0 143 "control",
michael@0 144 "format",
michael@0 145 "private use area",
michael@0 146 "surrogate",
michael@0 147 "dash punctuation",
michael@0 148 "start punctuation",
michael@0 149 "end punctuation",
michael@0 150 "connector punctuation",
michael@0 151 "other punctuation",
michael@0 152 "math symbol",
michael@0 153 "currency symbol",
michael@0 154 "modifier symbol",
michael@0 155 "other symbol",
michael@0 156 "initial punctuation",
michael@0 157 "final punctuation",
michael@0 158 "noncharacter",
michael@0 159 "lead surrogate",
michael@0 160 "trail surrogate"
michael@0 161 };
michael@0 162
michael@0 163 /* implementation ----------------------------------------------------------- */
michael@0 164
michael@0 165 static UBool U_CALLCONV unames_cleanup(void)
michael@0 166 {
michael@0 167 if(uCharNamesData) {
michael@0 168 udata_close(uCharNamesData);
michael@0 169 uCharNamesData = NULL;
michael@0 170 }
michael@0 171 if(uCharNames) {
michael@0 172 uCharNames = NULL;
michael@0 173 }
michael@0 174 gCharNamesInitOnce.reset();
michael@0 175 gMaxNameLength=0;
michael@0 176 return TRUE;
michael@0 177 }
michael@0 178
michael@0 179 static UBool U_CALLCONV
michael@0 180 isAcceptable(void * /*context*/,
michael@0 181 const char * /*type*/, const char * /*name*/,
michael@0 182 const UDataInfo *pInfo) {
michael@0 183 return (UBool)(
michael@0 184 pInfo->size>=20 &&
michael@0 185 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
michael@0 186 pInfo->charsetFamily==U_CHARSET_FAMILY &&
michael@0 187 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
michael@0 188 pInfo->dataFormat[1]==0x6e &&
michael@0 189 pInfo->dataFormat[2]==0x61 &&
michael@0 190 pInfo->dataFormat[3]==0x6d &&
michael@0 191 pInfo->formatVersion[0]==1);
michael@0 192 }
michael@0 193
michael@0 194 static void U_CALLCONV
michael@0 195 loadCharNames(UErrorCode &status) {
michael@0 196 U_ASSERT(uCharNamesData == NULL);
michael@0 197 U_ASSERT(uCharNames == NULL);
michael@0 198
michael@0 199 uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
michael@0 200 if(U_FAILURE(status)) {
michael@0 201 uCharNamesData = NULL;
michael@0 202 } else {
michael@0 203 uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
michael@0 204 }
michael@0 205 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
michael@0 206 }
michael@0 207
michael@0 208
michael@0 209 static UBool
michael@0 210 isDataLoaded(UErrorCode *pErrorCode) {
michael@0 211 umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
michael@0 212 return U_SUCCESS(*pErrorCode);
michael@0 213 }
michael@0 214
michael@0 215 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
michael@0 216 if((bufferLength)>0) { \
michael@0 217 *(buffer)++=c; \
michael@0 218 --(bufferLength); \
michael@0 219 } \
michael@0 220 ++(bufferPos); \
michael@0 221 }
michael@0 222
michael@0 223 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
michael@0 224
michael@0 225 /*
michael@0 226 * Important: expandName() and compareName() are almost the same -
michael@0 227 * apply fixes to both.
michael@0 228 *
michael@0 229 * UnicodeData.txt uses ';' as a field separator, so no
michael@0 230 * field can contain ';' as part of its contents.
michael@0 231 * In unames.dat, it is marked as token[';']==-1 only if the
michael@0 232 * semicolon is used in the data file - which is iff we
michael@0 233 * have Unicode 1.0 names or ISO comments or aliases.
michael@0 234 * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
michael@0 235 * although we know that it will never be part of a name.
michael@0 236 */
michael@0 237 static uint16_t
michael@0 238 expandName(UCharNames *names,
michael@0 239 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
michael@0 240 char *buffer, uint16_t bufferLength) {
michael@0 241 uint16_t *tokens=(uint16_t *)names+8;
michael@0 242 uint16_t token, tokenCount=*tokens++, bufferPos=0;
michael@0 243 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
michael@0 244 uint8_t c;
michael@0 245
michael@0 246 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
michael@0 247 /*
michael@0 248 * skip the modern name if it is not requested _and_
michael@0 249 * if the semicolon byte value is a character, not a token number
michael@0 250 */
michael@0 251 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
michael@0 252 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
michael@0 253 do {
michael@0 254 while(nameLength>0) {
michael@0 255 --nameLength;
michael@0 256 if(*name++==';') {
michael@0 257 break;
michael@0 258 }
michael@0 259 }
michael@0 260 } while(--fieldIndex>0);
michael@0 261 } else {
michael@0 262 /*
michael@0 263 * the semicolon byte value is a token number, therefore
michael@0 264 * only modern names are stored in unames.dat and there is no
michael@0 265 * such requested alternate name here
michael@0 266 */
michael@0 267 nameLength=0;
michael@0 268 }
michael@0 269 }
michael@0 270
michael@0 271 /* write each letter directly, and write a token word per token */
michael@0 272 while(nameLength>0) {
michael@0 273 --nameLength;
michael@0 274 c=*name++;
michael@0 275
michael@0 276 if(c>=tokenCount) {
michael@0 277 if(c!=';') {
michael@0 278 /* implicit letter */
michael@0 279 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 280 } else {
michael@0 281 /* finished */
michael@0 282 break;
michael@0 283 }
michael@0 284 } else {
michael@0 285 token=tokens[c];
michael@0 286 if(token==(uint16_t)(-2)) {
michael@0 287 /* this is a lead byte for a double-byte token */
michael@0 288 token=tokens[c<<8|*name++];
michael@0 289 --nameLength;
michael@0 290 }
michael@0 291 if(token==(uint16_t)(-1)) {
michael@0 292 if(c!=';') {
michael@0 293 /* explicit letter */
michael@0 294 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 295 } else {
michael@0 296 /* stop, but skip the semicolon if we are seeking
michael@0 297 extended names and there was no 2.0 name but there
michael@0 298 is a 1.0 name. */
michael@0 299 if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 300 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
michael@0 301 continue;
michael@0 302 }
michael@0 303 }
michael@0 304 /* finished */
michael@0 305 break;
michael@0 306 }
michael@0 307 } else {
michael@0 308 /* write token word */
michael@0 309 uint8_t *tokenString=tokenStrings+token;
michael@0 310 while((c=*tokenString++)!=0) {
michael@0 311 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 312 }
michael@0 313 }
michael@0 314 }
michael@0 315 }
michael@0 316
michael@0 317 /* zero-terminate */
michael@0 318 if(bufferLength>0) {
michael@0 319 *buffer=0;
michael@0 320 }
michael@0 321
michael@0 322 return bufferPos;
michael@0 323 }
michael@0 324
michael@0 325 /*
michael@0 326 * compareName() is almost the same as expandName() except that it compares
michael@0 327 * the currently expanded name to an input name.
michael@0 328 * It returns the match/no match result as soon as possible.
michael@0 329 */
michael@0 330 static UBool
michael@0 331 compareName(UCharNames *names,
michael@0 332 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
michael@0 333 const char *otherName) {
michael@0 334 uint16_t *tokens=(uint16_t *)names+8;
michael@0 335 uint16_t token, tokenCount=*tokens++;
michael@0 336 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
michael@0 337 uint8_t c;
michael@0 338 const char *origOtherName = otherName;
michael@0 339
michael@0 340 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
michael@0 341 /*
michael@0 342 * skip the modern name if it is not requested _and_
michael@0 343 * if the semicolon byte value is a character, not a token number
michael@0 344 */
michael@0 345 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
michael@0 346 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
michael@0 347 do {
michael@0 348 while(nameLength>0) {
michael@0 349 --nameLength;
michael@0 350 if(*name++==';') {
michael@0 351 break;
michael@0 352 }
michael@0 353 }
michael@0 354 } while(--fieldIndex>0);
michael@0 355 } else {
michael@0 356 /*
michael@0 357 * the semicolon byte value is a token number, therefore
michael@0 358 * only modern names are stored in unames.dat and there is no
michael@0 359 * such requested alternate name here
michael@0 360 */
michael@0 361 nameLength=0;
michael@0 362 }
michael@0 363 }
michael@0 364
michael@0 365 /* compare each letter directly, and compare a token word per token */
michael@0 366 while(nameLength>0) {
michael@0 367 --nameLength;
michael@0 368 c=*name++;
michael@0 369
michael@0 370 if(c>=tokenCount) {
michael@0 371 if(c!=';') {
michael@0 372 /* implicit letter */
michael@0 373 if((char)c!=*otherName++) {
michael@0 374 return FALSE;
michael@0 375 }
michael@0 376 } else {
michael@0 377 /* finished */
michael@0 378 break;
michael@0 379 }
michael@0 380 } else {
michael@0 381 token=tokens[c];
michael@0 382 if(token==(uint16_t)(-2)) {
michael@0 383 /* this is a lead byte for a double-byte token */
michael@0 384 token=tokens[c<<8|*name++];
michael@0 385 --nameLength;
michael@0 386 }
michael@0 387 if(token==(uint16_t)(-1)) {
michael@0 388 if(c!=';') {
michael@0 389 /* explicit letter */
michael@0 390 if((char)c!=*otherName++) {
michael@0 391 return FALSE;
michael@0 392 }
michael@0 393 } else {
michael@0 394 /* stop, but skip the semicolon if we are seeking
michael@0 395 extended names and there was no 2.0 name but there
michael@0 396 is a 1.0 name. */
michael@0 397 if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 398 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
michael@0 399 continue;
michael@0 400 }
michael@0 401 }
michael@0 402 /* finished */
michael@0 403 break;
michael@0 404 }
michael@0 405 } else {
michael@0 406 /* write token word */
michael@0 407 uint8_t *tokenString=tokenStrings+token;
michael@0 408 while((c=*tokenString++)!=0) {
michael@0 409 if((char)c!=*otherName++) {
michael@0 410 return FALSE;
michael@0 411 }
michael@0 412 }
michael@0 413 }
michael@0 414 }
michael@0 415 }
michael@0 416
michael@0 417 /* complete match? */
michael@0 418 return (UBool)(*otherName==0);
michael@0 419 }
michael@0 420
michael@0 421 static uint8_t getCharCat(UChar32 cp) {
michael@0 422 uint8_t cat;
michael@0 423
michael@0 424 if (U_IS_UNICODE_NONCHAR(cp)) {
michael@0 425 return U_NONCHARACTER_CODE_POINT;
michael@0 426 }
michael@0 427
michael@0 428 if ((cat = u_charType(cp)) == U_SURROGATE) {
michael@0 429 cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
michael@0 430 }
michael@0 431
michael@0 432 return cat;
michael@0 433 }
michael@0 434
michael@0 435 static const char *getCharCatName(UChar32 cp) {
michael@0 436 uint8_t cat = getCharCat(cp);
michael@0 437
michael@0 438 /* Return unknown if the table of names above is not up to
michael@0 439 date. */
michael@0 440
michael@0 441 if (cat >= LENGTHOF(charCatNames)) {
michael@0 442 return "unknown";
michael@0 443 } else {
michael@0 444 return charCatNames[cat];
michael@0 445 }
michael@0 446 }
michael@0 447
michael@0 448 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
michael@0 449 const char *catname = getCharCatName(code);
michael@0 450 uint16_t length = 0;
michael@0 451
michael@0 452 UChar32 cp;
michael@0 453 int ndigits, i;
michael@0 454
michael@0 455 WRITE_CHAR(buffer, bufferLength, length, '<');
michael@0 456 while (catname[length - 1]) {
michael@0 457 WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
michael@0 458 }
michael@0 459 WRITE_CHAR(buffer, bufferLength, length, '-');
michael@0 460 for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
michael@0 461 ;
michael@0 462 if (ndigits < 4)
michael@0 463 ndigits = 4;
michael@0 464 for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
michael@0 465 uint8_t v = (uint8_t)(cp & 0xf);
michael@0 466 buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
michael@0 467 }
michael@0 468 buffer += ndigits;
michael@0 469 length += ndigits;
michael@0 470 WRITE_CHAR(buffer, bufferLength, length, '>');
michael@0 471
michael@0 472 return length;
michael@0 473 }
michael@0 474
michael@0 475 /*
michael@0 476 * getGroup() does a binary search for the group that contains the
michael@0 477 * Unicode code point "code".
michael@0 478 * The return value is always a valid Group* that may contain "code"
michael@0 479 * or else is the highest group before "code".
michael@0 480 * If the lowest group is after "code", then that one is returned.
michael@0 481 */
michael@0 482 static const uint16_t *
michael@0 483 getGroup(UCharNames *names, uint32_t code) {
michael@0 484 const uint16_t *groups=GET_GROUPS(names);
michael@0 485 uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
michael@0 486 start=0,
michael@0 487 limit=*groups++,
michael@0 488 number;
michael@0 489
michael@0 490 /* binary search for the group of names that contains the one for code */
michael@0 491 while(start<limit-1) {
michael@0 492 number=(uint16_t)((start+limit)/2);
michael@0 493 if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
michael@0 494 limit=number;
michael@0 495 } else {
michael@0 496 start=number;
michael@0 497 }
michael@0 498 }
michael@0 499
michael@0 500 /* return this regardless of whether it is an exact match */
michael@0 501 return groups+start*GROUP_LENGTH;
michael@0 502 }
michael@0 503
michael@0 504 /*
michael@0 505 * expandGroupLengths() reads a block of compressed lengths of 32 strings and
michael@0 506 * expands them into offsets and lengths for each string.
michael@0 507 * Lengths are stored with a variable-width encoding in consecutive nibbles:
michael@0 508 * If a nibble<0xc, then it is the length itself (0=empty string).
michael@0 509 * If a nibble>=0xc, then it forms a length value with the following nibble.
michael@0 510 * Calculation see below.
michael@0 511 * The offsets and lengths arrays must be at least 33 (one more) long because
michael@0 512 * there is no check here at the end if the last nibble is still used.
michael@0 513 */
michael@0 514 static const uint8_t *
michael@0 515 expandGroupLengths(const uint8_t *s,
michael@0 516 uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
michael@0 517 /* read the lengths of the 32 strings in this group and get each string's offset */
michael@0 518 uint16_t i=0, offset=0, length=0;
michael@0 519 uint8_t lengthByte;
michael@0 520
michael@0 521 /* all 32 lengths must be read to get the offset of the first group string */
michael@0 522 while(i<LINES_PER_GROUP) {
michael@0 523 lengthByte=*s++;
michael@0 524
michael@0 525 /* read even nibble - MSBs of lengthByte */
michael@0 526 if(length>=12) {
michael@0 527 /* double-nibble length spread across two bytes */
michael@0 528 length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
michael@0 529 lengthByte&=0xf;
michael@0 530 } else if((lengthByte /* &0xf0 */)>=0xc0) {
michael@0 531 /* double-nibble length spread across this one byte */
michael@0 532 length=(uint16_t)((lengthByte&0x3f)+12);
michael@0 533 } else {
michael@0 534 /* single-nibble length in MSBs */
michael@0 535 length=(uint16_t)(lengthByte>>4);
michael@0 536 lengthByte&=0xf;
michael@0 537 }
michael@0 538
michael@0 539 *offsets++=offset;
michael@0 540 *lengths++=length;
michael@0 541
michael@0 542 offset+=length;
michael@0 543 ++i;
michael@0 544
michael@0 545 /* read odd nibble - LSBs of lengthByte */
michael@0 546 if((lengthByte&0xf0)==0) {
michael@0 547 /* this nibble was not consumed for a double-nibble length above */
michael@0 548 length=lengthByte;
michael@0 549 if(length<12) {
michael@0 550 /* single-nibble length in LSBs */
michael@0 551 *offsets++=offset;
michael@0 552 *lengths++=length;
michael@0 553
michael@0 554 offset+=length;
michael@0 555 ++i;
michael@0 556 }
michael@0 557 } else {
michael@0 558 length=0; /* prevent double-nibble detection in the next iteration */
michael@0 559 }
michael@0 560 }
michael@0 561
michael@0 562 /* now, s is at the first group string */
michael@0 563 return s;
michael@0 564 }
michael@0 565
michael@0 566 static uint16_t
michael@0 567 expandGroupName(UCharNames *names, const uint16_t *group,
michael@0 568 uint16_t lineNumber, UCharNameChoice nameChoice,
michael@0 569 char *buffer, uint16_t bufferLength) {
michael@0 570 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
michael@0 571 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
michael@0 572 s=expandGroupLengths(s, offsets, lengths);
michael@0 573 return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
michael@0 574 buffer, bufferLength);
michael@0 575 }
michael@0 576
michael@0 577 static uint16_t
michael@0 578 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
michael@0 579 char *buffer, uint16_t bufferLength) {
michael@0 580 const uint16_t *group=getGroup(names, code);
michael@0 581 if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
michael@0 582 return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
michael@0 583 buffer, bufferLength);
michael@0 584 } else {
michael@0 585 /* group not found */
michael@0 586 /* zero-terminate */
michael@0 587 if(bufferLength>0) {
michael@0 588 *buffer=0;
michael@0 589 }
michael@0 590 return 0;
michael@0 591 }
michael@0 592 }
michael@0 593
michael@0 594 /*
michael@0 595 * enumGroupNames() enumerates all the names in a 32-group
michael@0 596 * and either calls the enumerator function or finds a given input name.
michael@0 597 */
michael@0 598 static UBool
michael@0 599 enumGroupNames(UCharNames *names, const uint16_t *group,
michael@0 600 UChar32 start, UChar32 end,
michael@0 601 UEnumCharNamesFn *fn, void *context,
michael@0 602 UCharNameChoice nameChoice) {
michael@0 603 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
michael@0 604 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
michael@0 605
michael@0 606 s=expandGroupLengths(s, offsets, lengths);
michael@0 607 if(fn!=DO_FIND_NAME) {
michael@0 608 char buffer[200];
michael@0 609 uint16_t length;
michael@0 610
michael@0 611 while(start<=end) {
michael@0 612 length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
michael@0 613 if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 614 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
michael@0 615 }
michael@0 616 /* here, we assume that the buffer is large enough */
michael@0 617 if(length>0) {
michael@0 618 if(!fn(context, start, nameChoice, buffer, length)) {
michael@0 619 return FALSE;
michael@0 620 }
michael@0 621 }
michael@0 622 ++start;
michael@0 623 }
michael@0 624 } else {
michael@0 625 const char *otherName=((FindName *)context)->otherName;
michael@0 626 while(start<=end) {
michael@0 627 if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
michael@0 628 ((FindName *)context)->code=start;
michael@0 629 return FALSE;
michael@0 630 }
michael@0 631 ++start;
michael@0 632 }
michael@0 633 }
michael@0 634 return TRUE;
michael@0 635 }
michael@0 636
michael@0 637 /*
michael@0 638 * enumExtNames enumerate extended names.
michael@0 639 * It only needs to do it if it is called with a real function and not
michael@0 640 * with the dummy DO_FIND_NAME, because u_charFromName() does a check
michael@0 641 * for extended names by itself.
michael@0 642 */
michael@0 643 static UBool
michael@0 644 enumExtNames(UChar32 start, UChar32 end,
michael@0 645 UEnumCharNamesFn *fn, void *context)
michael@0 646 {
michael@0 647 if(fn!=DO_FIND_NAME) {
michael@0 648 char buffer[200];
michael@0 649 uint16_t length;
michael@0 650
michael@0 651 while(start<=end) {
michael@0 652 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
michael@0 653 /* here, we assume that the buffer is large enough */
michael@0 654 if(length>0) {
michael@0 655 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
michael@0 656 return FALSE;
michael@0 657 }
michael@0 658 }
michael@0 659 ++start;
michael@0 660 }
michael@0 661 }
michael@0 662
michael@0 663 return TRUE;
michael@0 664 }
michael@0 665
michael@0 666 static UBool
michael@0 667 enumNames(UCharNames *names,
michael@0 668 UChar32 start, UChar32 limit,
michael@0 669 UEnumCharNamesFn *fn, void *context,
michael@0 670 UCharNameChoice nameChoice) {
michael@0 671 uint16_t startGroupMSB, endGroupMSB, groupCount;
michael@0 672 const uint16_t *group, *groupLimit;
michael@0 673
michael@0 674 startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
michael@0 675 endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
michael@0 676
michael@0 677 /* find the group that contains start, or the highest before it */
michael@0 678 group=getGroup(names, start);
michael@0 679
michael@0 680 if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
michael@0 681 /* enumerate synthetic names between start and the group start */
michael@0 682 UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
michael@0 683 if(extLimit>limit) {
michael@0 684 extLimit=limit;
michael@0 685 }
michael@0 686 if(!enumExtNames(start, extLimit-1, fn, context)) {
michael@0 687 return FALSE;
michael@0 688 }
michael@0 689 start=extLimit;
michael@0 690 }
michael@0 691
michael@0 692 if(startGroupMSB==endGroupMSB) {
michael@0 693 if(startGroupMSB==group[GROUP_MSB]) {
michael@0 694 /* if start and limit-1 are in the same group, then enumerate only in that one */
michael@0 695 return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
michael@0 696 }
michael@0 697 } else {
michael@0 698 const uint16_t *groups=GET_GROUPS(names);
michael@0 699 groupCount=*groups++;
michael@0 700 groupLimit=groups+groupCount*GROUP_LENGTH;
michael@0 701
michael@0 702 if(startGroupMSB==group[GROUP_MSB]) {
michael@0 703 /* enumerate characters in the partial start group */
michael@0 704 if((start&GROUP_MASK)!=0) {
michael@0 705 if(!enumGroupNames(names, group,
michael@0 706 start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
michael@0 707 fn, context, nameChoice)) {
michael@0 708 return FALSE;
michael@0 709 }
michael@0 710 group=NEXT_GROUP(group); /* continue with the next group */
michael@0 711 }
michael@0 712 } else if(startGroupMSB>group[GROUP_MSB]) {
michael@0 713 /* make sure that we start enumerating with the first group after start */
michael@0 714 const uint16_t *nextGroup=NEXT_GROUP(group);
michael@0 715 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 716 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
michael@0 717 if (end > limit) {
michael@0 718 end = limit;
michael@0 719 }
michael@0 720 if (!enumExtNames(start, end - 1, fn, context)) {
michael@0 721 return FALSE;
michael@0 722 }
michael@0 723 }
michael@0 724 group=nextGroup;
michael@0 725 }
michael@0 726
michael@0 727 /* enumerate entire groups between the start- and end-groups */
michael@0 728 while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
michael@0 729 const uint16_t *nextGroup;
michael@0 730 start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
michael@0 731 if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
michael@0 732 return FALSE;
michael@0 733 }
michael@0 734 nextGroup=NEXT_GROUP(group);
michael@0 735 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 736 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
michael@0 737 if (end > limit) {
michael@0 738 end = limit;
michael@0 739 }
michael@0 740 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
michael@0 741 return FALSE;
michael@0 742 }
michael@0 743 }
michael@0 744 group=nextGroup;
michael@0 745 }
michael@0 746
michael@0 747 /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
michael@0 748 if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
michael@0 749 return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
michael@0 750 } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
michael@0 751 UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
michael@0 752 if (next > start) {
michael@0 753 start = next;
michael@0 754 }
michael@0 755 } else {
michael@0 756 return TRUE;
michael@0 757 }
michael@0 758 }
michael@0 759
michael@0 760 /* we have not found a group, which means everything is made of
michael@0 761 extended names. */
michael@0 762 if (nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 763 if (limit > UCHAR_MAX_VALUE + 1) {
michael@0 764 limit = UCHAR_MAX_VALUE + 1;
michael@0 765 }
michael@0 766 return enumExtNames(start, limit - 1, fn, context);
michael@0 767 }
michael@0 768
michael@0 769 return TRUE;
michael@0 770 }
michael@0 771
michael@0 772 static uint16_t
michael@0 773 writeFactorSuffix(const uint16_t *factors, uint16_t count,
michael@0 774 const char *s, /* suffix elements */
michael@0 775 uint32_t code,
michael@0 776 uint16_t indexes[8], /* output fields from here */
michael@0 777 const char *elementBases[8], const char *elements[8],
michael@0 778 char *buffer, uint16_t bufferLength) {
michael@0 779 uint16_t i, factor, bufferPos=0;
michael@0 780 char c;
michael@0 781
michael@0 782 /* write elements according to the factors */
michael@0 783
michael@0 784 /*
michael@0 785 * the factorized elements are determined by modulo arithmetic
michael@0 786 * with the factors of this algorithm
michael@0 787 *
michael@0 788 * note that for fewer operations, count is decremented here
michael@0 789 */
michael@0 790 --count;
michael@0 791 for(i=count; i>0; --i) {
michael@0 792 factor=factors[i];
michael@0 793 indexes[i]=(uint16_t)(code%factor);
michael@0 794 code/=factor;
michael@0 795 }
michael@0 796 /*
michael@0 797 * we don't need to calculate the last modulus because start<=code<=end
michael@0 798 * guarantees here that code<=factors[0]
michael@0 799 */
michael@0 800 indexes[0]=(uint16_t)code;
michael@0 801
michael@0 802 /* write each element */
michael@0 803 for(;;) {
michael@0 804 if(elementBases!=NULL) {
michael@0 805 *elementBases++=s;
michael@0 806 }
michael@0 807
michael@0 808 /* skip indexes[i] strings */
michael@0 809 factor=indexes[i];
michael@0 810 while(factor>0) {
michael@0 811 while(*s++!=0) {}
michael@0 812 --factor;
michael@0 813 }
michael@0 814 if(elements!=NULL) {
michael@0 815 *elements++=s;
michael@0 816 }
michael@0 817
michael@0 818 /* write element */
michael@0 819 while((c=*s++)!=0) {
michael@0 820 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 821 }
michael@0 822
michael@0 823 /* we do not need to perform the rest of this loop for i==count - break here */
michael@0 824 if(i>=count) {
michael@0 825 break;
michael@0 826 }
michael@0 827
michael@0 828 /* skip the rest of the strings for this factors[i] */
michael@0 829 factor=(uint16_t)(factors[i]-indexes[i]-1);
michael@0 830 while(factor>0) {
michael@0 831 while(*s++!=0) {}
michael@0 832 --factor;
michael@0 833 }
michael@0 834
michael@0 835 ++i;
michael@0 836 }
michael@0 837
michael@0 838 /* zero-terminate */
michael@0 839 if(bufferLength>0) {
michael@0 840 *buffer=0;
michael@0 841 }
michael@0 842
michael@0 843 return bufferPos;
michael@0 844 }
michael@0 845
michael@0 846 /*
michael@0 847 * Important:
michael@0 848 * Parts of findAlgName() are almost the same as some of getAlgName().
michael@0 849 * Fixes must be applied to both.
michael@0 850 */
michael@0 851 static uint16_t
michael@0 852 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
michael@0 853 char *buffer, uint16_t bufferLength) {
michael@0 854 uint16_t bufferPos=0;
michael@0 855
michael@0 856 /* Only the normative character name can be algorithmic. */
michael@0 857 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
michael@0 858 /* zero-terminate */
michael@0 859 if(bufferLength>0) {
michael@0 860 *buffer=0;
michael@0 861 }
michael@0 862 return 0;
michael@0 863 }
michael@0 864
michael@0 865 switch(range->type) {
michael@0 866 case 0: {
michael@0 867 /* name = prefix hex-digits */
michael@0 868 const char *s=(const char *)(range+1);
michael@0 869 char c;
michael@0 870
michael@0 871 uint16_t i, count;
michael@0 872
michael@0 873 /* copy prefix */
michael@0 874 while((c=*s++)!=0) {
michael@0 875 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 876 }
michael@0 877
michael@0 878 /* write hexadecimal code point value */
michael@0 879 count=range->variant;
michael@0 880
michael@0 881 /* zero-terminate */
michael@0 882 if(count<bufferLength) {
michael@0 883 buffer[count]=0;
michael@0 884 }
michael@0 885
michael@0 886 for(i=count; i>0;) {
michael@0 887 if(--i<bufferLength) {
michael@0 888 c=(char)(code&0xf);
michael@0 889 if(c<10) {
michael@0 890 c+='0';
michael@0 891 } else {
michael@0 892 c+='A'-10;
michael@0 893 }
michael@0 894 buffer[i]=c;
michael@0 895 }
michael@0 896 code>>=4;
michael@0 897 }
michael@0 898
michael@0 899 bufferPos+=count;
michael@0 900 break;
michael@0 901 }
michael@0 902 case 1: {
michael@0 903 /* name = prefix factorized-elements */
michael@0 904 uint16_t indexes[8];
michael@0 905 const uint16_t *factors=(const uint16_t *)(range+1);
michael@0 906 uint16_t count=range->variant;
michael@0 907 const char *s=(const char *)(factors+count);
michael@0 908 char c;
michael@0 909
michael@0 910 /* copy prefix */
michael@0 911 while((c=*s++)!=0) {
michael@0 912 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
michael@0 913 }
michael@0 914
michael@0 915 bufferPos+=writeFactorSuffix(factors, count,
michael@0 916 s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
michael@0 917 break;
michael@0 918 }
michael@0 919 default:
michael@0 920 /* undefined type */
michael@0 921 /* zero-terminate */
michael@0 922 if(bufferLength>0) {
michael@0 923 *buffer=0;
michael@0 924 }
michael@0 925 break;
michael@0 926 }
michael@0 927
michael@0 928 return bufferPos;
michael@0 929 }
michael@0 930
michael@0 931 /*
michael@0 932 * Important: enumAlgNames() and findAlgName() are almost the same.
michael@0 933 * Any fix must be applied to both.
michael@0 934 */
michael@0 935 static UBool
michael@0 936 enumAlgNames(AlgorithmicRange *range,
michael@0 937 UChar32 start, UChar32 limit,
michael@0 938 UEnumCharNamesFn *fn, void *context,
michael@0 939 UCharNameChoice nameChoice) {
michael@0 940 char buffer[200];
michael@0 941 uint16_t length;
michael@0 942
michael@0 943 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
michael@0 944 return TRUE;
michael@0 945 }
michael@0 946
michael@0 947 switch(range->type) {
michael@0 948 case 0: {
michael@0 949 char *s, *end;
michael@0 950 char c;
michael@0 951
michael@0 952 /* get the full name of the start character */
michael@0 953 length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
michael@0 954 if(length<=0) {
michael@0 955 return TRUE;
michael@0 956 }
michael@0 957
michael@0 958 /* call the enumerator function with this first character */
michael@0 959 if(!fn(context, start, nameChoice, buffer, length)) {
michael@0 960 return FALSE;
michael@0 961 }
michael@0 962
michael@0 963 /* go to the end of the name; all these names have the same length */
michael@0 964 end=buffer;
michael@0 965 while(*end!=0) {
michael@0 966 ++end;
michael@0 967 }
michael@0 968
michael@0 969 /* enumerate the rest of the names */
michael@0 970 while(++start<limit) {
michael@0 971 /* increment the hexadecimal number on a character-basis */
michael@0 972 s=end;
michael@0 973 for (;;) {
michael@0 974 c=*--s;
michael@0 975 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
michael@0 976 *s=(char)(c+1);
michael@0 977 break;
michael@0 978 } else if(c=='9') {
michael@0 979 *s='A';
michael@0 980 break;
michael@0 981 } else if(c=='F') {
michael@0 982 *s='0';
michael@0 983 }
michael@0 984 }
michael@0 985
michael@0 986 if(!fn(context, start, nameChoice, buffer, length)) {
michael@0 987 return FALSE;
michael@0 988 }
michael@0 989 }
michael@0 990 break;
michael@0 991 }
michael@0 992 case 1: {
michael@0 993 uint16_t indexes[8];
michael@0 994 const char *elementBases[8], *elements[8];
michael@0 995 const uint16_t *factors=(const uint16_t *)(range+1);
michael@0 996 uint16_t count=range->variant;
michael@0 997 const char *s=(const char *)(factors+count);
michael@0 998 char *suffix, *t;
michael@0 999 uint16_t prefixLength, i, idx;
michael@0 1000
michael@0 1001 char c;
michael@0 1002
michael@0 1003 /* name = prefix factorized-elements */
michael@0 1004
michael@0 1005 /* copy prefix */
michael@0 1006 suffix=buffer;
michael@0 1007 prefixLength=0;
michael@0 1008 while((c=*s++)!=0) {
michael@0 1009 *suffix++=c;
michael@0 1010 ++prefixLength;
michael@0 1011 }
michael@0 1012
michael@0 1013 /* append the suffix of the start character */
michael@0 1014 length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
michael@0 1015 s, (uint32_t)start-range->start,
michael@0 1016 indexes, elementBases, elements,
michael@0 1017 suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
michael@0 1018
michael@0 1019 /* call the enumerator function with this first character */
michael@0 1020 if(!fn(context, start, nameChoice, buffer, length)) {
michael@0 1021 return FALSE;
michael@0 1022 }
michael@0 1023
michael@0 1024 /* enumerate the rest of the names */
michael@0 1025 while(++start<limit) {
michael@0 1026 /* increment the indexes in lexical order bound by the factors */
michael@0 1027 i=count;
michael@0 1028 for (;;) {
michael@0 1029 idx=(uint16_t)(indexes[--i]+1);
michael@0 1030 if(idx<factors[i]) {
michael@0 1031 /* skip one index and its element string */
michael@0 1032 indexes[i]=idx;
michael@0 1033 s=elements[i];
michael@0 1034 while(*s++!=0) {
michael@0 1035 }
michael@0 1036 elements[i]=s;
michael@0 1037 break;
michael@0 1038 } else {
michael@0 1039 /* reset this index to 0 and its element string to the first one */
michael@0 1040 indexes[i]=0;
michael@0 1041 elements[i]=elementBases[i];
michael@0 1042 }
michael@0 1043 }
michael@0 1044
michael@0 1045 /* to make matters a little easier, just append all elements to the suffix */
michael@0 1046 t=suffix;
michael@0 1047 length=prefixLength;
michael@0 1048 for(i=0; i<count; ++i) {
michael@0 1049 s=elements[i];
michael@0 1050 while((c=*s++)!=0) {
michael@0 1051 *t++=c;
michael@0 1052 ++length;
michael@0 1053 }
michael@0 1054 }
michael@0 1055 /* zero-terminate */
michael@0 1056 *t=0;
michael@0 1057
michael@0 1058 if(!fn(context, start, nameChoice, buffer, length)) {
michael@0 1059 return FALSE;
michael@0 1060 }
michael@0 1061 }
michael@0 1062 break;
michael@0 1063 }
michael@0 1064 default:
michael@0 1065 /* undefined type */
michael@0 1066 break;
michael@0 1067 }
michael@0 1068
michael@0 1069 return TRUE;
michael@0 1070 }
michael@0 1071
michael@0 1072 /*
michael@0 1073 * findAlgName() is almost the same as enumAlgNames() except that it
michael@0 1074 * returns the code point for a name if it fits into the range.
michael@0 1075 * It returns 0xffff otherwise.
michael@0 1076 */
michael@0 1077 static UChar32
michael@0 1078 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
michael@0 1079 UChar32 code;
michael@0 1080
michael@0 1081 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
michael@0 1082 return 0xffff;
michael@0 1083 }
michael@0 1084
michael@0 1085 switch(range->type) {
michael@0 1086 case 0: {
michael@0 1087 /* name = prefix hex-digits */
michael@0 1088 const char *s=(const char *)(range+1);
michael@0 1089 char c;
michael@0 1090
michael@0 1091 uint16_t i, count;
michael@0 1092
michael@0 1093 /* compare prefix */
michael@0 1094 while((c=*s++)!=0) {
michael@0 1095 if((char)c!=*otherName++) {
michael@0 1096 return 0xffff;
michael@0 1097 }
michael@0 1098 }
michael@0 1099
michael@0 1100 /* read hexadecimal code point value */
michael@0 1101 count=range->variant;
michael@0 1102 code=0;
michael@0 1103 for(i=0; i<count; ++i) {
michael@0 1104 c=*otherName++;
michael@0 1105 if('0'<=c && c<='9') {
michael@0 1106 code=(code<<4)|(c-'0');
michael@0 1107 } else if('A'<=c && c<='F') {
michael@0 1108 code=(code<<4)|(c-'A'+10);
michael@0 1109 } else {
michael@0 1110 return 0xffff;
michael@0 1111 }
michael@0 1112 }
michael@0 1113
michael@0 1114 /* does it fit into the range? */
michael@0 1115 if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
michael@0 1116 return code;
michael@0 1117 }
michael@0 1118 break;
michael@0 1119 }
michael@0 1120 case 1: {
michael@0 1121 char buffer[64];
michael@0 1122 uint16_t indexes[8];
michael@0 1123 const char *elementBases[8], *elements[8];
michael@0 1124 const uint16_t *factors=(const uint16_t *)(range+1);
michael@0 1125 uint16_t count=range->variant;
michael@0 1126 const char *s=(const char *)(factors+count), *t;
michael@0 1127 UChar32 start, limit;
michael@0 1128 uint16_t i, idx;
michael@0 1129
michael@0 1130 char c;
michael@0 1131
michael@0 1132 /* name = prefix factorized-elements */
michael@0 1133
michael@0 1134 /* compare prefix */
michael@0 1135 while((c=*s++)!=0) {
michael@0 1136 if((char)c!=*otherName++) {
michael@0 1137 return 0xffff;
michael@0 1138 }
michael@0 1139 }
michael@0 1140
michael@0 1141 start=(UChar32)range->start;
michael@0 1142 limit=(UChar32)(range->end+1);
michael@0 1143
michael@0 1144 /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
michael@0 1145 writeFactorSuffix(factors, count, s, 0,
michael@0 1146 indexes, elementBases, elements, buffer, sizeof(buffer));
michael@0 1147
michael@0 1148 /* compare the first suffix */
michael@0 1149 if(0==uprv_strcmp(otherName, buffer)) {
michael@0 1150 return start;
michael@0 1151 }
michael@0 1152
michael@0 1153 /* enumerate and compare the rest of the suffixes */
michael@0 1154 while(++start<limit) {
michael@0 1155 /* increment the indexes in lexical order bound by the factors */
michael@0 1156 i=count;
michael@0 1157 for (;;) {
michael@0 1158 idx=(uint16_t)(indexes[--i]+1);
michael@0 1159 if(idx<factors[i]) {
michael@0 1160 /* skip one index and its element string */
michael@0 1161 indexes[i]=idx;
michael@0 1162 s=elements[i];
michael@0 1163 while(*s++!=0) {}
michael@0 1164 elements[i]=s;
michael@0 1165 break;
michael@0 1166 } else {
michael@0 1167 /* reset this index to 0 and its element string to the first one */
michael@0 1168 indexes[i]=0;
michael@0 1169 elements[i]=elementBases[i];
michael@0 1170 }
michael@0 1171 }
michael@0 1172
michael@0 1173 /* to make matters a little easier, just compare all elements of the suffix */
michael@0 1174 t=otherName;
michael@0 1175 for(i=0; i<count; ++i) {
michael@0 1176 s=elements[i];
michael@0 1177 while((c=*s++)!=0) {
michael@0 1178 if(c!=*t++) {
michael@0 1179 s=""; /* does not match */
michael@0 1180 i=99;
michael@0 1181 }
michael@0 1182 }
michael@0 1183 }
michael@0 1184 if(i<99 && *t==0) {
michael@0 1185 return start;
michael@0 1186 }
michael@0 1187 }
michael@0 1188 break;
michael@0 1189 }
michael@0 1190 default:
michael@0 1191 /* undefined type */
michael@0 1192 break;
michael@0 1193 }
michael@0 1194
michael@0 1195 return 0xffff;
michael@0 1196 }
michael@0 1197
michael@0 1198 /* sets of name characters, maximum name lengths ---------------------------- */
michael@0 1199
michael@0 1200 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
michael@0 1201 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
michael@0 1202
michael@0 1203 static int32_t
michael@0 1204 calcStringSetLength(uint32_t set[8], const char *s) {
michael@0 1205 int32_t length=0;
michael@0 1206 char c;
michael@0 1207
michael@0 1208 while((c=*s++)!=0) {
michael@0 1209 SET_ADD(set, c);
michael@0 1210 ++length;
michael@0 1211 }
michael@0 1212 return length;
michael@0 1213 }
michael@0 1214
michael@0 1215 static int32_t
michael@0 1216 calcAlgNameSetsLengths(int32_t maxNameLength) {
michael@0 1217 AlgorithmicRange *range;
michael@0 1218 uint32_t *p;
michael@0 1219 uint32_t rangeCount;
michael@0 1220 int32_t length;
michael@0 1221
michael@0 1222 /* enumerate algorithmic ranges */
michael@0 1223 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
michael@0 1224 rangeCount=*p;
michael@0 1225 range=(AlgorithmicRange *)(p+1);
michael@0 1226 while(rangeCount>0) {
michael@0 1227 switch(range->type) {
michael@0 1228 case 0:
michael@0 1229 /* name = prefix + (range->variant times) hex-digits */
michael@0 1230 /* prefix */
michael@0 1231 length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
michael@0 1232 if(length>maxNameLength) {
michael@0 1233 maxNameLength=length;
michael@0 1234 }
michael@0 1235 break;
michael@0 1236 case 1: {
michael@0 1237 /* name = prefix factorized-elements */
michael@0 1238 const uint16_t *factors=(const uint16_t *)(range+1);
michael@0 1239 const char *s;
michael@0 1240 int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
michael@0 1241
michael@0 1242 /* prefix length */
michael@0 1243 s=(const char *)(factors+count);
michael@0 1244 length=calcStringSetLength(gNameSet, s);
michael@0 1245 s+=length+1; /* start of factor suffixes */
michael@0 1246
michael@0 1247 /* get the set and maximum factor suffix length for each factor */
michael@0 1248 for(i=0; i<count; ++i) {
michael@0 1249 maxFactorLength=0;
michael@0 1250 for(factor=factors[i]; factor>0; --factor) {
michael@0 1251 factorLength=calcStringSetLength(gNameSet, s);
michael@0 1252 s+=factorLength+1;
michael@0 1253 if(factorLength>maxFactorLength) {
michael@0 1254 maxFactorLength=factorLength;
michael@0 1255 }
michael@0 1256 }
michael@0 1257 length+=maxFactorLength;
michael@0 1258 }
michael@0 1259
michael@0 1260 if(length>maxNameLength) {
michael@0 1261 maxNameLength=length;
michael@0 1262 }
michael@0 1263 break;
michael@0 1264 }
michael@0 1265 default:
michael@0 1266 /* unknown type */
michael@0 1267 break;
michael@0 1268 }
michael@0 1269
michael@0 1270 range=(AlgorithmicRange *)((uint8_t *)range+range->size);
michael@0 1271 --rangeCount;
michael@0 1272 }
michael@0 1273 return maxNameLength;
michael@0 1274 }
michael@0 1275
michael@0 1276 static int32_t
michael@0 1277 calcExtNameSetsLengths(int32_t maxNameLength) {
michael@0 1278 int32_t i, length;
michael@0 1279
michael@0 1280 for(i=0; i<LENGTHOF(charCatNames); ++i) {
michael@0 1281 /*
michael@0 1282 * for each category, count the length of the category name
michael@0 1283 * plus 9=
michael@0 1284 * 2 for <>
michael@0 1285 * 1 for -
michael@0 1286 * 6 for most hex digits per code point
michael@0 1287 */
michael@0 1288 length=9+calcStringSetLength(gNameSet, charCatNames[i]);
michael@0 1289 if(length>maxNameLength) {
michael@0 1290 maxNameLength=length;
michael@0 1291 }
michael@0 1292 }
michael@0 1293 return maxNameLength;
michael@0 1294 }
michael@0 1295
michael@0 1296 static int32_t
michael@0 1297 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
michael@0 1298 uint32_t set[8],
michael@0 1299 const uint8_t **pLine, const uint8_t *lineLimit) {
michael@0 1300 const uint8_t *line=*pLine;
michael@0 1301 int32_t length=0, tokenLength;
michael@0 1302 uint16_t c, token;
michael@0 1303
michael@0 1304 while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
michael@0 1305 if(c>=tokenCount) {
michael@0 1306 /* implicit letter */
michael@0 1307 SET_ADD(set, c);
michael@0 1308 ++length;
michael@0 1309 } else {
michael@0 1310 token=tokens[c];
michael@0 1311 if(token==(uint16_t)(-2)) {
michael@0 1312 /* this is a lead byte for a double-byte token */
michael@0 1313 c=c<<8|*line++;
michael@0 1314 token=tokens[c];
michael@0 1315 }
michael@0 1316 if(token==(uint16_t)(-1)) {
michael@0 1317 /* explicit letter */
michael@0 1318 SET_ADD(set, c);
michael@0 1319 ++length;
michael@0 1320 } else {
michael@0 1321 /* count token word */
michael@0 1322 if(tokenLengths!=NULL) {
michael@0 1323 /* use cached token length */
michael@0 1324 tokenLength=tokenLengths[c];
michael@0 1325 if(tokenLength==0) {
michael@0 1326 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
michael@0 1327 tokenLengths[c]=(int8_t)tokenLength;
michael@0 1328 }
michael@0 1329 } else {
michael@0 1330 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
michael@0 1331 }
michael@0 1332 length+=tokenLength;
michael@0 1333 }
michael@0 1334 }
michael@0 1335 }
michael@0 1336
michael@0 1337 *pLine=line;
michael@0 1338 return length;
michael@0 1339 }
michael@0 1340
michael@0 1341 static void
michael@0 1342 calcGroupNameSetsLengths(int32_t maxNameLength) {
michael@0 1343 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
michael@0 1344
michael@0 1345 uint16_t *tokens=(uint16_t *)uCharNames+8;
michael@0 1346 uint16_t tokenCount=*tokens++;
michael@0 1347 uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
michael@0 1348
michael@0 1349 int8_t *tokenLengths;
michael@0 1350
michael@0 1351 const uint16_t *group;
michael@0 1352 const uint8_t *s, *line, *lineLimit;
michael@0 1353
michael@0 1354 int32_t groupCount, lineNumber, length;
michael@0 1355
michael@0 1356 tokenLengths=(int8_t *)uprv_malloc(tokenCount);
michael@0 1357 if(tokenLengths!=NULL) {
michael@0 1358 uprv_memset(tokenLengths, 0, tokenCount);
michael@0 1359 }
michael@0 1360
michael@0 1361 group=GET_GROUPS(uCharNames);
michael@0 1362 groupCount=*group++;
michael@0 1363
michael@0 1364 /* enumerate all groups */
michael@0 1365 while(groupCount>0) {
michael@0 1366 s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
michael@0 1367 s=expandGroupLengths(s, offsets, lengths);
michael@0 1368
michael@0 1369 /* enumerate all lines in each group */
michael@0 1370 for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
michael@0 1371 line=s+offsets[lineNumber];
michael@0 1372 length=lengths[lineNumber];
michael@0 1373 if(length==0) {
michael@0 1374 continue;
michael@0 1375 }
michael@0 1376
michael@0 1377 lineLimit=line+length;
michael@0 1378
michael@0 1379 /* read regular name */
michael@0 1380 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
michael@0 1381 if(length>maxNameLength) {
michael@0 1382 maxNameLength=length;
michael@0 1383 }
michael@0 1384 if(line==lineLimit) {
michael@0 1385 continue;
michael@0 1386 }
michael@0 1387
michael@0 1388 /* read Unicode 1.0 name */
michael@0 1389 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
michael@0 1390 if(length>maxNameLength) {
michael@0 1391 maxNameLength=length;
michael@0 1392 }
michael@0 1393 if(line==lineLimit) {
michael@0 1394 continue;
michael@0 1395 }
michael@0 1396
michael@0 1397 /* read ISO comment */
michael@0 1398 /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
michael@0 1399 }
michael@0 1400
michael@0 1401 group=NEXT_GROUP(group);
michael@0 1402 --groupCount;
michael@0 1403 }
michael@0 1404
michael@0 1405 if(tokenLengths!=NULL) {
michael@0 1406 uprv_free(tokenLengths);
michael@0 1407 }
michael@0 1408
michael@0 1409 /* set gMax... - name length last for threading */
michael@0 1410 gMaxNameLength=maxNameLength;
michael@0 1411 }
michael@0 1412
michael@0 1413 static UBool
michael@0 1414 calcNameSetsLengths(UErrorCode *pErrorCode) {
michael@0 1415 static const char extChars[]="0123456789ABCDEF<>-";
michael@0 1416 int32_t i, maxNameLength;
michael@0 1417
michael@0 1418 if(gMaxNameLength!=0) {
michael@0 1419 return TRUE;
michael@0 1420 }
michael@0 1421
michael@0 1422 if(!isDataLoaded(pErrorCode)) {
michael@0 1423 return FALSE;
michael@0 1424 }
michael@0 1425
michael@0 1426 /* set hex digits, used in various names, and <>-, used in extended names */
michael@0 1427 for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
michael@0 1428 SET_ADD(gNameSet, extChars[i]);
michael@0 1429 }
michael@0 1430
michael@0 1431 /* set sets and lengths from algorithmic names */
michael@0 1432 maxNameLength=calcAlgNameSetsLengths(0);
michael@0 1433
michael@0 1434 /* set sets and lengths from extended names */
michael@0 1435 maxNameLength=calcExtNameSetsLengths(maxNameLength);
michael@0 1436
michael@0 1437 /* set sets and lengths from group names, set global maximum values */
michael@0 1438 calcGroupNameSetsLengths(maxNameLength);
michael@0 1439
michael@0 1440 return TRUE;
michael@0 1441 }
michael@0 1442
michael@0 1443 /* public API --------------------------------------------------------------- */
michael@0 1444
michael@0 1445 U_CAPI int32_t U_EXPORT2
michael@0 1446 u_charName(UChar32 code, UCharNameChoice nameChoice,
michael@0 1447 char *buffer, int32_t bufferLength,
michael@0 1448 UErrorCode *pErrorCode) {
michael@0 1449 AlgorithmicRange *algRange;
michael@0 1450 uint32_t *p;
michael@0 1451 uint32_t i;
michael@0 1452 int32_t length;
michael@0 1453
michael@0 1454 /* check the argument values */
michael@0 1455 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1456 return 0;
michael@0 1457 } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
michael@0 1458 bufferLength<0 || (bufferLength>0 && buffer==NULL)
michael@0 1459 ) {
michael@0 1460 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1461 return 0;
michael@0 1462 }
michael@0 1463
michael@0 1464 if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
michael@0 1465 return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
michael@0 1466 }
michael@0 1467
michael@0 1468 length=0;
michael@0 1469
michael@0 1470 /* try algorithmic names first */
michael@0 1471 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
michael@0 1472 i=*p;
michael@0 1473 algRange=(AlgorithmicRange *)(p+1);
michael@0 1474 while(i>0) {
michael@0 1475 if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
michael@0 1476 length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
michael@0 1477 break;
michael@0 1478 }
michael@0 1479 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
michael@0 1480 --i;
michael@0 1481 }
michael@0 1482
michael@0 1483 if(i==0) {
michael@0 1484 if (nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 1485 length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
michael@0 1486 if (!length) {
michael@0 1487 /* extended character name */
michael@0 1488 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
michael@0 1489 }
michael@0 1490 } else {
michael@0 1491 /* normal character name */
michael@0 1492 length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
michael@0 1493 }
michael@0 1494 }
michael@0 1495
michael@0 1496 return u_terminateChars(buffer, bufferLength, length, pErrorCode);
michael@0 1497 }
michael@0 1498
michael@0 1499 U_CAPI int32_t U_EXPORT2
michael@0 1500 u_getISOComment(UChar32 /*c*/,
michael@0 1501 char *dest, int32_t destCapacity,
michael@0 1502 UErrorCode *pErrorCode) {
michael@0 1503 /* check the argument values */
michael@0 1504 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1505 return 0;
michael@0 1506 } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
michael@0 1507 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1508 return 0;
michael@0 1509 }
michael@0 1510
michael@0 1511 return u_terminateChars(dest, destCapacity, 0, pErrorCode);
michael@0 1512 }
michael@0 1513
michael@0 1514 U_CAPI UChar32 U_EXPORT2
michael@0 1515 u_charFromName(UCharNameChoice nameChoice,
michael@0 1516 const char *name,
michael@0 1517 UErrorCode *pErrorCode) {
michael@0 1518 char upper[120], lower[120];
michael@0 1519 FindName findName;
michael@0 1520 AlgorithmicRange *algRange;
michael@0 1521 uint32_t *p;
michael@0 1522 uint32_t i;
michael@0 1523 UChar32 cp = 0;
michael@0 1524 char c0;
michael@0 1525 UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
michael@0 1526
michael@0 1527 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1528 return error;
michael@0 1529 }
michael@0 1530
michael@0 1531 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
michael@0 1532 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1533 return error;
michael@0 1534 }
michael@0 1535
michael@0 1536 if(!isDataLoaded(pErrorCode)) {
michael@0 1537 return error;
michael@0 1538 }
michael@0 1539
michael@0 1540 /* construct the uppercase and lowercase of the name first */
michael@0 1541 for(i=0; i<sizeof(upper); ++i) {
michael@0 1542 if((c0=*name++)!=0) {
michael@0 1543 upper[i]=uprv_toupper(c0);
michael@0 1544 lower[i]=uprv_tolower(c0);
michael@0 1545 } else {
michael@0 1546 upper[i]=lower[i]=0;
michael@0 1547 break;
michael@0 1548 }
michael@0 1549 }
michael@0 1550 if(i==sizeof(upper)) {
michael@0 1551 /* name too long, there is no such character */
michael@0 1552 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
michael@0 1553 return error;
michael@0 1554 }
michael@0 1555
michael@0 1556 /* try extended names first */
michael@0 1557 if (lower[0] == '<') {
michael@0 1558 if (nameChoice == U_EXTENDED_CHAR_NAME) {
michael@0 1559 if (lower[--i] == '>') {
michael@0 1560 for (--i; lower[i] && lower[i] != '-'; --i) {
michael@0 1561 }
michael@0 1562
michael@0 1563 if (lower[i] == '-') { /* We've got a category. */
michael@0 1564 uint32_t cIdx;
michael@0 1565
michael@0 1566 lower[i] = 0;
michael@0 1567
michael@0 1568 for (++i; lower[i] != '>'; ++i) {
michael@0 1569 if (lower[i] >= '0' && lower[i] <= '9') {
michael@0 1570 cp = (cp << 4) + lower[i] - '0';
michael@0 1571 } else if (lower[i] >= 'a' && lower[i] <= 'f') {
michael@0 1572 cp = (cp << 4) + lower[i] - 'a' + 10;
michael@0 1573 } else {
michael@0 1574 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
michael@0 1575 return error;
michael@0 1576 }
michael@0 1577 }
michael@0 1578
michael@0 1579 /* Now validate the category name.
michael@0 1580 We could use a binary search, or a trie, if
michael@0 1581 we really wanted to. */
michael@0 1582
michael@0 1583 for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
michael@0 1584
michael@0 1585 if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
michael@0 1586 if (getCharCat(cp) == cIdx) {
michael@0 1587 return cp;
michael@0 1588 }
michael@0 1589 break;
michael@0 1590 }
michael@0 1591 }
michael@0 1592 }
michael@0 1593 }
michael@0 1594 }
michael@0 1595
michael@0 1596 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
michael@0 1597 return error;
michael@0 1598 }
michael@0 1599
michael@0 1600 /* try algorithmic names now */
michael@0 1601 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
michael@0 1602 i=*p;
michael@0 1603 algRange=(AlgorithmicRange *)(p+1);
michael@0 1604 while(i>0) {
michael@0 1605 if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
michael@0 1606 return cp;
michael@0 1607 }
michael@0 1608 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
michael@0 1609 --i;
michael@0 1610 }
michael@0 1611
michael@0 1612 /* normal character name */
michael@0 1613 findName.otherName=upper;
michael@0 1614 findName.code=error;
michael@0 1615 enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
michael@0 1616 if (findName.code == error) {
michael@0 1617 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
michael@0 1618 }
michael@0 1619 return findName.code;
michael@0 1620 }
michael@0 1621
michael@0 1622 U_CAPI void U_EXPORT2
michael@0 1623 u_enumCharNames(UChar32 start, UChar32 limit,
michael@0 1624 UEnumCharNamesFn *fn,
michael@0 1625 void *context,
michael@0 1626 UCharNameChoice nameChoice,
michael@0 1627 UErrorCode *pErrorCode) {
michael@0 1628 AlgorithmicRange *algRange;
michael@0 1629 uint32_t *p;
michael@0 1630 uint32_t i;
michael@0 1631
michael@0 1632 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1633 return;
michael@0 1634 }
michael@0 1635
michael@0 1636 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
michael@0 1637 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1638 return;
michael@0 1639 }
michael@0 1640
michael@0 1641 if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
michael@0 1642 limit = UCHAR_MAX_VALUE + 1;
michael@0 1643 }
michael@0 1644 if((uint32_t)start>=(uint32_t)limit) {
michael@0 1645 return;
michael@0 1646 }
michael@0 1647
michael@0 1648 if(!isDataLoaded(pErrorCode)) {
michael@0 1649 return;
michael@0 1650 }
michael@0 1651
michael@0 1652 /* interleave the data-driven ones with the algorithmic ones */
michael@0 1653 /* iterate over all algorithmic ranges; assume that they are in ascending order */
michael@0 1654 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
michael@0 1655 i=*p;
michael@0 1656 algRange=(AlgorithmicRange *)(p+1);
michael@0 1657 while(i>0) {
michael@0 1658 /* enumerate the character names before the current algorithmic range */
michael@0 1659 /* here: start<limit */
michael@0 1660 if((uint32_t)start<algRange->start) {
michael@0 1661 if((uint32_t)limit<=algRange->start) {
michael@0 1662 enumNames(uCharNames, start, limit, fn, context, nameChoice);
michael@0 1663 return;
michael@0 1664 }
michael@0 1665 if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
michael@0 1666 return;
michael@0 1667 }
michael@0 1668 start=(UChar32)algRange->start;
michael@0 1669 }
michael@0 1670 /* enumerate the character names in the current algorithmic range */
michael@0 1671 /* here: algRange->start<=start<limit */
michael@0 1672 if((uint32_t)start<=algRange->end) {
michael@0 1673 if((uint32_t)limit<=(algRange->end+1)) {
michael@0 1674 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
michael@0 1675 return;
michael@0 1676 }
michael@0 1677 if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
michael@0 1678 return;
michael@0 1679 }
michael@0 1680 start=(UChar32)algRange->end+1;
michael@0 1681 }
michael@0 1682 /* continue to the next algorithmic range (here: start<limit) */
michael@0 1683 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
michael@0 1684 --i;
michael@0 1685 }
michael@0 1686 /* enumerate the character names after the last algorithmic range */
michael@0 1687 enumNames(uCharNames, start, limit, fn, context, nameChoice);
michael@0 1688 }
michael@0 1689
michael@0 1690 U_CAPI int32_t U_EXPORT2
michael@0 1691 uprv_getMaxCharNameLength() {
michael@0 1692 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 1693 if(calcNameSetsLengths(&errorCode)) {
michael@0 1694 return gMaxNameLength;
michael@0 1695 } else {
michael@0 1696 return 0;
michael@0 1697 }
michael@0 1698 }
michael@0 1699
michael@0 1700 /**
michael@0 1701 * Converts the char set cset into a Unicode set uset.
michael@0 1702 * @param cset Set of 256 bit flags corresponding to a set of chars.
michael@0 1703 * @param uset USet to receive characters. Existing contents are deleted.
michael@0 1704 */
michael@0 1705 static void
michael@0 1706 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
michael@0 1707 UChar us[256];
michael@0 1708 char cs[256];
michael@0 1709
michael@0 1710 int32_t i, length;
michael@0 1711 UErrorCode errorCode;
michael@0 1712
michael@0 1713 errorCode=U_ZERO_ERROR;
michael@0 1714
michael@0 1715 if(!calcNameSetsLengths(&errorCode)) {
michael@0 1716 return;
michael@0 1717 }
michael@0 1718
michael@0 1719 /* build a char string with all chars that are used in character names */
michael@0 1720 length=0;
michael@0 1721 for(i=0; i<256; ++i) {
michael@0 1722 if(SET_CONTAINS(cset, i)) {
michael@0 1723 cs[length++]=(char)i;
michael@0 1724 }
michael@0 1725 }
michael@0 1726
michael@0 1727 /* convert the char string to a UChar string */
michael@0 1728 u_charsToUChars(cs, us, length);
michael@0 1729
michael@0 1730 /* add each UChar to the USet */
michael@0 1731 for(i=0; i<length; ++i) {
michael@0 1732 if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
michael@0 1733 sa->add(sa->set, us[i]);
michael@0 1734 }
michael@0 1735 }
michael@0 1736 }
michael@0 1737
michael@0 1738 /**
michael@0 1739 * Fills set with characters that are used in Unicode character names.
michael@0 1740 * @param set USet to receive characters.
michael@0 1741 */
michael@0 1742 U_CAPI void U_EXPORT2
michael@0 1743 uprv_getCharNameCharacters(const USetAdder *sa) {
michael@0 1744 charSetToUSet(gNameSet, sa);
michael@0 1745 }
michael@0 1746
michael@0 1747 /* data swapping ------------------------------------------------------------ */
michael@0 1748
michael@0 1749 /*
michael@0 1750 * The token table contains non-negative entries for token bytes,
michael@0 1751 * and -1 for bytes that represent themselves in the data file's charset.
michael@0 1752 * -2 entries are used for lead bytes.
michael@0 1753 *
michael@0 1754 * Direct bytes (-1 entries) must be translated from the input charset family
michael@0 1755 * to the output charset family.
michael@0 1756 * makeTokenMap() writes a permutation mapping for this.
michael@0 1757 * Use it once for single-/lead-byte tokens and once more for all trail byte
michael@0 1758 * tokens. (';' is an unused trail byte marked with -1.)
michael@0 1759 */
michael@0 1760 static void
michael@0 1761 makeTokenMap(const UDataSwapper *ds,
michael@0 1762 int16_t tokens[], uint16_t tokenCount,
michael@0 1763 uint8_t map[256],
michael@0 1764 UErrorCode *pErrorCode) {
michael@0 1765 UBool usedOutChar[256];
michael@0 1766 uint16_t i, j;
michael@0 1767 uint8_t c1, c2;
michael@0 1768
michael@0 1769 if(U_FAILURE(*pErrorCode)) {
michael@0 1770 return;
michael@0 1771 }
michael@0 1772
michael@0 1773 if(ds->inCharset==ds->outCharset) {
michael@0 1774 /* Same charset family: identity permutation */
michael@0 1775 for(i=0; i<256; ++i) {
michael@0 1776 map[i]=(uint8_t)i;
michael@0 1777 }
michael@0 1778 } else {
michael@0 1779 uprv_memset(map, 0, 256);
michael@0 1780 uprv_memset(usedOutChar, 0, 256);
michael@0 1781
michael@0 1782 if(tokenCount>256) {
michael@0 1783 tokenCount=256;
michael@0 1784 }
michael@0 1785
michael@0 1786 /* set the direct bytes (byte 0 always maps to itself) */
michael@0 1787 for(i=1; i<tokenCount; ++i) {
michael@0 1788 if(tokens[i]==-1) {
michael@0 1789 /* convert the direct byte character */
michael@0 1790 c1=(uint8_t)i;
michael@0 1791 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
michael@0 1792 if(U_FAILURE(*pErrorCode)) {
michael@0 1793 udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
michael@0 1794 i, ds->inCharset);
michael@0 1795 return;
michael@0 1796 }
michael@0 1797
michael@0 1798 /* enter the converted character into the map and mark it used */
michael@0 1799 map[c1]=c2;
michael@0 1800 usedOutChar[c2]=TRUE;
michael@0 1801 }
michael@0 1802 }
michael@0 1803
michael@0 1804 /* set the mappings for the rest of the permutation */
michael@0 1805 for(i=j=1; i<tokenCount; ++i) {
michael@0 1806 /* set mappings that were not set for direct bytes */
michael@0 1807 if(map[i]==0) {
michael@0 1808 /* set an output byte value that was not used as an output byte above */
michael@0 1809 while(usedOutChar[j]) {
michael@0 1810 ++j;
michael@0 1811 }
michael@0 1812 map[i]=(uint8_t)j++;
michael@0 1813 }
michael@0 1814 }
michael@0 1815
michael@0 1816 /*
michael@0 1817 * leave mappings at tokenCount and above unset if tokenCount<256
michael@0 1818 * because they won't be used
michael@0 1819 */
michael@0 1820 }
michael@0 1821 }
michael@0 1822
michael@0 1823 U_CAPI int32_t U_EXPORT2
michael@0 1824 uchar_swapNames(const UDataSwapper *ds,
michael@0 1825 const void *inData, int32_t length, void *outData,
michael@0 1826 UErrorCode *pErrorCode) {
michael@0 1827 const UDataInfo *pInfo;
michael@0 1828 int32_t headerSize;
michael@0 1829
michael@0 1830 const uint8_t *inBytes;
michael@0 1831 uint8_t *outBytes;
michael@0 1832
michael@0 1833 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
michael@0 1834 offset, i, count, stringsCount;
michael@0 1835
michael@0 1836 const AlgorithmicRange *inRange;
michael@0 1837 AlgorithmicRange *outRange;
michael@0 1838
michael@0 1839 /* udata_swapDataHeader checks the arguments */
michael@0 1840 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 1841 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1842 return 0;
michael@0 1843 }
michael@0 1844
michael@0 1845 /* check data format and format version */
michael@0 1846 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 1847 if(!(
michael@0 1848 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
michael@0 1849 pInfo->dataFormat[1]==0x6e &&
michael@0 1850 pInfo->dataFormat[2]==0x61 &&
michael@0 1851 pInfo->dataFormat[3]==0x6d &&
michael@0 1852 pInfo->formatVersion[0]==1
michael@0 1853 )) {
michael@0 1854 udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
michael@0 1855 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 1856 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 1857 pInfo->formatVersion[0]);
michael@0 1858 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 1859 return 0;
michael@0 1860 }
michael@0 1861
michael@0 1862 inBytes=(const uint8_t *)inData+headerSize;
michael@0 1863 outBytes=(uint8_t *)outData+headerSize;
michael@0 1864 if(length<0) {
michael@0 1865 algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
michael@0 1866 } else {
michael@0 1867 length-=headerSize;
michael@0 1868 if( length<20 ||
michael@0 1869 (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
michael@0 1870 ) {
michael@0 1871 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
michael@0 1872 length);
michael@0 1873 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 1874 return 0;
michael@0 1875 }
michael@0 1876 }
michael@0 1877
michael@0 1878 if(length<0) {
michael@0 1879 /* preflighting: iterate through algorithmic ranges */
michael@0 1880 offset=algNamesOffset;
michael@0 1881 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
michael@0 1882 offset+=4;
michael@0 1883
michael@0 1884 for(i=0; i<count; ++i) {
michael@0 1885 inRange=(const AlgorithmicRange *)(inBytes+offset);
michael@0 1886 offset+=ds->readUInt16(inRange->size);
michael@0 1887 }
michael@0 1888 } else {
michael@0 1889 /* swap data */
michael@0 1890 const uint16_t *p;
michael@0 1891 uint16_t *q, *temp;
michael@0 1892
michael@0 1893 int16_t tokens[512];
michael@0 1894 uint16_t tokenCount;
michael@0 1895
michael@0 1896 uint8_t map[256], trailMap[256];
michael@0 1897
michael@0 1898 /* copy the data for inaccessible bytes */
michael@0 1899 if(inBytes!=outBytes) {
michael@0 1900 uprv_memcpy(outBytes, inBytes, length);
michael@0 1901 }
michael@0 1902
michael@0 1903 /* the initial 4 offsets first */
michael@0 1904 tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
michael@0 1905 groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
michael@0 1906 groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
michael@0 1907 ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
michael@0 1908
michael@0 1909 /*
michael@0 1910 * now the tokens table
michael@0 1911 * it needs to be permutated along with the compressed name strings
michael@0 1912 */
michael@0 1913 p=(const uint16_t *)(inBytes+16);
michael@0 1914 q=(uint16_t *)(outBytes+16);
michael@0 1915
michael@0 1916 /* read and swap the tokenCount */
michael@0 1917 tokenCount=ds->readUInt16(*p);
michael@0 1918 ds->swapArray16(ds, p, 2, q, pErrorCode);
michael@0 1919 ++p;
michael@0 1920 ++q;
michael@0 1921
michael@0 1922 /* read the first 512 tokens and make the token maps */
michael@0 1923 if(tokenCount<=512) {
michael@0 1924 count=tokenCount;
michael@0 1925 } else {
michael@0 1926 count=512;
michael@0 1927 }
michael@0 1928 for(i=0; i<count; ++i) {
michael@0 1929 tokens[i]=udata_readInt16(ds, p[i]);
michael@0 1930 }
michael@0 1931 for(; i<512; ++i) {
michael@0 1932 tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
michael@0 1933 }
michael@0 1934 makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
michael@0 1935 makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
michael@0 1936 if(U_FAILURE(*pErrorCode)) {
michael@0 1937 return 0;
michael@0 1938 }
michael@0 1939
michael@0 1940 /*
michael@0 1941 * swap and permutate the tokens
michael@0 1942 * go through a temporary array to support in-place swapping
michael@0 1943 */
michael@0 1944 temp=(uint16_t *)uprv_malloc(tokenCount*2);
michael@0 1945 if(temp==NULL) {
michael@0 1946 udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
michael@0 1947 tokenCount);
michael@0 1948 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1949 return 0;
michael@0 1950 }
michael@0 1951
michael@0 1952 /* swap and permutate single-/lead-byte tokens */
michael@0 1953 for(i=0; i<tokenCount && i<256; ++i) {
michael@0 1954 ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
michael@0 1955 }
michael@0 1956
michael@0 1957 /* swap and permutate trail-byte tokens */
michael@0 1958 for(; i<tokenCount; ++i) {
michael@0 1959 ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
michael@0 1960 }
michael@0 1961
michael@0 1962 /* copy the result into the output and free the temporary array */
michael@0 1963 uprv_memcpy(q, temp, tokenCount*2);
michael@0 1964 uprv_free(temp);
michael@0 1965
michael@0 1966 /*
michael@0 1967 * swap the token strings but not a possible padding byte after
michael@0 1968 * the terminating NUL of the last string
michael@0 1969 */
michael@0 1970 udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
michael@0 1971 outBytes+tokenStringOffset, pErrorCode);
michael@0 1972 if(U_FAILURE(*pErrorCode)) {
michael@0 1973 udata_printError(ds, "uchar_swapNames(token strings) failed\n");
michael@0 1974 return 0;
michael@0 1975 }
michael@0 1976
michael@0 1977 /* swap the group table */
michael@0 1978 count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
michael@0 1979 ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
michael@0 1980 outBytes+groupsOffset, pErrorCode);
michael@0 1981
michael@0 1982 /*
michael@0 1983 * swap the group strings
michael@0 1984 * swap the string bytes but not the nibble-encoded string lengths
michael@0 1985 */
michael@0 1986 if(ds->inCharset!=ds->outCharset) {
michael@0 1987 uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
michael@0 1988
michael@0 1989 const uint8_t *inStrings, *nextInStrings;
michael@0 1990 uint8_t *outStrings;
michael@0 1991
michael@0 1992 uint8_t c;
michael@0 1993
michael@0 1994 inStrings=inBytes+groupStringOffset;
michael@0 1995 outStrings=outBytes+groupStringOffset;
michael@0 1996
michael@0 1997 stringsCount=algNamesOffset-groupStringOffset;
michael@0 1998
michael@0 1999 /* iterate through string groups until only a few padding bytes are left */
michael@0 2000 while(stringsCount>32) {
michael@0 2001 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
michael@0 2002
michael@0 2003 /* move past the length bytes */
michael@0 2004 stringsCount-=(uint32_t)(nextInStrings-inStrings);
michael@0 2005 outStrings+=nextInStrings-inStrings;
michael@0 2006 inStrings=nextInStrings;
michael@0 2007
michael@0 2008 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
michael@0 2009 stringsCount-=count;
michael@0 2010
michael@0 2011 /* swap the string bytes using map[] and trailMap[] */
michael@0 2012 while(count>0) {
michael@0 2013 c=*inStrings++;
michael@0 2014 *outStrings++=map[c];
michael@0 2015 if(tokens[c]!=-2) {
michael@0 2016 --count;
michael@0 2017 } else {
michael@0 2018 /* token lead byte: swap the trail byte, too */
michael@0 2019 *outStrings++=trailMap[*inStrings++];
michael@0 2020 count-=2;
michael@0 2021 }
michael@0 2022 }
michael@0 2023 }
michael@0 2024 }
michael@0 2025
michael@0 2026 /* swap the algorithmic ranges */
michael@0 2027 offset=algNamesOffset;
michael@0 2028 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
michael@0 2029 ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
michael@0 2030 offset+=4;
michael@0 2031
michael@0 2032 for(i=0; i<count; ++i) {
michael@0 2033 if(offset>(uint32_t)length) {
michael@0 2034 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
michael@0 2035 length, i);
michael@0 2036 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 2037 return 0;
michael@0 2038 }
michael@0 2039
michael@0 2040 inRange=(const AlgorithmicRange *)(inBytes+offset);
michael@0 2041 outRange=(AlgorithmicRange *)(outBytes+offset);
michael@0 2042 offset+=ds->readUInt16(inRange->size);
michael@0 2043
michael@0 2044 ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
michael@0 2045 ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
michael@0 2046 switch(inRange->type) {
michael@0 2047 case 0:
michael@0 2048 /* swap prefix string */
michael@0 2049 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
michael@0 2050 outRange+1, pErrorCode);
michael@0 2051 if(U_FAILURE(*pErrorCode)) {
michael@0 2052 udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
michael@0 2053 i);
michael@0 2054 return 0;
michael@0 2055 }
michael@0 2056 break;
michael@0 2057 case 1:
michael@0 2058 {
michael@0 2059 /* swap factors and the prefix and factor strings */
michael@0 2060 uint32_t factorsCount;
michael@0 2061
michael@0 2062 factorsCount=inRange->variant;
michael@0 2063 p=(const uint16_t *)(inRange+1);
michael@0 2064 q=(uint16_t *)(outRange+1);
michael@0 2065 ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
michael@0 2066
michael@0 2067 /* swap the strings, up to the last terminating NUL */
michael@0 2068 p+=factorsCount;
michael@0 2069 q+=factorsCount;
michael@0 2070 stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
michael@0 2071 while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
michael@0 2072 --stringsCount;
michael@0 2073 }
michael@0 2074 ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
michael@0 2075 }
michael@0 2076 break;
michael@0 2077 default:
michael@0 2078 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
michael@0 2079 inRange->type, i);
michael@0 2080 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 2081 return 0;
michael@0 2082 }
michael@0 2083 }
michael@0 2084 }
michael@0 2085
michael@0 2086 return headerSize+(int32_t)offset;
michael@0 2087 }
michael@0 2088
michael@0 2089 U_NAMESPACE_END
michael@0 2090
michael@0 2091 /*
michael@0 2092 * Hey, Emacs, please set the following:
michael@0 2093 *
michael@0 2094 * Local Variables:
michael@0 2095 * indent-tabs-mode: nil
michael@0 2096 * End:
michael@0 2097 *
michael@0 2098 */

mercurial