intl/icu/source/tools/toolutil/ppucd.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2011-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: ppucd.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2011dec11
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/uchar.h"
michael@0 17 #include "charstr.h"
michael@0 18 #include "cstring.h"
michael@0 19 #include "ppucd.h"
michael@0 20 #include "uassert.h"
michael@0 21 #include "uparse.h"
michael@0 22
michael@0 23 #include <stdio.h>
michael@0 24 #include <string.h>
michael@0 25
michael@0 26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 27
michael@0 28 U_NAMESPACE_BEGIN
michael@0 29
michael@0 30 PropertyNames::~PropertyNames() {}
michael@0 31
michael@0 32 int32_t
michael@0 33 PropertyNames::getPropertyEnum(const char *name) const {
michael@0 34 return u_getPropertyEnum(name);
michael@0 35 }
michael@0 36
michael@0 37 int32_t
michael@0 38 PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
michael@0 39 return u_getPropertyValueEnum((UProperty)property, name);
michael@0 40 }
michael@0 41
michael@0 42 UniProps::UniProps()
michael@0 43 : start(U_SENTINEL), end(U_SENTINEL),
michael@0 44 bmg(U_SENTINEL), bpb(U_SENTINEL),
michael@0 45 scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
michael@0 46 digitValue(-1), numericValue(NULL),
michael@0 47 name(NULL), nameAlias(NULL) {
michael@0 48 memset(binProps, 0, sizeof(binProps));
michael@0 49 memset(intProps, 0, sizeof(intProps));
michael@0 50 memset(age, 0, 4);
michael@0 51 }
michael@0 52
michael@0 53 UniProps::~UniProps() {}
michael@0 54
michael@0 55 const int32_t PreparsedUCD::kNumLineBuffers;
michael@0 56
michael@0 57 PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
michael@0 58 : icuPnames(new PropertyNames()), pnames(icuPnames),
michael@0 59 file(NULL),
michael@0 60 defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
michael@0 61 lineNumber(0),
michael@0 62 lineType(NO_LINE),
michael@0 63 fieldLimit(NULL), lineLimit(NULL) {
michael@0 64 if(U_FAILURE(errorCode)) { return; }
michael@0 65
michael@0 66 if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
michael@0 67 filename=NULL;
michael@0 68 file=stdin;
michael@0 69 } else {
michael@0 70 file=fopen(filename, "r");
michael@0 71 }
michael@0 72 if(file==NULL) {
michael@0 73 perror("error opening preparsed UCD");
michael@0 74 fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
michael@0 75 errorCode=U_FILE_ACCESS_ERROR;
michael@0 76 return;
michael@0 77 }
michael@0 78
michael@0 79 memset(ucdVersion, 0, 4);
michael@0 80 lines[0][0]=0;
michael@0 81 }
michael@0 82
michael@0 83 PreparsedUCD::~PreparsedUCD() {
michael@0 84 if(file!=stdin) {
michael@0 85 fclose(file);
michael@0 86 }
michael@0 87 delete icuPnames;
michael@0 88 }
michael@0 89
michael@0 90 // Same order as the LineType values.
michael@0 91 static const char *lineTypeStrings[]={
michael@0 92 NULL,
michael@0 93 NULL,
michael@0 94 "ucd",
michael@0 95 "property",
michael@0 96 "binary",
michael@0 97 "value",
michael@0 98 "defaults",
michael@0 99 "block",
michael@0 100 "cp",
michael@0 101 "algnamesrange"
michael@0 102 };
michael@0 103
michael@0 104 PreparsedUCD::LineType
michael@0 105 PreparsedUCD::readLine(UErrorCode &errorCode) {
michael@0 106 if(U_FAILURE(errorCode)) { return NO_LINE; }
michael@0 107 // Select the next available line buffer.
michael@0 108 while(!isLineBufferAvailable(lineIndex)) {
michael@0 109 ++lineIndex;
michael@0 110 if (lineIndex == kNumLineBuffers) {
michael@0 111 lineIndex = 0;
michael@0 112 }
michael@0 113 }
michael@0 114 char *line=lines[lineIndex];
michael@0 115 *line=0;
michael@0 116 lineLimit=fieldLimit=line;
michael@0 117 lineType=NO_LINE;
michael@0 118 char *result=fgets(line, sizeof(lines[0]), file);
michael@0 119 if(result==NULL) {
michael@0 120 if(ferror(file)) {
michael@0 121 perror("error reading preparsed UCD");
michael@0 122 fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
michael@0 123 errorCode=U_FILE_ACCESS_ERROR;
michael@0 124 }
michael@0 125 return NO_LINE;
michael@0 126 }
michael@0 127 ++lineNumber;
michael@0 128 if(*line=='#') {
michael@0 129 fieldLimit=strchr(line, 0);
michael@0 130 return lineType=EMPTY_LINE;
michael@0 131 }
michael@0 132 // Remove trailing /r/n.
michael@0 133 char c;
michael@0 134 char *limit=strchr(line, 0);
michael@0 135 while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
michael@0 136 // Remove trailing white space.
michael@0 137 while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
michael@0 138 *limit=0;
michael@0 139 lineLimit=limit;
michael@0 140 if(line==limit) {
michael@0 141 fieldLimit=limit;
michael@0 142 return lineType=EMPTY_LINE;
michael@0 143 }
michael@0 144 // Split by ';'.
michael@0 145 char *semi=line;
michael@0 146 while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
michael@0 147 fieldLimit=strchr(line, 0);
michael@0 148 // Determine the line type.
michael@0 149 int32_t type;
michael@0 150 for(type=EMPTY_LINE+1;; ++type) {
michael@0 151 if(type==LINE_TYPE_COUNT) {
michael@0 152 fprintf(stderr,
michael@0 153 "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
michael@0 154 line, (long)lineNumber);
michael@0 155 errorCode=U_PARSE_ERROR;
michael@0 156 return NO_LINE;
michael@0 157 }
michael@0 158 if(0==strcmp(line, lineTypeStrings[type])) {
michael@0 159 break;
michael@0 160 }
michael@0 161 }
michael@0 162 lineType=(LineType)type;
michael@0 163 if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
michael@0 164 u_versionFromString(ucdVersion, fieldLimit+1);
michael@0 165 }
michael@0 166 return lineType;
michael@0 167 }
michael@0 168
michael@0 169 const char *
michael@0 170 PreparsedUCD::firstField() {
michael@0 171 char *field=lines[lineIndex];
michael@0 172 fieldLimit=strchr(field, 0);
michael@0 173 return field;
michael@0 174 }
michael@0 175
michael@0 176 const char *
michael@0 177 PreparsedUCD::nextField() {
michael@0 178 if(fieldLimit==lineLimit) { return NULL; }
michael@0 179 char *field=fieldLimit+1;
michael@0 180 fieldLimit=strchr(field, 0);
michael@0 181 return field;
michael@0 182 }
michael@0 183
michael@0 184 const UniProps *
michael@0 185 PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
michael@0 186 if(U_FAILURE(errorCode)) { return NULL; }
michael@0 187 newValues.clear();
michael@0 188 if(!lineHasPropertyValues()) {
michael@0 189 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 190 return NULL;
michael@0 191 }
michael@0 192 firstField();
michael@0 193 const char *field=nextField();
michael@0 194 if(field==NULL) {
michael@0 195 // No range field after the type.
michael@0 196 fprintf(stderr,
michael@0 197 "error in preparsed UCD: missing default/block/cp range field "
michael@0 198 "(no second field) on line %ld\n",
michael@0 199 (long)lineNumber);
michael@0 200 errorCode=U_PARSE_ERROR;
michael@0 201 return NULL;
michael@0 202 }
michael@0 203 UChar32 start, end;
michael@0 204 if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
michael@0 205 UniProps *props;
michael@0 206 switch(lineType) {
michael@0 207 case DEFAULTS_LINE:
michael@0 208 if(defaultLineIndex>=0) {
michael@0 209 fprintf(stderr,
michael@0 210 "error in preparsed UCD: second line with default properties on line %ld\n",
michael@0 211 (long)lineNumber);
michael@0 212 errorCode=U_PARSE_ERROR;
michael@0 213 return NULL;
michael@0 214 }
michael@0 215 if(start!=0 || end!=0x10ffff) {
michael@0 216 fprintf(stderr,
michael@0 217 "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
michael@0 218 field, (long)lineNumber);
michael@0 219 errorCode=U_PARSE_ERROR;
michael@0 220 return NULL;
michael@0 221 }
michael@0 222 props=&defaultProps;
michael@0 223 defaultLineIndex=lineIndex;
michael@0 224 break;
michael@0 225 case BLOCK_LINE:
michael@0 226 blockProps=defaultProps; // Block inherits default properties.
michael@0 227 props=&blockProps;
michael@0 228 blockLineIndex=lineIndex;
michael@0 229 break;
michael@0 230 case CP_LINE:
michael@0 231 if(blockProps.start<=start && end<=blockProps.end) {
michael@0 232 // Code point range fully inside the last block inherits the block properties.
michael@0 233 cpProps=blockProps;
michael@0 234 } else if(start>blockProps.end || end<blockProps.start) {
michael@0 235 // Code point range fully outside the last block inherits the default properties.
michael@0 236 cpProps=defaultProps;
michael@0 237 } else {
michael@0 238 // Code point range partially overlapping with the last block is illegal.
michael@0 239 fprintf(stderr,
michael@0 240 "error in preparsed UCD: cp range %s on line %ld only "
michael@0 241 "partially overlaps with block range %04lX..%04lX\n",
michael@0 242 field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
michael@0 243 errorCode=U_PARSE_ERROR;
michael@0 244 return NULL;
michael@0 245 }
michael@0 246 props=&cpProps;
michael@0 247 break;
michael@0 248 default:
michael@0 249 // Will not occur because of the range check above.
michael@0 250 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 251 return NULL;
michael@0 252 }
michael@0 253 props->start=start;
michael@0 254 props->end=end;
michael@0 255 while((field=nextField())!=NULL) {
michael@0 256 if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
michael@0 257 }
michael@0 258 return props;
michael@0 259 }
michael@0 260
michael@0 261 static const struct {
michael@0 262 const char *name;
michael@0 263 int32_t prop;
michael@0 264 } ppucdProperties[]={
michael@0 265 { "Name_Alias", PPUCD_NAME_ALIAS },
michael@0 266 { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
michael@0 267 { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
michael@0 268 };
michael@0 269
michael@0 270 // Returns TRUE for "ok to continue parsing fields".
michael@0 271 UBool
michael@0 272 PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
michael@0 273 UErrorCode &errorCode) {
michael@0 274 CharString pBuffer;
michael@0 275 const char *p=field;
michael@0 276 const char *v=strchr(p, '=');
michael@0 277 int binaryValue;
michael@0 278 if(*p=='-') {
michael@0 279 if(v!=NULL) {
michael@0 280 fprintf(stderr,
michael@0 281 "error in preparsed UCD: mix of binary-property-no and "
michael@0 282 "enum-property syntax '%s' on line %ld\n",
michael@0 283 field, (long)lineNumber);
michael@0 284 errorCode=U_PARSE_ERROR;
michael@0 285 return FALSE;
michael@0 286 }
michael@0 287 binaryValue=0;
michael@0 288 ++p;
michael@0 289 } else if(v==NULL) {
michael@0 290 binaryValue=1;
michael@0 291 } else {
michael@0 292 binaryValue=-1;
michael@0 293 // Copy out the property name rather than modifying the field (writing a NUL).
michael@0 294 pBuffer.append(p, (int32_t)(v-p), errorCode);
michael@0 295 p=pBuffer.data();
michael@0 296 ++v;
michael@0 297 }
michael@0 298 int32_t prop=pnames->getPropertyEnum(p);
michael@0 299 if(prop<0) {
michael@0 300 for(int32_t i=0;; ++i) {
michael@0 301 if(i==LENGTHOF(ppucdProperties)) {
michael@0 302 // Ignore unknown property names.
michael@0 303 return TRUE;
michael@0 304 }
michael@0 305 if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
michael@0 306 prop=ppucdProperties[i].prop;
michael@0 307 U_ASSERT(prop>=0);
michael@0 308 break;
michael@0 309 }
michael@0 310 }
michael@0 311 }
michael@0 312 if(prop<UCHAR_BINARY_LIMIT) {
michael@0 313 if(binaryValue>=0) {
michael@0 314 props.binProps[prop]=(UBool)binaryValue;
michael@0 315 } else {
michael@0 316 // No binary value for a binary property.
michael@0 317 fprintf(stderr,
michael@0 318 "error in preparsed UCD: enum-property syntax '%s' "
michael@0 319 "for binary property on line %ld\n",
michael@0 320 field, (long)lineNumber);
michael@0 321 errorCode=U_PARSE_ERROR;
michael@0 322 }
michael@0 323 } else if(binaryValue>=0) {
michael@0 324 // Binary value for a non-binary property.
michael@0 325 fprintf(stderr,
michael@0 326 "error in preparsed UCD: binary-property syntax '%s' "
michael@0 327 "for non-binary property on line %ld\n",
michael@0 328 field, (long)lineNumber);
michael@0 329 errorCode=U_PARSE_ERROR;
michael@0 330 } else if (prop < UCHAR_INT_START) {
michael@0 331 fprintf(stderr,
michael@0 332 "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
michael@0 333 prop, (long)lineNumber);
michael@0 334 errorCode=U_PARSE_ERROR;
michael@0 335 } else if(prop<UCHAR_INT_LIMIT) {
michael@0 336 int32_t value=pnames->getPropertyValueEnum(prop, v);
michael@0 337 if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
michael@0 338 // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
michael@0 339 char *end;
michael@0 340 unsigned long ccc=uprv_strtoul(v, &end, 10);
michael@0 341 if(v<end && *end==0 && ccc<=254) {
michael@0 342 value=(int32_t)ccc;
michael@0 343 }
michael@0 344 }
michael@0 345 if(value==UCHAR_INVALID_CODE) {
michael@0 346 fprintf(stderr,
michael@0 347 "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
michael@0 348 field, (long)lineNumber);
michael@0 349 errorCode=U_PARSE_ERROR;
michael@0 350 } else {
michael@0 351 props.intProps[prop-UCHAR_INT_START]=value;
michael@0 352 }
michael@0 353 } else if(*v=='<') {
michael@0 354 // Do not parse default values like <code point>, just set null values.
michael@0 355 switch(prop) {
michael@0 356 case UCHAR_BIDI_MIRRORING_GLYPH:
michael@0 357 props.bmg=U_SENTINEL;
michael@0 358 break;
michael@0 359 case UCHAR_BIDI_PAIRED_BRACKET:
michael@0 360 props.bpb=U_SENTINEL;
michael@0 361 break;
michael@0 362 case UCHAR_SIMPLE_CASE_FOLDING:
michael@0 363 props.scf=U_SENTINEL;
michael@0 364 break;
michael@0 365 case UCHAR_SIMPLE_LOWERCASE_MAPPING:
michael@0 366 props.slc=U_SENTINEL;
michael@0 367 break;
michael@0 368 case UCHAR_SIMPLE_TITLECASE_MAPPING:
michael@0 369 props.stc=U_SENTINEL;
michael@0 370 break;
michael@0 371 case UCHAR_SIMPLE_UPPERCASE_MAPPING:
michael@0 372 props.suc=U_SENTINEL;
michael@0 373 break;
michael@0 374 case UCHAR_CASE_FOLDING:
michael@0 375 props.cf.remove();
michael@0 376 break;
michael@0 377 case UCHAR_LOWERCASE_MAPPING:
michael@0 378 props.lc.remove();
michael@0 379 break;
michael@0 380 case UCHAR_TITLECASE_MAPPING:
michael@0 381 props.tc.remove();
michael@0 382 break;
michael@0 383 case UCHAR_UPPERCASE_MAPPING:
michael@0 384 props.uc.remove();
michael@0 385 break;
michael@0 386 case UCHAR_SCRIPT_EXTENSIONS:
michael@0 387 props.scx.clear();
michael@0 388 break;
michael@0 389 default:
michael@0 390 fprintf(stderr,
michael@0 391 "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
michael@0 392 field, (long)lineNumber);
michael@0 393 errorCode=U_PARSE_ERROR;
michael@0 394 }
michael@0 395 } else {
michael@0 396 char c;
michael@0 397 switch(prop) {
michael@0 398 case UCHAR_NUMERIC_VALUE:
michael@0 399 props.numericValue=v;
michael@0 400 c=*v;
michael@0 401 if('0'<=c && c<='9' && v[1]==0) {
michael@0 402 props.digitValue=c-'0';
michael@0 403 } else {
michael@0 404 props.digitValue=-1;
michael@0 405 }
michael@0 406 break;
michael@0 407 case UCHAR_NAME:
michael@0 408 props.name=v;
michael@0 409 break;
michael@0 410 case UCHAR_AGE:
michael@0 411 u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric.
michael@0 412 break;
michael@0 413 case UCHAR_BIDI_MIRRORING_GLYPH:
michael@0 414 props.bmg=parseCodePoint(v, errorCode);
michael@0 415 break;
michael@0 416 case UCHAR_BIDI_PAIRED_BRACKET:
michael@0 417 props.bpb=parseCodePoint(v, errorCode);
michael@0 418 break;
michael@0 419 case UCHAR_SIMPLE_CASE_FOLDING:
michael@0 420 props.scf=parseCodePoint(v, errorCode);
michael@0 421 break;
michael@0 422 case UCHAR_SIMPLE_LOWERCASE_MAPPING:
michael@0 423 props.slc=parseCodePoint(v, errorCode);
michael@0 424 break;
michael@0 425 case UCHAR_SIMPLE_TITLECASE_MAPPING:
michael@0 426 props.stc=parseCodePoint(v, errorCode);
michael@0 427 break;
michael@0 428 case UCHAR_SIMPLE_UPPERCASE_MAPPING:
michael@0 429 props.suc=parseCodePoint(v, errorCode);
michael@0 430 break;
michael@0 431 case UCHAR_CASE_FOLDING:
michael@0 432 parseString(v, props.cf, errorCode);
michael@0 433 break;
michael@0 434 case UCHAR_LOWERCASE_MAPPING:
michael@0 435 parseString(v, props.lc, errorCode);
michael@0 436 break;
michael@0 437 case UCHAR_TITLECASE_MAPPING:
michael@0 438 parseString(v, props.tc, errorCode);
michael@0 439 break;
michael@0 440 case UCHAR_UPPERCASE_MAPPING:
michael@0 441 parseString(v, props.uc, errorCode);
michael@0 442 break;
michael@0 443 case PPUCD_NAME_ALIAS:
michael@0 444 props.nameAlias=v;
michael@0 445 break;
michael@0 446 case PPUCD_CONDITIONAL_CASE_MAPPINGS:
michael@0 447 case PPUCD_TURKIC_CASE_FOLDING:
michael@0 448 // No need to parse their values: They are hardcoded in the runtime library.
michael@0 449 break;
michael@0 450 case UCHAR_SCRIPT_EXTENSIONS:
michael@0 451 parseScriptExtensions(v, props.scx, errorCode);
michael@0 452 break;
michael@0 453 default:
michael@0 454 // Ignore unhandled properties.
michael@0 455 return TRUE;
michael@0 456 }
michael@0 457 }
michael@0 458 if(U_SUCCESS(errorCode)) {
michael@0 459 newValues.add((UChar32)prop);
michael@0 460 return TRUE;
michael@0 461 } else {
michael@0 462 return FALSE;
michael@0 463 }
michael@0 464 }
michael@0 465
michael@0 466 UBool
michael@0 467 PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
michael@0 468 if(U_FAILURE(errorCode)) { return FALSE; }
michael@0 469 if(lineType!=ALG_NAMES_RANGE_LINE) {
michael@0 470 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 471 return FALSE;
michael@0 472 }
michael@0 473 firstField();
michael@0 474 const char *field=nextField();
michael@0 475 if(field==NULL) {
michael@0 476 // No range field after the type.
michael@0 477 fprintf(stderr,
michael@0 478 "error in preparsed UCD: missing algnamesrange range field "
michael@0 479 "(no second field) on line %ld\n",
michael@0 480 (long)lineNumber);
michael@0 481 errorCode=U_PARSE_ERROR;
michael@0 482 return FALSE;
michael@0 483 }
michael@0 484 return parseCodePointRange(field, start, end, errorCode);
michael@0 485 }
michael@0 486
michael@0 487 UChar32
michael@0 488 PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
michael@0 489 char *end;
michael@0 490 uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
michael@0 491 if(end<=s || *end!=0 || value>=0x110000) {
michael@0 492 fprintf(stderr,
michael@0 493 "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
michael@0 494 s, (long)lineNumber);
michael@0 495 errorCode=U_PARSE_ERROR;
michael@0 496 return U_SENTINEL;
michael@0 497 }
michael@0 498 return (UChar32)value;
michael@0 499 }
michael@0 500
michael@0 501 UBool
michael@0 502 PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
michael@0 503 uint32_t st, e;
michael@0 504 u_parseCodePointRange(s, &st, &e, &errorCode);
michael@0 505 if(U_FAILURE(errorCode)) {
michael@0 506 fprintf(stderr,
michael@0 507 "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
michael@0 508 s, (long)lineNumber);
michael@0 509 return FALSE;
michael@0 510 }
michael@0 511 start=(UChar32)st;
michael@0 512 end=(UChar32)e;
michael@0 513 return TRUE;
michael@0 514 }
michael@0 515
michael@0 516 void
michael@0 517 PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
michael@0 518 UChar *buffer=uni.getBuffer(-1);
michael@0 519 int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
michael@0 520 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
michael@0 521 errorCode=U_ZERO_ERROR;
michael@0 522 uni.releaseBuffer(0);
michael@0 523 buffer=uni.getBuffer(length);
michael@0 524 length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
michael@0 525 }
michael@0 526 uni.releaseBuffer(length);
michael@0 527 if(U_FAILURE(errorCode)) {
michael@0 528 fprintf(stderr,
michael@0 529 "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
michael@0 530 s, (long)lineNumber);
michael@0 531 }
michael@0 532 }
michael@0 533
michael@0 534 void
michael@0 535 PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
michael@0 536 if(U_FAILURE(errorCode)) { return; }
michael@0 537 scx.clear();
michael@0 538 CharString scString;
michael@0 539 for(;;) {
michael@0 540 const char *scs;
michael@0 541 const char *scLimit=strchr(s, ' ');
michael@0 542 if(scLimit!=NULL) {
michael@0 543 scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
michael@0 544 if(U_FAILURE(errorCode)) { return; }
michael@0 545 } else {
michael@0 546 scs=s;
michael@0 547 }
michael@0 548 int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
michael@0 549 if(script==UCHAR_INVALID_CODE) {
michael@0 550 fprintf(stderr,
michael@0 551 "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
michael@0 552 scs, (long)lineNumber);
michael@0 553 errorCode=U_PARSE_ERROR;
michael@0 554 return;
michael@0 555 } else if(scx.contains(script)) {
michael@0 556 fprintf(stderr,
michael@0 557 "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
michael@0 558 scs, (long)lineNumber);
michael@0 559 errorCode=U_PARSE_ERROR;
michael@0 560 return;
michael@0 561 } else {
michael@0 562 scx.add(script);
michael@0 563 }
michael@0 564 if(scLimit!=NULL) {
michael@0 565 s=scLimit+1;
michael@0 566 } else {
michael@0 567 break;
michael@0 568 }
michael@0 569 }
michael@0 570 if(scx.isEmpty()) {
michael@0 571 fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
michael@0 572 errorCode=U_PARSE_ERROR;
michael@0 573 }
michael@0 574 }
michael@0 575
michael@0 576 U_NAMESPACE_END

mercurial