intl/icu/source/tools/toolutil/ppucd.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/ppucd.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,576 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2011-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  ppucd.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2011dec11
    1.15 +*   created by: Markus W. Scherer
    1.16 +*/
    1.17 +
    1.18 +#include "unicode/utypes.h"
    1.19 +#include "unicode/uchar.h"
    1.20 +#include "charstr.h"
    1.21 +#include "cstring.h"
    1.22 +#include "ppucd.h"
    1.23 +#include "uassert.h"
    1.24 +#include "uparse.h"
    1.25 +
    1.26 +#include <stdio.h>
    1.27 +#include <string.h>
    1.28 +
    1.29 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.30 +
    1.31 +U_NAMESPACE_BEGIN
    1.32 +
    1.33 +PropertyNames::~PropertyNames() {}
    1.34 +
    1.35 +int32_t
    1.36 +PropertyNames::getPropertyEnum(const char *name) const {
    1.37 +    return u_getPropertyEnum(name);
    1.38 +}
    1.39 +
    1.40 +int32_t
    1.41 +PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
    1.42 +    return u_getPropertyValueEnum((UProperty)property, name);
    1.43 +}
    1.44 +
    1.45 +UniProps::UniProps()
    1.46 +        : start(U_SENTINEL), end(U_SENTINEL),
    1.47 +          bmg(U_SENTINEL), bpb(U_SENTINEL),
    1.48 +          scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
    1.49 +          digitValue(-1), numericValue(NULL),
    1.50 +          name(NULL), nameAlias(NULL) {
    1.51 +    memset(binProps, 0, sizeof(binProps));
    1.52 +    memset(intProps, 0, sizeof(intProps));
    1.53 +    memset(age, 0, 4);
    1.54 +}
    1.55 +
    1.56 +UniProps::~UniProps() {}
    1.57 +
    1.58 +const int32_t PreparsedUCD::kNumLineBuffers;
    1.59 +
    1.60 +PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
    1.61 +        : icuPnames(new PropertyNames()), pnames(icuPnames),
    1.62 +          file(NULL),
    1.63 +          defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
    1.64 +          lineNumber(0),
    1.65 +          lineType(NO_LINE),
    1.66 +          fieldLimit(NULL), lineLimit(NULL) {
    1.67 +    if(U_FAILURE(errorCode)) { return; }
    1.68 +
    1.69 +    if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
    1.70 +        filename=NULL;
    1.71 +        file=stdin;
    1.72 +    } else {
    1.73 +        file=fopen(filename, "r");
    1.74 +    }
    1.75 +    if(file==NULL) {
    1.76 +        perror("error opening preparsed UCD");
    1.77 +        fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
    1.78 +        errorCode=U_FILE_ACCESS_ERROR;
    1.79 +        return;
    1.80 +    }
    1.81 +
    1.82 +    memset(ucdVersion, 0, 4);
    1.83 +    lines[0][0]=0;
    1.84 +}
    1.85 +
    1.86 +PreparsedUCD::~PreparsedUCD() {
    1.87 +    if(file!=stdin) {
    1.88 +        fclose(file);
    1.89 +    }
    1.90 +    delete icuPnames;
    1.91 +}
    1.92 +
    1.93 +// Same order as the LineType values.
    1.94 +static const char *lineTypeStrings[]={
    1.95 +    NULL,
    1.96 +    NULL,
    1.97 +    "ucd",
    1.98 +    "property",
    1.99 +    "binary",
   1.100 +    "value",
   1.101 +    "defaults",
   1.102 +    "block",
   1.103 +    "cp",
   1.104 +    "algnamesrange"
   1.105 +};
   1.106 +
   1.107 +PreparsedUCD::LineType
   1.108 +PreparsedUCD::readLine(UErrorCode &errorCode) {
   1.109 +    if(U_FAILURE(errorCode)) { return NO_LINE; }
   1.110 +    // Select the next available line buffer.
   1.111 +    while(!isLineBufferAvailable(lineIndex)) {
   1.112 +        ++lineIndex;
   1.113 +        if (lineIndex == kNumLineBuffers) {
   1.114 +            lineIndex = 0;
   1.115 +        }
   1.116 +    }
   1.117 +    char *line=lines[lineIndex];
   1.118 +    *line=0;
   1.119 +    lineLimit=fieldLimit=line;
   1.120 +    lineType=NO_LINE;
   1.121 +    char *result=fgets(line, sizeof(lines[0]), file);
   1.122 +    if(result==NULL) {
   1.123 +        if(ferror(file)) {
   1.124 +            perror("error reading preparsed UCD");
   1.125 +            fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
   1.126 +            errorCode=U_FILE_ACCESS_ERROR;
   1.127 +        }
   1.128 +        return NO_LINE;
   1.129 +    }
   1.130 +    ++lineNumber;
   1.131 +    if(*line=='#') {
   1.132 +        fieldLimit=strchr(line, 0);
   1.133 +        return lineType=EMPTY_LINE;
   1.134 +    }
   1.135 +    // Remove trailing /r/n.
   1.136 +    char c;
   1.137 +    char *limit=strchr(line, 0);
   1.138 +    while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
   1.139 +    // Remove trailing white space.
   1.140 +    while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
   1.141 +    *limit=0;
   1.142 +    lineLimit=limit;
   1.143 +    if(line==limit) {
   1.144 +        fieldLimit=limit;
   1.145 +        return lineType=EMPTY_LINE;
   1.146 +    }
   1.147 +    // Split by ';'.
   1.148 +    char *semi=line;
   1.149 +    while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
   1.150 +    fieldLimit=strchr(line, 0);
   1.151 +    // Determine the line type.
   1.152 +    int32_t type;
   1.153 +    for(type=EMPTY_LINE+1;; ++type) {
   1.154 +        if(type==LINE_TYPE_COUNT) {
   1.155 +            fprintf(stderr,
   1.156 +                    "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
   1.157 +                    line, (long)lineNumber);
   1.158 +            errorCode=U_PARSE_ERROR;
   1.159 +            return NO_LINE;
   1.160 +        }
   1.161 +        if(0==strcmp(line, lineTypeStrings[type])) {
   1.162 +            break;
   1.163 +        }
   1.164 +    }
   1.165 +    lineType=(LineType)type;
   1.166 +    if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
   1.167 +        u_versionFromString(ucdVersion, fieldLimit+1);
   1.168 +    }
   1.169 +    return lineType;
   1.170 +}
   1.171 +
   1.172 +const char *
   1.173 +PreparsedUCD::firstField() {
   1.174 +    char *field=lines[lineIndex];
   1.175 +    fieldLimit=strchr(field, 0);
   1.176 +    return field;
   1.177 +}
   1.178 +
   1.179 +const char *
   1.180 +PreparsedUCD::nextField() {
   1.181 +    if(fieldLimit==lineLimit) { return NULL; }
   1.182 +    char *field=fieldLimit+1;
   1.183 +    fieldLimit=strchr(field, 0);
   1.184 +    return field;
   1.185 +}
   1.186 +
   1.187 +const UniProps *
   1.188 +PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
   1.189 +    if(U_FAILURE(errorCode)) { return NULL; }
   1.190 +    newValues.clear();
   1.191 +    if(!lineHasPropertyValues()) {
   1.192 +        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.193 +        return NULL;
   1.194 +    }
   1.195 +    firstField();
   1.196 +    const char *field=nextField();
   1.197 +    if(field==NULL) {
   1.198 +        // No range field after the type.
   1.199 +        fprintf(stderr,
   1.200 +                "error in preparsed UCD: missing default/block/cp range field "
   1.201 +                "(no second field) on line %ld\n",
   1.202 +                (long)lineNumber);
   1.203 +        errorCode=U_PARSE_ERROR;
   1.204 +        return NULL;
   1.205 +    }
   1.206 +    UChar32 start, end;
   1.207 +    if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
   1.208 +    UniProps *props;
   1.209 +    switch(lineType) {
   1.210 +    case DEFAULTS_LINE:
   1.211 +        if(defaultLineIndex>=0) {
   1.212 +            fprintf(stderr,
   1.213 +                    "error in preparsed UCD: second line with default properties on line %ld\n",
   1.214 +                    (long)lineNumber);
   1.215 +            errorCode=U_PARSE_ERROR;
   1.216 +            return NULL;
   1.217 +        }
   1.218 +        if(start!=0 || end!=0x10ffff) {
   1.219 +            fprintf(stderr,
   1.220 +                    "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
   1.221 +                    field, (long)lineNumber);
   1.222 +            errorCode=U_PARSE_ERROR;
   1.223 +            return NULL;
   1.224 +        }
   1.225 +        props=&defaultProps;
   1.226 +        defaultLineIndex=lineIndex;
   1.227 +        break;
   1.228 +    case BLOCK_LINE:
   1.229 +        blockProps=defaultProps;  // Block inherits default properties.
   1.230 +        props=&blockProps;
   1.231 +        blockLineIndex=lineIndex;
   1.232 +        break;
   1.233 +    case CP_LINE:
   1.234 +        if(blockProps.start<=start && end<=blockProps.end) {
   1.235 +            // Code point range fully inside the last block inherits the block properties.
   1.236 +            cpProps=blockProps;
   1.237 +        } else if(start>blockProps.end || end<blockProps.start) {
   1.238 +            // Code point range fully outside the last block inherits the default properties.
   1.239 +            cpProps=defaultProps;
   1.240 +        } else {
   1.241 +            // Code point range partially overlapping with the last block is illegal.
   1.242 +            fprintf(stderr,
   1.243 +                    "error in preparsed UCD: cp range %s on line %ld only "
   1.244 +                    "partially overlaps with block range %04lX..%04lX\n",
   1.245 +                    field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
   1.246 +            errorCode=U_PARSE_ERROR;
   1.247 +            return NULL;
   1.248 +        }
   1.249 +        props=&cpProps;
   1.250 +        break;
   1.251 +    default:
   1.252 +        // Will not occur because of the range check above.
   1.253 +        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.254 +        return NULL;
   1.255 +    }
   1.256 +    props->start=start;
   1.257 +    props->end=end;
   1.258 +    while((field=nextField())!=NULL) {
   1.259 +        if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
   1.260 +    }
   1.261 +    return props;
   1.262 +}
   1.263 +
   1.264 +static const struct {
   1.265 +    const char *name;
   1.266 +    int32_t prop;
   1.267 +} ppucdProperties[]={
   1.268 +    { "Name_Alias", PPUCD_NAME_ALIAS },
   1.269 +    { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
   1.270 +    { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
   1.271 +};
   1.272 +
   1.273 +// Returns TRUE for "ok to continue parsing fields".
   1.274 +UBool
   1.275 +PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
   1.276 +                            UErrorCode &errorCode) {
   1.277 +    CharString pBuffer;
   1.278 +    const char *p=field;
   1.279 +    const char *v=strchr(p, '=');
   1.280 +    int binaryValue;
   1.281 +    if(*p=='-') {
   1.282 +        if(v!=NULL) {
   1.283 +            fprintf(stderr,
   1.284 +                    "error in preparsed UCD: mix of binary-property-no and "
   1.285 +                    "enum-property syntax '%s' on line %ld\n",
   1.286 +                    field, (long)lineNumber);
   1.287 +            errorCode=U_PARSE_ERROR;
   1.288 +            return FALSE;
   1.289 +        }
   1.290 +        binaryValue=0;
   1.291 +        ++p;
   1.292 +    } else if(v==NULL) {
   1.293 +        binaryValue=1;
   1.294 +    } else {
   1.295 +        binaryValue=-1;
   1.296 +        // Copy out the property name rather than modifying the field (writing a NUL).
   1.297 +        pBuffer.append(p, (int32_t)(v-p), errorCode);
   1.298 +        p=pBuffer.data();
   1.299 +        ++v;
   1.300 +    }
   1.301 +    int32_t prop=pnames->getPropertyEnum(p);
   1.302 +    if(prop<0) {
   1.303 +        for(int32_t i=0;; ++i) {
   1.304 +            if(i==LENGTHOF(ppucdProperties)) {
   1.305 +                // Ignore unknown property names.
   1.306 +                return TRUE;
   1.307 +            }
   1.308 +            if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
   1.309 +                prop=ppucdProperties[i].prop;
   1.310 +                U_ASSERT(prop>=0);
   1.311 +                break;
   1.312 +            }
   1.313 +        }
   1.314 +    }
   1.315 +    if(prop<UCHAR_BINARY_LIMIT) {
   1.316 +        if(binaryValue>=0) {
   1.317 +            props.binProps[prop]=(UBool)binaryValue;
   1.318 +        } else {
   1.319 +            // No binary value for a binary property.
   1.320 +            fprintf(stderr,
   1.321 +                    "error in preparsed UCD: enum-property syntax '%s' "
   1.322 +                    "for binary property on line %ld\n",
   1.323 +                    field, (long)lineNumber);
   1.324 +            errorCode=U_PARSE_ERROR;
   1.325 +        }
   1.326 +    } else if(binaryValue>=0) {
   1.327 +        // Binary value for a non-binary property.
   1.328 +        fprintf(stderr,
   1.329 +                "error in preparsed UCD: binary-property syntax '%s' "
   1.330 +                "for non-binary property on line %ld\n",
   1.331 +                field, (long)lineNumber);
   1.332 +        errorCode=U_PARSE_ERROR;
   1.333 +    } else if (prop < UCHAR_INT_START) {
   1.334 +        fprintf(stderr,
   1.335 +                "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
   1.336 +                prop, (long)lineNumber);
   1.337 +        errorCode=U_PARSE_ERROR;
   1.338 +    } else if(prop<UCHAR_INT_LIMIT) {
   1.339 +        int32_t value=pnames->getPropertyValueEnum(prop, v);
   1.340 +        if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
   1.341 +            // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
   1.342 +            char *end;
   1.343 +            unsigned long ccc=uprv_strtoul(v, &end, 10);
   1.344 +            if(v<end && *end==0 && ccc<=254) {
   1.345 +                value=(int32_t)ccc;
   1.346 +            }
   1.347 +        }
   1.348 +        if(value==UCHAR_INVALID_CODE) {
   1.349 +            fprintf(stderr,
   1.350 +                    "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
   1.351 +                    field, (long)lineNumber);
   1.352 +            errorCode=U_PARSE_ERROR;
   1.353 +        } else {
   1.354 +            props.intProps[prop-UCHAR_INT_START]=value;
   1.355 +        }
   1.356 +    } else if(*v=='<') {
   1.357 +        // Do not parse default values like <code point>, just set null values.
   1.358 +        switch(prop) {
   1.359 +        case UCHAR_BIDI_MIRRORING_GLYPH:
   1.360 +            props.bmg=U_SENTINEL;
   1.361 +            break;
   1.362 +        case UCHAR_BIDI_PAIRED_BRACKET:
   1.363 +            props.bpb=U_SENTINEL;
   1.364 +            break;
   1.365 +        case UCHAR_SIMPLE_CASE_FOLDING:
   1.366 +            props.scf=U_SENTINEL;
   1.367 +            break;
   1.368 +        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
   1.369 +            props.slc=U_SENTINEL;
   1.370 +            break;
   1.371 +        case UCHAR_SIMPLE_TITLECASE_MAPPING:
   1.372 +            props.stc=U_SENTINEL;
   1.373 +            break;
   1.374 +        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
   1.375 +            props.suc=U_SENTINEL;
   1.376 +            break;
   1.377 +        case UCHAR_CASE_FOLDING:
   1.378 +            props.cf.remove();
   1.379 +            break;
   1.380 +        case UCHAR_LOWERCASE_MAPPING:
   1.381 +            props.lc.remove();
   1.382 +            break;
   1.383 +        case UCHAR_TITLECASE_MAPPING:
   1.384 +            props.tc.remove();
   1.385 +            break;
   1.386 +        case UCHAR_UPPERCASE_MAPPING:
   1.387 +            props.uc.remove();
   1.388 +            break;
   1.389 +        case UCHAR_SCRIPT_EXTENSIONS:
   1.390 +            props.scx.clear();
   1.391 +            break;
   1.392 +        default:
   1.393 +            fprintf(stderr,
   1.394 +                    "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
   1.395 +                    field, (long)lineNumber);
   1.396 +            errorCode=U_PARSE_ERROR;
   1.397 +        }
   1.398 +    } else {
   1.399 +        char c;
   1.400 +        switch(prop) {
   1.401 +        case UCHAR_NUMERIC_VALUE:
   1.402 +            props.numericValue=v;
   1.403 +            c=*v;
   1.404 +            if('0'<=c && c<='9' && v[1]==0) {
   1.405 +                props.digitValue=c-'0';
   1.406 +            } else {
   1.407 +                props.digitValue=-1;
   1.408 +            }
   1.409 +            break;
   1.410 +        case UCHAR_NAME:
   1.411 +            props.name=v;
   1.412 +            break;
   1.413 +        case UCHAR_AGE:
   1.414 +            u_versionFromString(props.age, v);  // Writes 0.0.0.0 if v is not numeric.
   1.415 +            break;
   1.416 +        case UCHAR_BIDI_MIRRORING_GLYPH:
   1.417 +            props.bmg=parseCodePoint(v, errorCode);
   1.418 +            break;
   1.419 +        case UCHAR_BIDI_PAIRED_BRACKET:
   1.420 +            props.bpb=parseCodePoint(v, errorCode);
   1.421 +            break;
   1.422 +        case UCHAR_SIMPLE_CASE_FOLDING:
   1.423 +            props.scf=parseCodePoint(v, errorCode);
   1.424 +            break;
   1.425 +        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
   1.426 +            props.slc=parseCodePoint(v, errorCode);
   1.427 +            break;
   1.428 +        case UCHAR_SIMPLE_TITLECASE_MAPPING:
   1.429 +            props.stc=parseCodePoint(v, errorCode);
   1.430 +            break;
   1.431 +        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
   1.432 +            props.suc=parseCodePoint(v, errorCode);
   1.433 +            break;
   1.434 +        case UCHAR_CASE_FOLDING:
   1.435 +            parseString(v, props.cf, errorCode);
   1.436 +            break;
   1.437 +        case UCHAR_LOWERCASE_MAPPING:
   1.438 +            parseString(v, props.lc, errorCode);
   1.439 +            break;
   1.440 +        case UCHAR_TITLECASE_MAPPING:
   1.441 +            parseString(v, props.tc, errorCode);
   1.442 +            break;
   1.443 +        case UCHAR_UPPERCASE_MAPPING:
   1.444 +            parseString(v, props.uc, errorCode);
   1.445 +            break;
   1.446 +        case PPUCD_NAME_ALIAS:
   1.447 +            props.nameAlias=v;
   1.448 +            break;
   1.449 +        case PPUCD_CONDITIONAL_CASE_MAPPINGS:
   1.450 +        case PPUCD_TURKIC_CASE_FOLDING:
   1.451 +            // No need to parse their values: They are hardcoded in the runtime library.
   1.452 +            break;
   1.453 +        case UCHAR_SCRIPT_EXTENSIONS:
   1.454 +            parseScriptExtensions(v, props.scx, errorCode);
   1.455 +            break;
   1.456 +        default:
   1.457 +            // Ignore unhandled properties.
   1.458 +            return TRUE;
   1.459 +        }
   1.460 +    }
   1.461 +    if(U_SUCCESS(errorCode)) {
   1.462 +        newValues.add((UChar32)prop);
   1.463 +        return TRUE;
   1.464 +    } else {
   1.465 +        return FALSE;
   1.466 +    }
   1.467 +}
   1.468 +
   1.469 +UBool
   1.470 +PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
   1.471 +    if(U_FAILURE(errorCode)) { return FALSE; }
   1.472 +    if(lineType!=ALG_NAMES_RANGE_LINE) {
   1.473 +        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.474 +        return FALSE;
   1.475 +    }
   1.476 +    firstField();
   1.477 +    const char *field=nextField();
   1.478 +    if(field==NULL) {
   1.479 +        // No range field after the type.
   1.480 +        fprintf(stderr,
   1.481 +                "error in preparsed UCD: missing algnamesrange range field "
   1.482 +                "(no second field) on line %ld\n",
   1.483 +                (long)lineNumber);
   1.484 +        errorCode=U_PARSE_ERROR;
   1.485 +        return FALSE;
   1.486 +    }
   1.487 +    return parseCodePointRange(field, start, end, errorCode);
   1.488 +}
   1.489 +
   1.490 +UChar32
   1.491 +PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
   1.492 +    char *end;
   1.493 +    uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
   1.494 +    if(end<=s || *end!=0 || value>=0x110000) {
   1.495 +        fprintf(stderr,
   1.496 +                "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
   1.497 +                s, (long)lineNumber);
   1.498 +        errorCode=U_PARSE_ERROR;
   1.499 +        return U_SENTINEL;
   1.500 +    }
   1.501 +    return (UChar32)value;
   1.502 +}
   1.503 +
   1.504 +UBool
   1.505 +PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
   1.506 +    uint32_t st, e;
   1.507 +    u_parseCodePointRange(s, &st, &e, &errorCode);
   1.508 +    if(U_FAILURE(errorCode)) {
   1.509 +        fprintf(stderr,
   1.510 +                "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
   1.511 +                s, (long)lineNumber);
   1.512 +        return FALSE;
   1.513 +    }
   1.514 +    start=(UChar32)st;
   1.515 +    end=(UChar32)e;
   1.516 +    return TRUE;
   1.517 +}
   1.518 +
   1.519 +void
   1.520 +PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
   1.521 +    UChar *buffer=uni.getBuffer(-1);
   1.522 +    int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
   1.523 +    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
   1.524 +        errorCode=U_ZERO_ERROR;
   1.525 +        uni.releaseBuffer(0);
   1.526 +        buffer=uni.getBuffer(length);
   1.527 +        length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
   1.528 +    }
   1.529 +    uni.releaseBuffer(length);
   1.530 +    if(U_FAILURE(errorCode)) {
   1.531 +        fprintf(stderr,
   1.532 +                "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
   1.533 +                s, (long)lineNumber);
   1.534 +    }
   1.535 +}
   1.536 +
   1.537 +void
   1.538 +PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
   1.539 +    if(U_FAILURE(errorCode)) { return; }
   1.540 +    scx.clear();
   1.541 +    CharString scString;
   1.542 +    for(;;) {
   1.543 +        const char *scs;
   1.544 +        const char *scLimit=strchr(s, ' ');
   1.545 +        if(scLimit!=NULL) {
   1.546 +            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
   1.547 +            if(U_FAILURE(errorCode)) { return; }
   1.548 +        } else {
   1.549 +            scs=s;
   1.550 +        }
   1.551 +        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
   1.552 +        if(script==UCHAR_INVALID_CODE) {
   1.553 +            fprintf(stderr,
   1.554 +                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
   1.555 +                    scs, (long)lineNumber);
   1.556 +            errorCode=U_PARSE_ERROR;
   1.557 +            return;
   1.558 +        } else if(scx.contains(script)) {
   1.559 +            fprintf(stderr,
   1.560 +                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
   1.561 +                    scs, (long)lineNumber);
   1.562 +            errorCode=U_PARSE_ERROR;
   1.563 +            return;
   1.564 +        } else {
   1.565 +            scx.add(script);
   1.566 +        }
   1.567 +        if(scLimit!=NULL) {
   1.568 +            s=scLimit+1;
   1.569 +        } else {
   1.570 +            break;
   1.571 +        }
   1.572 +    }
   1.573 +    if(scx.isEmpty()) {
   1.574 +        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
   1.575 +        errorCode=U_PARSE_ERROR;
   1.576 +    }
   1.577 +}
   1.578 +
   1.579 +U_NAMESPACE_END

mercurial