intl/icu/source/tools/toolutil/ppucd.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2011-2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  ppucd.h
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2011dec11
    12 *   created by: Markus W. Scherer
    13 */
    15 #ifndef __PPUCD_H__
    16 #define __PPUCD_H__
    18 #include "unicode/utypes.h"
    19 #include "unicode/uniset.h"
    20 #include "unicode/unistr.h"
    22 #include <stdio.h>
    24 /** Additions to the uchar.h enum UProperty. */
    25 enum {
    26     /** Name_Alias */
    27     PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
    28     PPUCD_CONDITIONAL_CASE_MAPPINGS,
    29     PPUCD_TURKIC_CASE_FOLDING
    30 };
    32 U_NAMESPACE_BEGIN
    34 class U_TOOLUTIL_API PropertyNames {
    35 public:
    36     virtual ~PropertyNames();
    37     virtual int32_t getPropertyEnum(const char *name) const;
    38     virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
    39 };
    41 struct U_TOOLUTIL_API UniProps {
    42     UniProps();
    43     ~UniProps();
    45     int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }
    47     UChar32 start, end;
    48     UBool binProps[UCHAR_BINARY_LIMIT];
    49     int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
    50     UVersionInfo age;
    51     UChar32 bmg, bpb;
    52     UChar32 scf, slc, stc, suc;
    53     int32_t digitValue;
    54     const char *numericValue;
    55     const char *name;
    56     const char *nameAlias;
    57     UnicodeString cf, lc, tc, uc;
    58     UnicodeSet scx;
    59 };
    61 class U_TOOLUTIL_API PreparsedUCD {
    62 public:
    63     enum LineType {
    64         /** No line, end of file. */
    65         NO_LINE,
    66         /** Empty line. (Might contain a comment.) */
    67         EMPTY_LINE,
    69         /** ucd;6.1.0 */
    70         UNICODE_VERSION_LINE,
    72         /** property;Binary;Alpha;Alphabetic */
    73         PROPERTY_LINE,
    74         /** binary;N;No;F;False */
    75         BINARY_LINE,
    76         /** value;gc;Zs;Space_Separator */
    77         VALUE_LINE,
    79         /** defaults;0000..10FFFF;age=NA;bc=L;... */
    80         DEFAULTS_LINE,
    81         /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
    82         BLOCK_LINE,
    83         /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
    84         CP_LINE,
    86         /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
    87         ALG_NAMES_RANGE_LINE,
    89         LINE_TYPE_COUNT
    90     };
    92     /**
    93      * Constructor.
    94      * Prepare this object for a new, empty package.
    95      */
    96     PreparsedUCD(const char *filename, UErrorCode &errorCode);
    98     /** Destructor. */
    99     ~PreparsedUCD();
   101     /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
   102     void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
   104     /**
   105      * Reads a line from the preparsed UCD file.
   106      * Splits the line by replacing each ';' with a NUL.
   107      */
   108     LineType readLine(UErrorCode &errorCode);
   110     /** Returns the number of the line read by readLine(). */
   111     int32_t getLineNumber() const { return lineNumber; }
   113     /** Returns the line's next field, or NULL. */
   114     const char *nextField();
   116     /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
   117     const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
   119     /** Returns TRUE if the current line has property values. */
   120     UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; }
   122     /**
   123      * Parses properties from the current line.
   124      * Clears newValues and sets UProperty codes for property values mentioned
   125      * on the current line (as opposed to being inherited).
   126      * Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
   127      * The returned UniProps are usable until the next line of the same type is read.
   128      */
   129     const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
   131     /**
   132      * Returns the code point range for the current algnamesrange line.
   133      * Calls & parses nextField().
   134      * Further nextField() calls will yield the range's type & prefix string.
   135      * Returns U_SUCCESS(errorCode).
   136      */
   137     UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
   139 private:
   140     UBool isLineBufferAvailable(int32_t i) {
   141         return defaultLineIndex!=i && blockLineIndex!=i;
   142     }
   144     /** Resets the field iterator and returns the line's first field (the line type field). */
   145     const char *firstField();
   147     UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
   148                         UErrorCode &errorCode);
   149     UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
   150     UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
   151     void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
   152     void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);
   154     static const int32_t kNumLineBuffers=3;
   156     PropertyNames *icuPnames;  // owned
   157     const PropertyNames *pnames;  // aliased
   158     FILE *file;
   159     int32_t defaultLineIndex, blockLineIndex, lineIndex;
   160     int32_t lineNumber;
   161     LineType lineType;
   162     char *fieldLimit;
   163     char *lineLimit;
   165     UVersionInfo ucdVersion;
   166     UniProps defaultProps, blockProps, cpProps;
   167     // Multiple lines so that default and block properties can maintain pointers
   168     // into their line buffers.
   169     char lines[kNumLineBuffers][4096];
   170 };
   172 U_NAMESPACE_END
   174 #endif  // __PPUCD_H__

mercurial