1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/ppucd.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,174 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2011-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: ppucd.h 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2011dec11 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#ifndef __PPUCD_H__ 1.19 +#define __PPUCD_H__ 1.20 + 1.21 +#include "unicode/utypes.h" 1.22 +#include "unicode/uniset.h" 1.23 +#include "unicode/unistr.h" 1.24 + 1.25 +#include <stdio.h> 1.26 + 1.27 +/** Additions to the uchar.h enum UProperty. */ 1.28 +enum { 1.29 + /** Name_Alias */ 1.30 + PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, 1.31 + PPUCD_CONDITIONAL_CASE_MAPPINGS, 1.32 + PPUCD_TURKIC_CASE_FOLDING 1.33 +}; 1.34 + 1.35 +U_NAMESPACE_BEGIN 1.36 + 1.37 +class U_TOOLUTIL_API PropertyNames { 1.38 +public: 1.39 + virtual ~PropertyNames(); 1.40 + virtual int32_t getPropertyEnum(const char *name) const; 1.41 + virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; 1.42 +}; 1.43 + 1.44 +struct U_TOOLUTIL_API UniProps { 1.45 + UniProps(); 1.46 + ~UniProps(); 1.47 + 1.48 + int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } 1.49 + 1.50 + UChar32 start, end; 1.51 + UBool binProps[UCHAR_BINARY_LIMIT]; 1.52 + int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; 1.53 + UVersionInfo age; 1.54 + UChar32 bmg, bpb; 1.55 + UChar32 scf, slc, stc, suc; 1.56 + int32_t digitValue; 1.57 + const char *numericValue; 1.58 + const char *name; 1.59 + const char *nameAlias; 1.60 + UnicodeString cf, lc, tc, uc; 1.61 + UnicodeSet scx; 1.62 +}; 1.63 + 1.64 +class U_TOOLUTIL_API PreparsedUCD { 1.65 +public: 1.66 + enum LineType { 1.67 + /** No line, end of file. */ 1.68 + NO_LINE, 1.69 + /** Empty line. (Might contain a comment.) */ 1.70 + EMPTY_LINE, 1.71 + 1.72 + /** ucd;6.1.0 */ 1.73 + UNICODE_VERSION_LINE, 1.74 + 1.75 + /** property;Binary;Alpha;Alphabetic */ 1.76 + PROPERTY_LINE, 1.77 + /** binary;N;No;F;False */ 1.78 + BINARY_LINE, 1.79 + /** value;gc;Zs;Space_Separator */ 1.80 + VALUE_LINE, 1.81 + 1.82 + /** defaults;0000..10FFFF;age=NA;bc=L;... */ 1.83 + DEFAULTS_LINE, 1.84 + /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ 1.85 + BLOCK_LINE, 1.86 + /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ 1.87 + CP_LINE, 1.88 + 1.89 + /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ 1.90 + ALG_NAMES_RANGE_LINE, 1.91 + 1.92 + LINE_TYPE_COUNT 1.93 + }; 1.94 + 1.95 + /** 1.96 + * Constructor. 1.97 + * Prepare this object for a new, empty package. 1.98 + */ 1.99 + PreparsedUCD(const char *filename, UErrorCode &errorCode); 1.100 + 1.101 + /** Destructor. */ 1.102 + ~PreparsedUCD(); 1.103 + 1.104 + /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ 1.105 + void setPropertyNames(const PropertyNames *pn) { pnames=pn; } 1.106 + 1.107 + /** 1.108 + * Reads a line from the preparsed UCD file. 1.109 + * Splits the line by replacing each ';' with a NUL. 1.110 + */ 1.111 + LineType readLine(UErrorCode &errorCode); 1.112 + 1.113 + /** Returns the number of the line read by readLine(). */ 1.114 + int32_t getLineNumber() const { return lineNumber; } 1.115 + 1.116 + /** Returns the line's next field, or NULL. */ 1.117 + const char *nextField(); 1.118 + 1.119 + /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ 1.120 + const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } 1.121 + 1.122 + /** Returns TRUE if the current line has property values. */ 1.123 + UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } 1.124 + 1.125 + /** 1.126 + * Parses properties from the current line. 1.127 + * Clears newValues and sets UProperty codes for property values mentioned 1.128 + * on the current line (as opposed to being inherited). 1.129 + * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. 1.130 + * The returned UniProps are usable until the next line of the same type is read. 1.131 + */ 1.132 + const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); 1.133 + 1.134 + /** 1.135 + * Returns the code point range for the current algnamesrange line. 1.136 + * Calls & parses nextField(). 1.137 + * Further nextField() calls will yield the range's type & prefix string. 1.138 + * Returns U_SUCCESS(errorCode). 1.139 + */ 1.140 + UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); 1.141 + 1.142 +private: 1.143 + UBool isLineBufferAvailable(int32_t i) { 1.144 + return defaultLineIndex!=i && blockLineIndex!=i; 1.145 + } 1.146 + 1.147 + /** Resets the field iterator and returns the line's first field (the line type field). */ 1.148 + const char *firstField(); 1.149 + 1.150 + UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, 1.151 + UErrorCode &errorCode); 1.152 + UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); 1.153 + UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); 1.154 + void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); 1.155 + void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); 1.156 + 1.157 + static const int32_t kNumLineBuffers=3; 1.158 + 1.159 + PropertyNames *icuPnames; // owned 1.160 + const PropertyNames *pnames; // aliased 1.161 + FILE *file; 1.162 + int32_t defaultLineIndex, blockLineIndex, lineIndex; 1.163 + int32_t lineNumber; 1.164 + LineType lineType; 1.165 + char *fieldLimit; 1.166 + char *lineLimit; 1.167 + 1.168 + UVersionInfo ucdVersion; 1.169 + UniProps defaultProps, blockProps, cpProps; 1.170 + // Multiple lines so that default and block properties can maintain pointers 1.171 + // into their line buffers. 1.172 + char lines[kNumLineBuffers][4096]; 1.173 +}; 1.174 + 1.175 +U_NAMESPACE_END 1.176 + 1.177 +#endif // __PPUCD_H__