michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2011-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: ppucd.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2011dec11 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #ifndef __PPUCD_H__ michael@0: #define __PPUCD_H__ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uniset.h" michael@0: #include "unicode/unistr.h" michael@0: michael@0: #include michael@0: michael@0: /** Additions to the uchar.h enum UProperty. */ michael@0: enum { michael@0: /** Name_Alias */ michael@0: PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, michael@0: PPUCD_CONDITIONAL_CASE_MAPPINGS, michael@0: PPUCD_TURKIC_CASE_FOLDING michael@0: }; michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class U_TOOLUTIL_API PropertyNames { michael@0: public: michael@0: virtual ~PropertyNames(); michael@0: virtual int32_t getPropertyEnum(const char *name) const; michael@0: virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; michael@0: }; michael@0: michael@0: struct U_TOOLUTIL_API UniProps { michael@0: UniProps(); michael@0: ~UniProps(); michael@0: michael@0: int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } michael@0: michael@0: UChar32 start, end; michael@0: UBool binProps[UCHAR_BINARY_LIMIT]; michael@0: int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; michael@0: UVersionInfo age; michael@0: UChar32 bmg, bpb; michael@0: UChar32 scf, slc, stc, suc; michael@0: int32_t digitValue; michael@0: const char *numericValue; michael@0: const char *name; michael@0: const char *nameAlias; michael@0: UnicodeString cf, lc, tc, uc; michael@0: UnicodeSet scx; michael@0: }; michael@0: michael@0: class U_TOOLUTIL_API PreparsedUCD { michael@0: public: michael@0: enum LineType { michael@0: /** No line, end of file. */ michael@0: NO_LINE, michael@0: /** Empty line. (Might contain a comment.) */ michael@0: EMPTY_LINE, michael@0: michael@0: /** ucd;6.1.0 */ michael@0: UNICODE_VERSION_LINE, michael@0: michael@0: /** property;Binary;Alpha;Alphabetic */ michael@0: PROPERTY_LINE, michael@0: /** binary;N;No;F;False */ michael@0: BINARY_LINE, michael@0: /** value;gc;Zs;Space_Separator */ michael@0: VALUE_LINE, michael@0: michael@0: /** defaults;0000..10FFFF;age=NA;bc=L;... */ michael@0: DEFAULTS_LINE, michael@0: /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ michael@0: BLOCK_LINE, michael@0: /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ michael@0: CP_LINE, michael@0: michael@0: /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ michael@0: ALG_NAMES_RANGE_LINE, michael@0: michael@0: LINE_TYPE_COUNT michael@0: }; michael@0: michael@0: /** michael@0: * Constructor. michael@0: * Prepare this object for a new, empty package. michael@0: */ michael@0: PreparsedUCD(const char *filename, UErrorCode &errorCode); michael@0: michael@0: /** Destructor. */ michael@0: ~PreparsedUCD(); michael@0: michael@0: /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ michael@0: void setPropertyNames(const PropertyNames *pn) { pnames=pn; } michael@0: michael@0: /** michael@0: * Reads a line from the preparsed UCD file. michael@0: * Splits the line by replacing each ';' with a NUL. michael@0: */ michael@0: LineType readLine(UErrorCode &errorCode); michael@0: michael@0: /** Returns the number of the line read by readLine(). */ michael@0: int32_t getLineNumber() const { return lineNumber; } michael@0: michael@0: /** Returns the line's next field, or NULL. */ michael@0: const char *nextField(); michael@0: michael@0: /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ michael@0: const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } michael@0: michael@0: /** Returns TRUE if the current line has property values. */ michael@0: UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } michael@0: michael@0: /** michael@0: * Parses properties from the current line. michael@0: * Clears newValues and sets UProperty codes for property values mentioned michael@0: * on the current line (as opposed to being inherited). michael@0: * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. michael@0: * The returned UniProps are usable until the next line of the same type is read. michael@0: */ michael@0: const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); michael@0: michael@0: /** michael@0: * Returns the code point range for the current algnamesrange line. michael@0: * Calls & parses nextField(). michael@0: * Further nextField() calls will yield the range's type & prefix string. michael@0: * Returns U_SUCCESS(errorCode). michael@0: */ michael@0: UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); michael@0: michael@0: private: michael@0: UBool isLineBufferAvailable(int32_t i) { michael@0: return defaultLineIndex!=i && blockLineIndex!=i; michael@0: } michael@0: michael@0: /** Resets the field iterator and returns the line's first field (the line type field). */ michael@0: const char *firstField(); michael@0: michael@0: UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, michael@0: UErrorCode &errorCode); michael@0: UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); michael@0: UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); michael@0: void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); michael@0: void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); michael@0: michael@0: static const int32_t kNumLineBuffers=3; michael@0: michael@0: PropertyNames *icuPnames; // owned michael@0: const PropertyNames *pnames; // aliased michael@0: FILE *file; michael@0: int32_t defaultLineIndex, blockLineIndex, lineIndex; michael@0: int32_t lineNumber; michael@0: LineType lineType; michael@0: char *fieldLimit; michael@0: char *lineLimit; michael@0: michael@0: UVersionInfo ucdVersion; michael@0: UniProps defaultProps, blockProps, cpProps; michael@0: // Multiple lines so that default and block properties can maintain pointers michael@0: // into their line buffers. michael@0: char lines[kNumLineBuffers][4096]; michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif // __PPUCD_H__