intl/icu/source/tools/toolutil/ppucd.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2011-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: ppucd.h
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2011dec11
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #ifndef __PPUCD_H__
michael@0 16 #define __PPUCD_H__
michael@0 17
michael@0 18 #include "unicode/utypes.h"
michael@0 19 #include "unicode/uniset.h"
michael@0 20 #include "unicode/unistr.h"
michael@0 21
michael@0 22 #include <stdio.h>
michael@0 23
michael@0 24 /** Additions to the uchar.h enum UProperty. */
michael@0 25 enum {
michael@0 26 /** Name_Alias */
michael@0 27 PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
michael@0 28 PPUCD_CONDITIONAL_CASE_MAPPINGS,
michael@0 29 PPUCD_TURKIC_CASE_FOLDING
michael@0 30 };
michael@0 31
michael@0 32 U_NAMESPACE_BEGIN
michael@0 33
michael@0 34 class U_TOOLUTIL_API PropertyNames {
michael@0 35 public:
michael@0 36 virtual ~PropertyNames();
michael@0 37 virtual int32_t getPropertyEnum(const char *name) const;
michael@0 38 virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
michael@0 39 };
michael@0 40
michael@0 41 struct U_TOOLUTIL_API UniProps {
michael@0 42 UniProps();
michael@0 43 ~UniProps();
michael@0 44
michael@0 45 int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }
michael@0 46
michael@0 47 UChar32 start, end;
michael@0 48 UBool binProps[UCHAR_BINARY_LIMIT];
michael@0 49 int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
michael@0 50 UVersionInfo age;
michael@0 51 UChar32 bmg, bpb;
michael@0 52 UChar32 scf, slc, stc, suc;
michael@0 53 int32_t digitValue;
michael@0 54 const char *numericValue;
michael@0 55 const char *name;
michael@0 56 const char *nameAlias;
michael@0 57 UnicodeString cf, lc, tc, uc;
michael@0 58 UnicodeSet scx;
michael@0 59 };
michael@0 60
michael@0 61 class U_TOOLUTIL_API PreparsedUCD {
michael@0 62 public:
michael@0 63 enum LineType {
michael@0 64 /** No line, end of file. */
michael@0 65 NO_LINE,
michael@0 66 /** Empty line. (Might contain a comment.) */
michael@0 67 EMPTY_LINE,
michael@0 68
michael@0 69 /** ucd;6.1.0 */
michael@0 70 UNICODE_VERSION_LINE,
michael@0 71
michael@0 72 /** property;Binary;Alpha;Alphabetic */
michael@0 73 PROPERTY_LINE,
michael@0 74 /** binary;N;No;F;False */
michael@0 75 BINARY_LINE,
michael@0 76 /** value;gc;Zs;Space_Separator */
michael@0 77 VALUE_LINE,
michael@0 78
michael@0 79 /** defaults;0000..10FFFF;age=NA;bc=L;... */
michael@0 80 DEFAULTS_LINE,
michael@0 81 /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
michael@0 82 BLOCK_LINE,
michael@0 83 /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
michael@0 84 CP_LINE,
michael@0 85
michael@0 86 /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
michael@0 87 ALG_NAMES_RANGE_LINE,
michael@0 88
michael@0 89 LINE_TYPE_COUNT
michael@0 90 };
michael@0 91
michael@0 92 /**
michael@0 93 * Constructor.
michael@0 94 * Prepare this object for a new, empty package.
michael@0 95 */
michael@0 96 PreparsedUCD(const char *filename, UErrorCode &errorCode);
michael@0 97
michael@0 98 /** Destructor. */
michael@0 99 ~PreparsedUCD();
michael@0 100
michael@0 101 /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
michael@0 102 void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
michael@0 103
michael@0 104 /**
michael@0 105 * Reads a line from the preparsed UCD file.
michael@0 106 * Splits the line by replacing each ';' with a NUL.
michael@0 107 */
michael@0 108 LineType readLine(UErrorCode &errorCode);
michael@0 109
michael@0 110 /** Returns the number of the line read by readLine(). */
michael@0 111 int32_t getLineNumber() const { return lineNumber; }
michael@0 112
michael@0 113 /** Returns the line's next field, or NULL. */
michael@0 114 const char *nextField();
michael@0 115
michael@0 116 /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
michael@0 117 const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
michael@0 118
michael@0 119 /** Returns TRUE if the current line has property values. */
michael@0 120 UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; }
michael@0 121
michael@0 122 /**
michael@0 123 * Parses properties from the current line.
michael@0 124 * Clears newValues and sets UProperty codes for property values mentioned
michael@0 125 * on the current line (as opposed to being inherited).
michael@0 126 * Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
michael@0 127 * The returned UniProps are usable until the next line of the same type is read.
michael@0 128 */
michael@0 129 const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
michael@0 130
michael@0 131 /**
michael@0 132 * Returns the code point range for the current algnamesrange line.
michael@0 133 * Calls & parses nextField().
michael@0 134 * Further nextField() calls will yield the range's type & prefix string.
michael@0 135 * Returns U_SUCCESS(errorCode).
michael@0 136 */
michael@0 137 UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
michael@0 138
michael@0 139 private:
michael@0 140 UBool isLineBufferAvailable(int32_t i) {
michael@0 141 return defaultLineIndex!=i && blockLineIndex!=i;
michael@0 142 }
michael@0 143
michael@0 144 /** Resets the field iterator and returns the line's first field (the line type field). */
michael@0 145 const char *firstField();
michael@0 146
michael@0 147 UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
michael@0 148 UErrorCode &errorCode);
michael@0 149 UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
michael@0 150 UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
michael@0 151 void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
michael@0 152 void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);
michael@0 153
michael@0 154 static const int32_t kNumLineBuffers=3;
michael@0 155
michael@0 156 PropertyNames *icuPnames; // owned
michael@0 157 const PropertyNames *pnames; // aliased
michael@0 158 FILE *file;
michael@0 159 int32_t defaultLineIndex, blockLineIndex, lineIndex;
michael@0 160 int32_t lineNumber;
michael@0 161 LineType lineType;
michael@0 162 char *fieldLimit;
michael@0 163 char *lineLimit;
michael@0 164
michael@0 165 UVersionInfo ucdVersion;
michael@0 166 UniProps defaultProps, blockProps, cpProps;
michael@0 167 // Multiple lines so that default and block properties can maintain pointers
michael@0 168 // into their line buffers.
michael@0 169 char lines[kNumLineBuffers][4096];
michael@0 170 };
michael@0 171
michael@0 172 U_NAMESPACE_END
michael@0 173
michael@0 174 #endif // __PPUCD_H__

mercurial