Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2011-2013, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * file name: ppucd.h |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2011dec11 |
michael@0 | 12 | * created by: Markus W. Scherer |
michael@0 | 13 | */ |
michael@0 | 14 | |
michael@0 | 15 | #ifndef __PPUCD_H__ |
michael@0 | 16 | #define __PPUCD_H__ |
michael@0 | 17 | |
michael@0 | 18 | #include "unicode/utypes.h" |
michael@0 | 19 | #include "unicode/uniset.h" |
michael@0 | 20 | #include "unicode/unistr.h" |
michael@0 | 21 | |
michael@0 | 22 | #include <stdio.h> |
michael@0 | 23 | |
michael@0 | 24 | /** Additions to the uchar.h enum UProperty. */ |
michael@0 | 25 | enum { |
michael@0 | 26 | /** Name_Alias */ |
michael@0 | 27 | PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT, |
michael@0 | 28 | PPUCD_CONDITIONAL_CASE_MAPPINGS, |
michael@0 | 29 | PPUCD_TURKIC_CASE_FOLDING |
michael@0 | 30 | }; |
michael@0 | 31 | |
michael@0 | 32 | U_NAMESPACE_BEGIN |
michael@0 | 33 | |
michael@0 | 34 | class U_TOOLUTIL_API PropertyNames { |
michael@0 | 35 | public: |
michael@0 | 36 | virtual ~PropertyNames(); |
michael@0 | 37 | virtual int32_t getPropertyEnum(const char *name) const; |
michael@0 | 38 | virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const; |
michael@0 | 39 | }; |
michael@0 | 40 | |
michael@0 | 41 | struct U_TOOLUTIL_API UniProps { |
michael@0 | 42 | UniProps(); |
michael@0 | 43 | ~UniProps(); |
michael@0 | 44 | |
michael@0 | 45 | int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; } |
michael@0 | 46 | |
michael@0 | 47 | UChar32 start, end; |
michael@0 | 48 | UBool binProps[UCHAR_BINARY_LIMIT]; |
michael@0 | 49 | int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]; |
michael@0 | 50 | UVersionInfo age; |
michael@0 | 51 | UChar32 bmg, bpb; |
michael@0 | 52 | UChar32 scf, slc, stc, suc; |
michael@0 | 53 | int32_t digitValue; |
michael@0 | 54 | const char *numericValue; |
michael@0 | 55 | const char *name; |
michael@0 | 56 | const char *nameAlias; |
michael@0 | 57 | UnicodeString cf, lc, tc, uc; |
michael@0 | 58 | UnicodeSet scx; |
michael@0 | 59 | }; |
michael@0 | 60 | |
michael@0 | 61 | class U_TOOLUTIL_API PreparsedUCD { |
michael@0 | 62 | public: |
michael@0 | 63 | enum LineType { |
michael@0 | 64 | /** No line, end of file. */ |
michael@0 | 65 | NO_LINE, |
michael@0 | 66 | /** Empty line. (Might contain a comment.) */ |
michael@0 | 67 | EMPTY_LINE, |
michael@0 | 68 | |
michael@0 | 69 | /** ucd;6.1.0 */ |
michael@0 | 70 | UNICODE_VERSION_LINE, |
michael@0 | 71 | |
michael@0 | 72 | /** property;Binary;Alpha;Alphabetic */ |
michael@0 | 73 | PROPERTY_LINE, |
michael@0 | 74 | /** binary;N;No;F;False */ |
michael@0 | 75 | BINARY_LINE, |
michael@0 | 76 | /** value;gc;Zs;Space_Separator */ |
michael@0 | 77 | VALUE_LINE, |
michael@0 | 78 | |
michael@0 | 79 | /** defaults;0000..10FFFF;age=NA;bc=L;... */ |
michael@0 | 80 | DEFAULTS_LINE, |
michael@0 | 81 | /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */ |
michael@0 | 82 | BLOCK_LINE, |
michael@0 | 83 | /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */ |
michael@0 | 84 | CP_LINE, |
michael@0 | 85 | |
michael@0 | 86 | /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */ |
michael@0 | 87 | ALG_NAMES_RANGE_LINE, |
michael@0 | 88 | |
michael@0 | 89 | LINE_TYPE_COUNT |
michael@0 | 90 | }; |
michael@0 | 91 | |
michael@0 | 92 | /** |
michael@0 | 93 | * Constructor. |
michael@0 | 94 | * Prepare this object for a new, empty package. |
michael@0 | 95 | */ |
michael@0 | 96 | PreparsedUCD(const char *filename, UErrorCode &errorCode); |
michael@0 | 97 | |
michael@0 | 98 | /** Destructor. */ |
michael@0 | 99 | ~PreparsedUCD(); |
michael@0 | 100 | |
michael@0 | 101 | /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */ |
michael@0 | 102 | void setPropertyNames(const PropertyNames *pn) { pnames=pn; } |
michael@0 | 103 | |
michael@0 | 104 | /** |
michael@0 | 105 | * Reads a line from the preparsed UCD file. |
michael@0 | 106 | * Splits the line by replacing each ';' with a NUL. |
michael@0 | 107 | */ |
michael@0 | 108 | LineType readLine(UErrorCode &errorCode); |
michael@0 | 109 | |
michael@0 | 110 | /** Returns the number of the line read by readLine(). */ |
michael@0 | 111 | int32_t getLineNumber() const { return lineNumber; } |
michael@0 | 112 | |
michael@0 | 113 | /** Returns the line's next field, or NULL. */ |
michael@0 | 114 | const char *nextField(); |
michael@0 | 115 | |
michael@0 | 116 | /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */ |
michael@0 | 117 | const UVersionInfo &getUnicodeVersion() const { return ucdVersion; } |
michael@0 | 118 | |
michael@0 | 119 | /** Returns TRUE if the current line has property values. */ |
michael@0 | 120 | UBool lineHasPropertyValues() const { return DEFAULTS_LINE<=lineType && lineType<=CP_LINE; } |
michael@0 | 121 | |
michael@0 | 122 | /** |
michael@0 | 123 | * Parses properties from the current line. |
michael@0 | 124 | * Clears newValues and sets UProperty codes for property values mentioned |
michael@0 | 125 | * on the current line (as opposed to being inherited). |
michael@0 | 126 | * Returns a pointer to the filled-in UniProps, or NULL if something went wrong. |
michael@0 | 127 | * The returned UniProps are usable until the next line of the same type is read. |
michael@0 | 128 | */ |
michael@0 | 129 | const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode); |
michael@0 | 130 | |
michael@0 | 131 | /** |
michael@0 | 132 | * Returns the code point range for the current algnamesrange line. |
michael@0 | 133 | * Calls & parses nextField(). |
michael@0 | 134 | * Further nextField() calls will yield the range's type & prefix string. |
michael@0 | 135 | * Returns U_SUCCESS(errorCode). |
michael@0 | 136 | */ |
michael@0 | 137 | UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode); |
michael@0 | 138 | |
michael@0 | 139 | private: |
michael@0 | 140 | UBool isLineBufferAvailable(int32_t i) { |
michael@0 | 141 | return defaultLineIndex!=i && blockLineIndex!=i; |
michael@0 | 142 | } |
michael@0 | 143 | |
michael@0 | 144 | /** Resets the field iterator and returns the line's first field (the line type field). */ |
michael@0 | 145 | const char *firstField(); |
michael@0 | 146 | |
michael@0 | 147 | UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues, |
michael@0 | 148 | UErrorCode &errorCode); |
michael@0 | 149 | UChar32 parseCodePoint(const char *s, UErrorCode &errorCode); |
michael@0 | 150 | UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode); |
michael@0 | 151 | void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode); |
michael@0 | 152 | void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode); |
michael@0 | 153 | |
michael@0 | 154 | static const int32_t kNumLineBuffers=3; |
michael@0 | 155 | |
michael@0 | 156 | PropertyNames *icuPnames; // owned |
michael@0 | 157 | const PropertyNames *pnames; // aliased |
michael@0 | 158 | FILE *file; |
michael@0 | 159 | int32_t defaultLineIndex, blockLineIndex, lineIndex; |
michael@0 | 160 | int32_t lineNumber; |
michael@0 | 161 | LineType lineType; |
michael@0 | 162 | char *fieldLimit; |
michael@0 | 163 | char *lineLimit; |
michael@0 | 164 | |
michael@0 | 165 | UVersionInfo ucdVersion; |
michael@0 | 166 | UniProps defaultProps, blockProps, cpProps; |
michael@0 | 167 | // Multiple lines so that default and block properties can maintain pointers |
michael@0 | 168 | // into their line buffers. |
michael@0 | 169 | char lines[kNumLineBuffers][4096]; |
michael@0 | 170 | }; |
michael@0 | 171 | |
michael@0 | 172 | U_NAMESPACE_END |
michael@0 | 173 | |
michael@0 | 174 | #endif // __PPUCD_H__ |