intl/icu/source/tools/toolutil/ucm.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2003-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: ucm.h
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2003jun20
michael@0 12 * created by: Markus W. Scherer
michael@0 13 *
michael@0 14 * Definitions for the .ucm file parser and handler module ucm.c.
michael@0 15 */
michael@0 16
michael@0 17 #ifndef __UCM_H__
michael@0 18 #define __UCM_H__
michael@0 19
michael@0 20 #include "unicode/utypes.h"
michael@0 21 #include "ucnvmbcs.h"
michael@0 22 #include "ucnv_ext.h"
michael@0 23 #include "filestrm.h"
michael@0 24 #include <stdio.h>
michael@0 25
michael@0 26 #if !UCONFIG_NO_CONVERSION
michael@0 27
michael@0 28 U_CDECL_BEGIN
michael@0 29
michael@0 30 /* constants for UCMapping.moveFlag */
michael@0 31 enum {
michael@0 32 UCM_MOVE_TO_EXT=1,
michael@0 33 UCM_REMOVE_MAPPING=2
michael@0 34 };
michael@0 35
michael@0 36 /*
michael@0 37 * Per-mapping data structure
michael@0 38 *
michael@0 39 * u if uLen==1: Unicode code point
michael@0 40 * else index to uLen code points
michael@0 41 * b if bLen<=4: up to 4 bytes
michael@0 42 * else index to bLen bytes
michael@0 43 * uLen number of code points
michael@0 44 * bLen number of words containing left-justified bytes
michael@0 45 * bIsMultipleChars indicates that the bytes contain more than one sequence
michael@0 46 * according to the state table
michael@0 47 * f flag for roundtrip (0), fallback (1), sub mapping (2), reverse fallback (3)
michael@0 48 * or "good one-way" mapping (4).
michael@0 49 * Same values as in the source file after |
michael@0 50 */
michael@0 51 typedef struct UCMapping {
michael@0 52 UChar32 u;
michael@0 53 union {
michael@0 54 uint32_t idx;
michael@0 55 uint8_t bytes[4];
michael@0 56 } b;
michael@0 57 int8_t uLen, bLen, f, moveFlag;
michael@0 58 } UCMapping;
michael@0 59
michael@0 60 /* constants for UCMTable.flagsType */
michael@0 61 enum {
michael@0 62 UCM_FLAGS_INITIAL, /* no mappings parsed yet */
michael@0 63 UCM_FLAGS_EXPLICIT, /* .ucm file has mappings with | fallback indicators */
michael@0 64 UCM_FLAGS_IMPLICIT, /* .ucm file has mappings without | fallback indicators, later wins */
michael@0 65 UCM_FLAGS_MIXED /* both implicit and explicit */
michael@0 66 };
michael@0 67
michael@0 68 typedef struct UCMTable {
michael@0 69 UCMapping *mappings;
michael@0 70 int32_t mappingsCapacity, mappingsLength;
michael@0 71
michael@0 72 UChar32 *codePoints;
michael@0 73 int32_t codePointsCapacity, codePointsLength;
michael@0 74
michael@0 75 uint8_t *bytes;
michael@0 76 int32_t bytesCapacity, bytesLength;
michael@0 77
michael@0 78 /* index map for mapping by bytes first */
michael@0 79 int32_t *reverseMap;
michael@0 80
michael@0 81 uint8_t unicodeMask;
michael@0 82 int8_t flagsType; /* UCM_FLAGS_INITIAL etc. */
michael@0 83 UBool isSorted;
michael@0 84 } UCMTable;
michael@0 85
michael@0 86 enum {
michael@0 87 MBCS_STATE_FLAG_DIRECT=1,
michael@0 88 MBCS_STATE_FLAG_SURROGATES,
michael@0 89
michael@0 90 MBCS_STATE_FLAG_READY=16
michael@0 91 };
michael@0 92
michael@0 93 typedef struct UCMStates {
michael@0 94 int32_t stateTable[MBCS_MAX_STATE_COUNT][256];
michael@0 95 uint32_t stateFlags[MBCS_MAX_STATE_COUNT],
michael@0 96 stateOffsetSum[MBCS_MAX_STATE_COUNT];
michael@0 97
michael@0 98 int32_t countStates, minCharLength, maxCharLength, countToUCodeUnits;
michael@0 99 int8_t conversionType, outputType;
michael@0 100 } UCMStates;
michael@0 101
michael@0 102 typedef struct UCMFile {
michael@0 103 UCMTable *base, *ext;
michael@0 104 UCMStates states;
michael@0 105
michael@0 106 char baseName[UCNV_MAX_CONVERTER_NAME_LENGTH];
michael@0 107 } UCMFile;
michael@0 108
michael@0 109 /* simple accesses ---------------------------------------------------------- */
michael@0 110
michael@0 111 #define UCM_GET_CODE_POINTS(t, m) \
michael@0 112 (((m)->uLen==1) ? &(m)->u : (t)->codePoints+(m)->u)
michael@0 113
michael@0 114 #define UCM_GET_BYTES(t, m) \
michael@0 115 (((m)->bLen<=4) ? (m)->b.bytes : (t)->bytes+(m)->b.idx)
michael@0 116
michael@0 117 /* APIs --------------------------------------------------------------------- */
michael@0 118
michael@0 119 U_CAPI UCMFile * U_EXPORT2
michael@0 120 ucm_open(void);
michael@0 121
michael@0 122 U_CAPI void U_EXPORT2
michael@0 123 ucm_close(UCMFile *ucm);
michael@0 124
michael@0 125 U_CAPI UBool U_EXPORT2
michael@0 126 ucm_parseHeaderLine(UCMFile *ucm,
michael@0 127 char *line, char **pKey, char **pValue);
michael@0 128
michael@0 129 /* @return -1 illegal bytes 0 suitable for base table 1 needs to go into extension table */
michael@0 130 U_CAPI int32_t U_EXPORT2
michael@0 131 ucm_mappingType(UCMStates *baseStates,
michael@0 132 UCMapping *m,
michael@0 133 UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
michael@0 134 uint8_t bytes[UCNV_EXT_MAX_BYTES]);
michael@0 135
michael@0 136 /* add a mapping to the base or extension table as appropriate */
michael@0 137 U_CAPI UBool U_EXPORT2
michael@0 138 ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates,
michael@0 139 UCMapping *m,
michael@0 140 UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
michael@0 141 uint8_t bytes[UCNV_EXT_MAX_BYTES]);
michael@0 142
michael@0 143 U_CAPI UBool U_EXPORT2
michael@0 144 ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates);
michael@0 145
michael@0 146
michael@0 147 U_CAPI UCMTable * U_EXPORT2
michael@0 148 ucm_openTable(void);
michael@0 149
michael@0 150 U_CAPI void U_EXPORT2
michael@0 151 ucm_closeTable(UCMTable *table);
michael@0 152
michael@0 153 U_CAPI void U_EXPORT2
michael@0 154 ucm_resetTable(UCMTable *table);
michael@0 155
michael@0 156 U_CAPI void U_EXPORT2
michael@0 157 ucm_sortTable(UCMTable *t);
michael@0 158
michael@0 159 /*
michael@0 160 * Remove mappings with their move flag set from the base table
michael@0 161 * and move some of them (with UCM_MOVE_TO_EXT) to the extension table.
michael@0 162 */
michael@0 163 U_CAPI void U_EXPORT2
michael@0 164 ucm_moveMappings(UCMTable *base, UCMTable *ext);
michael@0 165
michael@0 166 /**
michael@0 167 * Read a table from a .ucm file, from after the CHARMAP line to
michael@0 168 * including the END CHARMAP line.
michael@0 169 */
michael@0 170 U_CAPI void U_EXPORT2
michael@0 171 ucm_readTable(UCMFile *ucm, FileStream* convFile,
michael@0 172 UBool forBase, UCMStates *baseStates,
michael@0 173 UErrorCode *pErrorCode);
michael@0 174
michael@0 175 /**
michael@0 176 * Check the validity of mappings against a base table's states;
michael@0 177 * necessary for extension-only tables that were read before their base tables.
michael@0 178 */
michael@0 179 U_CAPI UBool U_EXPORT2
michael@0 180 ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
michael@0 181
michael@0 182 /**
michael@0 183 * Check a base table against an extension table.
michael@0 184 * Set the moveTarget!=NULL if it is possible to move mappings from the base.
michael@0 185 * This is the case where base and extension tables are parsed from a single file
michael@0 186 * (moveTarget==ext)
michael@0 187 * or when delta file mappings are subtracted from a base table.
michael@0 188 *
michael@0 189 * When a base table cannot be modified because a delta file is parsed in makeconv,
michael@0 190 * then set moveTarget=NULL.
michael@0 191 *
michael@0 192 * if(intersectBase) then mappings that exist in the base table but not in
michael@0 193 * the extension table are moved to moveTarget instead of showing an error.
michael@0 194 *
michael@0 195 * Special mode:
michael@0 196 * If intersectBase==2 for a DBCS extension table, then SBCS mappings are
michael@0 197 * not moved out of the base unless their Unicode input requires it.
michael@0 198 * This helps ucmkbase generate base tables for DBCS-only extension .cnv files.
michael@0 199 *
michael@0 200 * For both tables in the same file, the extension table is automatically
michael@0 201 * built.
michael@0 202 * For separate files, the extension file can use a complete mapping table (.ucm file),
michael@0 203 * so that common mappings need not be stripped out manually.
michael@0 204 *
michael@0 205 *
michael@0 206 * Sort both tables, and then for each mapping direction:
michael@0 207 *
michael@0 208 * If intersectBase is TRUE and the base table contains a mapping
michael@0 209 * that does not exist in the extension table, then this mapping is moved
michael@0 210 * to moveTarget.
michael@0 211 *
michael@0 212 * - otherwise -
michael@0 213 *
michael@0 214 * If the base table contains a mapping for which the input sequence is
michael@0 215 * the same as the extension input, then
michael@0 216 * - if the output is the same: remove the extension mapping
michael@0 217 * - else: error
michael@0 218 *
michael@0 219 * If the base table contains a mapping for which the input sequence is
michael@0 220 * a prefix of the extension input, then
michael@0 221 * - if moveTarget!=NULL: move the base mapping to the moveTarget table
michael@0 222 * - else: error
michael@0 223 *
michael@0 224 * @return FALSE in case of an irreparable error
michael@0 225 */
michael@0 226 U_CAPI UBool U_EXPORT2
michael@0 227 ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
michael@0 228 UCMTable *moveTarget, UBool intersectBase);
michael@0 229
michael@0 230 U_CAPI void U_EXPORT2
michael@0 231 ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode);
michael@0 232
michael@0 233 U_CAPI void U_EXPORT2
michael@0 234 ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f);
michael@0 235
michael@0 236
michael@0 237 U_CAPI void U_EXPORT2
michael@0 238 ucm_addState(UCMStates *states, const char *s);
michael@0 239
michael@0 240 U_CAPI void U_EXPORT2
michael@0 241 ucm_processStates(UCMStates *states, UBool ignoreSISOCheck);
michael@0 242
michael@0 243 U_CAPI int32_t U_EXPORT2
michael@0 244 ucm_countChars(UCMStates *states,
michael@0 245 const uint8_t *bytes, int32_t length);
michael@0 246
michael@0 247
michael@0 248 U_CAPI int8_t U_EXPORT2
michael@0 249 ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps);
michael@0 250
michael@0 251 U_CAPI UBool U_EXPORT2
michael@0 252 ucm_parseMappingLine(UCMapping *m,
michael@0 253 UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
michael@0 254 uint8_t bytes[UCNV_EXT_MAX_BYTES],
michael@0 255 const char *line);
michael@0 256
michael@0 257 U_CAPI void U_EXPORT2
michael@0 258 ucm_addMapping(UCMTable *table,
michael@0 259 UCMapping *m,
michael@0 260 UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
michael@0 261 uint8_t bytes[UCNV_EXT_MAX_BYTES]);
michael@0 262
michael@0 263 /* very makeconv-specific functions ----------------------------------------- */
michael@0 264
michael@0 265 /* finalize and optimize states after the toUnicode mappings are processed */
michael@0 266 U_CAPI void U_EXPORT2
michael@0 267 ucm_optimizeStates(UCMStates *states,
michael@0 268 uint16_t **pUnicodeCodeUnits,
michael@0 269 _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
michael@0 270 UBool verbose);
michael@0 271
michael@0 272 /* moved here because it is used inside ucmstate.c */
michael@0 273 U_CAPI int32_t U_EXPORT2
michael@0 274 ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
michael@0 275 uint32_t offset);
michael@0 276
michael@0 277 /* very rptp2ucm-specific functions ----------------------------------------- */
michael@0 278
michael@0 279 /*
michael@0 280 * Input: Separate tables with mappings from/to Unicode,
michael@0 281 * subchar and subchar1 (0 if none).
michael@0 282 * All mappings must have flag 0.
michael@0 283 *
michael@0 284 * Output: fromUTable will contain the union of mappings with the correct
michael@0 285 * precision flags, and be sorted.
michael@0 286 */
michael@0 287 U_CAPI void U_EXPORT2
michael@0 288 ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable,
michael@0 289 const uint8_t *subchar, int32_t subcharLength,
michael@0 290 uint8_t subchar1);
michael@0 291
michael@0 292 U_CAPI UBool U_EXPORT2
michael@0 293 ucm_separateMappings(UCMFile *ucm, UBool isSISO);
michael@0 294
michael@0 295 U_CDECL_END
michael@0 296
michael@0 297 #endif
michael@0 298
michael@0 299 #endif
michael@0 300

mercurial