intl/icu/source/tools/toolutil/ucm.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2  *******************************************************************************
     3  *   Copyright (C) 2003-2013, International Business Machines
     4  *   Corporation and others.  All Rights Reserved.
     5  *******************************************************************************
     6  *   file name:  ucm.h
     7  *   encoding:   US-ASCII
     8  *   tab size:   8 (not used)
     9  *   indentation:4
    10  *
    11  *   created on: 2003jun20
    12  *   created by: Markus W. Scherer
    13  *
    14  *   Definitions for the .ucm file parser and handler module ucm.c.
    15  */
    17 #ifndef __UCM_H__
    18 #define __UCM_H__
    20 #include "unicode/utypes.h"
    21 #include "ucnvmbcs.h"
    22 #include "ucnv_ext.h"
    23 #include "filestrm.h"
    24 #include <stdio.h>
    26 #if !UCONFIG_NO_CONVERSION
    28 U_CDECL_BEGIN
    30 /* constants for UCMapping.moveFlag */
    31 enum {
    32     UCM_MOVE_TO_EXT=1,
    33     UCM_REMOVE_MAPPING=2
    34 };
    36 /*
    37  * Per-mapping data structure
    38  *
    39  * u if uLen==1: Unicode code point
    40  *   else index to uLen code points
    41  * b if bLen<=4: up to 4 bytes
    42  *   else index to bLen bytes
    43  * uLen number of code points
    44  * bLen number of words containing left-justified bytes
    45  * bIsMultipleChars indicates that the bytes contain more than one sequence
    46  *                  according to the state table
    47  * f flag for roundtrip (0), fallback (1), sub mapping (2), reverse fallback (3)
    48  *   or "good one-way" mapping (4).
    49  *   Same values as in the source file after |
    50  */
    51 typedef struct UCMapping {
    52     UChar32 u;
    53     union {
    54         uint32_t idx;
    55         uint8_t bytes[4];
    56     } b;
    57     int8_t uLen, bLen, f, moveFlag;
    58 } UCMapping;
    60 /* constants for UCMTable.flagsType */
    61 enum {
    62     UCM_FLAGS_INITIAL,  /* no mappings parsed yet */
    63     UCM_FLAGS_EXPLICIT, /* .ucm file has mappings with | fallback indicators */
    64     UCM_FLAGS_IMPLICIT, /* .ucm file has mappings without | fallback indicators, later wins */
    65     UCM_FLAGS_MIXED     /* both implicit and explicit */
    66 };
    68 typedef struct UCMTable {
    69     UCMapping *mappings;
    70     int32_t mappingsCapacity, mappingsLength;
    72     UChar32 *codePoints;
    73     int32_t codePointsCapacity, codePointsLength;
    75     uint8_t *bytes;
    76     int32_t bytesCapacity, bytesLength;
    78     /* index map for mapping by bytes first */
    79     int32_t *reverseMap;
    81     uint8_t unicodeMask;
    82     int8_t flagsType; /* UCM_FLAGS_INITIAL etc. */
    83     UBool isSorted;
    84 } UCMTable;
    86 enum {
    87     MBCS_STATE_FLAG_DIRECT=1,
    88     MBCS_STATE_FLAG_SURROGATES,
    90     MBCS_STATE_FLAG_READY=16
    91 };
    93 typedef struct UCMStates {
    94     int32_t stateTable[MBCS_MAX_STATE_COUNT][256];
    95     uint32_t stateFlags[MBCS_MAX_STATE_COUNT],
    96              stateOffsetSum[MBCS_MAX_STATE_COUNT];
    98     int32_t countStates, minCharLength, maxCharLength, countToUCodeUnits;
    99     int8_t conversionType, outputType;
   100 } UCMStates;
   102 typedef struct UCMFile {
   103     UCMTable *base, *ext;
   104     UCMStates states;
   106     char baseName[UCNV_MAX_CONVERTER_NAME_LENGTH];
   107 } UCMFile;
   109 /* simple accesses ---------------------------------------------------------- */
   111 #define UCM_GET_CODE_POINTS(t, m) \
   112     (((m)->uLen==1) ? &(m)->u : (t)->codePoints+(m)->u)
   114 #define UCM_GET_BYTES(t, m) \
   115     (((m)->bLen<=4) ? (m)->b.bytes : (t)->bytes+(m)->b.idx)
   117 /* APIs --------------------------------------------------------------------- */
   119 U_CAPI UCMFile * U_EXPORT2
   120 ucm_open(void);
   122 U_CAPI void U_EXPORT2
   123 ucm_close(UCMFile *ucm);
   125 U_CAPI UBool U_EXPORT2
   126 ucm_parseHeaderLine(UCMFile *ucm,
   127                     char *line, char **pKey, char **pValue);
   129 /* @return -1 illegal bytes  0 suitable for base table  1 needs to go into extension table */
   130 U_CAPI int32_t U_EXPORT2
   131 ucm_mappingType(UCMStates *baseStates,
   132                 UCMapping *m,
   133                 UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
   134                 uint8_t bytes[UCNV_EXT_MAX_BYTES]);
   136 /* add a mapping to the base or extension table as appropriate */
   137 U_CAPI UBool U_EXPORT2
   138 ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates,
   139                    UCMapping *m,
   140                    UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
   141                    uint8_t bytes[UCNV_EXT_MAX_BYTES]);
   143 U_CAPI UBool U_EXPORT2
   144 ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates);
   147 U_CAPI UCMTable * U_EXPORT2
   148 ucm_openTable(void);
   150 U_CAPI void U_EXPORT2
   151 ucm_closeTable(UCMTable *table);
   153 U_CAPI void U_EXPORT2
   154 ucm_resetTable(UCMTable *table);
   156 U_CAPI void U_EXPORT2
   157 ucm_sortTable(UCMTable *t);
   159 /*
   160  * Remove mappings with their move flag set from the base table
   161  * and move some of them (with UCM_MOVE_TO_EXT) to the extension table.
   162  */
   163 U_CAPI void U_EXPORT2
   164 ucm_moveMappings(UCMTable *base, UCMTable *ext);
   166 /**
   167  * Read a table from a .ucm file, from after the CHARMAP line to
   168  * including the END CHARMAP line.
   169  */
   170 U_CAPI void U_EXPORT2
   171 ucm_readTable(UCMFile *ucm, FileStream* convFile,
   172               UBool forBase, UCMStates *baseStates,
   173               UErrorCode *pErrorCode);
   175 /**
   176  * Check the validity of mappings against a base table's states;
   177  * necessary for extension-only tables that were read before their base tables.
   178  */
   179 U_CAPI UBool U_EXPORT2
   180 ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
   182 /**
   183  * Check a base table against an extension table.
   184  * Set the moveTarget!=NULL if it is possible to move mappings from the base.
   185  * This is the case where base and extension tables are parsed from a single file
   186  * (moveTarget==ext)
   187  * or when delta file mappings are subtracted from a base table.
   188  *
   189  * When a base table cannot be modified because a delta file is parsed in makeconv,
   190  * then set moveTarget=NULL.
   191  *
   192  * if(intersectBase) then mappings that exist in the base table but not in
   193  * the extension table are moved to moveTarget instead of showing an error.
   194  *
   195  * Special mode:
   196  * If intersectBase==2 for a DBCS extension table, then SBCS mappings are
   197  * not moved out of the base unless their Unicode input requires it.
   198  * This helps ucmkbase generate base tables for DBCS-only extension .cnv files.
   199  *
   200  * For both tables in the same file, the extension table is automatically
   201  * built.
   202  * For separate files, the extension file can use a complete mapping table (.ucm file),
   203  * so that common mappings need not be stripped out manually.
   204  *
   205  *
   206  * Sort both tables, and then for each mapping direction:
   207  *
   208  * If intersectBase is TRUE and the base table contains a mapping
   209  * that does not exist in the extension table, then this mapping is moved
   210  * to moveTarget.
   211  *
   212  * - otherwise -
   213  *
   214  * If the base table contains a mapping for which the input sequence is
   215  * the same as the extension input, then
   216  * - if the output is the same: remove the extension mapping
   217  * - else: error
   218  *
   219  * If the base table contains a mapping for which the input sequence is
   220  * a prefix of the extension input, then
   221  * - if moveTarget!=NULL: move the base mapping to the moveTarget table
   222  * - else: error
   223  *
   224  * @return FALSE in case of an irreparable error
   225  */
   226 U_CAPI UBool U_EXPORT2
   227 ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
   228                  UCMTable *moveTarget, UBool intersectBase);
   230 U_CAPI void U_EXPORT2
   231 ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode);
   233 U_CAPI void U_EXPORT2
   234 ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f);
   237 U_CAPI void U_EXPORT2
   238 ucm_addState(UCMStates *states, const char *s);
   240 U_CAPI void U_EXPORT2
   241 ucm_processStates(UCMStates *states, UBool ignoreSISOCheck);
   243 U_CAPI int32_t U_EXPORT2
   244 ucm_countChars(UCMStates *states,
   245                const uint8_t *bytes, int32_t length);
   248 U_CAPI int8_t U_EXPORT2
   249 ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps);
   251 U_CAPI UBool U_EXPORT2
   252 ucm_parseMappingLine(UCMapping *m,
   253                      UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
   254                      uint8_t bytes[UCNV_EXT_MAX_BYTES],
   255                      const char *line);
   257 U_CAPI void U_EXPORT2
   258 ucm_addMapping(UCMTable *table,
   259                UCMapping *m,
   260                UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
   261                uint8_t bytes[UCNV_EXT_MAX_BYTES]);
   263 /* very makeconv-specific functions ----------------------------------------- */
   265 /* finalize and optimize states after the toUnicode mappings are processed */
   266 U_CAPI void U_EXPORT2
   267 ucm_optimizeStates(UCMStates *states,
   268                    uint16_t **pUnicodeCodeUnits,
   269                    _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
   270                    UBool verbose);
   272 /* moved here because it is used inside ucmstate.c */
   273 U_CAPI int32_t U_EXPORT2
   274 ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
   275                  uint32_t offset);
   277 /* very rptp2ucm-specific functions ----------------------------------------- */
   279 /*
   280  * Input: Separate tables with mappings from/to Unicode,
   281  * subchar and subchar1 (0 if none).
   282  * All mappings must have flag 0.
   283  *
   284  * Output: fromUTable will contain the union of mappings with the correct
   285  * precision flags, and be sorted.
   286  */
   287 U_CAPI void U_EXPORT2
   288 ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable,
   289                 const uint8_t *subchar, int32_t subcharLength,
   290                 uint8_t subchar1);
   292 U_CAPI UBool U_EXPORT2
   293 ucm_separateMappings(UCMFile *ucm, UBool isSISO);
   295 U_CDECL_END
   297 #endif
   299 #endif

mercurial