michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2000-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ucol_elm.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created 02/22/2001 michael@0: * created by: Vladimir Weinstein michael@0: * michael@0: * This program reads the Franctional UCA table and generates michael@0: * internal format for UCA table as well as inverse UCA table. michael@0: * It then writes binary files containing the data: ucadata.dat michael@0: * & invuca.dat michael@0: */ michael@0: #ifndef UCOL_UCAELEMS_H michael@0: #define UCOL_UCAELEMS_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uniset.h" michael@0: #include "ucol_tok.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "ucol_imp.h" michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: #include "cmemory.h" michael@0: #include michael@0: #endif michael@0: michael@0: U_CDECL_BEGIN michael@0: michael@0: /* This is the maximum trie capacity for the mapping trie. michael@0: Due to current limitations in genuca and the design of UTrie, michael@0: this number can't be more than 256K. michael@0: As of Unicode 5, it currently could safely go to 128K without michael@0: a problem. Normally, less than 32K are tailored. michael@0: */ michael@0: #define UCOL_ELM_TRIE_CAPACITY 0x40000 michael@0: michael@0: /* This is the maxmun capacity for temparay combining class michael@0: * table. The table will be compacted after scanning all the michael@0: * Unicode codepoints. michael@0: */ michael@0: #define UCOL_MAX_CM_TAB 0x10000 michael@0: michael@0: michael@0: typedef struct { michael@0: uint32_t *CEs; michael@0: int32_t position; michael@0: int32_t size; michael@0: } ExpansionTable; michael@0: michael@0: typedef struct { michael@0: UChar prefixChars[128]; michael@0: UChar *prefix; michael@0: uint32_t prefixSize; michael@0: UChar uchars[128]; michael@0: UChar *cPoints; michael@0: uint32_t cSize; /* Number of characters in sequence - for contraction */ michael@0: uint32_t noOfCEs; /* Number of collation elements */ michael@0: uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */ michael@0: uint32_t mapCE; /* This is the value element maps in original table */ michael@0: uint32_t sizePrim[128]; michael@0: uint32_t sizeSec[128]; michael@0: uint32_t sizeTer[128]; michael@0: UBool caseBit; michael@0: UBool isThai; michael@0: } UCAElements; michael@0: michael@0: typedef struct { michael@0: uint32_t *endExpansionCE; michael@0: UBool *isV; michael@0: int32_t position; michael@0: int32_t size; michael@0: uint8_t maxLSize; michael@0: uint8_t maxVSize; michael@0: uint8_t maxTSize; michael@0: } MaxJamoExpansionTable; michael@0: michael@0: typedef struct { michael@0: uint32_t *endExpansionCE; michael@0: uint8_t *expansionCESize; michael@0: int32_t position; michael@0: int32_t size; michael@0: } MaxExpansionTable; michael@0: michael@0: typedef struct { michael@0: uint16_t index[256]; /* index of cPoints by combining class 0-255. */ michael@0: UChar *cPoints; /* code point array of all combining marks */ michael@0: uint32_t size; /* total number of combining marks */ michael@0: } CombinClassTable; michael@0: michael@0: typedef struct { michael@0: /*CompactEIntArray *mapping; */ michael@0: UNewTrie *mapping; michael@0: ExpansionTable *expansions; michael@0: struct CntTable *contractions; michael@0: UCATableHeader *image; michael@0: UColOptionSet *options; michael@0: MaxExpansionTable *maxExpansions; michael@0: MaxJamoExpansionTable *maxJamoExpansions; michael@0: uint8_t *unsafeCP; michael@0: uint8_t *contrEndCP; michael@0: const UCollator *UCA; michael@0: UHashtable *prefixLookup; michael@0: CombinClassTable *cmLookup; /* combining class lookup for tailoring. */ michael@0: } tempUCATable; michael@0: michael@0: typedef struct { michael@0: UChar cp; michael@0: uint16_t cClass; // combining class michael@0: }CompData; michael@0: michael@0: typedef struct { michael@0: CompData *precomp; michael@0: int32_t precompLen; michael@0: UChar *decomp; michael@0: int32_t decompLen; michael@0: UChar *comp; michael@0: int32_t compLen; michael@0: uint16_t curClass; michael@0: uint16_t tailoringCM; michael@0: int32_t cmPos; michael@0: }tempTailorContext; michael@0: michael@0: U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status); michael@0: U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t); michael@0: U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status); michael@0: U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status); michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, michael@0: icu::UnicodeSet *closed, UErrorCode *status); michael@0: michael@0: U_CDECL_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: #endif