michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2002-2010, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: propsvec.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2002feb22 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Store bits (Unicode character properties) in bit set vectors. michael@0: */ michael@0: michael@0: #ifndef __UPROPSVEC_H__ michael@0: #define __UPROPSVEC_H__ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "utrie.h" michael@0: #include "utrie2.h" michael@0: michael@0: U_CDECL_BEGIN michael@0: michael@0: /** michael@0: * Unicode Properties Vectors associated with code point ranges. michael@0: * michael@0: * Rows of uint32_t integers in a contiguous array store michael@0: * the range limits and the properties vectors. michael@0: * michael@0: * Logically, each row has a certain number of uint32_t values, michael@0: * which is set via the upvec_open() "columns" parameter. michael@0: * michael@0: * Internally, two additional columns are stored. michael@0: * In each internal row, michael@0: * row[0] contains the start code point and michael@0: * row[1] contains the limit code point, michael@0: * which is the start of the next range. michael@0: * michael@0: * Initially, there is only one "normal" row for michael@0: * range [0..0x110000[ with values 0. michael@0: * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. michael@0: * michael@0: * It would be possible to store only one range boundary per row, michael@0: * but self-contained rows allow to later sort them by contents. michael@0: */ michael@0: struct UPropsVectors; michael@0: typedef struct UPropsVectors UPropsVectors; michael@0: michael@0: /* michael@0: * Special pseudo code points for storing the initialValue and the errorValue, michael@0: * which are used to initialize a UTrie2 or similar. michael@0: */ michael@0: #define UPVEC_FIRST_SPECIAL_CP 0x110000 michael@0: #define UPVEC_INITIAL_VALUE_CP 0x110000 michael@0: #define UPVEC_ERROR_VALUE_CP 0x110001 michael@0: #define UPVEC_MAX_CP 0x110001 michael@0: michael@0: /* michael@0: * Special pseudo code point used in upvec_compact() signalling the end of michael@0: * delivering special values and the beginning of delivering real ones. michael@0: * Stable value, unlike UPVEC_MAX_CP which might grow over time. michael@0: */ michael@0: #define UPVEC_START_REAL_VALUES_CP 0x200000 michael@0: michael@0: /* michael@0: * Open a UPropsVectors object. michael@0: * @param columns Number of value integers (uint32_t) per row. michael@0: */ michael@0: U_CAPI UPropsVectors * U_EXPORT2 michael@0: upvec_open(int32_t columns, UErrorCode *pErrorCode); michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: upvec_close(UPropsVectors *pv); michael@0: michael@0: /* michael@0: * In rows for code points [start..end], select the column, michael@0: * reset the mask bits and set the value bits (ANDed with the mask). michael@0: * michael@0: * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: upvec_setValue(UPropsVectors *pv, michael@0: UChar32 start, UChar32 end, michael@0: int32_t column, michael@0: uint32_t value, uint32_t mask, michael@0: UErrorCode *pErrorCode); michael@0: michael@0: /* michael@0: * Logically const but must not be used on the same pv concurrently! michael@0: * Always returns 0 if called after upvec_compact(). michael@0: */ michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); michael@0: michael@0: /* michael@0: * pRangeStart and pRangeEnd can be NULL. michael@0: * @return NULL if rowIndex out of range and for illegal arguments, michael@0: * or if called after upvec_compact() michael@0: */ michael@0: U_CAPI uint32_t * U_EXPORT2 michael@0: upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, michael@0: UChar32 *pRangeStart, UChar32 *pRangeEnd); michael@0: michael@0: /* michael@0: * Compact the vectors: michael@0: * - modify the memory michael@0: * - keep only unique vectors michael@0: * - store them contiguously from the beginning of the memory michael@0: * - for each (non-unique) row, call the handler function michael@0: * michael@0: * The handler's rowIndex is the index of the row in the compacted michael@0: * memory block. michael@0: * (Therefore, it starts at 0 increases in increments of the columns value.) michael@0: * michael@0: * In a first phase, only special values are delivered (each exactly once), michael@0: * with start==end both equalling a special pseudo code point. michael@0: * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP michael@0: * where rowIndex is the length of the compacted array, michael@0: * and the row is arbitrary (but not NULL). michael@0: * Then, in the second phase, the handler is called for each row of real values. michael@0: */ michael@0: typedef void U_CALLCONV michael@0: UPVecCompactHandler(void *context, michael@0: UChar32 start, UChar32 end, michael@0: int32_t rowIndex, uint32_t *row, int32_t columns, michael@0: UErrorCode *pErrorCode); michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); michael@0: michael@0: /* michael@0: * Get the vectors array after calling upvec_compact(). michael@0: * The caller must not modify nor release the returned array. michael@0: * Returns NULL if called before upvec_compact(). michael@0: */ michael@0: U_CAPI const uint32_t * U_EXPORT2 michael@0: upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); michael@0: michael@0: /* michael@0: * Get a clone of the vectors array after calling upvec_compact(). michael@0: * The caller owns the returned array and must uprv_free() it. michael@0: * Returns NULL if called before upvec_compact(). michael@0: */ michael@0: U_CAPI uint32_t * U_EXPORT2 michael@0: upvec_cloneArray(const UPropsVectors *pv, michael@0: int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); michael@0: michael@0: /* michael@0: * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted michael@0: * vectors array, and freeze the trie. michael@0: */ michael@0: U_CAPI UTrie2 * U_EXPORT2 michael@0: upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); michael@0: michael@0: struct UPVecToUTrie2Context { michael@0: UTrie2 *trie; michael@0: int32_t initialValue; michael@0: int32_t errorValue; michael@0: int32_t maxValue; michael@0: }; michael@0: typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; michael@0: michael@0: /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ michael@0: U_CAPI void U_CALLCONV michael@0: upvec_compactToUTrie2Handler(void *context, michael@0: UChar32 start, UChar32 end, michael@0: int32_t rowIndex, uint32_t *row, int32_t columns, michael@0: UErrorCode *pErrorCode); michael@0: michael@0: U_CDECL_END michael@0: michael@0: #endif