intl/icu/source/common/propsvec.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/propsvec.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,176 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2002-2010, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  propsvec.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2002feb22
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   Store bits (Unicode character properties) in bit set vectors.
    1.20 +*/
    1.21 +
    1.22 +#ifndef __UPROPSVEC_H__
    1.23 +#define __UPROPSVEC_H__
    1.24 +
    1.25 +#include "unicode/utypes.h"
    1.26 +#include "utrie.h"
    1.27 +#include "utrie2.h"
    1.28 +
    1.29 +U_CDECL_BEGIN
    1.30 +
    1.31 +/**
    1.32 + * Unicode Properties Vectors associated with code point ranges.
    1.33 + *
    1.34 + * Rows of uint32_t integers in a contiguous array store
    1.35 + * the range limits and the properties vectors.
    1.36 + *
    1.37 + * Logically, each row has a certain number of uint32_t values,
    1.38 + * which is set via the upvec_open() "columns" parameter.
    1.39 + *
    1.40 + * Internally, two additional columns are stored.
    1.41 + * In each internal row,
    1.42 + * row[0] contains the start code point and
    1.43 + * row[1] contains the limit code point,
    1.44 + * which is the start of the next range.
    1.45 + *
    1.46 + * Initially, there is only one "normal" row for
    1.47 + * range [0..0x110000[ with values 0.
    1.48 + * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
    1.49 + *
    1.50 + * It would be possible to store only one range boundary per row,
    1.51 + * but self-contained rows allow to later sort them by contents.
    1.52 + */
    1.53 +struct UPropsVectors;
    1.54 +typedef struct UPropsVectors UPropsVectors;
    1.55 +
    1.56 +/*
    1.57 + * Special pseudo code points for storing the initialValue and the errorValue,
    1.58 + * which are used to initialize a UTrie2 or similar.
    1.59 + */
    1.60 +#define UPVEC_FIRST_SPECIAL_CP 0x110000
    1.61 +#define UPVEC_INITIAL_VALUE_CP 0x110000
    1.62 +#define UPVEC_ERROR_VALUE_CP 0x110001
    1.63 +#define UPVEC_MAX_CP 0x110001
    1.64 +
    1.65 +/*
    1.66 + * Special pseudo code point used in upvec_compact() signalling the end of
    1.67 + * delivering special values and the beginning of delivering real ones.
    1.68 + * Stable value, unlike UPVEC_MAX_CP which might grow over time.
    1.69 + */
    1.70 +#define UPVEC_START_REAL_VALUES_CP 0x200000
    1.71 +
    1.72 +/*
    1.73 + * Open a UPropsVectors object.
    1.74 + * @param columns Number of value integers (uint32_t) per row.
    1.75 + */
    1.76 +U_CAPI UPropsVectors * U_EXPORT2
    1.77 +upvec_open(int32_t columns, UErrorCode *pErrorCode);
    1.78 +
    1.79 +U_CAPI void U_EXPORT2
    1.80 +upvec_close(UPropsVectors *pv);
    1.81 +
    1.82 +/*
    1.83 + * In rows for code points [start..end], select the column,
    1.84 + * reset the mask bits and set the value bits (ANDed with the mask).
    1.85 + *
    1.86 + * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
    1.87 + */
    1.88 +U_CAPI void U_EXPORT2
    1.89 +upvec_setValue(UPropsVectors *pv,
    1.90 +               UChar32 start, UChar32 end,
    1.91 +               int32_t column,
    1.92 +               uint32_t value, uint32_t mask,
    1.93 +               UErrorCode *pErrorCode);
    1.94 +
    1.95 +/*
    1.96 + * Logically const but must not be used on the same pv concurrently!
    1.97 + * Always returns 0 if called after upvec_compact().
    1.98 + */
    1.99 +U_CAPI uint32_t U_EXPORT2
   1.100 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
   1.101 +
   1.102 +/*
   1.103 + * pRangeStart and pRangeEnd can be NULL.
   1.104 + * @return NULL if rowIndex out of range and for illegal arguments,
   1.105 + *         or if called after upvec_compact()
   1.106 + */
   1.107 +U_CAPI uint32_t * U_EXPORT2
   1.108 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
   1.109 +             UChar32 *pRangeStart, UChar32 *pRangeEnd);
   1.110 +
   1.111 +/*
   1.112 + * Compact the vectors:
   1.113 + * - modify the memory
   1.114 + * - keep only unique vectors
   1.115 + * - store them contiguously from the beginning of the memory
   1.116 + * - for each (non-unique) row, call the handler function
   1.117 + *
   1.118 + * The handler's rowIndex is the index of the row in the compacted
   1.119 + * memory block.
   1.120 + * (Therefore, it starts at 0 increases in increments of the columns value.)
   1.121 + *
   1.122 + * In a first phase, only special values are delivered (each exactly once),
   1.123 + * with start==end both equalling a special pseudo code point.
   1.124 + * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
   1.125 + * where rowIndex is the length of the compacted array,
   1.126 + * and the row is arbitrary (but not NULL).
   1.127 + * Then, in the second phase, the handler is called for each row of real values.
   1.128 + */
   1.129 +typedef void U_CALLCONV
   1.130 +UPVecCompactHandler(void *context,
   1.131 +                    UChar32 start, UChar32 end,
   1.132 +                    int32_t rowIndex, uint32_t *row, int32_t columns,
   1.133 +                    UErrorCode *pErrorCode);
   1.134 +
   1.135 +U_CAPI void U_EXPORT2
   1.136 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
   1.137 +
   1.138 +/*
   1.139 + * Get the vectors array after calling upvec_compact().
   1.140 + * The caller must not modify nor release the returned array.
   1.141 + * Returns NULL if called before upvec_compact().
   1.142 + */
   1.143 +U_CAPI const uint32_t * U_EXPORT2
   1.144 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
   1.145 +
   1.146 +/*
   1.147 + * Get a clone of the vectors array after calling upvec_compact().
   1.148 + * The caller owns the returned array and must uprv_free() it.
   1.149 + * Returns NULL if called before upvec_compact().
   1.150 + */
   1.151 +U_CAPI uint32_t * U_EXPORT2
   1.152 +upvec_cloneArray(const UPropsVectors *pv,
   1.153 +                 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
   1.154 +
   1.155 +/*
   1.156 + * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
   1.157 + * vectors array, and freeze the trie.
   1.158 + */
   1.159 +U_CAPI UTrie2 * U_EXPORT2
   1.160 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
   1.161 +
   1.162 +struct UPVecToUTrie2Context {
   1.163 +    UTrie2 *trie;
   1.164 +    int32_t initialValue;
   1.165 +    int32_t errorValue;
   1.166 +    int32_t maxValue;
   1.167 +};
   1.168 +typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
   1.169 +
   1.170 +/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
   1.171 +U_CAPI void U_CALLCONV
   1.172 +upvec_compactToUTrie2Handler(void *context,
   1.173 +                             UChar32 start, UChar32 end,
   1.174 +                             int32_t rowIndex, uint32_t *row, int32_t columns,
   1.175 +                             UErrorCode *pErrorCode);
   1.176 +
   1.177 +U_CDECL_END
   1.178 +
   1.179 +#endif

mercurial