intl/icu/source/common/propsvec.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2002-2010, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: propsvec.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2002feb22
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Store bits (Unicode character properties) in bit set vectors.
michael@0 17 */
michael@0 18
michael@0 19 #ifndef __UPROPSVEC_H__
michael@0 20 #define __UPROPSVEC_H__
michael@0 21
michael@0 22 #include "unicode/utypes.h"
michael@0 23 #include "utrie.h"
michael@0 24 #include "utrie2.h"
michael@0 25
michael@0 26 U_CDECL_BEGIN
michael@0 27
michael@0 28 /**
michael@0 29 * Unicode Properties Vectors associated with code point ranges.
michael@0 30 *
michael@0 31 * Rows of uint32_t integers in a contiguous array store
michael@0 32 * the range limits and the properties vectors.
michael@0 33 *
michael@0 34 * Logically, each row has a certain number of uint32_t values,
michael@0 35 * which is set via the upvec_open() "columns" parameter.
michael@0 36 *
michael@0 37 * Internally, two additional columns are stored.
michael@0 38 * In each internal row,
michael@0 39 * row[0] contains the start code point and
michael@0 40 * row[1] contains the limit code point,
michael@0 41 * which is the start of the next range.
michael@0 42 *
michael@0 43 * Initially, there is only one "normal" row for
michael@0 44 * range [0..0x110000[ with values 0.
michael@0 45 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
michael@0 46 *
michael@0 47 * It would be possible to store only one range boundary per row,
michael@0 48 * but self-contained rows allow to later sort them by contents.
michael@0 49 */
michael@0 50 struct UPropsVectors;
michael@0 51 typedef struct UPropsVectors UPropsVectors;
michael@0 52
michael@0 53 /*
michael@0 54 * Special pseudo code points for storing the initialValue and the errorValue,
michael@0 55 * which are used to initialize a UTrie2 or similar.
michael@0 56 */
michael@0 57 #define UPVEC_FIRST_SPECIAL_CP 0x110000
michael@0 58 #define UPVEC_INITIAL_VALUE_CP 0x110000
michael@0 59 #define UPVEC_ERROR_VALUE_CP 0x110001
michael@0 60 #define UPVEC_MAX_CP 0x110001
michael@0 61
michael@0 62 /*
michael@0 63 * Special pseudo code point used in upvec_compact() signalling the end of
michael@0 64 * delivering special values and the beginning of delivering real ones.
michael@0 65 * Stable value, unlike UPVEC_MAX_CP which might grow over time.
michael@0 66 */
michael@0 67 #define UPVEC_START_REAL_VALUES_CP 0x200000
michael@0 68
michael@0 69 /*
michael@0 70 * Open a UPropsVectors object.
michael@0 71 * @param columns Number of value integers (uint32_t) per row.
michael@0 72 */
michael@0 73 U_CAPI UPropsVectors * U_EXPORT2
michael@0 74 upvec_open(int32_t columns, UErrorCode *pErrorCode);
michael@0 75
michael@0 76 U_CAPI void U_EXPORT2
michael@0 77 upvec_close(UPropsVectors *pv);
michael@0 78
michael@0 79 /*
michael@0 80 * In rows for code points [start..end], select the column,
michael@0 81 * reset the mask bits and set the value bits (ANDed with the mask).
michael@0 82 *
michael@0 83 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
michael@0 84 */
michael@0 85 U_CAPI void U_EXPORT2
michael@0 86 upvec_setValue(UPropsVectors *pv,
michael@0 87 UChar32 start, UChar32 end,
michael@0 88 int32_t column,
michael@0 89 uint32_t value, uint32_t mask,
michael@0 90 UErrorCode *pErrorCode);
michael@0 91
michael@0 92 /*
michael@0 93 * Logically const but must not be used on the same pv concurrently!
michael@0 94 * Always returns 0 if called after upvec_compact().
michael@0 95 */
michael@0 96 U_CAPI uint32_t U_EXPORT2
michael@0 97 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
michael@0 98
michael@0 99 /*
michael@0 100 * pRangeStart and pRangeEnd can be NULL.
michael@0 101 * @return NULL if rowIndex out of range and for illegal arguments,
michael@0 102 * or if called after upvec_compact()
michael@0 103 */
michael@0 104 U_CAPI uint32_t * U_EXPORT2
michael@0 105 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
michael@0 106 UChar32 *pRangeStart, UChar32 *pRangeEnd);
michael@0 107
michael@0 108 /*
michael@0 109 * Compact the vectors:
michael@0 110 * - modify the memory
michael@0 111 * - keep only unique vectors
michael@0 112 * - store them contiguously from the beginning of the memory
michael@0 113 * - for each (non-unique) row, call the handler function
michael@0 114 *
michael@0 115 * The handler's rowIndex is the index of the row in the compacted
michael@0 116 * memory block.
michael@0 117 * (Therefore, it starts at 0 increases in increments of the columns value.)
michael@0 118 *
michael@0 119 * In a first phase, only special values are delivered (each exactly once),
michael@0 120 * with start==end both equalling a special pseudo code point.
michael@0 121 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
michael@0 122 * where rowIndex is the length of the compacted array,
michael@0 123 * and the row is arbitrary (but not NULL).
michael@0 124 * Then, in the second phase, the handler is called for each row of real values.
michael@0 125 */
michael@0 126 typedef void U_CALLCONV
michael@0 127 UPVecCompactHandler(void *context,
michael@0 128 UChar32 start, UChar32 end,
michael@0 129 int32_t rowIndex, uint32_t *row, int32_t columns,
michael@0 130 UErrorCode *pErrorCode);
michael@0 131
michael@0 132 U_CAPI void U_EXPORT2
michael@0 133 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
michael@0 134
michael@0 135 /*
michael@0 136 * Get the vectors array after calling upvec_compact().
michael@0 137 * The caller must not modify nor release the returned array.
michael@0 138 * Returns NULL if called before upvec_compact().
michael@0 139 */
michael@0 140 U_CAPI const uint32_t * U_EXPORT2
michael@0 141 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
michael@0 142
michael@0 143 /*
michael@0 144 * Get a clone of the vectors array after calling upvec_compact().
michael@0 145 * The caller owns the returned array and must uprv_free() it.
michael@0 146 * Returns NULL if called before upvec_compact().
michael@0 147 */
michael@0 148 U_CAPI uint32_t * U_EXPORT2
michael@0 149 upvec_cloneArray(const UPropsVectors *pv,
michael@0 150 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
michael@0 151
michael@0 152 /*
michael@0 153 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
michael@0 154 * vectors array, and freeze the trie.
michael@0 155 */
michael@0 156 U_CAPI UTrie2 * U_EXPORT2
michael@0 157 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
michael@0 158
michael@0 159 struct UPVecToUTrie2Context {
michael@0 160 UTrie2 *trie;
michael@0 161 int32_t initialValue;
michael@0 162 int32_t errorValue;
michael@0 163 int32_t maxValue;
michael@0 164 };
michael@0 165 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
michael@0 166
michael@0 167 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
michael@0 168 U_CAPI void U_CALLCONV
michael@0 169 upvec_compactToUTrie2Handler(void *context,
michael@0 170 UChar32 start, UChar32 end,
michael@0 171 int32_t rowIndex, uint32_t *row, int32_t columns,
michael@0 172 UErrorCode *pErrorCode);
michael@0 173
michael@0 174 U_CDECL_END
michael@0 175
michael@0 176 #endif

mercurial