michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 2007, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: bmpset.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2007jan29 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #ifndef __BMPSET_H__ michael@0: #define __BMPSET_H__ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uniset.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: /* michael@0: * Helper class for frozen UnicodeSets, implements contains() and span() michael@0: * optimized for BMP code points. Structured to be UTF-8-friendly. michael@0: * michael@0: * ASCII: Look up bytes. michael@0: * 2-byte characters: Bits organized vertically. michael@0: * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, michael@0: * with mixed for illegal ranges. michael@0: * Supplementary characters: Call contains() on the parent set. michael@0: */ michael@0: class BMPSet : public UMemory { michael@0: public: michael@0: BMPSet(const int32_t *parentList, int32_t parentListLength); michael@0: BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength); michael@0: virtual ~BMPSet(); michael@0: michael@0: virtual UBool contains(UChar32 c) const; michael@0: michael@0: /* michael@0: * Span the initial substring for which each character c has spanCondition==contains(c). michael@0: * It must be s0 and spanCondition==0 or 1. michael@0: * @return The string pointer which limits the span. michael@0: */ michael@0: const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; michael@0: michael@0: /* michael@0: * Span the trailing substring for which each character c has spanCondition==contains(c). michael@0: * It must be length>0 and spanCondition==0 or 1. michael@0: * @return The start of the span. michael@0: */ michael@0: int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; michael@0: michael@0: private: michael@0: void initBits(); michael@0: void overrideIllegal(); michael@0: michael@0: /** michael@0: * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the michael@0: * binary search is restricted for finding code points in a certain range. michael@0: * michael@0: * For restricting the search for finding in the range start..end, michael@0: * pass in michael@0: * lo=findCodePoint(start) and michael@0: * hi=findCodePoint(end) michael@0: * with 0<=lo<=hi