intl/icu/source/common/brkeng.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /**
michael@0 2 ************************************************************************************
michael@0 3 * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
michael@0 4 * All Rights Reserved. *
michael@0 5 ************************************************************************************
michael@0 6 */
michael@0 7
michael@0 8 #ifndef BRKENG_H
michael@0 9 #define BRKENG_H
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12 #include "unicode/uobject.h"
michael@0 13 #include "unicode/utext.h"
michael@0 14 #include "unicode/uscript.h"
michael@0 15
michael@0 16 U_NAMESPACE_BEGIN
michael@0 17
michael@0 18 class UnicodeSet;
michael@0 19 class UStack;
michael@0 20 class DictionaryMatcher;
michael@0 21
michael@0 22 /*******************************************************************
michael@0 23 * LanguageBreakEngine
michael@0 24 */
michael@0 25
michael@0 26 /**
michael@0 27 * <p>LanguageBreakEngines implement language-specific knowledge for
michael@0 28 * finding text boundaries within a run of characters belonging to a
michael@0 29 * specific set. The boundaries will be of a specific kind, e.g. word,
michael@0 30 * line, etc.</p>
michael@0 31 *
michael@0 32 * <p>LanguageBreakEngines should normally be implemented so as to
michael@0 33 * be shared between threads without locking.</p>
michael@0 34 */
michael@0 35 class LanguageBreakEngine : public UMemory {
michael@0 36 public:
michael@0 37
michael@0 38 /**
michael@0 39 * <p>Default constructor.</p>
michael@0 40 *
michael@0 41 */
michael@0 42 LanguageBreakEngine();
michael@0 43
michael@0 44 /**
michael@0 45 * <p>Virtual destructor.</p>
michael@0 46 */
michael@0 47 virtual ~LanguageBreakEngine();
michael@0 48
michael@0 49 /**
michael@0 50 * <p>Indicate whether this engine handles a particular character for
michael@0 51 * a particular kind of break.</p>
michael@0 52 *
michael@0 53 * @param c A character which begins a run that the engine might handle
michael@0 54 * @param breakType The type of text break which the caller wants to determine
michael@0 55 * @return TRUE if this engine handles the particular character and break
michael@0 56 * type.
michael@0 57 */
michael@0 58 virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
michael@0 59
michael@0 60 /**
michael@0 61 * <p>Find any breaks within a run in the supplied text.</p>
michael@0 62 *
michael@0 63 * @param text A UText representing the text. The
michael@0 64 * iterator is left at the end of the run of characters which the engine
michael@0 65 * is capable of handling.
michael@0 66 * @param startPos The start of the run within the supplied text.
michael@0 67 * @param endPos The end of the run within the supplied text.
michael@0 68 * @param reverse Whether the caller is looking for breaks in a reverse
michael@0 69 * direction.
michael@0 70 * @param breakType The type of break desired, or -1.
michael@0 71 * @param foundBreaks An allocated C array of the breaks found, if any
michael@0 72 * @return The number of breaks found.
michael@0 73 */
michael@0 74 virtual int32_t findBreaks( UText *text,
michael@0 75 int32_t startPos,
michael@0 76 int32_t endPos,
michael@0 77 UBool reverse,
michael@0 78 int32_t breakType,
michael@0 79 UStack &foundBreaks ) const = 0;
michael@0 80
michael@0 81 };
michael@0 82
michael@0 83 /*******************************************************************
michael@0 84 * LanguageBreakFactory
michael@0 85 */
michael@0 86
michael@0 87 /**
michael@0 88 * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
michael@0 89 * that can determine breaks for characters in a specific set, if
michael@0 90 * such an object can be found.</p>
michael@0 91 *
michael@0 92 * <p>If a LanguageBreakFactory is to be shared between threads,
michael@0 93 * appropriate synchronization must be used; there is none internal
michael@0 94 * to the factory.</p>
michael@0 95 *
michael@0 96 * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
michael@0 97 * normally be shared between threads without synchronization, unless
michael@0 98 * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
michael@0 99 *
michael@0 100 * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
michael@0 101 * it returns when it itself is deleted, unless the specific subclass of
michael@0 102 * LanguageBreakFactory indicates otherwise. Naturally, the factory should
michael@0 103 * not be deleted until the LanguageBreakEngines it has returned are no
michael@0 104 * longer needed.</p>
michael@0 105 */
michael@0 106 class LanguageBreakFactory : public UMemory {
michael@0 107 public:
michael@0 108
michael@0 109 /**
michael@0 110 * <p>Default constructor.</p>
michael@0 111 *
michael@0 112 */
michael@0 113 LanguageBreakFactory();
michael@0 114
michael@0 115 /**
michael@0 116 * <p>Virtual destructor.</p>
michael@0 117 */
michael@0 118 virtual ~LanguageBreakFactory();
michael@0 119
michael@0 120 /**
michael@0 121 * <p>Find and return a LanguageBreakEngine that can find the desired
michael@0 122 * kind of break for the set of characters to which the supplied
michael@0 123 * character belongs. It is up to the set of available engines to
michael@0 124 * determine what the sets of characters are.</p>
michael@0 125 *
michael@0 126 * @param c A character that begins a run for which a LanguageBreakEngine is
michael@0 127 * sought.
michael@0 128 * @param breakType The kind of text break for which a LanguageBreakEngine is
michael@0 129 * sought.
michael@0 130 * @return A LanguageBreakEngine with the desired characteristics, or 0.
michael@0 131 */
michael@0 132 virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
michael@0 133
michael@0 134 };
michael@0 135
michael@0 136 /*******************************************************************
michael@0 137 * UnhandledEngine
michael@0 138 */
michael@0 139
michael@0 140 /**
michael@0 141 * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
michael@0 142 * handles characters that no other LanguageBreakEngine is available to
michael@0 143 * handle. It is told the character and the type of break; at its
michael@0 144 * discretion it may handle more than the specified character (e.g.,
michael@0 145 * the entire script to which that character belongs.</p>
michael@0 146 *
michael@0 147 * <p>UnhandledEngines may not be shared between threads without
michael@0 148 * external synchronization.</p>
michael@0 149 */
michael@0 150
michael@0 151 class UnhandledEngine : public LanguageBreakEngine {
michael@0 152 private:
michael@0 153
michael@0 154 /**
michael@0 155 * The sets of characters handled, for each break type
michael@0 156 * @internal
michael@0 157 */
michael@0 158
michael@0 159 UnicodeSet *fHandled[4];
michael@0 160
michael@0 161 public:
michael@0 162
michael@0 163 /**
michael@0 164 * <p>Default constructor.</p>
michael@0 165 *
michael@0 166 */
michael@0 167 UnhandledEngine(UErrorCode &status);
michael@0 168
michael@0 169 /**
michael@0 170 * <p>Virtual destructor.</p>
michael@0 171 */
michael@0 172 virtual ~UnhandledEngine();
michael@0 173
michael@0 174 /**
michael@0 175 * <p>Indicate whether this engine handles a particular character for
michael@0 176 * a particular kind of break.</p>
michael@0 177 *
michael@0 178 * @param c A character which begins a run that the engine might handle
michael@0 179 * @param breakType The type of text break which the caller wants to determine
michael@0 180 * @return TRUE if this engine handles the particular character and break
michael@0 181 * type.
michael@0 182 */
michael@0 183 virtual UBool handles(UChar32 c, int32_t breakType) const;
michael@0 184
michael@0 185 /**
michael@0 186 * <p>Find any breaks within a run in the supplied text.</p>
michael@0 187 *
michael@0 188 * @param text A UText representing the text (TODO: UText). The
michael@0 189 * iterator is left at the end of the run of characters which the engine
michael@0 190 * is capable of handling.
michael@0 191 * @param startPos The start of the run within the supplied text.
michael@0 192 * @param endPos The end of the run within the supplied text.
michael@0 193 * @param reverse Whether the caller is looking for breaks in a reverse
michael@0 194 * direction.
michael@0 195 * @param breakType The type of break desired, or -1.
michael@0 196 * @param foundBreaks An allocated C array of the breaks found, if any
michael@0 197 * @return The number of breaks found.
michael@0 198 */
michael@0 199 virtual int32_t findBreaks( UText *text,
michael@0 200 int32_t startPos,
michael@0 201 int32_t endPos,
michael@0 202 UBool reverse,
michael@0 203 int32_t breakType,
michael@0 204 UStack &foundBreaks ) const;
michael@0 205
michael@0 206 /**
michael@0 207 * <p>Tell the engine to handle a particular character and break type.</p>
michael@0 208 *
michael@0 209 * @param c A character which the engine should handle
michael@0 210 * @param breakType The type of text break for which the engine should handle c
michael@0 211 */
michael@0 212 virtual void handleCharacter(UChar32 c, int32_t breakType);
michael@0 213
michael@0 214 };
michael@0 215
michael@0 216 /*******************************************************************
michael@0 217 * ICULanguageBreakFactory
michael@0 218 */
michael@0 219
michael@0 220 /**
michael@0 221 * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
michael@0 222 * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
michael@0 223 * data in the ICU data file.</p>
michael@0 224 */
michael@0 225 class ICULanguageBreakFactory : public LanguageBreakFactory {
michael@0 226 private:
michael@0 227
michael@0 228 /**
michael@0 229 * The stack of break engines created by this factory
michael@0 230 * @internal
michael@0 231 */
michael@0 232
michael@0 233 UStack *fEngines;
michael@0 234
michael@0 235 public:
michael@0 236
michael@0 237 /**
michael@0 238 * <p>Standard constructor.</p>
michael@0 239 *
michael@0 240 */
michael@0 241 ICULanguageBreakFactory(UErrorCode &status);
michael@0 242
michael@0 243 /**
michael@0 244 * <p>Virtual destructor.</p>
michael@0 245 */
michael@0 246 virtual ~ICULanguageBreakFactory();
michael@0 247
michael@0 248 /**
michael@0 249 * <p>Find and return a LanguageBreakEngine that can find the desired
michael@0 250 * kind of break for the set of characters to which the supplied
michael@0 251 * character belongs. It is up to the set of available engines to
michael@0 252 * determine what the sets of characters are.</p>
michael@0 253 *
michael@0 254 * @param c A character that begins a run for which a LanguageBreakEngine is
michael@0 255 * sought.
michael@0 256 * @param breakType The kind of text break for which a LanguageBreakEngine is
michael@0 257 * sought.
michael@0 258 * @return A LanguageBreakEngine with the desired characteristics, or 0.
michael@0 259 */
michael@0 260 virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
michael@0 261
michael@0 262 protected:
michael@0 263 /**
michael@0 264 * <p>Create a LanguageBreakEngine for the set of characters to which
michael@0 265 * the supplied character belongs, for the specified break type.</p>
michael@0 266 *
michael@0 267 * @param c A character that begins a run for which a LanguageBreakEngine is
michael@0 268 * sought.
michael@0 269 * @param breakType The kind of text break for which a LanguageBreakEngine is
michael@0 270 * sought.
michael@0 271 * @return A LanguageBreakEngine with the desired characteristics, or 0.
michael@0 272 */
michael@0 273 virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
michael@0 274
michael@0 275 /**
michael@0 276 * <p>Create a DictionaryMatcher for the specified script and break type.</p>
michael@0 277 * @param script An ISO 15924 script code that identifies the dictionary to be
michael@0 278 * created.
michael@0 279 * @param breakType The kind of text break for which a dictionary is
michael@0 280 * sought.
michael@0 281 * @return A DictionaryMatcher with the desired characteristics, or NULL.
michael@0 282 */
michael@0 283 virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
michael@0 284 };
michael@0 285
michael@0 286 U_NAMESPACE_END
michael@0 287
michael@0 288 /* BRKENG_H */
michael@0 289 #endif

mercurial