intl/icu/source/common/brkeng.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /**
     2  ************************************************************************************
     3  * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
     4  * All Rights Reserved.                                                             *
     5  ************************************************************************************
     6  */
     8 #ifndef BRKENG_H
     9 #define BRKENG_H
    11 #include "unicode/utypes.h"
    12 #include "unicode/uobject.h"
    13 #include "unicode/utext.h"
    14 #include "unicode/uscript.h"
    16 U_NAMESPACE_BEGIN
    18 class UnicodeSet;
    19 class UStack;
    20 class DictionaryMatcher;
    22 /*******************************************************************
    23  * LanguageBreakEngine
    24  */
    26 /**
    27  * <p>LanguageBreakEngines implement language-specific knowledge for
    28  * finding text boundaries within a run of characters belonging to a
    29  * specific set. The boundaries will be of a specific kind, e.g. word,
    30  * line, etc.</p>
    31  *
    32  * <p>LanguageBreakEngines should normally be implemented so as to
    33  * be shared between threads without locking.</p>
    34  */
    35 class LanguageBreakEngine : public UMemory {
    36  public:
    38   /**
    39    * <p>Default constructor.</p>
    40    *
    41    */
    42   LanguageBreakEngine();
    44   /**
    45    * <p>Virtual destructor.</p>
    46    */
    47   virtual ~LanguageBreakEngine();
    49  /**
    50   * <p>Indicate whether this engine handles a particular character for
    51   * a particular kind of break.</p>
    52   *
    53   * @param c A character which begins a run that the engine might handle
    54   * @param breakType The type of text break which the caller wants to determine
    55   * @return TRUE if this engine handles the particular character and break
    56   * type.
    57   */
    58   virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
    60  /**
    61   * <p>Find any breaks within a run in the supplied text.</p>
    62   *
    63   * @param text A UText representing the text. The
    64   * iterator is left at the end of the run of characters which the engine
    65   * is capable of handling.
    66   * @param startPos The start of the run within the supplied text.
    67   * @param endPos The end of the run within the supplied text.
    68   * @param reverse Whether the caller is looking for breaks in a reverse
    69   * direction.
    70   * @param breakType The type of break desired, or -1.
    71   * @param foundBreaks An allocated C array of the breaks found, if any
    72   * @return The number of breaks found.
    73   */
    74   virtual int32_t findBreaks( UText *text,
    75                               int32_t startPos,
    76                               int32_t endPos,
    77                               UBool reverse,
    78                               int32_t breakType,
    79                               UStack &foundBreaks ) const = 0;
    81 };
    83 /*******************************************************************
    84  * LanguageBreakFactory
    85  */
    87 /**
    88  * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
    89  * that can determine breaks for characters in a specific set, if
    90  * such an object can be found.</p>
    91  *
    92  * <p>If a LanguageBreakFactory is to be shared between threads,
    93  * appropriate synchronization must be used; there is none internal
    94  * to the factory.</p>
    95  *
    96  * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
    97  * normally be shared between threads without synchronization, unless
    98  * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
    99  *
   100  * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
   101  * it returns when it itself is deleted, unless the specific subclass of
   102  * LanguageBreakFactory indicates otherwise. Naturally, the factory should
   103  * not be deleted until the LanguageBreakEngines it has returned are no
   104  * longer needed.</p>
   105  */
   106 class LanguageBreakFactory : public UMemory {
   107  public:
   109   /**
   110    * <p>Default constructor.</p>
   111    *
   112    */
   113   LanguageBreakFactory();
   115   /**
   116    * <p>Virtual destructor.</p>
   117    */
   118   virtual ~LanguageBreakFactory();
   120  /**
   121   * <p>Find and return a LanguageBreakEngine that can find the desired
   122   * kind of break for the set of characters to which the supplied
   123   * character belongs. It is up to the set of available engines to
   124   * determine what the sets of characters are.</p>
   125   *
   126   * @param c A character that begins a run for which a LanguageBreakEngine is
   127   * sought.
   128   * @param breakType The kind of text break for which a LanguageBreakEngine is
   129   * sought.
   130   * @return A LanguageBreakEngine with the desired characteristics, or 0.
   131   */
   132   virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
   134 };
   136 /*******************************************************************
   137  * UnhandledEngine
   138  */
   140 /**
   141  * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
   142  * handles characters that no other LanguageBreakEngine is available to
   143  * handle. It is told the character and the type of break; at its
   144  * discretion it may handle more than the specified character (e.g.,
   145  * the entire script to which that character belongs.</p>
   146  *
   147  * <p>UnhandledEngines may not be shared between threads without
   148  * external synchronization.</p>
   149  */
   151 class UnhandledEngine : public LanguageBreakEngine {
   152  private:
   154     /**
   155      * The sets of characters handled, for each break type
   156      * @internal
   157      */
   159   UnicodeSet    *fHandled[4];
   161  public:
   163   /**
   164    * <p>Default constructor.</p>
   165    *
   166    */
   167   UnhandledEngine(UErrorCode &status);
   169   /**
   170    * <p>Virtual destructor.</p>
   171    */
   172   virtual ~UnhandledEngine();
   174  /**
   175   * <p>Indicate whether this engine handles a particular character for
   176   * a particular kind of break.</p>
   177   *
   178   * @param c A character which begins a run that the engine might handle
   179   * @param breakType The type of text break which the caller wants to determine
   180   * @return TRUE if this engine handles the particular character and break
   181   * type.
   182   */
   183   virtual UBool handles(UChar32 c, int32_t breakType) const;
   185  /**
   186   * <p>Find any breaks within a run in the supplied text.</p>
   187   *
   188   * @param text A UText representing the text (TODO: UText). The
   189   * iterator is left at the end of the run of characters which the engine
   190   * is capable of handling.
   191   * @param startPos The start of the run within the supplied text.
   192   * @param endPos The end of the run within the supplied text.
   193   * @param reverse Whether the caller is looking for breaks in a reverse
   194   * direction.
   195   * @param breakType The type of break desired, or -1.
   196   * @param foundBreaks An allocated C array of the breaks found, if any
   197   * @return The number of breaks found.
   198   */
   199   virtual int32_t findBreaks( UText *text,
   200                               int32_t startPos,
   201                               int32_t endPos,
   202                               UBool reverse,
   203                               int32_t breakType,
   204                               UStack &foundBreaks ) const;
   206  /**
   207   * <p>Tell the engine to handle a particular character and break type.</p>
   208   *
   209   * @param c A character which the engine should handle
   210   * @param breakType The type of text break for which the engine should handle c
   211   */
   212   virtual void handleCharacter(UChar32 c, int32_t breakType);
   214 };
   216 /*******************************************************************
   217  * ICULanguageBreakFactory
   218  */
   220 /**
   221  * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
   222  * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
   223  * data in the ICU data file.</p>
   224  */
   225 class ICULanguageBreakFactory : public LanguageBreakFactory {
   226  private:
   228     /**
   229      * The stack of break engines created by this factory
   230      * @internal
   231      */
   233   UStack    *fEngines;
   235  public:
   237   /**
   238    * <p>Standard constructor.</p>
   239    *
   240    */
   241   ICULanguageBreakFactory(UErrorCode &status);
   243   /**
   244    * <p>Virtual destructor.</p>
   245    */
   246   virtual ~ICULanguageBreakFactory();
   248  /**
   249   * <p>Find and return a LanguageBreakEngine that can find the desired
   250   * kind of break for the set of characters to which the supplied
   251   * character belongs. It is up to the set of available engines to
   252   * determine what the sets of characters are.</p>
   253   *
   254   * @param c A character that begins a run for which a LanguageBreakEngine is
   255   * sought.
   256   * @param breakType The kind of text break for which a LanguageBreakEngine is
   257   * sought.
   258   * @return A LanguageBreakEngine with the desired characteristics, or 0.
   259   */
   260   virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
   262 protected:
   263  /**
   264   * <p>Create a LanguageBreakEngine for the set of characters to which
   265   * the supplied character belongs, for the specified break type.</p>
   266   *
   267   * @param c A character that begins a run for which a LanguageBreakEngine is
   268   * sought.
   269   * @param breakType The kind of text break for which a LanguageBreakEngine is
   270   * sought.
   271   * @return A LanguageBreakEngine with the desired characteristics, or 0.
   272   */
   273   virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
   275   /**
   276    * <p>Create a DictionaryMatcher for the specified script and break type.</p>
   277    * @param script An ISO 15924 script code that identifies the dictionary to be
   278    * created.
   279    * @param breakType The kind of text break for which a dictionary is 
   280    * sought.
   281    * @return A DictionaryMatcher with the desired characteristics, or NULL.
   282    */
   283   virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
   284 };
   286 U_NAMESPACE_END
   288     /* BRKENG_H */
   289 #endif

mercurial