content/base/public/nsLineBreaker.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/base/public/nsLineBreaker.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,224 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#ifndef NSLINEBREAKER_H_
    1.10 +#define NSLINEBREAKER_H_
    1.11 +
    1.12 +#include "nsString.h"
    1.13 +#include "nsTArray.h"
    1.14 +#include "nsILineBreaker.h"
    1.15 +
    1.16 +class nsIAtom;
    1.17 +class nsHyphenator;
    1.18 +
    1.19 +/**
    1.20 + * A receiver of line break data.
    1.21 + */
    1.22 +class nsILineBreakSink {
    1.23 +public:
    1.24 +  /**
    1.25 +   * Sets the break data for a substring of the associated text chunk.
    1.26 +   * One or more of these calls will be performed; the union of all substrings
    1.27 +   * will cover the entire text chunk. Substrings may overlap (i.e., we may
    1.28 +   * set the break-before state of a character more than once).
    1.29 +   * @param aBreakBefore the break-before states for the characters in the substring.
    1.30 +   * These are enum values from gfxTextRun::CompressedGlyph:
    1.31 +   *    FLAG_BREAK_TYPE_NONE     - no linebreak is allowed here
    1.32 +   *    FLAG_BREAK_TYPE_NORMAL   - a normal (whitespace) linebreak
    1.33 +   *    FLAG_BREAK_TYPE_HYPHEN   - a hyphenation point
    1.34 +   */
    1.35 +  virtual void SetBreaks(uint32_t aStart, uint32_t aLength, uint8_t* aBreakBefore) = 0;
    1.36 +  
    1.37 +  /**
    1.38 +   * Indicates which characters should be capitalized. Only called if
    1.39 +   * BREAK_NEED_CAPITALIZATION was requested.
    1.40 +   */
    1.41 +  virtual void SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalize) = 0;
    1.42 +};
    1.43 +
    1.44 +/**
    1.45 + * A line-breaking state machine. You feed text into it via AppendText calls
    1.46 + * and it computes the possible line breaks. Because break decisions can
    1.47 + * require a lot of context, the breaks for a piece of text are sometimes not
    1.48 + * known until later text has been seen (or all text ends). So breaks are
    1.49 + * returned via a call to SetBreaks on the nsILineBreakSink object passed
    1.50 + * with each text chunk, which might happen during the corresponding AppendText
    1.51 + * call, or might happen during a later AppendText call or even a Reset()
    1.52 + * call.
    1.53 + * 
    1.54 + * The linebreak results MUST NOT depend on how the text is broken up
    1.55 + * into AppendText calls.
    1.56 + * 
    1.57 + * The current strategy is that we break the overall text into
    1.58 + * whitespace-delimited "words". Then those words are passed to the nsILineBreaker
    1.59 + * service for deeper analysis if they contain a "complex" character as described
    1.60 + * below.
    1.61 + * 
    1.62 + * This class also handles detection of which characters should be capitalized
    1.63 + * for text-transform:capitalize. This is a good place to handle that because
    1.64 + * we have all the context we need.
    1.65 + */
    1.66 +class nsLineBreaker {
    1.67 +public:
    1.68 +  nsLineBreaker();
    1.69 +  ~nsLineBreaker();
    1.70 +  
    1.71 +  static inline bool IsSpace(char16_t u) { return NS_IsSpace(u); }
    1.72 +
    1.73 +  static inline bool IsComplexASCIIChar(char16_t u)
    1.74 +  {
    1.75 +    return !((0x0030 <= u && u <= 0x0039) ||
    1.76 +             (0x0041 <= u && u <= 0x005A) ||
    1.77 +             (0x0061 <= u && u <= 0x007A) ||
    1.78 +             (0x000a == u));
    1.79 +  }
    1.80 +
    1.81 +  static inline bool IsComplexChar(char16_t u)
    1.82 +  {
    1.83 +    return IsComplexASCIIChar(u) ||
    1.84 +           NS_NeedsPlatformNativeHandling(u) ||
    1.85 +           (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
    1.86 +           (0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols
    1.87 +           (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
    1.88 +           (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
    1.89 +           (0xff00 <= u && u <= 0xffef);   // Halfwidth and Fullwidth Forms
    1.90 +  }
    1.91 +
    1.92 +  // Break opportunities exist at the end of each run of breakable whitespace
    1.93 +  // (see IsSpace above). Break opportunities can also exist between pairs of
    1.94 +  // non-whitespace characters, as determined by nsILineBreaker. We pass a whitespace-
    1.95 +  // delimited word to nsILineBreaker if it contains at least one character
    1.96 +  // matching IsComplexChar.
    1.97 +  // We provide flags to control on a per-chunk basis where breaks are allowed.
    1.98 +  // At any character boundary, exactly one text chunk governs whether a
    1.99 +  // break is allowed at that boundary.
   1.100 +  //
   1.101 +  // We operate on text after whitespace processing has been applied, so
   1.102 +  // other characters (e.g. tabs and newlines) may have been converted to
   1.103 +  // spaces.
   1.104 +
   1.105 +  /**
   1.106 +   * Flags passed with each chunk of text.
   1.107 +   */
   1.108 +  enum {
   1.109 +    /*
   1.110 +     * Do not introduce a break opportunity at the start of this chunk of text.
   1.111 +     */
   1.112 +    BREAK_SUPPRESS_INITIAL = 0x01,
   1.113 +    /**
   1.114 +     * Do not introduce a break opportunity in the interior of this chunk of text.
   1.115 +     * Also, whitespace in this chunk is treated as non-breakable.
   1.116 +     */
   1.117 +    BREAK_SUPPRESS_INSIDE = 0x02,
   1.118 +    /**
   1.119 +     * The sink currently is already set up to have no breaks in it;
   1.120 +     * if no breaks are possible, nsLineBreaker does not need to call
   1.121 +     * SetBreaks on it. This is useful when handling large quantities of
   1.122 +     * preformatted text; the textruns will never have any breaks set on them,
   1.123 +     * and there is no need to ever actually scan the text for breaks, except
   1.124 +     * at the end of textruns in case context is needed for following breakable
   1.125 +     * text.
   1.126 +     */
   1.127 +    BREAK_SKIP_SETTING_NO_BREAKS = 0x04,
   1.128 +    /**
   1.129 +     * We need to be notified of characters that should be capitalized
   1.130 +     * (as in text-transform:capitalize) in this chunk of text.
   1.131 +     */
   1.132 +    BREAK_NEED_CAPITALIZATION = 0x08,
   1.133 +    /**
   1.134 +     * Auto-hyphenation is enabled, so we need to get a hyphenator
   1.135 +     * (if available) and use it to find breakpoints.
   1.136 +     */
   1.137 +    BREAK_USE_AUTO_HYPHENATION = 0x10
   1.138 +  };
   1.139 +
   1.140 +  /**
   1.141 +   * Append "invisible whitespace". This acts like whitespace, but there is
   1.142 +   * no actual text associated with it. Only the BREAK_SUPPRESS_INSIDE flag
   1.143 +   * is relevant here.
   1.144 +   */
   1.145 +  nsresult AppendInvisibleWhitespace(uint32_t aFlags);
   1.146 +
   1.147 +  /**
   1.148 +   * Feed Unicode text into the linebreaker for analysis. aLength must be
   1.149 +   * nonzero.
   1.150 +   * @param aSink can be null if the breaks are not actually needed (we may
   1.151 +   * still be setting up state for later breaks)
   1.152 +   */
   1.153 +  nsresult AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
   1.154 +                      uint32_t aFlags, nsILineBreakSink* aSink);
   1.155 +  /**
   1.156 +   * Feed 8-bit text into the linebreaker for analysis. aLength must be nonzero.
   1.157 +   * @param aSink can be null if the breaks are not actually needed (we may
   1.158 +   * still be setting up state for later breaks)
   1.159 +   */
   1.160 +  nsresult AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
   1.161 +                      uint32_t aFlags, nsILineBreakSink* aSink);
   1.162 +  /**
   1.163 +   * Reset all state. This means the current run has ended; any outstanding
   1.164 +   * calls through nsILineBreakSink are made, and all outstanding references to
   1.165 +   * nsILineBreakSink objects are dropped.
   1.166 +   * After this call, this linebreaker can be reused.
   1.167 +   * This must be called at least once between any call to AppendText() and
   1.168 +   * destroying the object.
   1.169 +   * @param aTrailingBreak this is set to true when there is a break opportunity
   1.170 +   * at the end of the text. This will normally only be declared true when there
   1.171 +   * is breakable whitespace at the end.
   1.172 +   */
   1.173 +  nsresult Reset(bool* aTrailingBreak);
   1.174 +
   1.175 +  /*
   1.176 +   * Set word-break mode for linebreaker.  This is set by word-break property.
   1.177 +   * @param aMode is nsILineBreaker::kWordBreak_* value.
   1.178 +   */
   1.179 +  void SetWordBreak(uint8_t aMode) { mWordBreak = aMode; }
   1.180 +
   1.181 +private:
   1.182 +  // This is a list of text sources that make up the "current word" (i.e.,
   1.183 +  // run of text which does not contain any whitespace). All the mLengths
   1.184 +  // are are nonzero, these cannot overlap.
   1.185 +  struct TextItem {
   1.186 +    TextItem(nsILineBreakSink* aSink, uint32_t aSinkOffset, uint32_t aLength,
   1.187 +             uint32_t aFlags)
   1.188 +      : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {}
   1.189 +
   1.190 +    nsILineBreakSink* mSink;
   1.191 +    uint32_t          mSinkOffset;
   1.192 +    uint32_t          mLength;
   1.193 +    uint32_t          mFlags;
   1.194 +  };
   1.195 +
   1.196 +  // State for the nonwhitespace "word" that started in previous text and hasn't
   1.197 +  // finished yet.
   1.198 +
   1.199 +  // When the current word ends, this computes the linebreak opportunities
   1.200 +  // *inside* the word (excluding either end) and sets them through the
   1.201 +  // appropriate sink(s). Then we clear the current word state.
   1.202 +  nsresult FlushCurrentWord();
   1.203 +
   1.204 +  void UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage);
   1.205 +
   1.206 +  void FindHyphenationPoints(nsHyphenator *aHyphenator,
   1.207 +                             const char16_t *aTextStart,
   1.208 +                             const char16_t *aTextLimit,
   1.209 +                             uint8_t *aBreakState);
   1.210 +
   1.211 +  nsAutoTArray<char16_t,100> mCurrentWord;
   1.212 +  // All the items that contribute to mCurrentWord
   1.213 +  nsAutoTArray<TextItem,2>    mTextItems;
   1.214 +  nsIAtom*                    mCurrentWordLanguage;
   1.215 +  bool                        mCurrentWordContainsMixedLang;
   1.216 +  bool                        mCurrentWordContainsComplexChar;
   1.217 +
   1.218 +  // True if the previous character was breakable whitespace
   1.219 +  bool                        mAfterBreakableSpace;
   1.220 +  // True if a break must be allowed at the current position because
   1.221 +  // a run of breakable whitespace ends here
   1.222 +  bool                        mBreakHere;
   1.223 +  // line break mode by "word-break" style
   1.224 +  uint8_t                     mWordBreak;
   1.225 +};
   1.226 +
   1.227 +#endif /*NSLINEBREAKER_H_*/

mercurial