content/base/public/nsLineBreaker.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #ifndef NSLINEBREAKER_H_
michael@0 7 #define NSLINEBREAKER_H_
michael@0 8
michael@0 9 #include "nsString.h"
michael@0 10 #include "nsTArray.h"
michael@0 11 #include "nsILineBreaker.h"
michael@0 12
michael@0 13 class nsIAtom;
michael@0 14 class nsHyphenator;
michael@0 15
michael@0 16 /**
michael@0 17 * A receiver of line break data.
michael@0 18 */
michael@0 19 class nsILineBreakSink {
michael@0 20 public:
michael@0 21 /**
michael@0 22 * Sets the break data for a substring of the associated text chunk.
michael@0 23 * One or more of these calls will be performed; the union of all substrings
michael@0 24 * will cover the entire text chunk. Substrings may overlap (i.e., we may
michael@0 25 * set the break-before state of a character more than once).
michael@0 26 * @param aBreakBefore the break-before states for the characters in the substring.
michael@0 27 * These are enum values from gfxTextRun::CompressedGlyph:
michael@0 28 * FLAG_BREAK_TYPE_NONE - no linebreak is allowed here
michael@0 29 * FLAG_BREAK_TYPE_NORMAL - a normal (whitespace) linebreak
michael@0 30 * FLAG_BREAK_TYPE_HYPHEN - a hyphenation point
michael@0 31 */
michael@0 32 virtual void SetBreaks(uint32_t aStart, uint32_t aLength, uint8_t* aBreakBefore) = 0;
michael@0 33
michael@0 34 /**
michael@0 35 * Indicates which characters should be capitalized. Only called if
michael@0 36 * BREAK_NEED_CAPITALIZATION was requested.
michael@0 37 */
michael@0 38 virtual void SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalize) = 0;
michael@0 39 };
michael@0 40
michael@0 41 /**
michael@0 42 * A line-breaking state machine. You feed text into it via AppendText calls
michael@0 43 * and it computes the possible line breaks. Because break decisions can
michael@0 44 * require a lot of context, the breaks for a piece of text are sometimes not
michael@0 45 * known until later text has been seen (or all text ends). So breaks are
michael@0 46 * returned via a call to SetBreaks on the nsILineBreakSink object passed
michael@0 47 * with each text chunk, which might happen during the corresponding AppendText
michael@0 48 * call, or might happen during a later AppendText call or even a Reset()
michael@0 49 * call.
michael@0 50 *
michael@0 51 * The linebreak results MUST NOT depend on how the text is broken up
michael@0 52 * into AppendText calls.
michael@0 53 *
michael@0 54 * The current strategy is that we break the overall text into
michael@0 55 * whitespace-delimited "words". Then those words are passed to the nsILineBreaker
michael@0 56 * service for deeper analysis if they contain a "complex" character as described
michael@0 57 * below.
michael@0 58 *
michael@0 59 * This class also handles detection of which characters should be capitalized
michael@0 60 * for text-transform:capitalize. This is a good place to handle that because
michael@0 61 * we have all the context we need.
michael@0 62 */
michael@0 63 class nsLineBreaker {
michael@0 64 public:
michael@0 65 nsLineBreaker();
michael@0 66 ~nsLineBreaker();
michael@0 67
michael@0 68 static inline bool IsSpace(char16_t u) { return NS_IsSpace(u); }
michael@0 69
michael@0 70 static inline bool IsComplexASCIIChar(char16_t u)
michael@0 71 {
michael@0 72 return !((0x0030 <= u && u <= 0x0039) ||
michael@0 73 (0x0041 <= u && u <= 0x005A) ||
michael@0 74 (0x0061 <= u && u <= 0x007A) ||
michael@0 75 (0x000a == u));
michael@0 76 }
michael@0 77
michael@0 78 static inline bool IsComplexChar(char16_t u)
michael@0 79 {
michael@0 80 return IsComplexASCIIChar(u) ||
michael@0 81 NS_NeedsPlatformNativeHandling(u) ||
michael@0 82 (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
michael@0 83 (0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols
michael@0 84 (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
michael@0 85 (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
michael@0 86 (0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms
michael@0 87 }
michael@0 88
michael@0 89 // Break opportunities exist at the end of each run of breakable whitespace
michael@0 90 // (see IsSpace above). Break opportunities can also exist between pairs of
michael@0 91 // non-whitespace characters, as determined by nsILineBreaker. We pass a whitespace-
michael@0 92 // delimited word to nsILineBreaker if it contains at least one character
michael@0 93 // matching IsComplexChar.
michael@0 94 // We provide flags to control on a per-chunk basis where breaks are allowed.
michael@0 95 // At any character boundary, exactly one text chunk governs whether a
michael@0 96 // break is allowed at that boundary.
michael@0 97 //
michael@0 98 // We operate on text after whitespace processing has been applied, so
michael@0 99 // other characters (e.g. tabs and newlines) may have been converted to
michael@0 100 // spaces.
michael@0 101
michael@0 102 /**
michael@0 103 * Flags passed with each chunk of text.
michael@0 104 */
michael@0 105 enum {
michael@0 106 /*
michael@0 107 * Do not introduce a break opportunity at the start of this chunk of text.
michael@0 108 */
michael@0 109 BREAK_SUPPRESS_INITIAL = 0x01,
michael@0 110 /**
michael@0 111 * Do not introduce a break opportunity in the interior of this chunk of text.
michael@0 112 * Also, whitespace in this chunk is treated as non-breakable.
michael@0 113 */
michael@0 114 BREAK_SUPPRESS_INSIDE = 0x02,
michael@0 115 /**
michael@0 116 * The sink currently is already set up to have no breaks in it;
michael@0 117 * if no breaks are possible, nsLineBreaker does not need to call
michael@0 118 * SetBreaks on it. This is useful when handling large quantities of
michael@0 119 * preformatted text; the textruns will never have any breaks set on them,
michael@0 120 * and there is no need to ever actually scan the text for breaks, except
michael@0 121 * at the end of textruns in case context is needed for following breakable
michael@0 122 * text.
michael@0 123 */
michael@0 124 BREAK_SKIP_SETTING_NO_BREAKS = 0x04,
michael@0 125 /**
michael@0 126 * We need to be notified of characters that should be capitalized
michael@0 127 * (as in text-transform:capitalize) in this chunk of text.
michael@0 128 */
michael@0 129 BREAK_NEED_CAPITALIZATION = 0x08,
michael@0 130 /**
michael@0 131 * Auto-hyphenation is enabled, so we need to get a hyphenator
michael@0 132 * (if available) and use it to find breakpoints.
michael@0 133 */
michael@0 134 BREAK_USE_AUTO_HYPHENATION = 0x10
michael@0 135 };
michael@0 136
michael@0 137 /**
michael@0 138 * Append "invisible whitespace". This acts like whitespace, but there is
michael@0 139 * no actual text associated with it. Only the BREAK_SUPPRESS_INSIDE flag
michael@0 140 * is relevant here.
michael@0 141 */
michael@0 142 nsresult AppendInvisibleWhitespace(uint32_t aFlags);
michael@0 143
michael@0 144 /**
michael@0 145 * Feed Unicode text into the linebreaker for analysis. aLength must be
michael@0 146 * nonzero.
michael@0 147 * @param aSink can be null if the breaks are not actually needed (we may
michael@0 148 * still be setting up state for later breaks)
michael@0 149 */
michael@0 150 nsresult AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
michael@0 151 uint32_t aFlags, nsILineBreakSink* aSink);
michael@0 152 /**
michael@0 153 * Feed 8-bit text into the linebreaker for analysis. aLength must be nonzero.
michael@0 154 * @param aSink can be null if the breaks are not actually needed (we may
michael@0 155 * still be setting up state for later breaks)
michael@0 156 */
michael@0 157 nsresult AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
michael@0 158 uint32_t aFlags, nsILineBreakSink* aSink);
michael@0 159 /**
michael@0 160 * Reset all state. This means the current run has ended; any outstanding
michael@0 161 * calls through nsILineBreakSink are made, and all outstanding references to
michael@0 162 * nsILineBreakSink objects are dropped.
michael@0 163 * After this call, this linebreaker can be reused.
michael@0 164 * This must be called at least once between any call to AppendText() and
michael@0 165 * destroying the object.
michael@0 166 * @param aTrailingBreak this is set to true when there is a break opportunity
michael@0 167 * at the end of the text. This will normally only be declared true when there
michael@0 168 * is breakable whitespace at the end.
michael@0 169 */
michael@0 170 nsresult Reset(bool* aTrailingBreak);
michael@0 171
michael@0 172 /*
michael@0 173 * Set word-break mode for linebreaker. This is set by word-break property.
michael@0 174 * @param aMode is nsILineBreaker::kWordBreak_* value.
michael@0 175 */
michael@0 176 void SetWordBreak(uint8_t aMode) { mWordBreak = aMode; }
michael@0 177
michael@0 178 private:
michael@0 179 // This is a list of text sources that make up the "current word" (i.e.,
michael@0 180 // run of text which does not contain any whitespace). All the mLengths
michael@0 181 // are are nonzero, these cannot overlap.
michael@0 182 struct TextItem {
michael@0 183 TextItem(nsILineBreakSink* aSink, uint32_t aSinkOffset, uint32_t aLength,
michael@0 184 uint32_t aFlags)
michael@0 185 : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {}
michael@0 186
michael@0 187 nsILineBreakSink* mSink;
michael@0 188 uint32_t mSinkOffset;
michael@0 189 uint32_t mLength;
michael@0 190 uint32_t mFlags;
michael@0 191 };
michael@0 192
michael@0 193 // State for the nonwhitespace "word" that started in previous text and hasn't
michael@0 194 // finished yet.
michael@0 195
michael@0 196 // When the current word ends, this computes the linebreak opportunities
michael@0 197 // *inside* the word (excluding either end) and sets them through the
michael@0 198 // appropriate sink(s). Then we clear the current word state.
michael@0 199 nsresult FlushCurrentWord();
michael@0 200
michael@0 201 void UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage);
michael@0 202
michael@0 203 void FindHyphenationPoints(nsHyphenator *aHyphenator,
michael@0 204 const char16_t *aTextStart,
michael@0 205 const char16_t *aTextLimit,
michael@0 206 uint8_t *aBreakState);
michael@0 207
michael@0 208 nsAutoTArray<char16_t,100> mCurrentWord;
michael@0 209 // All the items that contribute to mCurrentWord
michael@0 210 nsAutoTArray<TextItem,2> mTextItems;
michael@0 211 nsIAtom* mCurrentWordLanguage;
michael@0 212 bool mCurrentWordContainsMixedLang;
michael@0 213 bool mCurrentWordContainsComplexChar;
michael@0 214
michael@0 215 // True if the previous character was breakable whitespace
michael@0 216 bool mAfterBreakableSpace;
michael@0 217 // True if a break must be allowed at the current position because
michael@0 218 // a run of breakable whitespace ends here
michael@0 219 bool mBreakHere;
michael@0 220 // line break mode by "word-break" style
michael@0 221 uint8_t mWordBreak;
michael@0 222 };
michael@0 223
michael@0 224 #endif /*NSLINEBREAKER_H_*/

mercurial