1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/base/public/nsLineBreaker.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,224 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#ifndef NSLINEBREAKER_H_ 1.10 +#define NSLINEBREAKER_H_ 1.11 + 1.12 +#include "nsString.h" 1.13 +#include "nsTArray.h" 1.14 +#include "nsILineBreaker.h" 1.15 + 1.16 +class nsIAtom; 1.17 +class nsHyphenator; 1.18 + 1.19 +/** 1.20 + * A receiver of line break data. 1.21 + */ 1.22 +class nsILineBreakSink { 1.23 +public: 1.24 + /** 1.25 + * Sets the break data for a substring of the associated text chunk. 1.26 + * One or more of these calls will be performed; the union of all substrings 1.27 + * will cover the entire text chunk. Substrings may overlap (i.e., we may 1.28 + * set the break-before state of a character more than once). 1.29 + * @param aBreakBefore the break-before states for the characters in the substring. 1.30 + * These are enum values from gfxTextRun::CompressedGlyph: 1.31 + * FLAG_BREAK_TYPE_NONE - no linebreak is allowed here 1.32 + * FLAG_BREAK_TYPE_NORMAL - a normal (whitespace) linebreak 1.33 + * FLAG_BREAK_TYPE_HYPHEN - a hyphenation point 1.34 + */ 1.35 + virtual void SetBreaks(uint32_t aStart, uint32_t aLength, uint8_t* aBreakBefore) = 0; 1.36 + 1.37 + /** 1.38 + * Indicates which characters should be capitalized. Only called if 1.39 + * BREAK_NEED_CAPITALIZATION was requested. 1.40 + */ 1.41 + virtual void SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalize) = 0; 1.42 +}; 1.43 + 1.44 +/** 1.45 + * A line-breaking state machine. You feed text into it via AppendText calls 1.46 + * and it computes the possible line breaks. Because break decisions can 1.47 + * require a lot of context, the breaks for a piece of text are sometimes not 1.48 + * known until later text has been seen (or all text ends). So breaks are 1.49 + * returned via a call to SetBreaks on the nsILineBreakSink object passed 1.50 + * with each text chunk, which might happen during the corresponding AppendText 1.51 + * call, or might happen during a later AppendText call or even a Reset() 1.52 + * call. 1.53 + * 1.54 + * The linebreak results MUST NOT depend on how the text is broken up 1.55 + * into AppendText calls. 1.56 + * 1.57 + * The current strategy is that we break the overall text into 1.58 + * whitespace-delimited "words". Then those words are passed to the nsILineBreaker 1.59 + * service for deeper analysis if they contain a "complex" character as described 1.60 + * below. 1.61 + * 1.62 + * This class also handles detection of which characters should be capitalized 1.63 + * for text-transform:capitalize. This is a good place to handle that because 1.64 + * we have all the context we need. 1.65 + */ 1.66 +class nsLineBreaker { 1.67 +public: 1.68 + nsLineBreaker(); 1.69 + ~nsLineBreaker(); 1.70 + 1.71 + static inline bool IsSpace(char16_t u) { return NS_IsSpace(u); } 1.72 + 1.73 + static inline bool IsComplexASCIIChar(char16_t u) 1.74 + { 1.75 + return !((0x0030 <= u && u <= 0x0039) || 1.76 + (0x0041 <= u && u <= 0x005A) || 1.77 + (0x0061 <= u && u <= 0x007A) || 1.78 + (0x000a == u)); 1.79 + } 1.80 + 1.81 + static inline bool IsComplexChar(char16_t u) 1.82 + { 1.83 + return IsComplexASCIIChar(u) || 1.84 + NS_NeedsPlatformNativeHandling(u) || 1.85 + (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo 1.86 + (0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols 1.87 + (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks 1.88 + (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs 1.89 + (0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms 1.90 + } 1.91 + 1.92 + // Break opportunities exist at the end of each run of breakable whitespace 1.93 + // (see IsSpace above). Break opportunities can also exist between pairs of 1.94 + // non-whitespace characters, as determined by nsILineBreaker. We pass a whitespace- 1.95 + // delimited word to nsILineBreaker if it contains at least one character 1.96 + // matching IsComplexChar. 1.97 + // We provide flags to control on a per-chunk basis where breaks are allowed. 1.98 + // At any character boundary, exactly one text chunk governs whether a 1.99 + // break is allowed at that boundary. 1.100 + // 1.101 + // We operate on text after whitespace processing has been applied, so 1.102 + // other characters (e.g. tabs and newlines) may have been converted to 1.103 + // spaces. 1.104 + 1.105 + /** 1.106 + * Flags passed with each chunk of text. 1.107 + */ 1.108 + enum { 1.109 + /* 1.110 + * Do not introduce a break opportunity at the start of this chunk of text. 1.111 + */ 1.112 + BREAK_SUPPRESS_INITIAL = 0x01, 1.113 + /** 1.114 + * Do not introduce a break opportunity in the interior of this chunk of text. 1.115 + * Also, whitespace in this chunk is treated as non-breakable. 1.116 + */ 1.117 + BREAK_SUPPRESS_INSIDE = 0x02, 1.118 + /** 1.119 + * The sink currently is already set up to have no breaks in it; 1.120 + * if no breaks are possible, nsLineBreaker does not need to call 1.121 + * SetBreaks on it. This is useful when handling large quantities of 1.122 + * preformatted text; the textruns will never have any breaks set on them, 1.123 + * and there is no need to ever actually scan the text for breaks, except 1.124 + * at the end of textruns in case context is needed for following breakable 1.125 + * text. 1.126 + */ 1.127 + BREAK_SKIP_SETTING_NO_BREAKS = 0x04, 1.128 + /** 1.129 + * We need to be notified of characters that should be capitalized 1.130 + * (as in text-transform:capitalize) in this chunk of text. 1.131 + */ 1.132 + BREAK_NEED_CAPITALIZATION = 0x08, 1.133 + /** 1.134 + * Auto-hyphenation is enabled, so we need to get a hyphenator 1.135 + * (if available) and use it to find breakpoints. 1.136 + */ 1.137 + BREAK_USE_AUTO_HYPHENATION = 0x10 1.138 + }; 1.139 + 1.140 + /** 1.141 + * Append "invisible whitespace". This acts like whitespace, but there is 1.142 + * no actual text associated with it. Only the BREAK_SUPPRESS_INSIDE flag 1.143 + * is relevant here. 1.144 + */ 1.145 + nsresult AppendInvisibleWhitespace(uint32_t aFlags); 1.146 + 1.147 + /** 1.148 + * Feed Unicode text into the linebreaker for analysis. aLength must be 1.149 + * nonzero. 1.150 + * @param aSink can be null if the breaks are not actually needed (we may 1.151 + * still be setting up state for later breaks) 1.152 + */ 1.153 + nsresult AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength, 1.154 + uint32_t aFlags, nsILineBreakSink* aSink); 1.155 + /** 1.156 + * Feed 8-bit text into the linebreaker for analysis. aLength must be nonzero. 1.157 + * @param aSink can be null if the breaks are not actually needed (we may 1.158 + * still be setting up state for later breaks) 1.159 + */ 1.160 + nsresult AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength, 1.161 + uint32_t aFlags, nsILineBreakSink* aSink); 1.162 + /** 1.163 + * Reset all state. This means the current run has ended; any outstanding 1.164 + * calls through nsILineBreakSink are made, and all outstanding references to 1.165 + * nsILineBreakSink objects are dropped. 1.166 + * After this call, this linebreaker can be reused. 1.167 + * This must be called at least once between any call to AppendText() and 1.168 + * destroying the object. 1.169 + * @param aTrailingBreak this is set to true when there is a break opportunity 1.170 + * at the end of the text. This will normally only be declared true when there 1.171 + * is breakable whitespace at the end. 1.172 + */ 1.173 + nsresult Reset(bool* aTrailingBreak); 1.174 + 1.175 + /* 1.176 + * Set word-break mode for linebreaker. This is set by word-break property. 1.177 + * @param aMode is nsILineBreaker::kWordBreak_* value. 1.178 + */ 1.179 + void SetWordBreak(uint8_t aMode) { mWordBreak = aMode; } 1.180 + 1.181 +private: 1.182 + // This is a list of text sources that make up the "current word" (i.e., 1.183 + // run of text which does not contain any whitespace). All the mLengths 1.184 + // are are nonzero, these cannot overlap. 1.185 + struct TextItem { 1.186 + TextItem(nsILineBreakSink* aSink, uint32_t aSinkOffset, uint32_t aLength, 1.187 + uint32_t aFlags) 1.188 + : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {} 1.189 + 1.190 + nsILineBreakSink* mSink; 1.191 + uint32_t mSinkOffset; 1.192 + uint32_t mLength; 1.193 + uint32_t mFlags; 1.194 + }; 1.195 + 1.196 + // State for the nonwhitespace "word" that started in previous text and hasn't 1.197 + // finished yet. 1.198 + 1.199 + // When the current word ends, this computes the linebreak opportunities 1.200 + // *inside* the word (excluding either end) and sets them through the 1.201 + // appropriate sink(s). Then we clear the current word state. 1.202 + nsresult FlushCurrentWord(); 1.203 + 1.204 + void UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage); 1.205 + 1.206 + void FindHyphenationPoints(nsHyphenator *aHyphenator, 1.207 + const char16_t *aTextStart, 1.208 + const char16_t *aTextLimit, 1.209 + uint8_t *aBreakState); 1.210 + 1.211 + nsAutoTArray<char16_t,100> mCurrentWord; 1.212 + // All the items that contribute to mCurrentWord 1.213 + nsAutoTArray<TextItem,2> mTextItems; 1.214 + nsIAtom* mCurrentWordLanguage; 1.215 + bool mCurrentWordContainsMixedLang; 1.216 + bool mCurrentWordContainsComplexChar; 1.217 + 1.218 + // True if the previous character was breakable whitespace 1.219 + bool mAfterBreakableSpace; 1.220 + // True if a break must be allowed at the current position because 1.221 + // a run of breakable whitespace ends here 1.222 + bool mBreakHere; 1.223 + // line break mode by "word-break" style 1.224 + uint8_t mWordBreak; 1.225 +}; 1.226 + 1.227 +#endif /*NSLINEBREAKER_H_*/