michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef NSLINEBREAKER_H_ michael@0: #define NSLINEBREAKER_H_ michael@0: michael@0: #include "nsString.h" michael@0: #include "nsTArray.h" michael@0: #include "nsILineBreaker.h" michael@0: michael@0: class nsIAtom; michael@0: class nsHyphenator; michael@0: michael@0: /** michael@0: * A receiver of line break data. michael@0: */ michael@0: class nsILineBreakSink { michael@0: public: michael@0: /** michael@0: * Sets the break data for a substring of the associated text chunk. michael@0: * One or more of these calls will be performed; the union of all substrings michael@0: * will cover the entire text chunk. Substrings may overlap (i.e., we may michael@0: * set the break-before state of a character more than once). michael@0: * @param aBreakBefore the break-before states for the characters in the substring. michael@0: * These are enum values from gfxTextRun::CompressedGlyph: michael@0: * FLAG_BREAK_TYPE_NONE - no linebreak is allowed here michael@0: * FLAG_BREAK_TYPE_NORMAL - a normal (whitespace) linebreak michael@0: * FLAG_BREAK_TYPE_HYPHEN - a hyphenation point michael@0: */ michael@0: virtual void SetBreaks(uint32_t aStart, uint32_t aLength, uint8_t* aBreakBefore) = 0; michael@0: michael@0: /** michael@0: * Indicates which characters should be capitalized. Only called if michael@0: * BREAK_NEED_CAPITALIZATION was requested. michael@0: */ michael@0: virtual void SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalize) = 0; michael@0: }; michael@0: michael@0: /** michael@0: * A line-breaking state machine. You feed text into it via AppendText calls michael@0: * and it computes the possible line breaks. Because break decisions can michael@0: * require a lot of context, the breaks for a piece of text are sometimes not michael@0: * known until later text has been seen (or all text ends). So breaks are michael@0: * returned via a call to SetBreaks on the nsILineBreakSink object passed michael@0: * with each text chunk, which might happen during the corresponding AppendText michael@0: * call, or might happen during a later AppendText call or even a Reset() michael@0: * call. michael@0: * michael@0: * The linebreak results MUST NOT depend on how the text is broken up michael@0: * into AppendText calls. michael@0: * michael@0: * The current strategy is that we break the overall text into michael@0: * whitespace-delimited "words". Then those words are passed to the nsILineBreaker michael@0: * service for deeper analysis if they contain a "complex" character as described michael@0: * below. michael@0: * michael@0: * This class also handles detection of which characters should be capitalized michael@0: * for text-transform:capitalize. This is a good place to handle that because michael@0: * we have all the context we need. michael@0: */ michael@0: class nsLineBreaker { michael@0: public: michael@0: nsLineBreaker(); michael@0: ~nsLineBreaker(); michael@0: michael@0: static inline bool IsSpace(char16_t u) { return NS_IsSpace(u); } michael@0: michael@0: static inline bool IsComplexASCIIChar(char16_t u) michael@0: { michael@0: return !((0x0030 <= u && u <= 0x0039) || michael@0: (0x0041 <= u && u <= 0x005A) || michael@0: (0x0061 <= u && u <= 0x007A) || michael@0: (0x000a == u)); michael@0: } michael@0: michael@0: static inline bool IsComplexChar(char16_t u) michael@0: { michael@0: return IsComplexASCIIChar(u) || michael@0: NS_NeedsPlatformNativeHandling(u) || michael@0: (0x1100 <= u && u <= 0x11ff) || // Hangul Jamo michael@0: (0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols michael@0: (0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks michael@0: (0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs michael@0: (0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms michael@0: } michael@0: michael@0: // Break opportunities exist at the end of each run of breakable whitespace michael@0: // (see IsSpace above). Break opportunities can also exist between pairs of michael@0: // non-whitespace characters, as determined by nsILineBreaker. We pass a whitespace- michael@0: // delimited word to nsILineBreaker if it contains at least one character michael@0: // matching IsComplexChar. michael@0: // We provide flags to control on a per-chunk basis where breaks are allowed. michael@0: // At any character boundary, exactly one text chunk governs whether a michael@0: // break is allowed at that boundary. michael@0: // michael@0: // We operate on text after whitespace processing has been applied, so michael@0: // other characters (e.g. tabs and newlines) may have been converted to michael@0: // spaces. michael@0: michael@0: /** michael@0: * Flags passed with each chunk of text. michael@0: */ michael@0: enum { michael@0: /* michael@0: * Do not introduce a break opportunity at the start of this chunk of text. michael@0: */ michael@0: BREAK_SUPPRESS_INITIAL = 0x01, michael@0: /** michael@0: * Do not introduce a break opportunity in the interior of this chunk of text. michael@0: * Also, whitespace in this chunk is treated as non-breakable. michael@0: */ michael@0: BREAK_SUPPRESS_INSIDE = 0x02, michael@0: /** michael@0: * The sink currently is already set up to have no breaks in it; michael@0: * if no breaks are possible, nsLineBreaker does not need to call michael@0: * SetBreaks on it. This is useful when handling large quantities of michael@0: * preformatted text; the textruns will never have any breaks set on them, michael@0: * and there is no need to ever actually scan the text for breaks, except michael@0: * at the end of textruns in case context is needed for following breakable michael@0: * text. michael@0: */ michael@0: BREAK_SKIP_SETTING_NO_BREAKS = 0x04, michael@0: /** michael@0: * We need to be notified of characters that should be capitalized michael@0: * (as in text-transform:capitalize) in this chunk of text. michael@0: */ michael@0: BREAK_NEED_CAPITALIZATION = 0x08, michael@0: /** michael@0: * Auto-hyphenation is enabled, so we need to get a hyphenator michael@0: * (if available) and use it to find breakpoints. michael@0: */ michael@0: BREAK_USE_AUTO_HYPHENATION = 0x10 michael@0: }; michael@0: michael@0: /** michael@0: * Append "invisible whitespace". This acts like whitespace, but there is michael@0: * no actual text associated with it. Only the BREAK_SUPPRESS_INSIDE flag michael@0: * is relevant here. michael@0: */ michael@0: nsresult AppendInvisibleWhitespace(uint32_t aFlags); michael@0: michael@0: /** michael@0: * Feed Unicode text into the linebreaker for analysis. aLength must be michael@0: * nonzero. michael@0: * @param aSink can be null if the breaks are not actually needed (we may michael@0: * still be setting up state for later breaks) michael@0: */ michael@0: nsresult AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength, michael@0: uint32_t aFlags, nsILineBreakSink* aSink); michael@0: /** michael@0: * Feed 8-bit text into the linebreaker for analysis. aLength must be nonzero. michael@0: * @param aSink can be null if the breaks are not actually needed (we may michael@0: * still be setting up state for later breaks) michael@0: */ michael@0: nsresult AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength, michael@0: uint32_t aFlags, nsILineBreakSink* aSink); michael@0: /** michael@0: * Reset all state. This means the current run has ended; any outstanding michael@0: * calls through nsILineBreakSink are made, and all outstanding references to michael@0: * nsILineBreakSink objects are dropped. michael@0: * After this call, this linebreaker can be reused. michael@0: * This must be called at least once between any call to AppendText() and michael@0: * destroying the object. michael@0: * @param aTrailingBreak this is set to true when there is a break opportunity michael@0: * at the end of the text. This will normally only be declared true when there michael@0: * is breakable whitespace at the end. michael@0: */ michael@0: nsresult Reset(bool* aTrailingBreak); michael@0: michael@0: /* michael@0: * Set word-break mode for linebreaker. This is set by word-break property. michael@0: * @param aMode is nsILineBreaker::kWordBreak_* value. michael@0: */ michael@0: void SetWordBreak(uint8_t aMode) { mWordBreak = aMode; } michael@0: michael@0: private: michael@0: // This is a list of text sources that make up the "current word" (i.e., michael@0: // run of text which does not contain any whitespace). All the mLengths michael@0: // are are nonzero, these cannot overlap. michael@0: struct TextItem { michael@0: TextItem(nsILineBreakSink* aSink, uint32_t aSinkOffset, uint32_t aLength, michael@0: uint32_t aFlags) michael@0: : mSink(aSink), mSinkOffset(aSinkOffset), mLength(aLength), mFlags(aFlags) {} michael@0: michael@0: nsILineBreakSink* mSink; michael@0: uint32_t mSinkOffset; michael@0: uint32_t mLength; michael@0: uint32_t mFlags; michael@0: }; michael@0: michael@0: // State for the nonwhitespace "word" that started in previous text and hasn't michael@0: // finished yet. michael@0: michael@0: // When the current word ends, this computes the linebreak opportunities michael@0: // *inside* the word (excluding either end) and sets them through the michael@0: // appropriate sink(s). Then we clear the current word state. michael@0: nsresult FlushCurrentWord(); michael@0: michael@0: void UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage); michael@0: michael@0: void FindHyphenationPoints(nsHyphenator *aHyphenator, michael@0: const char16_t *aTextStart, michael@0: const char16_t *aTextLimit, michael@0: uint8_t *aBreakState); michael@0: michael@0: nsAutoTArray mCurrentWord; michael@0: // All the items that contribute to mCurrentWord michael@0: nsAutoTArray mTextItems; michael@0: nsIAtom* mCurrentWordLanguage; michael@0: bool mCurrentWordContainsMixedLang; michael@0: bool mCurrentWordContainsComplexChar; michael@0: michael@0: // True if the previous character was breakable whitespace michael@0: bool mAfterBreakableSpace; michael@0: // True if a break must be allowed at the current position because michael@0: // a run of breakable whitespace ends here michael@0: bool mBreakHere; michael@0: // line break mode by "word-break" style michael@0: uint8_t mWordBreak; michael@0: }; michael@0: michael@0: #endif /*NSLINEBREAKER_H_*/