michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef mozInlineSpellWordUtil_h michael@0: #define mozInlineSpellWordUtil_h michael@0: michael@0: #include "nsCOMPtr.h" michael@0: #include "nsIDOMDocument.h" michael@0: #include "nsIDocument.h" michael@0: #include "nsString.h" michael@0: #include "nsTArray.h" michael@0: michael@0: //#define DEBUG_SPELLCHECK michael@0: michael@0: class nsRange; michael@0: class nsINode; michael@0: michael@0: /** michael@0: * This class extracts text from the DOM and builds it into a single string. michael@0: * The string includes whitespace breaks whereever non-inline elements begin michael@0: * and end. This string is broken into "real words", following somewhat michael@0: * complex rules; for example substrings that look like URLs or michael@0: * email addresses are treated as single words, but otherwise many kinds of michael@0: * punctuation are treated as word separators. GetNextWord provides a way michael@0: * to iterate over these "real words". michael@0: * michael@0: * The basic operation is: michael@0: * michael@0: * 1. Call Init with the weak pointer to the editor that you're using. michael@0: * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop michael@0: * at the word boundary after that. If SetEnd is not called, we'll stop michael@0: * at the end of the document's root element. michael@0: * 3. Call SetPosition to initialize the current position inside the michael@0: * previously given range. michael@0: * 4. Call GetNextWord over and over until it returns false. michael@0: */ michael@0: michael@0: class mozInlineSpellWordUtil michael@0: { michael@0: public: michael@0: struct NodeOffset { michael@0: nsINode* mNode; michael@0: int32_t mOffset; michael@0: michael@0: NodeOffset(nsINode* aNode, int32_t aOffset) : michael@0: mNode(aNode), mOffset(aOffset) {} michael@0: }; michael@0: michael@0: mozInlineSpellWordUtil() michael@0: : mRootNode(nullptr), michael@0: mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0), michael@0: mNextWordIndex(-1), mSoftTextValid(false) {} michael@0: michael@0: nsresult Init(nsWeakPtr aWeakEditor); michael@0: michael@0: nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset); michael@0: michael@0: // sets the current position, this should be inside the range. If we are in michael@0: // the middle of a word, we'll move to its start. michael@0: nsresult SetPosition(nsINode* aNode, int32_t aOffset); michael@0: michael@0: // Given a point inside or immediately following a word, this returns the michael@0: // DOM range that exactly encloses that word's characters. The current michael@0: // position will be at the end of the word. This will find the previous michael@0: // word if the current position is space, so if you care that the point is michael@0: // inside the word, you should check the range. michael@0: // michael@0: // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called michael@0: // before you actually generate the range you are interested in and iterate michael@0: // the words in it. michael@0: nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset, michael@0: nsRange** aRange); michael@0: michael@0: // Moves to the the next word in the range, and retrieves it's text and range. michael@0: // An empty word and a nullptr range are returned when we are done checking. michael@0: // aSkipChecking will be set if the word is "special" and shouldn't be michael@0: // checked (e.g., an email address). michael@0: nsresult GetNextWord(nsAString& aText, nsRange** aRange, michael@0: bool* aSkipChecking); michael@0: michael@0: // Call to normalize some punctuation. This function takes an autostring michael@0: // so we can access characters directly. michael@0: static void NormalizeWord(nsSubstring& aWord); michael@0: michael@0: nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; } michael@0: nsIDocument* GetDocument() const { return mDocument; } michael@0: nsINode* GetRootNode() { return mRootNode; } michael@0: michael@0: private: michael@0: michael@0: // cached stuff for the editor, set by Init michael@0: nsCOMPtr mDOMDocument; michael@0: nsCOMPtr mDocument; michael@0: michael@0: // range to check, see SetPosition and SetEnd michael@0: nsINode* mRootNode; michael@0: NodeOffset mSoftBegin; michael@0: NodeOffset mSoftEnd; michael@0: michael@0: // DOM text covering the soft range, with newlines added at block boundaries michael@0: nsString mSoftText; michael@0: // A list of where we extracted text from, ordered by mSoftTextOffset. A given michael@0: // DOM node appears at most once in this list. michael@0: struct DOMTextMapping { michael@0: NodeOffset mNodeOffset; michael@0: int32_t mSoftTextOffset; michael@0: int32_t mLength; michael@0: michael@0: DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength) michael@0: : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset), michael@0: mLength(aLength) {} michael@0: }; michael@0: nsTArray mSoftTextDOMMapping; michael@0: michael@0: // A list of the "real words" in mSoftText, ordered by mSoftTextOffset michael@0: struct RealWord { michael@0: int32_t mSoftTextOffset; michael@0: int32_t mLength; michael@0: bool mCheckableWord; michael@0: michael@0: RealWord(int32_t aOffset, int32_t aLength, bool aCheckable) michael@0: : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {} michael@0: int32_t EndOffset() const { return mSoftTextOffset + mLength; } michael@0: }; michael@0: nsTArray mRealWords; michael@0: int32_t mNextWordIndex; michael@0: michael@0: bool mSoftTextValid; michael@0: michael@0: void InvalidateWords() { mSoftTextValid = false; } michael@0: void EnsureWords(); michael@0: michael@0: int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); michael@0: // Map an offset into mSoftText to a DOM position. Note that two DOM positions michael@0: // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb michael@0: // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, michael@0: // aHintBefore controls which position we return ... if aHint is eEnd michael@0: // then the position indicates the END of a range so we return (A,4). Otherwise michael@0: // the position indicates the START of a range so we return (B,0). michael@0: enum DOMMapHint { HINT_BEGIN, HINT_END }; michael@0: NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, michael@0: DOMMapHint aHint); michael@0: // Finds the index of the real word containing aSoftTextOffset, or -1 if none michael@0: // If it's exactly between two words, then if aHint is HINT_BEGIN, return the michael@0: // later word (favouring the assumption that it's the BEGINning of a word), michael@0: // otherwise return the earlier word (assuming it's the END of a word). michael@0: // If aSearchForward is true, then if we don't find a word at the given michael@0: // position, search forward until we do find a word and return that (if found). michael@0: int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, michael@0: bool aSearchForward); michael@0: michael@0: // build mSoftText and mSoftTextDOMMapping michael@0: void BuildSoftText(); michael@0: // Build mRealWords array michael@0: void BuildRealWords(); michael@0: michael@0: void SplitDOMWord(int32_t aStart, int32_t aEnd); michael@0: michael@0: // Convenience functions, object must be initialized michael@0: nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); michael@0: nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); michael@0: }; michael@0: michael@0: #endif