1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,166 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#ifndef mozInlineSpellWordUtil_h 1.10 +#define mozInlineSpellWordUtil_h 1.11 + 1.12 +#include "nsCOMPtr.h" 1.13 +#include "nsIDOMDocument.h" 1.14 +#include "nsIDocument.h" 1.15 +#include "nsString.h" 1.16 +#include "nsTArray.h" 1.17 + 1.18 +//#define DEBUG_SPELLCHECK 1.19 + 1.20 +class nsRange; 1.21 +class nsINode; 1.22 + 1.23 +/** 1.24 + * This class extracts text from the DOM and builds it into a single string. 1.25 + * The string includes whitespace breaks whereever non-inline elements begin 1.26 + * and end. This string is broken into "real words", following somewhat 1.27 + * complex rules; for example substrings that look like URLs or 1.28 + * email addresses are treated as single words, but otherwise many kinds of 1.29 + * punctuation are treated as word separators. GetNextWord provides a way 1.30 + * to iterate over these "real words". 1.31 + * 1.32 + * The basic operation is: 1.33 + * 1.34 + * 1. Call Init with the weak pointer to the editor that you're using. 1.35 + * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop 1.36 + * at the word boundary after that. If SetEnd is not called, we'll stop 1.37 + * at the end of the document's root element. 1.38 + * 3. Call SetPosition to initialize the current position inside the 1.39 + * previously given range. 1.40 + * 4. Call GetNextWord over and over until it returns false. 1.41 + */ 1.42 + 1.43 +class mozInlineSpellWordUtil 1.44 +{ 1.45 +public: 1.46 + struct NodeOffset { 1.47 + nsINode* mNode; 1.48 + int32_t mOffset; 1.49 + 1.50 + NodeOffset(nsINode* aNode, int32_t aOffset) : 1.51 + mNode(aNode), mOffset(aOffset) {} 1.52 + }; 1.53 + 1.54 + mozInlineSpellWordUtil() 1.55 + : mRootNode(nullptr), 1.56 + mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0), 1.57 + mNextWordIndex(-1), mSoftTextValid(false) {} 1.58 + 1.59 + nsresult Init(nsWeakPtr aWeakEditor); 1.60 + 1.61 + nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset); 1.62 + 1.63 + // sets the current position, this should be inside the range. If we are in 1.64 + // the middle of a word, we'll move to its start. 1.65 + nsresult SetPosition(nsINode* aNode, int32_t aOffset); 1.66 + 1.67 + // Given a point inside or immediately following a word, this returns the 1.68 + // DOM range that exactly encloses that word's characters. The current 1.69 + // position will be at the end of the word. This will find the previous 1.70 + // word if the current position is space, so if you care that the point is 1.71 + // inside the word, you should check the range. 1.72 + // 1.73 + // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called 1.74 + // before you actually generate the range you are interested in and iterate 1.75 + // the words in it. 1.76 + nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset, 1.77 + nsRange** aRange); 1.78 + 1.79 + // Moves to the the next word in the range, and retrieves it's text and range. 1.80 + // An empty word and a nullptr range are returned when we are done checking. 1.81 + // aSkipChecking will be set if the word is "special" and shouldn't be 1.82 + // checked (e.g., an email address). 1.83 + nsresult GetNextWord(nsAString& aText, nsRange** aRange, 1.84 + bool* aSkipChecking); 1.85 + 1.86 + // Call to normalize some punctuation. This function takes an autostring 1.87 + // so we can access characters directly. 1.88 + static void NormalizeWord(nsSubstring& aWord); 1.89 + 1.90 + nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; } 1.91 + nsIDocument* GetDocument() const { return mDocument; } 1.92 + nsINode* GetRootNode() { return mRootNode; } 1.93 + 1.94 +private: 1.95 + 1.96 + // cached stuff for the editor, set by Init 1.97 + nsCOMPtr<nsIDOMDocument> mDOMDocument; 1.98 + nsCOMPtr<nsIDocument> mDocument; 1.99 + 1.100 + // range to check, see SetPosition and SetEnd 1.101 + nsINode* mRootNode; 1.102 + NodeOffset mSoftBegin; 1.103 + NodeOffset mSoftEnd; 1.104 + 1.105 + // DOM text covering the soft range, with newlines added at block boundaries 1.106 + nsString mSoftText; 1.107 + // A list of where we extracted text from, ordered by mSoftTextOffset. A given 1.108 + // DOM node appears at most once in this list. 1.109 + struct DOMTextMapping { 1.110 + NodeOffset mNodeOffset; 1.111 + int32_t mSoftTextOffset; 1.112 + int32_t mLength; 1.113 + 1.114 + DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength) 1.115 + : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset), 1.116 + mLength(aLength) {} 1.117 + }; 1.118 + nsTArray<DOMTextMapping> mSoftTextDOMMapping; 1.119 + 1.120 + // A list of the "real words" in mSoftText, ordered by mSoftTextOffset 1.121 + struct RealWord { 1.122 + int32_t mSoftTextOffset; 1.123 + int32_t mLength; 1.124 + bool mCheckableWord; 1.125 + 1.126 + RealWord(int32_t aOffset, int32_t aLength, bool aCheckable) 1.127 + : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {} 1.128 + int32_t EndOffset() const { return mSoftTextOffset + mLength; } 1.129 + }; 1.130 + nsTArray<RealWord> mRealWords; 1.131 + int32_t mNextWordIndex; 1.132 + 1.133 + bool mSoftTextValid; 1.134 + 1.135 + void InvalidateWords() { mSoftTextValid = false; } 1.136 + void EnsureWords(); 1.137 + 1.138 + int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); 1.139 + // Map an offset into mSoftText to a DOM position. Note that two DOM positions 1.140 + // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb 1.141 + // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, 1.142 + // aHintBefore controls which position we return ... if aHint is eEnd 1.143 + // then the position indicates the END of a range so we return (A,4). Otherwise 1.144 + // the position indicates the START of a range so we return (B,0). 1.145 + enum DOMMapHint { HINT_BEGIN, HINT_END }; 1.146 + NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, 1.147 + DOMMapHint aHint); 1.148 + // Finds the index of the real word containing aSoftTextOffset, or -1 if none 1.149 + // If it's exactly between two words, then if aHint is HINT_BEGIN, return the 1.150 + // later word (favouring the assumption that it's the BEGINning of a word), 1.151 + // otherwise return the earlier word (assuming it's the END of a word). 1.152 + // If aSearchForward is true, then if we don't find a word at the given 1.153 + // position, search forward until we do find a word and return that (if found). 1.154 + int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, 1.155 + bool aSearchForward); 1.156 + 1.157 + // build mSoftText and mSoftTextDOMMapping 1.158 + void BuildSoftText(); 1.159 + // Build mRealWords array 1.160 + void BuildRealWords(); 1.161 + 1.162 + void SplitDOMWord(int32_t aStart, int32_t aEnd); 1.163 + 1.164 + // Convenience functions, object must be initialized 1.165 + nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); 1.166 + nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); 1.167 +}; 1.168 + 1.169 +#endif