|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #ifndef mozInlineSpellWordUtil_h |
|
7 #define mozInlineSpellWordUtil_h |
|
8 |
|
9 #include "nsCOMPtr.h" |
|
10 #include "nsIDOMDocument.h" |
|
11 #include "nsIDocument.h" |
|
12 #include "nsString.h" |
|
13 #include "nsTArray.h" |
|
14 |
|
15 //#define DEBUG_SPELLCHECK |
|
16 |
|
17 class nsRange; |
|
18 class nsINode; |
|
19 |
|
20 /** |
|
21 * This class extracts text from the DOM and builds it into a single string. |
|
22 * The string includes whitespace breaks whereever non-inline elements begin |
|
23 * and end. This string is broken into "real words", following somewhat |
|
24 * complex rules; for example substrings that look like URLs or |
|
25 * email addresses are treated as single words, but otherwise many kinds of |
|
26 * punctuation are treated as word separators. GetNextWord provides a way |
|
27 * to iterate over these "real words". |
|
28 * |
|
29 * The basic operation is: |
|
30 * |
|
31 * 1. Call Init with the weak pointer to the editor that you're using. |
|
32 * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop |
|
33 * at the word boundary after that. If SetEnd is not called, we'll stop |
|
34 * at the end of the document's root element. |
|
35 * 3. Call SetPosition to initialize the current position inside the |
|
36 * previously given range. |
|
37 * 4. Call GetNextWord over and over until it returns false. |
|
38 */ |
|
39 |
|
40 class mozInlineSpellWordUtil |
|
41 { |
|
42 public: |
|
43 struct NodeOffset { |
|
44 nsINode* mNode; |
|
45 int32_t mOffset; |
|
46 |
|
47 NodeOffset(nsINode* aNode, int32_t aOffset) : |
|
48 mNode(aNode), mOffset(aOffset) {} |
|
49 }; |
|
50 |
|
51 mozInlineSpellWordUtil() |
|
52 : mRootNode(nullptr), |
|
53 mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0), |
|
54 mNextWordIndex(-1), mSoftTextValid(false) {} |
|
55 |
|
56 nsresult Init(nsWeakPtr aWeakEditor); |
|
57 |
|
58 nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset); |
|
59 |
|
60 // sets the current position, this should be inside the range. If we are in |
|
61 // the middle of a word, we'll move to its start. |
|
62 nsresult SetPosition(nsINode* aNode, int32_t aOffset); |
|
63 |
|
64 // Given a point inside or immediately following a word, this returns the |
|
65 // DOM range that exactly encloses that word's characters. The current |
|
66 // position will be at the end of the word. This will find the previous |
|
67 // word if the current position is space, so if you care that the point is |
|
68 // inside the word, you should check the range. |
|
69 // |
|
70 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called |
|
71 // before you actually generate the range you are interested in and iterate |
|
72 // the words in it. |
|
73 nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset, |
|
74 nsRange** aRange); |
|
75 |
|
76 // Moves to the the next word in the range, and retrieves it's text and range. |
|
77 // An empty word and a nullptr range are returned when we are done checking. |
|
78 // aSkipChecking will be set if the word is "special" and shouldn't be |
|
79 // checked (e.g., an email address). |
|
80 nsresult GetNextWord(nsAString& aText, nsRange** aRange, |
|
81 bool* aSkipChecking); |
|
82 |
|
83 // Call to normalize some punctuation. This function takes an autostring |
|
84 // so we can access characters directly. |
|
85 static void NormalizeWord(nsSubstring& aWord); |
|
86 |
|
87 nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; } |
|
88 nsIDocument* GetDocument() const { return mDocument; } |
|
89 nsINode* GetRootNode() { return mRootNode; } |
|
90 |
|
91 private: |
|
92 |
|
93 // cached stuff for the editor, set by Init |
|
94 nsCOMPtr<nsIDOMDocument> mDOMDocument; |
|
95 nsCOMPtr<nsIDocument> mDocument; |
|
96 |
|
97 // range to check, see SetPosition and SetEnd |
|
98 nsINode* mRootNode; |
|
99 NodeOffset mSoftBegin; |
|
100 NodeOffset mSoftEnd; |
|
101 |
|
102 // DOM text covering the soft range, with newlines added at block boundaries |
|
103 nsString mSoftText; |
|
104 // A list of where we extracted text from, ordered by mSoftTextOffset. A given |
|
105 // DOM node appears at most once in this list. |
|
106 struct DOMTextMapping { |
|
107 NodeOffset mNodeOffset; |
|
108 int32_t mSoftTextOffset; |
|
109 int32_t mLength; |
|
110 |
|
111 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength) |
|
112 : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset), |
|
113 mLength(aLength) {} |
|
114 }; |
|
115 nsTArray<DOMTextMapping> mSoftTextDOMMapping; |
|
116 |
|
117 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset |
|
118 struct RealWord { |
|
119 int32_t mSoftTextOffset; |
|
120 int32_t mLength; |
|
121 bool mCheckableWord; |
|
122 |
|
123 RealWord(int32_t aOffset, int32_t aLength, bool aCheckable) |
|
124 : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {} |
|
125 int32_t EndOffset() const { return mSoftTextOffset + mLength; } |
|
126 }; |
|
127 nsTArray<RealWord> mRealWords; |
|
128 int32_t mNextWordIndex; |
|
129 |
|
130 bool mSoftTextValid; |
|
131 |
|
132 void InvalidateWords() { mSoftTextValid = false; } |
|
133 void EnsureWords(); |
|
134 |
|
135 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); |
|
136 // Map an offset into mSoftText to a DOM position. Note that two DOM positions |
|
137 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb |
|
138 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, |
|
139 // aHintBefore controls which position we return ... if aHint is eEnd |
|
140 // then the position indicates the END of a range so we return (A,4). Otherwise |
|
141 // the position indicates the START of a range so we return (B,0). |
|
142 enum DOMMapHint { HINT_BEGIN, HINT_END }; |
|
143 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, |
|
144 DOMMapHint aHint); |
|
145 // Finds the index of the real word containing aSoftTextOffset, or -1 if none |
|
146 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the |
|
147 // later word (favouring the assumption that it's the BEGINning of a word), |
|
148 // otherwise return the earlier word (assuming it's the END of a word). |
|
149 // If aSearchForward is true, then if we don't find a word at the given |
|
150 // position, search forward until we do find a word and return that (if found). |
|
151 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, |
|
152 bool aSearchForward); |
|
153 |
|
154 // build mSoftText and mSoftTextDOMMapping |
|
155 void BuildSoftText(); |
|
156 // Build mRealWords array |
|
157 void BuildRealWords(); |
|
158 |
|
159 void SplitDOMWord(int32_t aStart, int32_t aEnd); |
|
160 |
|
161 // Convenience functions, object must be initialized |
|
162 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); |
|
163 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); |
|
164 }; |
|
165 |
|
166 #endif |