Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
9 #include "nsCOMPtr.h"
10 #include "nsIDOMDocument.h"
11 #include "nsIDocument.h"
12 #include "nsString.h"
13 #include "nsTArray.h"
15 //#define DEBUG_SPELLCHECK
17 class nsRange;
18 class nsINode;
20 /**
21 * This class extracts text from the DOM and builds it into a single string.
22 * The string includes whitespace breaks whereever non-inline elements begin
23 * and end. This string is broken into "real words", following somewhat
24 * complex rules; for example substrings that look like URLs or
25 * email addresses are treated as single words, but otherwise many kinds of
26 * punctuation are treated as word separators. GetNextWord provides a way
27 * to iterate over these "real words".
28 *
29 * The basic operation is:
30 *
31 * 1. Call Init with the weak pointer to the editor that you're using.
32 * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
33 * at the word boundary after that. If SetEnd is not called, we'll stop
34 * at the end of the document's root element.
35 * 3. Call SetPosition to initialize the current position inside the
36 * previously given range.
37 * 4. Call GetNextWord over and over until it returns false.
38 */
40 class mozInlineSpellWordUtil
41 {
42 public:
43 struct NodeOffset {
44 nsINode* mNode;
45 int32_t mOffset;
47 NodeOffset(nsINode* aNode, int32_t aOffset) :
48 mNode(aNode), mOffset(aOffset) {}
49 };
51 mozInlineSpellWordUtil()
52 : mRootNode(nullptr),
53 mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),
54 mNextWordIndex(-1), mSoftTextValid(false) {}
56 nsresult Init(nsWeakPtr aWeakEditor);
58 nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);
60 // sets the current position, this should be inside the range. If we are in
61 // the middle of a word, we'll move to its start.
62 nsresult SetPosition(nsINode* aNode, int32_t aOffset);
64 // Given a point inside or immediately following a word, this returns the
65 // DOM range that exactly encloses that word's characters. The current
66 // position will be at the end of the word. This will find the previous
67 // word if the current position is space, so if you care that the point is
68 // inside the word, you should check the range.
69 //
70 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
71 // before you actually generate the range you are interested in and iterate
72 // the words in it.
73 nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,
74 nsRange** aRange);
76 // Moves to the the next word in the range, and retrieves it's text and range.
77 // An empty word and a nullptr range are returned when we are done checking.
78 // aSkipChecking will be set if the word is "special" and shouldn't be
79 // checked (e.g., an email address).
80 nsresult GetNextWord(nsAString& aText, nsRange** aRange,
81 bool* aSkipChecking);
83 // Call to normalize some punctuation. This function takes an autostring
84 // so we can access characters directly.
85 static void NormalizeWord(nsSubstring& aWord);
87 nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
88 nsIDocument* GetDocument() const { return mDocument; }
89 nsINode* GetRootNode() { return mRootNode; }
91 private:
93 // cached stuff for the editor, set by Init
94 nsCOMPtr<nsIDOMDocument> mDOMDocument;
95 nsCOMPtr<nsIDocument> mDocument;
97 // range to check, see SetPosition and SetEnd
98 nsINode* mRootNode;
99 NodeOffset mSoftBegin;
100 NodeOffset mSoftEnd;
102 // DOM text covering the soft range, with newlines added at block boundaries
103 nsString mSoftText;
104 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
105 // DOM node appears at most once in this list.
106 struct DOMTextMapping {
107 NodeOffset mNodeOffset;
108 int32_t mSoftTextOffset;
109 int32_t mLength;
111 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)
112 : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
113 mLength(aLength) {}
114 };
115 nsTArray<DOMTextMapping> mSoftTextDOMMapping;
117 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
118 struct RealWord {
119 int32_t mSoftTextOffset;
120 int32_t mLength;
121 bool mCheckableWord;
123 RealWord(int32_t aOffset, int32_t aLength, bool aCheckable)
124 : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
125 int32_t EndOffset() const { return mSoftTextOffset + mLength; }
126 };
127 nsTArray<RealWord> mRealWords;
128 int32_t mNextWordIndex;
130 bool mSoftTextValid;
132 void InvalidateWords() { mSoftTextValid = false; }
133 void EnsureWords();
135 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
136 // Map an offset into mSoftText to a DOM position. Note that two DOM positions
137 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
138 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
139 // aHintBefore controls which position we return ... if aHint is eEnd
140 // then the position indicates the END of a range so we return (A,4). Otherwise
141 // the position indicates the START of a range so we return (B,0).
142 enum DOMMapHint { HINT_BEGIN, HINT_END };
143 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
144 DOMMapHint aHint);
145 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
146 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
147 // later word (favouring the assumption that it's the BEGINning of a word),
148 // otherwise return the earlier word (assuming it's the END of a word).
149 // If aSearchForward is true, then if we don't find a word at the given
150 // position, search forward until we do find a word and return that (if found).
151 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
152 bool aSearchForward);
154 // build mSoftText and mSoftTextDOMMapping
155 void BuildSoftText();
156 // Build mRealWords array
157 void BuildRealWords();
159 void SplitDOMWord(int32_t aStart, int32_t aEnd);
161 // Convenience functions, object must be initialized
162 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
163 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
164 };
166 #endif