extensions/spellcheck/src/mozInlineSpellWordUtil.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #ifndef mozInlineSpellWordUtil_h
michael@0 7 #define mozInlineSpellWordUtil_h
michael@0 8
michael@0 9 #include "nsCOMPtr.h"
michael@0 10 #include "nsIDOMDocument.h"
michael@0 11 #include "nsIDocument.h"
michael@0 12 #include "nsString.h"
michael@0 13 #include "nsTArray.h"
michael@0 14
michael@0 15 //#define DEBUG_SPELLCHECK
michael@0 16
michael@0 17 class nsRange;
michael@0 18 class nsINode;
michael@0 19
michael@0 20 /**
michael@0 21 * This class extracts text from the DOM and builds it into a single string.
michael@0 22 * The string includes whitespace breaks whereever non-inline elements begin
michael@0 23 * and end. This string is broken into "real words", following somewhat
michael@0 24 * complex rules; for example substrings that look like URLs or
michael@0 25 * email addresses are treated as single words, but otherwise many kinds of
michael@0 26 * punctuation are treated as word separators. GetNextWord provides a way
michael@0 27 * to iterate over these "real words".
michael@0 28 *
michael@0 29 * The basic operation is:
michael@0 30 *
michael@0 31 * 1. Call Init with the weak pointer to the editor that you're using.
michael@0 32 * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
michael@0 33 * at the word boundary after that. If SetEnd is not called, we'll stop
michael@0 34 * at the end of the document's root element.
michael@0 35 * 3. Call SetPosition to initialize the current position inside the
michael@0 36 * previously given range.
michael@0 37 * 4. Call GetNextWord over and over until it returns false.
michael@0 38 */
michael@0 39
michael@0 40 class mozInlineSpellWordUtil
michael@0 41 {
michael@0 42 public:
michael@0 43 struct NodeOffset {
michael@0 44 nsINode* mNode;
michael@0 45 int32_t mOffset;
michael@0 46
michael@0 47 NodeOffset(nsINode* aNode, int32_t aOffset) :
michael@0 48 mNode(aNode), mOffset(aOffset) {}
michael@0 49 };
michael@0 50
michael@0 51 mozInlineSpellWordUtil()
michael@0 52 : mRootNode(nullptr),
michael@0 53 mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),
michael@0 54 mNextWordIndex(-1), mSoftTextValid(false) {}
michael@0 55
michael@0 56 nsresult Init(nsWeakPtr aWeakEditor);
michael@0 57
michael@0 58 nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);
michael@0 59
michael@0 60 // sets the current position, this should be inside the range. If we are in
michael@0 61 // the middle of a word, we'll move to its start.
michael@0 62 nsresult SetPosition(nsINode* aNode, int32_t aOffset);
michael@0 63
michael@0 64 // Given a point inside or immediately following a word, this returns the
michael@0 65 // DOM range that exactly encloses that word's characters. The current
michael@0 66 // position will be at the end of the word. This will find the previous
michael@0 67 // word if the current position is space, so if you care that the point is
michael@0 68 // inside the word, you should check the range.
michael@0 69 //
michael@0 70 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
michael@0 71 // before you actually generate the range you are interested in and iterate
michael@0 72 // the words in it.
michael@0 73 nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,
michael@0 74 nsRange** aRange);
michael@0 75
michael@0 76 // Moves to the the next word in the range, and retrieves it's text and range.
michael@0 77 // An empty word and a nullptr range are returned when we are done checking.
michael@0 78 // aSkipChecking will be set if the word is "special" and shouldn't be
michael@0 79 // checked (e.g., an email address).
michael@0 80 nsresult GetNextWord(nsAString& aText, nsRange** aRange,
michael@0 81 bool* aSkipChecking);
michael@0 82
michael@0 83 // Call to normalize some punctuation. This function takes an autostring
michael@0 84 // so we can access characters directly.
michael@0 85 static void NormalizeWord(nsSubstring& aWord);
michael@0 86
michael@0 87 nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
michael@0 88 nsIDocument* GetDocument() const { return mDocument; }
michael@0 89 nsINode* GetRootNode() { return mRootNode; }
michael@0 90
michael@0 91 private:
michael@0 92
michael@0 93 // cached stuff for the editor, set by Init
michael@0 94 nsCOMPtr<nsIDOMDocument> mDOMDocument;
michael@0 95 nsCOMPtr<nsIDocument> mDocument;
michael@0 96
michael@0 97 // range to check, see SetPosition and SetEnd
michael@0 98 nsINode* mRootNode;
michael@0 99 NodeOffset mSoftBegin;
michael@0 100 NodeOffset mSoftEnd;
michael@0 101
michael@0 102 // DOM text covering the soft range, with newlines added at block boundaries
michael@0 103 nsString mSoftText;
michael@0 104 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
michael@0 105 // DOM node appears at most once in this list.
michael@0 106 struct DOMTextMapping {
michael@0 107 NodeOffset mNodeOffset;
michael@0 108 int32_t mSoftTextOffset;
michael@0 109 int32_t mLength;
michael@0 110
michael@0 111 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)
michael@0 112 : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
michael@0 113 mLength(aLength) {}
michael@0 114 };
michael@0 115 nsTArray<DOMTextMapping> mSoftTextDOMMapping;
michael@0 116
michael@0 117 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
michael@0 118 struct RealWord {
michael@0 119 int32_t mSoftTextOffset;
michael@0 120 int32_t mLength;
michael@0 121 bool mCheckableWord;
michael@0 122
michael@0 123 RealWord(int32_t aOffset, int32_t aLength, bool aCheckable)
michael@0 124 : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
michael@0 125 int32_t EndOffset() const { return mSoftTextOffset + mLength; }
michael@0 126 };
michael@0 127 nsTArray<RealWord> mRealWords;
michael@0 128 int32_t mNextWordIndex;
michael@0 129
michael@0 130 bool mSoftTextValid;
michael@0 131
michael@0 132 void InvalidateWords() { mSoftTextValid = false; }
michael@0 133 void EnsureWords();
michael@0 134
michael@0 135 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
michael@0 136 // Map an offset into mSoftText to a DOM position. Note that two DOM positions
michael@0 137 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
michael@0 138 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
michael@0 139 // aHintBefore controls which position we return ... if aHint is eEnd
michael@0 140 // then the position indicates the END of a range so we return (A,4). Otherwise
michael@0 141 // the position indicates the START of a range so we return (B,0).
michael@0 142 enum DOMMapHint { HINT_BEGIN, HINT_END };
michael@0 143 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
michael@0 144 DOMMapHint aHint);
michael@0 145 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
michael@0 146 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
michael@0 147 // later word (favouring the assumption that it's the BEGINning of a word),
michael@0 148 // otherwise return the earlier word (assuming it's the END of a word).
michael@0 149 // If aSearchForward is true, then if we don't find a word at the given
michael@0 150 // position, search forward until we do find a word and return that (if found).
michael@0 151 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
michael@0 152 bool aSearchForward);
michael@0 153
michael@0 154 // build mSoftText and mSoftTextDOMMapping
michael@0 155 void BuildSoftText();
michael@0 156 // Build mRealWords array
michael@0 157 void BuildRealWords();
michael@0 158
michael@0 159 void SplitDOMWord(int32_t aStart, int32_t aEnd);
michael@0 160
michael@0 161 // Convenience functions, object must be initialized
michael@0 162 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
michael@0 163 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
michael@0 164 };
michael@0 165
michael@0 166 #endif

mercurial