Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #ifndef mozInlineSpellWordUtil_h |
michael@0 | 7 | #define mozInlineSpellWordUtil_h |
michael@0 | 8 | |
michael@0 | 9 | #include "nsCOMPtr.h" |
michael@0 | 10 | #include "nsIDOMDocument.h" |
michael@0 | 11 | #include "nsIDocument.h" |
michael@0 | 12 | #include "nsString.h" |
michael@0 | 13 | #include "nsTArray.h" |
michael@0 | 14 | |
michael@0 | 15 | //#define DEBUG_SPELLCHECK |
michael@0 | 16 | |
michael@0 | 17 | class nsRange; |
michael@0 | 18 | class nsINode; |
michael@0 | 19 | |
michael@0 | 20 | /** |
michael@0 | 21 | * This class extracts text from the DOM and builds it into a single string. |
michael@0 | 22 | * The string includes whitespace breaks whereever non-inline elements begin |
michael@0 | 23 | * and end. This string is broken into "real words", following somewhat |
michael@0 | 24 | * complex rules; for example substrings that look like URLs or |
michael@0 | 25 | * email addresses are treated as single words, but otherwise many kinds of |
michael@0 | 26 | * punctuation are treated as word separators. GetNextWord provides a way |
michael@0 | 27 | * to iterate over these "real words". |
michael@0 | 28 | * |
michael@0 | 29 | * The basic operation is: |
michael@0 | 30 | * |
michael@0 | 31 | * 1. Call Init with the weak pointer to the editor that you're using. |
michael@0 | 32 | * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop |
michael@0 | 33 | * at the word boundary after that. If SetEnd is not called, we'll stop |
michael@0 | 34 | * at the end of the document's root element. |
michael@0 | 35 | * 3. Call SetPosition to initialize the current position inside the |
michael@0 | 36 | * previously given range. |
michael@0 | 37 | * 4. Call GetNextWord over and over until it returns false. |
michael@0 | 38 | */ |
michael@0 | 39 | |
michael@0 | 40 | class mozInlineSpellWordUtil |
michael@0 | 41 | { |
michael@0 | 42 | public: |
michael@0 | 43 | struct NodeOffset { |
michael@0 | 44 | nsINode* mNode; |
michael@0 | 45 | int32_t mOffset; |
michael@0 | 46 | |
michael@0 | 47 | NodeOffset(nsINode* aNode, int32_t aOffset) : |
michael@0 | 48 | mNode(aNode), mOffset(aOffset) {} |
michael@0 | 49 | }; |
michael@0 | 50 | |
michael@0 | 51 | mozInlineSpellWordUtil() |
michael@0 | 52 | : mRootNode(nullptr), |
michael@0 | 53 | mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0), |
michael@0 | 54 | mNextWordIndex(-1), mSoftTextValid(false) {} |
michael@0 | 55 | |
michael@0 | 56 | nsresult Init(nsWeakPtr aWeakEditor); |
michael@0 | 57 | |
michael@0 | 58 | nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset); |
michael@0 | 59 | |
michael@0 | 60 | // sets the current position, this should be inside the range. If we are in |
michael@0 | 61 | // the middle of a word, we'll move to its start. |
michael@0 | 62 | nsresult SetPosition(nsINode* aNode, int32_t aOffset); |
michael@0 | 63 | |
michael@0 | 64 | // Given a point inside or immediately following a word, this returns the |
michael@0 | 65 | // DOM range that exactly encloses that word's characters. The current |
michael@0 | 66 | // position will be at the end of the word. This will find the previous |
michael@0 | 67 | // word if the current position is space, so if you care that the point is |
michael@0 | 68 | // inside the word, you should check the range. |
michael@0 | 69 | // |
michael@0 | 70 | // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called |
michael@0 | 71 | // before you actually generate the range you are interested in and iterate |
michael@0 | 72 | // the words in it. |
michael@0 | 73 | nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset, |
michael@0 | 74 | nsRange** aRange); |
michael@0 | 75 | |
michael@0 | 76 | // Moves to the the next word in the range, and retrieves it's text and range. |
michael@0 | 77 | // An empty word and a nullptr range are returned when we are done checking. |
michael@0 | 78 | // aSkipChecking will be set if the word is "special" and shouldn't be |
michael@0 | 79 | // checked (e.g., an email address). |
michael@0 | 80 | nsresult GetNextWord(nsAString& aText, nsRange** aRange, |
michael@0 | 81 | bool* aSkipChecking); |
michael@0 | 82 | |
michael@0 | 83 | // Call to normalize some punctuation. This function takes an autostring |
michael@0 | 84 | // so we can access characters directly. |
michael@0 | 85 | static void NormalizeWord(nsSubstring& aWord); |
michael@0 | 86 | |
michael@0 | 87 | nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; } |
michael@0 | 88 | nsIDocument* GetDocument() const { return mDocument; } |
michael@0 | 89 | nsINode* GetRootNode() { return mRootNode; } |
michael@0 | 90 | |
michael@0 | 91 | private: |
michael@0 | 92 | |
michael@0 | 93 | // cached stuff for the editor, set by Init |
michael@0 | 94 | nsCOMPtr<nsIDOMDocument> mDOMDocument; |
michael@0 | 95 | nsCOMPtr<nsIDocument> mDocument; |
michael@0 | 96 | |
michael@0 | 97 | // range to check, see SetPosition and SetEnd |
michael@0 | 98 | nsINode* mRootNode; |
michael@0 | 99 | NodeOffset mSoftBegin; |
michael@0 | 100 | NodeOffset mSoftEnd; |
michael@0 | 101 | |
michael@0 | 102 | // DOM text covering the soft range, with newlines added at block boundaries |
michael@0 | 103 | nsString mSoftText; |
michael@0 | 104 | // A list of where we extracted text from, ordered by mSoftTextOffset. A given |
michael@0 | 105 | // DOM node appears at most once in this list. |
michael@0 | 106 | struct DOMTextMapping { |
michael@0 | 107 | NodeOffset mNodeOffset; |
michael@0 | 108 | int32_t mSoftTextOffset; |
michael@0 | 109 | int32_t mLength; |
michael@0 | 110 | |
michael@0 | 111 | DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength) |
michael@0 | 112 | : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset), |
michael@0 | 113 | mLength(aLength) {} |
michael@0 | 114 | }; |
michael@0 | 115 | nsTArray<DOMTextMapping> mSoftTextDOMMapping; |
michael@0 | 116 | |
michael@0 | 117 | // A list of the "real words" in mSoftText, ordered by mSoftTextOffset |
michael@0 | 118 | struct RealWord { |
michael@0 | 119 | int32_t mSoftTextOffset; |
michael@0 | 120 | int32_t mLength; |
michael@0 | 121 | bool mCheckableWord; |
michael@0 | 122 | |
michael@0 | 123 | RealWord(int32_t aOffset, int32_t aLength, bool aCheckable) |
michael@0 | 124 | : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {} |
michael@0 | 125 | int32_t EndOffset() const { return mSoftTextOffset + mLength; } |
michael@0 | 126 | }; |
michael@0 | 127 | nsTArray<RealWord> mRealWords; |
michael@0 | 128 | int32_t mNextWordIndex; |
michael@0 | 129 | |
michael@0 | 130 | bool mSoftTextValid; |
michael@0 | 131 | |
michael@0 | 132 | void InvalidateWords() { mSoftTextValid = false; } |
michael@0 | 133 | void EnsureWords(); |
michael@0 | 134 | |
michael@0 | 135 | int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset); |
michael@0 | 136 | // Map an offset into mSoftText to a DOM position. Note that two DOM positions |
michael@0 | 137 | // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb |
michael@0 | 138 | // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So, |
michael@0 | 139 | // aHintBefore controls which position we return ... if aHint is eEnd |
michael@0 | 140 | // then the position indicates the END of a range so we return (A,4). Otherwise |
michael@0 | 141 | // the position indicates the START of a range so we return (B,0). |
michael@0 | 142 | enum DOMMapHint { HINT_BEGIN, HINT_END }; |
michael@0 | 143 | NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset, |
michael@0 | 144 | DOMMapHint aHint); |
michael@0 | 145 | // Finds the index of the real word containing aSoftTextOffset, or -1 if none |
michael@0 | 146 | // If it's exactly between two words, then if aHint is HINT_BEGIN, return the |
michael@0 | 147 | // later word (favouring the assumption that it's the BEGINning of a word), |
michael@0 | 148 | // otherwise return the earlier word (assuming it's the END of a word). |
michael@0 | 149 | // If aSearchForward is true, then if we don't find a word at the given |
michael@0 | 150 | // position, search forward until we do find a word and return that (if found). |
michael@0 | 151 | int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint, |
michael@0 | 152 | bool aSearchForward); |
michael@0 | 153 | |
michael@0 | 154 | // build mSoftText and mSoftTextDOMMapping |
michael@0 | 155 | void BuildSoftText(); |
michael@0 | 156 | // Build mRealWords array |
michael@0 | 157 | void BuildRealWords(); |
michael@0 | 158 | |
michael@0 | 159 | void SplitDOMWord(int32_t aStart, int32_t aEnd); |
michael@0 | 160 | |
michael@0 | 161 | // Convenience functions, object must be initialized |
michael@0 | 162 | nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange); |
michael@0 | 163 | nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange); |
michael@0 | 164 | }; |
michael@0 | 165 | |
michael@0 | 166 | #endif |