The Tor Browser: extensions/spellcheck/src/mozInlineSpellWordUtil.h@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 #ifndef mozInlineSpellWordUtil_h

     7 #define mozInlineSpellWordUtil_h

     9 #include "nsCOMPtr.h"

    10 #include "nsIDOMDocument.h"

    11 #include "nsIDocument.h"

    12 #include "nsString.h"

    13 #include "nsTArray.h"

    15 //#define DEBUG_SPELLCHECK

    17 class nsRange;

    18 class nsINode;

    20 /**

    21  *    This class extracts text from the DOM and builds it into a single string.

    22  *    The string includes whitespace breaks whereever non-inline elements begin

    23  *    and end. This string is broken into "real words", following somewhat

    24  *    complex rules; for example substrings that look like URLs or

    25  *    email addresses are treated as single words, but otherwise many kinds of

    26  *    punctuation are treated as word separators. GetNextWord provides a way

    27  *    to iterate over these "real words".

    28  *

    29  *    The basic operation is:

    30  *

    31  *    1. Call Init with the weak pointer to the editor that you're using.

    32  *    2. Call SetEnd to set where you want to stop spellchecking. We'll stop

    33  *       at the word boundary after that. If SetEnd is not called, we'll stop

    34  *       at the end of the document's root element.

    35  *    3. Call SetPosition to initialize the current position inside the

    36  *       previously given range.

    37  *    4. Call GetNextWord over and over until it returns false.

    38  */

    40 class mozInlineSpellWordUtil

    41 {

    42 public:

    43   struct NodeOffset {

    44     nsINode* mNode;

    45     int32_t  mOffset;

    47     NodeOffset(nsINode* aNode, int32_t aOffset) :

    48       mNode(aNode), mOffset(aOffset) {}

    49   };

    51   mozInlineSpellWordUtil()

    52     : mRootNode(nullptr),

    53       mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),

    54       mNextWordIndex(-1), mSoftTextValid(false) {}

    56   nsresult Init(nsWeakPtr aWeakEditor);

    58   nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);

    60   // sets the current position, this should be inside the range. If we are in

    61   // the middle of a word, we'll move to its start.

    62   nsresult SetPosition(nsINode* aNode, int32_t aOffset);

    64   // Given a point inside or immediately following a word, this returns the

    65   // DOM range that exactly encloses that word's characters. The current

    66   // position will be at the end of the word. This will find the previous

    67   // word if the current position is space, so if you care that the point is

    68   // inside the word, you should check the range.

    69   //

    70   // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called

    71   // before you actually generate the range you are interested in and iterate

    72   // the words in it.

    73   nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,

    74                            nsRange** aRange);

    76   // Moves to the the next word in the range, and retrieves it's text and range.

    77   // An empty word and a nullptr range are returned when we are done checking.

    78   // aSkipChecking will be set if the word is "special" and shouldn't be

    79   // checked (e.g., an email address).

    80   nsresult GetNextWord(nsAString& aText, nsRange** aRange,

    81                        bool* aSkipChecking);

    83   // Call to normalize some punctuation. This function takes an autostring

    84   // so we can access characters directly.

    85   static void NormalizeWord(nsSubstring& aWord);

    87   nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }

    88   nsIDocument* GetDocument() const { return mDocument; }

    89   nsINode* GetRootNode() { return mRootNode; }

    91 private:

    93   // cached stuff for the editor, set by Init

    94   nsCOMPtr<nsIDOMDocument> mDOMDocument;

    95   nsCOMPtr<nsIDocument>         mDocument;

    97   // range to check, see SetPosition and SetEnd

    98   nsINode*    mRootNode;

    99   NodeOffset  mSoftBegin;

   100   NodeOffset  mSoftEnd;

   102   // DOM text covering the soft range, with newlines added at block boundaries

   103   nsString mSoftText;

   104   // A list of where we extracted text from, ordered by mSoftTextOffset. A given

   105   // DOM node appears at most once in this list.

   106   struct DOMTextMapping {

   107     NodeOffset mNodeOffset;

   108     int32_t    mSoftTextOffset;

   109     int32_t    mLength;

   111     DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)

   112       : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),

   113         mLength(aLength) {}

   114   };

   115   nsTArray<DOMTextMapping> mSoftTextDOMMapping;

   117   // A list of the "real words" in mSoftText, ordered by mSoftTextOffset

   118   struct RealWord {

   119     int32_t      mSoftTextOffset;

   120     int32_t      mLength;

   121     bool mCheckableWord;

   123     RealWord(int32_t aOffset, int32_t aLength, bool aCheckable)

   124       : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}

   125     int32_t EndOffset() const { return mSoftTextOffset + mLength; }

   126   };

   127   nsTArray<RealWord> mRealWords;

   128   int32_t            mNextWordIndex;

   130   bool mSoftTextValid;

   132   void InvalidateWords() { mSoftTextValid = false; }

   133   void EnsureWords();

   135   int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);

   136   // Map an offset into mSoftText to a DOM position. Note that two DOM positions

   137   // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb

   138   // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,

   139   // aHintBefore controls which position we return ... if aHint is eEnd

   140   // then the position indicates the END of a range so we return (A,4). Otherwise

   141   // the position indicates the START of a range so we return (B,0).

   142   enum DOMMapHint { HINT_BEGIN, HINT_END };

   143   NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,

   144                                             DOMMapHint aHint);

   145   // Finds the index of the real word containing aSoftTextOffset, or -1 if none

   146   // If it's exactly between two words, then if aHint is HINT_BEGIN, return the

   147   // later word (favouring the assumption that it's the BEGINning of a word),

   148   // otherwise return the earlier word (assuming it's the END of a word).

   149   // If aSearchForward is true, then if we don't find a word at the given

   150   // position, search forward until we do find a word and return that (if found).

   151   int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,

   152                                  bool aSearchForward);

   154   // build mSoftText and mSoftTextDOMMapping

   155   void BuildSoftText();

   156   // Build mRealWords array

   157   void BuildRealWords();

   159   void SplitDOMWord(int32_t aStart, int32_t aEnd);

   161   // Convenience functions, object must be initialized

   162   nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);

   163   nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);

   164 };

   166 #endif

The Tor Browser / file revision