extensions/spellcheck/src/mozInlineSpellWordUtil.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,166 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#ifndef mozInlineSpellWordUtil_h
    1.10 +#define mozInlineSpellWordUtil_h
    1.11 +
    1.12 +#include "nsCOMPtr.h"
    1.13 +#include "nsIDOMDocument.h"
    1.14 +#include "nsIDocument.h"
    1.15 +#include "nsString.h"
    1.16 +#include "nsTArray.h"
    1.17 +
    1.18 +//#define DEBUG_SPELLCHECK
    1.19 +
    1.20 +class nsRange;
    1.21 +class nsINode;
    1.22 +
    1.23 +/**
    1.24 + *    This class extracts text from the DOM and builds it into a single string.
    1.25 + *    The string includes whitespace breaks whereever non-inline elements begin
    1.26 + *    and end. This string is broken into "real words", following somewhat
    1.27 + *    complex rules; for example substrings that look like URLs or
    1.28 + *    email addresses are treated as single words, but otherwise many kinds of
    1.29 + *    punctuation are treated as word separators. GetNextWord provides a way
    1.30 + *    to iterate over these "real words".
    1.31 + *
    1.32 + *    The basic operation is:
    1.33 + *
    1.34 + *    1. Call Init with the weak pointer to the editor that you're using.
    1.35 + *    2. Call SetEnd to set where you want to stop spellchecking. We'll stop
    1.36 + *       at the word boundary after that. If SetEnd is not called, we'll stop
    1.37 + *       at the end of the document's root element.
    1.38 + *    3. Call SetPosition to initialize the current position inside the
    1.39 + *       previously given range.
    1.40 + *    4. Call GetNextWord over and over until it returns false.
    1.41 + */
    1.42 +
    1.43 +class mozInlineSpellWordUtil
    1.44 +{
    1.45 +public:
    1.46 +  struct NodeOffset {
    1.47 +    nsINode* mNode;
    1.48 +    int32_t  mOffset;
    1.49 +    
    1.50 +    NodeOffset(nsINode* aNode, int32_t aOffset) :
    1.51 +      mNode(aNode), mOffset(aOffset) {}
    1.52 +  };
    1.53 +
    1.54 +  mozInlineSpellWordUtil()
    1.55 +    : mRootNode(nullptr),
    1.56 +      mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),
    1.57 +      mNextWordIndex(-1), mSoftTextValid(false) {}
    1.58 +
    1.59 +  nsresult Init(nsWeakPtr aWeakEditor);
    1.60 +
    1.61 +  nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);
    1.62 +
    1.63 +  // sets the current position, this should be inside the range. If we are in
    1.64 +  // the middle of a word, we'll move to its start.
    1.65 +  nsresult SetPosition(nsINode* aNode, int32_t aOffset);
    1.66 +
    1.67 +  // Given a point inside or immediately following a word, this returns the
    1.68 +  // DOM range that exactly encloses that word's characters. The current
    1.69 +  // position will be at the end of the word. This will find the previous
    1.70 +  // word if the current position is space, so if you care that the point is
    1.71 +  // inside the word, you should check the range.
    1.72 +  //
    1.73 +  // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
    1.74 +  // before you actually generate the range you are interested in and iterate
    1.75 +  // the words in it.
    1.76 +  nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,
    1.77 +                           nsRange** aRange);
    1.78 +
    1.79 +  // Moves to the the next word in the range, and retrieves it's text and range.
    1.80 +  // An empty word and a nullptr range are returned when we are done checking.
    1.81 +  // aSkipChecking will be set if the word is "special" and shouldn't be
    1.82 +  // checked (e.g., an email address).
    1.83 +  nsresult GetNextWord(nsAString& aText, nsRange** aRange,
    1.84 +                       bool* aSkipChecking);
    1.85 +
    1.86 +  // Call to normalize some punctuation. This function takes an autostring
    1.87 +  // so we can access characters directly.
    1.88 +  static void NormalizeWord(nsSubstring& aWord);
    1.89 +
    1.90 +  nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
    1.91 +  nsIDocument* GetDocument() const { return mDocument; }
    1.92 +  nsINode* GetRootNode() { return mRootNode; }
    1.93 +  
    1.94 +private:
    1.95 +
    1.96 +  // cached stuff for the editor, set by Init
    1.97 +  nsCOMPtr<nsIDOMDocument> mDOMDocument;
    1.98 +  nsCOMPtr<nsIDocument>         mDocument;
    1.99 +
   1.100 +  // range to check, see SetPosition and SetEnd
   1.101 +  nsINode*    mRootNode;
   1.102 +  NodeOffset  mSoftBegin;
   1.103 +  NodeOffset  mSoftEnd;
   1.104 +
   1.105 +  // DOM text covering the soft range, with newlines added at block boundaries
   1.106 +  nsString mSoftText;
   1.107 +  // A list of where we extracted text from, ordered by mSoftTextOffset. A given
   1.108 +  // DOM node appears at most once in this list.
   1.109 +  struct DOMTextMapping {
   1.110 +    NodeOffset mNodeOffset;
   1.111 +    int32_t    mSoftTextOffset;
   1.112 +    int32_t    mLength;
   1.113 +    
   1.114 +    DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)
   1.115 +      : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
   1.116 +        mLength(aLength) {}
   1.117 +  };
   1.118 +  nsTArray<DOMTextMapping> mSoftTextDOMMapping;
   1.119 +  
   1.120 +  // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
   1.121 +  struct RealWord {
   1.122 +    int32_t      mSoftTextOffset;
   1.123 +    int32_t      mLength;
   1.124 +    bool mCheckableWord;
   1.125 +    
   1.126 +    RealWord(int32_t aOffset, int32_t aLength, bool aCheckable)
   1.127 +      : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
   1.128 +    int32_t EndOffset() const { return mSoftTextOffset + mLength; }
   1.129 +  };
   1.130 +  nsTArray<RealWord> mRealWords;
   1.131 +  int32_t            mNextWordIndex;
   1.132 +
   1.133 +  bool mSoftTextValid;
   1.134 +
   1.135 +  void InvalidateWords() { mSoftTextValid = false; }
   1.136 +  void EnsureWords();
   1.137 +  
   1.138 +  int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
   1.139 +  // Map an offset into mSoftText to a DOM position. Note that two DOM positions
   1.140 +  // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
   1.141 +  // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
   1.142 +  // aHintBefore controls which position we return ... if aHint is eEnd
   1.143 +  // then the position indicates the END of a range so we return (A,4). Otherwise
   1.144 +  // the position indicates the START of a range so we return (B,0).
   1.145 +  enum DOMMapHint { HINT_BEGIN, HINT_END };
   1.146 +  NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
   1.147 +                                            DOMMapHint aHint);
   1.148 +  // Finds the index of the real word containing aSoftTextOffset, or -1 if none
   1.149 +  // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
   1.150 +  // later word (favouring the assumption that it's the BEGINning of a word),
   1.151 +  // otherwise return the earlier word (assuming it's the END of a word).
   1.152 +  // If aSearchForward is true, then if we don't find a word at the given
   1.153 +  // position, search forward until we do find a word and return that (if found).
   1.154 +  int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
   1.155 +                                 bool aSearchForward);
   1.156 +    
   1.157 +  // build mSoftText and mSoftTextDOMMapping
   1.158 +  void BuildSoftText();
   1.159 +  // Build mRealWords array
   1.160 +  void BuildRealWords();
   1.161 +
   1.162 +  void SplitDOMWord(int32_t aStart, int32_t aEnd);
   1.163 +
   1.164 +  // Convenience functions, object must be initialized
   1.165 +  nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
   1.166 +  nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
   1.167 +};
   1.168 +
   1.169 +#endif

mercurial