extensions/spellcheck/src/mozInlineSpellWordUtil.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/src/mozInlineSpellWordUtil.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1039 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "mozInlineSpellWordUtil.h"
    1.10 +#include "nsDebug.h"
    1.11 +#include "nsIAtom.h"
    1.12 +#include "nsComponentManagerUtils.h"
    1.13 +#include "nsIDOMCSSStyleDeclaration.h"
    1.14 +#include "nsIDOMElement.h"
    1.15 +#include "nsIDOMRange.h"
    1.16 +#include "nsIEditor.h"
    1.17 +#include "nsIDOMNode.h"
    1.18 +#include "nsIDOMHTMLBRElement.h"
    1.19 +#include "nsUnicharUtilCIID.h"
    1.20 +#include "nsUnicodeProperties.h"
    1.21 +#include "nsServiceManagerUtils.h"
    1.22 +#include "nsIContent.h"
    1.23 +#include "nsTextFragment.h"
    1.24 +#include "mozilla/dom/Element.h"
    1.25 +#include "nsRange.h"
    1.26 +#include "nsContentUtils.h"
    1.27 +#include "nsIFrame.h"
    1.28 +#include <algorithm>
    1.29 +
    1.30 +using namespace mozilla;
    1.31 +
    1.32 +// IsIgnorableCharacter
    1.33 +//
    1.34 +//    These characters are ones that we should ignore in input.
    1.35 +
    1.36 +inline bool IsIgnorableCharacter(char16_t ch)
    1.37 +{
    1.38 +  return (ch == 0xAD ||   // SOFT HYPHEN
    1.39 +          ch == 0x1806);  // MONGOLIAN TODO SOFT HYPHEN
    1.40 +}
    1.41 +
    1.42 +// IsConditionalPunctuation
    1.43 +//
    1.44 +//    Some characters (like apostrophes) require characters on each side to be
    1.45 +//    part of a word, and are otherwise punctuation.
    1.46 +
    1.47 +inline bool IsConditionalPunctuation(char16_t ch)
    1.48 +{
    1.49 +  return (ch == '\'' ||
    1.50 +          ch == 0x2019 || // RIGHT SINGLE QUOTATION MARK
    1.51 +          ch == 0x00B7); // MIDDLE DOT
    1.52 +}
    1.53 +
    1.54 +// mozInlineSpellWordUtil::Init
    1.55 +
    1.56 +nsresult
    1.57 +mozInlineSpellWordUtil::Init(nsWeakPtr aWeakEditor)
    1.58 +{
    1.59 +  nsresult rv;
    1.60 +
    1.61 +  // getting the editor can fail commonly because the editor was detached, so
    1.62 +  // don't assert
    1.63 +  nsCOMPtr<nsIEditor> editor = do_QueryReferent(aWeakEditor, &rv);
    1.64 +  if (NS_FAILED(rv))
    1.65 +    return rv;
    1.66 +
    1.67 +  nsCOMPtr<nsIDOMDocument> domDoc;
    1.68 +  rv = editor->GetDocument(getter_AddRefs(domDoc));
    1.69 +  NS_ENSURE_SUCCESS(rv, rv);
    1.70 +  NS_ENSURE_TRUE(domDoc, NS_ERROR_NULL_POINTER);
    1.71 +
    1.72 +  mDOMDocument = domDoc;
    1.73 +  mDocument = do_QueryInterface(domDoc);
    1.74 +
    1.75 +  // Find the root node for the editor. For contenteditable we'll need something
    1.76 +  // cleverer here.
    1.77 +  nsCOMPtr<nsIDOMElement> rootElt;
    1.78 +  rv = editor->GetRootElement(getter_AddRefs(rootElt));
    1.79 +  NS_ENSURE_SUCCESS(rv, rv);
    1.80 +
    1.81 +  nsCOMPtr<nsINode> rootNode = do_QueryInterface(rootElt);
    1.82 +  mRootNode = rootNode;
    1.83 +  NS_ASSERTION(mRootNode, "GetRootElement returned null *and* claimed to suceed!");
    1.84 +  return NS_OK;
    1.85 +}
    1.86 +
    1.87 +static inline bool
    1.88 +IsTextNode(nsINode* aNode)
    1.89 +{
    1.90 +  return aNode->IsNodeOfType(nsINode::eTEXT);
    1.91 +}
    1.92 +
    1.93 +typedef void (* OnLeaveNodeFunPtr)(nsINode* aNode, void* aClosure);
    1.94 +
    1.95 +// Find the next node in the DOM tree in preorder.
    1.96 +// Calls OnLeaveNodeFunPtr when the traversal leaves a node, which is
    1.97 +// why we can't just use GetNextNode here, sadly.
    1.98 +static nsINode*
    1.99 +FindNextNode(nsINode* aNode, nsINode* aRoot,
   1.100 +             OnLeaveNodeFunPtr aOnLeaveNode, void* aClosure)
   1.101 +{
   1.102 +  NS_PRECONDITION(aNode, "Null starting node?");
   1.103 +
   1.104 +  nsINode* next = aNode->GetFirstChild();
   1.105 +  if (next)
   1.106 +    return next;
   1.107 +  
   1.108 +  // Don't look at siblings or otherwise outside of aRoot
   1.109 +  if (aNode == aRoot)
   1.110 +    return nullptr;
   1.111 +
   1.112 +  next = aNode->GetNextSibling();
   1.113 +  if (next)
   1.114 +    return next;
   1.115 +
   1.116 +  // Go up
   1.117 +  for (;;) {
   1.118 +    if (aOnLeaveNode) {
   1.119 +      aOnLeaveNode(aNode, aClosure);
   1.120 +    }
   1.121 +    
   1.122 +    next = aNode->GetParent();
   1.123 +    if (next == aRoot || ! next)
   1.124 +      return nullptr;
   1.125 +    aNode = next;
   1.126 +    
   1.127 +    next = aNode->GetNextSibling();
   1.128 +    if (next)
   1.129 +      return next;
   1.130 +  }
   1.131 +}
   1.132 +
   1.133 +// aNode is not a text node. Find the first text node starting at aNode/aOffset
   1.134 +// in a preorder DOM traversal.
   1.135 +static nsINode*
   1.136 +FindNextTextNode(nsINode* aNode, int32_t aOffset, nsINode* aRoot)
   1.137 +{
   1.138 +  NS_PRECONDITION(aNode, "Null starting node?");
   1.139 +  NS_ASSERTION(!IsTextNode(aNode), "FindNextTextNode should start with a non-text node");
   1.140 +
   1.141 +  nsINode* checkNode;
   1.142 +  // Need to start at the aOffset'th child
   1.143 +  nsIContent* child = aNode->GetChildAt(aOffset);
   1.144 +
   1.145 +  if (child) {
   1.146 +    checkNode = child;
   1.147 +  } else {
   1.148 +    // aOffset was beyond the end of the child list. 
   1.149 +    // goto next node after the last descendant of aNode in
   1.150 +    // a preorder DOM traversal.
   1.151 +    checkNode = aNode->GetNextNonChildNode(aRoot);
   1.152 +  }
   1.153 +  
   1.154 +  while (checkNode && !IsTextNode(checkNode)) {
   1.155 +    checkNode = checkNode->GetNextNode(aRoot);
   1.156 +  }
   1.157 +  return checkNode;
   1.158 +}
   1.159 +
   1.160 +// mozInlineSpellWordUtil::SetEnd
   1.161 +//
   1.162 +//    We have two ranges "hard" and "soft". The hard boundary is simply
   1.163 +//    the scope of the root node. The soft boundary is that which is set
   1.164 +//    by the caller of this class by calling this function. If this function is
   1.165 +//    not called, the soft boundary is the same as the hard boundary.
   1.166 +//
   1.167 +//    When we reach the soft boundary (mSoftEnd), we keep
   1.168 +//    going until we reach the end of a word. This allows the caller to set the
   1.169 +//    end of the range to anything, and we will always check whole multiples of
   1.170 +//    words. When we reach the hard boundary we stop no matter what.
   1.171 +//
   1.172 +//    There is no beginning soft boundary. This is because we only go to the
   1.173 +//    previous node once, when finding the previous word boundary in
   1.174 +//    SetPosition(). You might think of the soft boundary as being this initial
   1.175 +//    position.
   1.176 +
   1.177 +nsresult
   1.178 +mozInlineSpellWordUtil::SetEnd(nsINode* aEndNode, int32_t aEndOffset)
   1.179 +{
   1.180 +  NS_PRECONDITION(aEndNode, "Null end node?");
   1.181 +
   1.182 +  NS_ASSERTION(mRootNode, "Not initialized");
   1.183 +
   1.184 +  InvalidateWords();
   1.185 +
   1.186 +  if (!IsTextNode(aEndNode)) {
   1.187 +    // End at the start of the first text node after aEndNode/aEndOffset.
   1.188 +    aEndNode = FindNextTextNode(aEndNode, aEndOffset, mRootNode);
   1.189 +    aEndOffset = 0;
   1.190 +  }
   1.191 +  mSoftEnd = NodeOffset(aEndNode, aEndOffset);
   1.192 +  return NS_OK;
   1.193 +}
   1.194 +
   1.195 +nsresult
   1.196 +mozInlineSpellWordUtil::SetPosition(nsINode* aNode, int32_t aOffset)
   1.197 +{
   1.198 +  InvalidateWords();
   1.199 +
   1.200 +  if (!IsTextNode(aNode)) {
   1.201 +    // Start at the start of the first text node after aNode/aOffset.
   1.202 +    aNode = FindNextTextNode(aNode, aOffset, mRootNode);
   1.203 +    aOffset = 0;
   1.204 +  }
   1.205 +  mSoftBegin = NodeOffset(aNode, aOffset);
   1.206 +
   1.207 +  EnsureWords();
   1.208 +  
   1.209 +  int32_t textOffset = MapDOMPositionToSoftTextOffset(mSoftBegin);
   1.210 +  if (textOffset < 0)
   1.211 +    return NS_OK;
   1.212 +  mNextWordIndex = FindRealWordContaining(textOffset, HINT_END, true);
   1.213 +  return NS_OK;
   1.214 +}
   1.215 +
   1.216 +void
   1.217 +mozInlineSpellWordUtil::EnsureWords()
   1.218 +{
   1.219 +  if (mSoftTextValid)
   1.220 +    return;
   1.221 +  BuildSoftText();
   1.222 +  BuildRealWords();
   1.223 +  mSoftTextValid = true;
   1.224 +}
   1.225 +
   1.226 +nsresult
   1.227 +mozInlineSpellWordUtil::MakeRangeForWord(const RealWord& aWord, nsRange** aRange)
   1.228 +{
   1.229 +  NodeOffset begin = MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
   1.230 +  NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
   1.231 +  return MakeRange(begin, end, aRange);
   1.232 +}
   1.233 +
   1.234 +// mozInlineSpellWordUtil::GetRangeForWord
   1.235 +
   1.236 +nsresult
   1.237 +mozInlineSpellWordUtil::GetRangeForWord(nsIDOMNode* aWordNode,
   1.238 +                                        int32_t aWordOffset,
   1.239 +                                        nsRange** aRange)
   1.240 +{
   1.241 +  // Set our soft end and start
   1.242 +  nsCOMPtr<nsINode> wordNode = do_QueryInterface(aWordNode);
   1.243 +  NodeOffset pt = NodeOffset(wordNode, aWordOffset);
   1.244 +  
   1.245 +  InvalidateWords();
   1.246 +  mSoftBegin = mSoftEnd = pt;
   1.247 +  EnsureWords();
   1.248 +  
   1.249 +  int32_t offset = MapDOMPositionToSoftTextOffset(pt);
   1.250 +  if (offset < 0)
   1.251 +    return MakeRange(pt, pt, aRange);
   1.252 +  int32_t wordIndex = FindRealWordContaining(offset, HINT_BEGIN, false);
   1.253 +  if (wordIndex < 0)
   1.254 +    return MakeRange(pt, pt, aRange);
   1.255 +  return MakeRangeForWord(mRealWords[wordIndex], aRange);
   1.256 +}
   1.257 +
   1.258 +// This is to fix characters that the spellchecker may not like
   1.259 +static void
   1.260 +NormalizeWord(const nsSubstring& aInput, int32_t aPos, int32_t aLen, nsAString& aOutput)
   1.261 +{
   1.262 +  aOutput.Truncate();
   1.263 +  for (int32_t i = 0; i < aLen; i++) {
   1.264 +    char16_t ch = aInput.CharAt(i + aPos);
   1.265 +
   1.266 +    // remove ignorable characters from the word
   1.267 +    if (IsIgnorableCharacter(ch))
   1.268 +      continue;
   1.269 +
   1.270 +    // the spellchecker doesn't handle curly apostrophes in all languages
   1.271 +    if (ch == 0x2019) { // RIGHT SINGLE QUOTATION MARK
   1.272 +      ch = '\'';
   1.273 +    }
   1.274 +
   1.275 +    aOutput.Append(ch);
   1.276 +  }
   1.277 +}
   1.278 +
   1.279 +// mozInlineSpellWordUtil::GetNextWord
   1.280 +//
   1.281 +//    FIXME-optimization: we shouldn't have to generate a range every single
   1.282 +//    time. It would be better if the inline spellchecker didn't require a
   1.283 +//    range unless the word was misspelled. This may or may not be possible.
   1.284 +
   1.285 +nsresult
   1.286 +mozInlineSpellWordUtil::GetNextWord(nsAString& aText, nsRange** aRange,
   1.287 +                                    bool* aSkipChecking)
   1.288 +{
   1.289 +#ifdef DEBUG_SPELLCHECK
   1.290 +  printf("GetNextWord called; mNextWordIndex=%d\n", mNextWordIndex);
   1.291 +#endif
   1.292 +
   1.293 +  if (mNextWordIndex < 0 ||
   1.294 +      mNextWordIndex >= int32_t(mRealWords.Length())) {
   1.295 +    mNextWordIndex = -1;
   1.296 +    *aRange = nullptr;
   1.297 +    *aSkipChecking = true;
   1.298 +    return NS_OK;
   1.299 +  }
   1.300 +  
   1.301 +  const RealWord& word = mRealWords[mNextWordIndex];
   1.302 +  nsresult rv = MakeRangeForWord(word, aRange);
   1.303 +  NS_ENSURE_SUCCESS(rv, rv);
   1.304 +  ++mNextWordIndex;
   1.305 +  *aSkipChecking = !word.mCheckableWord;
   1.306 +  ::NormalizeWord(mSoftText, word.mSoftTextOffset, word.mLength, aText);
   1.307 +
   1.308 +#ifdef DEBUG_SPELLCHECK
   1.309 +  printf("GetNextWord returning: %s (skip=%d)\n",
   1.310 +         NS_ConvertUTF16toUTF8(aText).get(), *aSkipChecking);
   1.311 +#endif
   1.312 +  
   1.313 +  return NS_OK;
   1.314 +}
   1.315 +
   1.316 +// mozInlineSpellWordUtil::MakeRange
   1.317 +//
   1.318 +//    Convenience function for creating a range over the current document.
   1.319 +
   1.320 +nsresult
   1.321 +mozInlineSpellWordUtil::MakeRange(NodeOffset aBegin, NodeOffset aEnd,
   1.322 +                                  nsRange** aRange)
   1.323 +{
   1.324 +  NS_ENSURE_ARG_POINTER(aBegin.mNode);
   1.325 +  if (!mDOMDocument)
   1.326 +    return NS_ERROR_NOT_INITIALIZED;
   1.327 +
   1.328 +  nsRefPtr<nsRange> range = new nsRange(aBegin.mNode);
   1.329 +  nsresult rv = range->Set(aBegin.mNode, aBegin.mOffset,
   1.330 +                           aEnd.mNode, aEnd.mOffset);
   1.331 +  NS_ENSURE_SUCCESS(rv, rv);
   1.332 +  range.forget(aRange);
   1.333 +
   1.334 +  return NS_OK;
   1.335 +}
   1.336 +
   1.337 +/*********** DOM text extraction ************/
   1.338 +
   1.339 +// IsDOMWordSeparator
   1.340 +//
   1.341 +//    Determines if the given character should be considered as a DOM Word
   1.342 +//    separator. Basically, this is whitespace, although it could also have
   1.343 +//    certain punctuation that we know ALWAYS breaks words. This is important.
   1.344 +//    For example, we can't have any punctuation that could appear in a URL
   1.345 +//    or email address in this, because those need to always fit into a single
   1.346 +//    DOM word.
   1.347 +
   1.348 +static bool
   1.349 +IsDOMWordSeparator(char16_t ch)
   1.350 +{
   1.351 +  // simple spaces
   1.352 +  if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
   1.353 +    return true;
   1.354 +
   1.355 +  // complex spaces - check only if char isn't ASCII (uncommon)
   1.356 +  if (ch >= 0xA0 &&
   1.357 +      (ch == 0x00A0 ||  // NO-BREAK SPACE
   1.358 +       ch == 0x2002 ||  // EN SPACE
   1.359 +       ch == 0x2003 ||  // EM SPACE
   1.360 +       ch == 0x2009 ||  // THIN SPACE
   1.361 +       ch == 0x3000))   // IDEOGRAPHIC SPACE
   1.362 +    return true;
   1.363 +
   1.364 +  // otherwise not a space
   1.365 +  return false;
   1.366 +}
   1.367 +
   1.368 +static inline bool
   1.369 +IsBRElement(nsINode* aNode)
   1.370 +{
   1.371 +  return aNode->IsElement() &&
   1.372 +         aNode->AsElement()->IsHTML(nsGkAtoms::br);
   1.373 +}
   1.374 +
   1.375 +/**
   1.376 + * Given a TextNode, checks to see if there's a DOM word separator before
   1.377 + * aBeforeOffset within it. This function does not modify aSeparatorOffset when
   1.378 + * it returns false.
   1.379 + *
   1.380 + * @param aNode the TextNode to check.
   1.381 + * @param aBeforeOffset the offset in the TextNode before which we will search
   1.382 + *        for the DOM separator. You can pass INT32_MAX to search the entire
   1.383 + *        length of the string.
   1.384 + * @param aSeparatorOffset will be set to the offset of the first separator it
   1.385 + *        encounters. Will not be written to if no separator is found.
   1.386 + * @returns True if it found a separator.
   1.387 + */
   1.388 +static bool
   1.389 +TextNodeContainsDOMWordSeparator(nsINode* aNode,
   1.390 +                                 int32_t aBeforeOffset,
   1.391 +                                 int32_t* aSeparatorOffset)
   1.392 +{
   1.393 +  // aNode is actually an nsIContent, since it's eTEXT
   1.394 +  nsIContent* content = static_cast<nsIContent*>(aNode);
   1.395 +  const nsTextFragment* textFragment = content->GetText();
   1.396 +  NS_ASSERTION(textFragment, "Where is our text?");
   1.397 +  for (int32_t i = std::min(aBeforeOffset, int32_t(textFragment->GetLength())) - 1; i >= 0; --i) {
   1.398 +    if (IsDOMWordSeparator(textFragment->CharAt(i))) {
   1.399 +      // Be greedy, find as many separators as we can
   1.400 +      for (int32_t j = i - 1; j >= 0; --j) {
   1.401 +        if (IsDOMWordSeparator(textFragment->CharAt(j))) {
   1.402 +          i = j;
   1.403 +        } else {
   1.404 +          break;
   1.405 +        }
   1.406 +      }
   1.407 +      *aSeparatorOffset = i;
   1.408 +      return true;
   1.409 +    }
   1.410 +  }
   1.411 +  return false;
   1.412 +}
   1.413 +
   1.414 +/**
   1.415 + * Check if there's a DOM word separator before aBeforeOffset in this node.
   1.416 + * Always returns true if it's a BR element.
   1.417 + * aSeparatorOffset is set to the index of the first character in the last
   1.418 + * separator if any is found (0 for BR elements).
   1.419 + *
   1.420 + * This function does not modify aSeparatorOffset when it returns false.
   1.421 + */
   1.422 +static bool
   1.423 +ContainsDOMWordSeparator(nsINode* aNode, int32_t aBeforeOffset,
   1.424 +                         int32_t* aSeparatorOffset)
   1.425 +{
   1.426 +  if (IsBRElement(aNode)) {
   1.427 +    *aSeparatorOffset = 0;
   1.428 +    return true;
   1.429 +  }
   1.430 +
   1.431 +  if (!IsTextNode(aNode))
   1.432 +    return false;
   1.433 +
   1.434 +  return TextNodeContainsDOMWordSeparator(aNode, aBeforeOffset,
   1.435 +                                          aSeparatorOffset);
   1.436 +}
   1.437 +
   1.438 +static bool
   1.439 +IsBreakElement(nsINode* aNode)
   1.440 +{
   1.441 +  if (!aNode->IsElement()) {
   1.442 +    return false;
   1.443 +  }
   1.444 +
   1.445 +  dom::Element *element = aNode->AsElement();
   1.446 +    
   1.447 +  if (element->IsHTML(nsGkAtoms::br))
   1.448 +    return true;
   1.449 +
   1.450 +  // If we don't have a frame, we don't consider ourselves a break
   1.451 +  // element.  In particular, words can span us.
   1.452 +  if (!element->GetPrimaryFrame())
   1.453 +    return false;
   1.454 +
   1.455 +  // Anything that's not an inline element is a break element.
   1.456 +  // XXXbz should replaced inlines be break elements, though?
   1.457 +  return element->GetPrimaryFrame()->StyleDisplay()->mDisplay !=
   1.458 +    NS_STYLE_DISPLAY_INLINE;
   1.459 +}
   1.460 +
   1.461 +struct CheckLeavingBreakElementClosure {
   1.462 +  bool          mLeftBreakElement;
   1.463 +};
   1.464 +
   1.465 +static void
   1.466 +CheckLeavingBreakElement(nsINode* aNode, void* aClosure)
   1.467 +{
   1.468 +  CheckLeavingBreakElementClosure* cl =
   1.469 +    static_cast<CheckLeavingBreakElementClosure*>(aClosure);
   1.470 +  if (!cl->mLeftBreakElement && IsBreakElement(aNode)) {
   1.471 +    cl->mLeftBreakElement = true;
   1.472 +  }
   1.473 +}
   1.474 +
   1.475 +void
   1.476 +mozInlineSpellWordUtil::NormalizeWord(nsSubstring& aWord)
   1.477 +{
   1.478 +  nsAutoString result;
   1.479 +  ::NormalizeWord(aWord, 0, aWord.Length(), result);
   1.480 +  aWord = result;
   1.481 +}
   1.482 +
   1.483 +void
   1.484 +mozInlineSpellWordUtil::BuildSoftText()
   1.485 +{
   1.486 +  // First we have to work backwards from mSoftStart to find a text node
   1.487 +  // containing a DOM word separator, a non-inline-element
   1.488 +  // boundary, or the hard start node. That's where we'll start building the
   1.489 +  // soft string from.
   1.490 +  nsINode* node = mSoftBegin.mNode;
   1.491 +  int32_t firstOffsetInNode = 0;
   1.492 +  int32_t checkBeforeOffset = mSoftBegin.mOffset;
   1.493 +  while (node) {
   1.494 +    if (ContainsDOMWordSeparator(node, checkBeforeOffset, &firstOffsetInNode)) {
   1.495 +      if (node == mSoftBegin.mNode) {
   1.496 +        // If we find a word separator on the first node, look at the preceding
   1.497 +        // word on the text node as well.
   1.498 +        int32_t newOffset = 0;
   1.499 +        if (firstOffsetInNode > 0) {
   1.500 +          // Try to find the previous word boundary in the current node. If
   1.501 +          // we can't find one, start checking previous sibling nodes (if any
   1.502 +          // adjacent ones exist) to see if we can find any text nodes with
   1.503 +          // DOM word separators. We bail out as soon as we see a node that is
   1.504 +          // not a text node, or we run out of previous sibling nodes. In the
   1.505 +          // event that we simply cannot find any preceding word separator, the
   1.506 +          // offset is set to 0, and the soft text beginning node is set to the
   1.507 +          // "most previous" text node before the original starting node, or
   1.508 +          // kept at the original starting node if no previous text nodes exist.
   1.509 +          if (!ContainsDOMWordSeparator(node, firstOffsetInNode - 1,
   1.510 +                                        &newOffset)) {
   1.511 +            nsINode* prevNode = node->GetPreviousSibling();
   1.512 +            while (prevNode && IsTextNode(prevNode)) {
   1.513 +              mSoftBegin.mNode = prevNode;
   1.514 +              if (TextNodeContainsDOMWordSeparator(prevNode, INT32_MAX,
   1.515 +                                                   &newOffset)) {
   1.516 +                break;
   1.517 +              }
   1.518 +              prevNode = prevNode->GetPreviousSibling();
   1.519 +            }
   1.520 +          }
   1.521 +        }
   1.522 +        firstOffsetInNode = newOffset;
   1.523 +        mSoftBegin.mOffset = newOffset;
   1.524 +      }
   1.525 +      break;
   1.526 +    }
   1.527 +    checkBeforeOffset = INT32_MAX;
   1.528 +    if (IsBreakElement(node)) {
   1.529 +      // Since GetPreviousContent follows tree *preorder*, we're about to traverse
   1.530 +      // up out of 'node'. Since node induces breaks (e.g., it's a block),
   1.531 +      // don't bother trying to look outside it, just stop now.
   1.532 +      break;
   1.533 +    }
   1.534 +    // GetPreviousContent below expects mRootNode to be an ancestor of node.
   1.535 +    if (!nsContentUtils::ContentIsDescendantOf(node, mRootNode)) {
   1.536 +      break;
   1.537 +    }
   1.538 +    node = node->GetPreviousContent(mRootNode);
   1.539 +  }
   1.540 +
   1.541 +  // Now build up the string moving forward through the DOM until we reach
   1.542 +  // the soft end and *then* see a DOM word separator, a non-inline-element
   1.543 +  // boundary, or the hard end node.
   1.544 +  mSoftText.Truncate();
   1.545 +  mSoftTextDOMMapping.Clear();
   1.546 +  bool seenSoftEnd = false;
   1.547 +  // Leave this outside the loop so large heap string allocations can be reused
   1.548 +  // across iterations
   1.549 +  while (node) {
   1.550 +    if (node == mSoftEnd.mNode) {
   1.551 +      seenSoftEnd = true;
   1.552 +    }
   1.553 +
   1.554 +    bool exit = false;
   1.555 +    if (IsTextNode(node)) {
   1.556 +      nsIContent* content = static_cast<nsIContent*>(node);
   1.557 +      NS_ASSERTION(content, "Where is our content?");
   1.558 +      const nsTextFragment* textFragment = content->GetText();
   1.559 +      NS_ASSERTION(textFragment, "Where is our text?");
   1.560 +      int32_t lastOffsetInNode = textFragment->GetLength();
   1.561 +
   1.562 +      if (seenSoftEnd) {
   1.563 +        // check whether we can stop after this
   1.564 +        for (int32_t i = node == mSoftEnd.mNode ? mSoftEnd.mOffset : 0;
   1.565 +             i < int32_t(textFragment->GetLength()); ++i) {
   1.566 +          if (IsDOMWordSeparator(textFragment->CharAt(i))) {
   1.567 +            exit = true;
   1.568 +            // stop at the first separator after the soft end point
   1.569 +            lastOffsetInNode = i;
   1.570 +            break;
   1.571 +          }
   1.572 +        }
   1.573 +      }
   1.574 +
   1.575 +      if (firstOffsetInNode < lastOffsetInNode) {
   1.576 +        int32_t len = lastOffsetInNode - firstOffsetInNode;
   1.577 +        mSoftTextDOMMapping.AppendElement(
   1.578 +          DOMTextMapping(NodeOffset(node, firstOffsetInNode), mSoftText.Length(), len));
   1.579 +
   1.580 +        bool ok = textFragment->AppendTo(mSoftText, firstOffsetInNode, len,
   1.581 +                                         mozilla::fallible_t());
   1.582 +        if (!ok) {
   1.583 +            // probably out of memory, remove from mSoftTextDOMMapping
   1.584 +            mSoftTextDOMMapping.RemoveElementAt(mSoftTextDOMMapping.Length() - 1);
   1.585 +            exit = true;
   1.586 +        }
   1.587 +      }
   1.588 +
   1.589 +      firstOffsetInNode = 0;
   1.590 +    }
   1.591 +
   1.592 +    if (exit)
   1.593 +      break;
   1.594 +
   1.595 +    CheckLeavingBreakElementClosure closure = { false };
   1.596 +    node = FindNextNode(node, mRootNode, CheckLeavingBreakElement, &closure);
   1.597 +    if (closure.mLeftBreakElement || (node && IsBreakElement(node))) {
   1.598 +      // We left, or are entering, a break element (e.g., block). Maybe we can
   1.599 +      // stop now.
   1.600 +      if (seenSoftEnd)
   1.601 +        break;
   1.602 +      // Record the break
   1.603 +      mSoftText.Append(' ');
   1.604 +    }
   1.605 +  }
   1.606 +  
   1.607 +#ifdef DEBUG_SPELLCHECK
   1.608 +  printf("Got DOM string: %s\n", NS_ConvertUTF16toUTF8(mSoftText).get());
   1.609 +#endif
   1.610 +}
   1.611 +
   1.612 +void
   1.613 +mozInlineSpellWordUtil::BuildRealWords()
   1.614 +{
   1.615 +  // This is pretty simple. We just have to walk mSoftText, tokenizing it
   1.616 +  // into "real words".
   1.617 +  // We do an outer traversal of words delimited by IsDOMWordSeparator, calling
   1.618 +  // SplitDOMWord on each of those DOM words
   1.619 +  int32_t wordStart = -1;
   1.620 +  mRealWords.Clear();
   1.621 +  for (int32_t i = 0; i < int32_t(mSoftText.Length()); ++i) {
   1.622 +    if (IsDOMWordSeparator(mSoftText.CharAt(i))) {
   1.623 +      if (wordStart >= 0) {
   1.624 +        SplitDOMWord(wordStart, i);
   1.625 +        wordStart = -1;
   1.626 +      }
   1.627 +    } else {
   1.628 +      if (wordStart < 0) {
   1.629 +        wordStart = i;
   1.630 +      }
   1.631 +    }
   1.632 +  }
   1.633 +  if (wordStart >= 0) {
   1.634 +    SplitDOMWord(wordStart, mSoftText.Length());
   1.635 +  }
   1.636 +}
   1.637 +
   1.638 +/*********** DOM/realwords<->mSoftText mapping functions ************/
   1.639 +
   1.640 +int32_t
   1.641 +mozInlineSpellWordUtil::MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset)
   1.642 +{
   1.643 +  if (!mSoftTextValid) {
   1.644 +    NS_ERROR("Soft text must be valid if we're to map into it");
   1.645 +    return -1;
   1.646 +  }
   1.647 +  
   1.648 +  for (int32_t i = 0; i < int32_t(mSoftTextDOMMapping.Length()); ++i) {
   1.649 +    const DOMTextMapping& map = mSoftTextDOMMapping[i];
   1.650 +    if (map.mNodeOffset.mNode == aNodeOffset.mNode) {
   1.651 +      // Allow offsets at either end of the string, in particular, allow the
   1.652 +      // offset that's at the end of the contributed string
   1.653 +      int32_t offsetInContributedString =
   1.654 +        aNodeOffset.mOffset - map.mNodeOffset.mOffset;
   1.655 +      if (offsetInContributedString >= 0 &&
   1.656 +          offsetInContributedString <= map.mLength)
   1.657 +        return map.mSoftTextOffset + offsetInContributedString;
   1.658 +      return -1;
   1.659 +    }
   1.660 +  }
   1.661 +  return -1;
   1.662 +}
   1.663 +
   1.664 +mozInlineSpellWordUtil::NodeOffset
   1.665 +mozInlineSpellWordUtil::MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
   1.666 +                                                       DOMMapHint aHint)
   1.667 +{
   1.668 +  NS_ASSERTION(mSoftTextValid, "Soft text must be valid if we're to map out of it");
   1.669 +  if (!mSoftTextValid)
   1.670 +    return NodeOffset(nullptr, -1);
   1.671 +  
   1.672 +  // The invariant is that the range start..end includes the last mapping,
   1.673 +  // if any, such that mSoftTextOffset <= aSoftTextOffset
   1.674 +  int32_t start = 0;
   1.675 +  int32_t end = mSoftTextDOMMapping.Length();
   1.676 +  while (end - start >= 2) {
   1.677 +    int32_t mid = (start + end)/2;
   1.678 +    const DOMTextMapping& map = mSoftTextDOMMapping[mid];
   1.679 +    if (map.mSoftTextOffset > aSoftTextOffset) {
   1.680 +      end = mid;
   1.681 +    } else {
   1.682 +      start = mid;
   1.683 +    }
   1.684 +  }
   1.685 +  
   1.686 +  if (start >= end)
   1.687 +    return NodeOffset(nullptr, -1);
   1.688 +
   1.689 +  // 'start' is now the last mapping, if any, such that
   1.690 +  // mSoftTextOffset <= aSoftTextOffset.
   1.691 +  // If we're doing HINT_END, then we may want to return the end of the
   1.692 +  // the previous mapping instead of the start of this mapping
   1.693 +  if (aHint == HINT_END && start > 0) {
   1.694 +    const DOMTextMapping& map = mSoftTextDOMMapping[start - 1];
   1.695 +    if (map.mSoftTextOffset + map.mLength == aSoftTextOffset)
   1.696 +      return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + map.mLength);
   1.697 +  }
   1.698 +  
   1.699 +  // We allow ourselves to return the end of this mapping even if we're
   1.700 +  // doing HINT_START. This will only happen if there is no mapping which this
   1.701 +  // point is the start of. I'm not 100% sure this is OK...
   1.702 +  const DOMTextMapping& map = mSoftTextDOMMapping[start];
   1.703 +  int32_t offset = aSoftTextOffset - map.mSoftTextOffset;
   1.704 +  if (offset >= 0 && offset <= map.mLength)
   1.705 +    return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + offset);
   1.706 +    
   1.707 +  return NodeOffset(nullptr, -1);
   1.708 +}
   1.709 +
   1.710 +int32_t
   1.711 +mozInlineSpellWordUtil::FindRealWordContaining(int32_t aSoftTextOffset,
   1.712 +    DOMMapHint aHint, bool aSearchForward)
   1.713 +{
   1.714 +  NS_ASSERTION(mSoftTextValid, "Soft text must be valid if we're to map out of it");
   1.715 +  if (!mSoftTextValid)
   1.716 +    return -1;
   1.717 +
   1.718 +  // The invariant is that the range start..end includes the last word,
   1.719 +  // if any, such that mSoftTextOffset <= aSoftTextOffset
   1.720 +  int32_t start = 0;
   1.721 +  int32_t end = mRealWords.Length();
   1.722 +  while (end - start >= 2) {
   1.723 +    int32_t mid = (start + end)/2;
   1.724 +    const RealWord& word = mRealWords[mid];
   1.725 +    if (word.mSoftTextOffset > aSoftTextOffset) {
   1.726 +      end = mid;
   1.727 +    } else {
   1.728 +      start = mid;
   1.729 +    }
   1.730 +  }
   1.731 +  
   1.732 +  if (start >= end)
   1.733 +    return -1;
   1.734 +
   1.735 +  // 'start' is now the last word, if any, such that
   1.736 +  // mSoftTextOffset <= aSoftTextOffset.
   1.737 +  // If we're doing HINT_END, then we may want to return the end of the
   1.738 +  // the previous word instead of the start of this word
   1.739 +  if (aHint == HINT_END && start > 0) {
   1.740 +    const RealWord& word = mRealWords[start - 1];
   1.741 +    if (word.mSoftTextOffset + word.mLength == aSoftTextOffset)
   1.742 +      return start - 1;
   1.743 +  }
   1.744 +  
   1.745 +  // We allow ourselves to return the end of this word even if we're
   1.746 +  // doing HINT_START. This will only happen if there is no word which this
   1.747 +  // point is the start of. I'm not 100% sure this is OK...
   1.748 +  const RealWord& word = mRealWords[start];
   1.749 +  int32_t offset = aSoftTextOffset - word.mSoftTextOffset;
   1.750 +  if (offset >= 0 && offset <= word.mLength)
   1.751 +    return start;
   1.752 +
   1.753 +  if (aSearchForward) {
   1.754 +    if (mRealWords[0].mSoftTextOffset > aSoftTextOffset) {
   1.755 +      // All words have mSoftTextOffset > aSoftTextOffset
   1.756 +      return 0;
   1.757 +    }
   1.758 +    // 'start' is the last word such that mSoftTextOffset <= aSoftTextOffset.
   1.759 +    // Word start+1, if it exists, will be the first with
   1.760 +    // mSoftTextOffset > aSoftTextOffset.
   1.761 +    if (start + 1 < int32_t(mRealWords.Length()))
   1.762 +      return start + 1;
   1.763 +  }
   1.764 +
   1.765 +  return -1;
   1.766 +}
   1.767 +
   1.768 +/*********** Word Splitting ************/
   1.769 +
   1.770 +// classifies a given character in the DOM word
   1.771 +enum CharClass {
   1.772 +  CHAR_CLASS_WORD,
   1.773 +  CHAR_CLASS_SEPARATOR,
   1.774 +  CHAR_CLASS_END_OF_INPUT };
   1.775 +
   1.776 +// Encapsulates DOM-word to real-word splitting
   1.777 +struct MOZ_STACK_CLASS WordSplitState
   1.778 +{
   1.779 +  mozInlineSpellWordUtil*    mWordUtil;
   1.780 +  const nsDependentSubstring mDOMWordText;
   1.781 +  int32_t                    mDOMWordOffset;
   1.782 +  CharClass                  mCurCharClass;
   1.783 +
   1.784 +  WordSplitState(mozInlineSpellWordUtil* aWordUtil,
   1.785 +                 const nsString& aString, int32_t aStart, int32_t aLen)
   1.786 +    : mWordUtil(aWordUtil), mDOMWordText(aString, aStart, aLen),
   1.787 +      mDOMWordOffset(0), mCurCharClass(CHAR_CLASS_END_OF_INPUT) {}
   1.788 +
   1.789 +  CharClass ClassifyCharacter(int32_t aIndex, bool aRecurse) const;
   1.790 +  void Advance();
   1.791 +  void AdvanceThroughSeparators();
   1.792 +  void AdvanceThroughWord();
   1.793 +
   1.794 +  // Finds special words like email addresses and URLs that may start at the
   1.795 +  // current position, and returns their length, or 0 if not found. This allows
   1.796 +  // arbitrary word breaking rules to be used for these special entities, as
   1.797 +  // long as they can not contain whitespace.
   1.798 +  bool IsSpecialWord();
   1.799 +
   1.800 +  // Similar to IsSpecialWord except that this takes a split word as
   1.801 +  // input. This checks for things that do not require special word-breaking
   1.802 +  // rules.
   1.803 +  bool ShouldSkipWord(int32_t aStart, int32_t aLength);
   1.804 +};
   1.805 +
   1.806 +// WordSplitState::ClassifyCharacter
   1.807 +
   1.808 +CharClass
   1.809 +WordSplitState::ClassifyCharacter(int32_t aIndex, bool aRecurse) const
   1.810 +{
   1.811 +  NS_ASSERTION(aIndex >= 0 && aIndex <= int32_t(mDOMWordText.Length()),
   1.812 +               "Index out of range");
   1.813 +  if (aIndex == int32_t(mDOMWordText.Length()))
   1.814 +    return CHAR_CLASS_SEPARATOR;
   1.815 +
   1.816 +  // this will classify the character, we want to treat "ignorable" characters
   1.817 +  // such as soft hyphens, and also ZWJ and ZWNJ as word characters.
   1.818 +  nsIUGenCategory::nsUGenCategory
   1.819 +    charCategory = mozilla::unicode::GetGenCategory(mDOMWordText[aIndex]);
   1.820 +  if (charCategory == nsIUGenCategory::kLetter ||
   1.821 +      IsIgnorableCharacter(mDOMWordText[aIndex]) ||
   1.822 +      mDOMWordText[aIndex] == 0x200C /* ZWNJ */ ||
   1.823 +      mDOMWordText[aIndex] == 0x200D /* ZWJ */)
   1.824 +    return CHAR_CLASS_WORD;
   1.825 +
   1.826 +  // If conditional punctuation is surrounded immediately on both sides by word
   1.827 +  // characters it also counts as a word character.
   1.828 +  if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
   1.829 +    if (!aRecurse) {
   1.830 +      // not allowed to look around, this punctuation counts like a separator
   1.831 +      return CHAR_CLASS_SEPARATOR;
   1.832 +    }
   1.833 +
   1.834 +    // check the left-hand character
   1.835 +    if (aIndex == 0)
   1.836 +      return CHAR_CLASS_SEPARATOR;
   1.837 +    if (ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD)
   1.838 +      return CHAR_CLASS_SEPARATOR;
   1.839 +    // If the previous charatcer is a word-char, make sure that it's not a
   1.840 +    // special dot character.
   1.841 +    if (mDOMWordText[aIndex - 1] == '.')
   1.842 +      return CHAR_CLASS_SEPARATOR;
   1.843 +
   1.844 +    // now we know left char is a word-char, check the right-hand character
   1.845 +    if (aIndex == int32_t(mDOMWordText.Length()) - 1)
   1.846 +      return CHAR_CLASS_SEPARATOR;
   1.847 +    if (ClassifyCharacter(aIndex + 1, false) != CHAR_CLASS_WORD)
   1.848 +      return CHAR_CLASS_SEPARATOR;
   1.849 +    // If the next charatcer is a word-char, make sure that it's not a
   1.850 +    // special dot character.
   1.851 +    if (mDOMWordText[aIndex + 1] == '.')
   1.852 +      return CHAR_CLASS_SEPARATOR;
   1.853 +
   1.854 +    // char on either side is a word, this counts as a word
   1.855 +    return CHAR_CLASS_WORD;
   1.856 +  }
   1.857 +
   1.858 +  // The dot character, if appearing at the end of a word, should
   1.859 +  // be considered part of that word.  Example: "etc.", or
   1.860 +  // abbreviations
   1.861 +  if (aIndex > 0 &&
   1.862 +      mDOMWordText[aIndex] == '.' &&
   1.863 +      mDOMWordText[aIndex - 1] != '.' &&
   1.864 +      ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD) {
   1.865 +    return CHAR_CLASS_WORD;
   1.866 +  }
   1.867 +
   1.868 +  // all other punctuation
   1.869 +  if (charCategory == nsIUGenCategory::kSeparator ||
   1.870 +      charCategory == nsIUGenCategory::kOther ||
   1.871 +      charCategory == nsIUGenCategory::kPunctuation ||
   1.872 +      charCategory == nsIUGenCategory::kSymbol) {
   1.873 +    // Don't break on hyphens, as hunspell handles them on its own.
   1.874 +    if (aIndex > 0 &&
   1.875 +        mDOMWordText[aIndex] == '-' &&
   1.876 +        mDOMWordText[aIndex - 1] != '-' &&
   1.877 +        ClassifyCharacter(aIndex - 1, false) == CHAR_CLASS_WORD) {
   1.878 +      // A hyphen is only meaningful as a separator inside a word
   1.879 +      // if the previous and next characters are a word character.
   1.880 +      if (aIndex == int32_t(mDOMWordText.Length()) - 1)
   1.881 +        return CHAR_CLASS_SEPARATOR;
   1.882 +      if (mDOMWordText[aIndex + 1] != '.' &&
   1.883 +          ClassifyCharacter(aIndex + 1, false) == CHAR_CLASS_WORD)
   1.884 +        return CHAR_CLASS_WORD;
   1.885 +    }
   1.886 +    return CHAR_CLASS_SEPARATOR;
   1.887 +  }
   1.888 +
   1.889 +  // any other character counts as a word
   1.890 +  return CHAR_CLASS_WORD;
   1.891 +}
   1.892 +
   1.893 +
   1.894 +// WordSplitState::Advance
   1.895 +
   1.896 +void
   1.897 +WordSplitState::Advance()
   1.898 +{
   1.899 +  NS_ASSERTION(mDOMWordOffset >= 0, "Negative word index");
   1.900 +  NS_ASSERTION(mDOMWordOffset < (int32_t)mDOMWordText.Length(),
   1.901 +               "Length beyond end");
   1.902 +
   1.903 +  mDOMWordOffset ++;
   1.904 +  if (mDOMWordOffset >= (int32_t)mDOMWordText.Length())
   1.905 +    mCurCharClass = CHAR_CLASS_END_OF_INPUT;
   1.906 +  else
   1.907 +    mCurCharClass = ClassifyCharacter(mDOMWordOffset, true);
   1.908 +}
   1.909 +
   1.910 +
   1.911 +// WordSplitState::AdvanceThroughSeparators
   1.912 +
   1.913 +void
   1.914 +WordSplitState::AdvanceThroughSeparators()
   1.915 +{
   1.916 +  while (mCurCharClass == CHAR_CLASS_SEPARATOR)
   1.917 +    Advance();
   1.918 +}
   1.919 +
   1.920 +// WordSplitState::AdvanceThroughWord
   1.921 +
   1.922 +void
   1.923 +WordSplitState::AdvanceThroughWord()
   1.924 +{
   1.925 +  while (mCurCharClass == CHAR_CLASS_WORD)
   1.926 +    Advance();
   1.927 +}
   1.928 +
   1.929 +
   1.930 +// WordSplitState::IsSpecialWord
   1.931 +
   1.932 +bool
   1.933 +WordSplitState::IsSpecialWord()
   1.934 +{
   1.935 +  // Search for email addresses. We simply define these as any sequence of
   1.936 +  // characters with an '@' character in the middle. The DOM word is already
   1.937 +  // split on whitepace, so we know that everything to the end is the address
   1.938 +  int32_t firstColon = -1;
   1.939 +  for (int32_t i = mDOMWordOffset;
   1.940 +       i < int32_t(mDOMWordText.Length()); i ++) {
   1.941 +    if (mDOMWordText[i] == '@') {
   1.942 +      // only accept this if there are unambiguous word characters (don't bother
   1.943 +      // recursing to disambiguate apostrophes) on each side. This prevents
   1.944 +      // classifying, e.g. "@home" as an email address
   1.945 +
   1.946 +      // Use this condition to only accept words with '@' in the middle of
   1.947 +      // them. It works, but the inlinespellcker doesn't like this. The problem
   1.948 +      // is that you type "fhsgfh@" that's a misspelled word followed by a
   1.949 +      // symbol, but when you type another letter "fhsgfh@g" that first word
   1.950 +      // need to be unmarked misspelled. It doesn't do this. it only checks the
   1.951 +      // current position for potentially removing a spelling range.
   1.952 +      if (i > 0 && ClassifyCharacter(i - 1, false) == CHAR_CLASS_WORD &&
   1.953 +          i < (int32_t)mDOMWordText.Length() - 1 &&
   1.954 +          ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD) {
   1.955 +        return true;
   1.956 +      }
   1.957 +    } else if (mDOMWordText[i] == ':' && firstColon < 0) {
   1.958 +      firstColon = i;
   1.959 +
   1.960 +      // If the first colon is followed by a slash, consider it a URL
   1.961 +      // This will catch things like asdf://foo.com
   1.962 +      if (firstColon < (int32_t)mDOMWordText.Length() - 1 &&
   1.963 +          mDOMWordText[firstColon + 1] == '/') {
   1.964 +        return true;
   1.965 +      }
   1.966 +    }
   1.967 +  }
   1.968 +
   1.969 +  // Check the text before the first colon against some known protocols. It
   1.970 +  // is impossible to check against all protocols, especially since you can
   1.971 +  // plug in new protocols. We also don't want to waste time here checking
   1.972 +  // against a lot of obscure protocols.
   1.973 +  if (firstColon > mDOMWordOffset) {
   1.974 +    nsString protocol(Substring(mDOMWordText, mDOMWordOffset,
   1.975 +                      firstColon - mDOMWordOffset));
   1.976 +    if (protocol.EqualsIgnoreCase("http") ||
   1.977 +        protocol.EqualsIgnoreCase("https") ||
   1.978 +        protocol.EqualsIgnoreCase("news") ||
   1.979 +        protocol.EqualsIgnoreCase("file") ||
   1.980 +        protocol.EqualsIgnoreCase("javascript") ||
   1.981 +        protocol.EqualsIgnoreCase("data") ||
   1.982 +        protocol.EqualsIgnoreCase("ftp")) {
   1.983 +      return true;
   1.984 +    }
   1.985 +  }
   1.986 +
   1.987 +  // not anything special
   1.988 +  return false;
   1.989 +}
   1.990 +
   1.991 +// WordSplitState::ShouldSkipWord
   1.992 +
   1.993 +bool
   1.994 +WordSplitState::ShouldSkipWord(int32_t aStart, int32_t aLength)
   1.995 +{
   1.996 +  int32_t last = aStart + aLength;
   1.997 +
   1.998 +  // check to see if the word contains a digit
   1.999 +  for (int32_t i = aStart; i < last; i ++) {
  1.1000 +    if (unicode::GetGenCategory(mDOMWordText[i]) == nsIUGenCategory::kNumber) {
  1.1001 +      return true;
  1.1002 +    }
  1.1003 +  }
  1.1004 +
  1.1005 +  // not special
  1.1006 +  return false;
  1.1007 +}
  1.1008 +
  1.1009 +// mozInlineSpellWordUtil::SplitDOMWord
  1.1010 +
  1.1011 +void
  1.1012 +mozInlineSpellWordUtil::SplitDOMWord(int32_t aStart, int32_t aEnd)
  1.1013 +{
  1.1014 +  WordSplitState state(this, mSoftText, aStart, aEnd - aStart);
  1.1015 +  state.mCurCharClass = state.ClassifyCharacter(0, true);
  1.1016 +
  1.1017 +  state.AdvanceThroughSeparators();
  1.1018 +  if (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT &&
  1.1019 +      state.IsSpecialWord()) {
  1.1020 +    int32_t specialWordLength = state.mDOMWordText.Length() - state.mDOMWordOffset;
  1.1021 +    mRealWords.AppendElement(
  1.1022 +        RealWord(aStart + state.mDOMWordOffset, specialWordLength, false));
  1.1023 +
  1.1024 +    return;
  1.1025 +  }
  1.1026 +
  1.1027 +  while (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT) {
  1.1028 +    state.AdvanceThroughSeparators();
  1.1029 +    if (state.mCurCharClass == CHAR_CLASS_END_OF_INPUT)
  1.1030 +      break;
  1.1031 +
  1.1032 +    // save the beginning of the word
  1.1033 +    int32_t wordOffset = state.mDOMWordOffset;
  1.1034 +
  1.1035 +    // find the end of the word
  1.1036 +    state.AdvanceThroughWord();
  1.1037 +    int32_t wordLen = state.mDOMWordOffset - wordOffset;
  1.1038 +    mRealWords.AppendElement(
  1.1039 +      RealWord(aStart + wordOffset, wordLen,
  1.1040 +               !state.ShouldSkipWord(wordOffset, wordLen)));
  1.1041 +  }
  1.1042 +}

mercurial