Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 //#define DEBUG_FIND 1
8 #include "nsFind.h"
9 #include "nsContentCID.h"
10 #include "nsIContent.h"
11 #include "nsIDOMNode.h"
12 #include "nsIDOMNodeList.h"
13 #include "nsISelection.h"
14 #include "nsISelectionController.h"
15 #include "nsIFrame.h"
16 #include "nsITextControlFrame.h"
17 #include "nsIFormControl.h"
18 #include "nsIEditor.h"
19 #include "nsIPlaintextEditor.h"
20 #include "nsTextFragment.h"
21 #include "nsString.h"
22 #include "nsIAtom.h"
23 #include "nsServiceManagerUtils.h"
24 #include "nsUnicharUtils.h"
25 #include "nsIDOMElement.h"
26 #include "nsIWordBreaker.h"
27 #include "nsCRT.h"
28 #include "nsRange.h"
29 #include "nsContentUtils.h"
30 #include "mozilla/DebugOnly.h"
32 using namespace mozilla;
34 // Yikes! Casting a char to unichar can fill with ones!
35 #define CHAR_TO_UNICHAR(c) ((char16_t)(const unsigned char)c)
37 static NS_DEFINE_CID(kCContentIteratorCID, NS_CONTENTITERATOR_CID);
38 static NS_DEFINE_CID(kCPreContentIteratorCID, NS_PRECONTENTITERATOR_CID);
40 #define CH_QUOTE ((char16_t) 0x22)
41 #define CH_APOSTROPHE ((char16_t) 0x27)
42 #define CH_LEFT_SINGLE_QUOTE ((char16_t) 0x2018)
43 #define CH_RIGHT_SINGLE_QUOTE ((char16_t) 0x2019)
44 #define CH_LEFT_DOUBLE_QUOTE ((char16_t) 0x201C)
45 #define CH_RIGHT_DOUBLE_QUOTE ((char16_t) 0x201D)
47 #define CH_SHY ((char16_t) 0xAD)
49 // nsFind::Find casts CH_SHY to char before calling StripChars
50 // This works correctly if and only if CH_SHY <= 255
51 PR_STATIC_ASSERT(CH_SHY <= 255);
53 // -----------------------------------------------------------------------
54 // nsFindContentIterator is a special iterator that also goes through
55 // any existing <textarea>'s or text <input>'s editor to lookup the
56 // anonymous DOM content there.
57 //
58 // Details:
59 // 1) We use two iterators: The "outer-iterator" goes through the
60 // normal DOM. The "inner-iterator" goes through the anonymous DOM
61 // inside the editor.
62 //
63 // 2) [MaybeSetupInnerIterator] As soon as the outer-iterator's current
64 // node is changed, a check is made to see if the node is a <textarea> or
65 // a text <input> node. If so, an inner-iterator is created to lookup the
66 // anynomous contents of the editor underneath the text control.
67 //
68 // 3) When the inner-iterator is created, we position the outer-iterator
69 // 'after' (or 'before' in backward search) the text control to avoid
70 // revisiting that control.
71 //
72 // 4) As a consequence of searching through text controls, we can be
73 // called via FindNext with the current selection inside a <textarea>
74 // or a text <input>. This means that we can be given an initial search
75 // range that stretches across the anonymous DOM and the normal DOM. To
76 // cater for this situation, we split the anonymous part into the
77 // inner-iterator and then reposition the outer-iterator outside.
78 //
79 // 5) The implementation assumes that First() and Next() are only called
80 // in find-forward mode, while Last() and Prev() are used in find-backward.
82 class nsFindContentIterator : public nsIContentIterator
83 {
84 public:
85 nsFindContentIterator(bool aFindBackward)
86 : mStartOffset(0),
87 mEndOffset(0),
88 mFindBackward(aFindBackward)
89 {
90 }
92 virtual ~nsFindContentIterator()
93 {
94 }
96 // nsISupports
97 NS_DECL_CYCLE_COLLECTING_ISUPPORTS
98 NS_DECL_CYCLE_COLLECTION_CLASS(nsFindContentIterator)
100 // nsIContentIterator
101 virtual nsresult Init(nsINode* aRoot)
102 {
103 NS_NOTREACHED("internal error");
104 return NS_ERROR_NOT_IMPLEMENTED;
105 }
106 virtual nsresult Init(nsIDOMRange* aRange)
107 {
108 NS_NOTREACHED("internal error");
109 return NS_ERROR_NOT_IMPLEMENTED;
110 }
111 // Not a range because one of the endpoints may be anonymous.
112 nsresult Init(nsIDOMNode* aStartNode, int32_t aStartOffset,
113 nsIDOMNode* aEndNode, int32_t aEndOffset);
114 virtual void First();
115 virtual void Last();
116 virtual void Next();
117 virtual void Prev();
118 virtual nsINode* GetCurrentNode();
119 virtual bool IsDone();
120 virtual nsresult PositionAt(nsINode* aCurNode);
122 private:
123 nsCOMPtr<nsIContentIterator> mOuterIterator;
124 nsCOMPtr<nsIContentIterator> mInnerIterator;
125 // Can't use a range here, since we want to represent part of the
126 // flattened tree, including native anonymous content.
127 nsCOMPtr<nsIDOMNode> mStartNode;
128 int32_t mStartOffset;
129 nsCOMPtr<nsIDOMNode> mEndNode;
130 int32_t mEndOffset;
132 nsCOMPtr<nsIContent> mStartOuterContent;
133 nsCOMPtr<nsIContent> mEndOuterContent;
134 bool mFindBackward;
136 void Reset();
137 void MaybeSetupInnerIterator();
138 void SetupInnerIterator(nsIContent* aContent);
139 };
141 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFindContentIterator)
142 NS_INTERFACE_MAP_ENTRY(nsIContentIterator)
143 NS_INTERFACE_MAP_ENTRY(nsISupports)
144 NS_INTERFACE_MAP_END
146 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFindContentIterator)
147 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFindContentIterator)
149 NS_IMPL_CYCLE_COLLECTION(nsFindContentIterator, mOuterIterator, mInnerIterator,
150 mStartOuterContent, mEndOuterContent, mEndNode, mStartNode)
153 nsresult
154 nsFindContentIterator::Init(nsIDOMNode* aStartNode, int32_t aStartOffset,
155 nsIDOMNode* aEndNode, int32_t aEndOffset)
156 {
157 NS_ENSURE_ARG_POINTER(aStartNode);
158 NS_ENSURE_ARG_POINTER(aEndNode);
159 if (!mOuterIterator) {
160 if (mFindBackward) {
161 // Use post-order in the reverse case, so we get parents
162 // before children in case we want to prevent descending
163 // into a node.
164 mOuterIterator = do_CreateInstance(kCContentIteratorCID);
165 }
166 else {
167 // Use pre-order in the forward case, so we get parents
168 // before children in case we want to prevent descending
169 // into a node.
170 mOuterIterator = do_CreateInstance(kCPreContentIteratorCID);
171 }
172 NS_ENSURE_ARG_POINTER(mOuterIterator);
173 }
175 // Set up the search "range" that we will examine
176 mStartNode = aStartNode;
177 mStartOffset = aStartOffset;
178 mEndNode = aEndNode;
179 mEndOffset = aEndOffset;
181 return NS_OK;
182 }
184 void
185 nsFindContentIterator::First()
186 {
187 Reset();
188 }
190 void
191 nsFindContentIterator::Last()
192 {
193 Reset();
194 }
196 void
197 nsFindContentIterator::Next()
198 {
199 if (mInnerIterator) {
200 mInnerIterator->Next();
201 if (!mInnerIterator->IsDone())
202 return;
204 // by construction, mOuterIterator is already on the next node
205 }
206 else {
207 mOuterIterator->Next();
208 }
209 MaybeSetupInnerIterator();
210 }
212 void
213 nsFindContentIterator::Prev()
214 {
215 if (mInnerIterator) {
216 mInnerIterator->Prev();
217 if (!mInnerIterator->IsDone())
218 return;
220 // by construction, mOuterIterator is already on the previous node
221 }
222 else {
223 mOuterIterator->Prev();
224 }
225 MaybeSetupInnerIterator();
226 }
228 nsINode*
229 nsFindContentIterator::GetCurrentNode()
230 {
231 if (mInnerIterator && !mInnerIterator->IsDone()) {
232 return mInnerIterator->GetCurrentNode();
233 }
234 return mOuterIterator->GetCurrentNode();
235 }
237 bool
238 nsFindContentIterator::IsDone() {
239 if (mInnerIterator && !mInnerIterator->IsDone()) {
240 return false;
241 }
242 return mOuterIterator->IsDone();
243 }
245 nsresult
246 nsFindContentIterator::PositionAt(nsINode* aCurNode)
247 {
248 nsINode* oldNode = mOuterIterator->GetCurrentNode();
249 nsresult rv = mOuterIterator->PositionAt(aCurNode);
250 if (NS_SUCCEEDED(rv)) {
251 MaybeSetupInnerIterator();
252 }
253 else {
254 mOuterIterator->PositionAt(oldNode);
255 if (mInnerIterator)
256 rv = mInnerIterator->PositionAt(aCurNode);
257 }
258 return rv;
259 }
261 void
262 nsFindContentIterator::Reset()
263 {
264 mInnerIterator = nullptr;
265 mStartOuterContent = nullptr;
266 mEndOuterContent = nullptr;
268 // As a consequence of searching through text controls, we may have been
269 // initialized with a selection inside a <textarea> or a text <input>.
271 // see if the start node is an anonymous text node inside a text control
272 nsCOMPtr<nsIContent> startContent(do_QueryInterface(mStartNode));
273 if (startContent) {
274 mStartOuterContent = startContent->FindFirstNonChromeOnlyAccessContent();
275 }
277 // see if the end node is an anonymous text node inside a text control
278 nsCOMPtr<nsIContent> endContent(do_QueryInterface(mEndNode));
279 if (endContent) {
280 mEndOuterContent = endContent->FindFirstNonChromeOnlyAccessContent();
281 }
283 // Note: OK to just set up the outer iterator here; if our range has a native
284 // anonymous endpoint we'll end up setting up an inner iterator, and
285 // reset the outer one in the process.
286 nsCOMPtr<nsINode> node = do_QueryInterface(mStartNode);
287 NS_ENSURE_TRUE_VOID(node);
289 nsCOMPtr<nsIDOMRange> range = nsFind::CreateRange(node);
290 range->SetStart(mStartNode, mStartOffset);
291 range->SetEnd(mEndNode, mEndOffset);
292 mOuterIterator->Init(range);
294 if (!mFindBackward) {
295 if (mStartOuterContent != startContent) {
296 // the start node was an anonymous text node
297 SetupInnerIterator(mStartOuterContent);
298 if (mInnerIterator)
299 mInnerIterator->First();
300 }
301 if (!mOuterIterator->IsDone())
302 mOuterIterator->First();
303 }
304 else {
305 if (mEndOuterContent != endContent) {
306 // the end node was an anonymous text node
307 SetupInnerIterator(mEndOuterContent);
308 if (mInnerIterator)
309 mInnerIterator->Last();
310 }
311 if (!mOuterIterator->IsDone())
312 mOuterIterator->Last();
313 }
315 // if we didn't create an inner-iterator, the boundary node could still be
316 // a text control, in which case we also need an inner-iterator straightaway
317 if (!mInnerIterator) {
318 MaybeSetupInnerIterator();
319 }
320 }
322 void
323 nsFindContentIterator::MaybeSetupInnerIterator()
324 {
325 mInnerIterator = nullptr;
327 nsCOMPtr<nsIContent> content =
328 do_QueryInterface(mOuterIterator->GetCurrentNode());
329 if (!content || !content->IsNodeOfType(nsINode::eHTML_FORM_CONTROL))
330 return;
332 nsCOMPtr<nsIFormControl> formControl(do_QueryInterface(content));
333 if (!formControl->IsTextControl(true)) {
334 return;
335 }
337 SetupInnerIterator(content);
338 if (mInnerIterator) {
339 if (!mFindBackward) {
340 mInnerIterator->First();
341 // finish setup: position mOuterIterator on the actual "next"
342 // node (this completes its re-init, @see SetupInnerIterator)
343 if (!mOuterIterator->IsDone())
344 mOuterIterator->First();
345 }
346 else {
347 mInnerIterator->Last();
348 // finish setup: position mOuterIterator on the actual "previous"
349 // node (this completes its re-init, @see SetupInnerIterator)
350 if (!mOuterIterator->IsDone())
351 mOuterIterator->Last();
352 }
353 }
354 }
356 void
357 nsFindContentIterator::SetupInnerIterator(nsIContent* aContent)
358 {
359 if (!aContent) {
360 return;
361 }
362 NS_ASSERTION(!aContent->IsRootOfNativeAnonymousSubtree(), "invalid call");
364 nsITextControlFrame* tcFrame = do_QueryFrame(aContent->GetPrimaryFrame());
365 if (!tcFrame)
366 return;
368 nsCOMPtr<nsIEditor> editor;
369 tcFrame->GetEditor(getter_AddRefs(editor));
370 if (!editor)
371 return;
373 // don't mess with disabled input fields
374 uint32_t editorFlags = 0;
375 editor->GetFlags(&editorFlags);
376 if (editorFlags & nsIPlaintextEditor::eEditorDisabledMask)
377 return;
379 nsCOMPtr<nsIDOMElement> rootElement;
380 editor->GetRootElement(getter_AddRefs(rootElement));
382 nsCOMPtr<nsIDOMRange> innerRange = nsFind::CreateRange(aContent);
383 nsCOMPtr<nsIDOMRange> outerRange = nsFind::CreateRange(aContent);
384 if (!innerRange || !outerRange) {
385 return;
386 }
388 // now create the inner-iterator
389 mInnerIterator = do_CreateInstance(kCPreContentIteratorCID);
391 if (mInnerIterator) {
392 innerRange->SelectNodeContents(rootElement);
394 // fix up the inner bounds, we may have to only lookup a portion
395 // of the text control if the current node is a boundary point
396 if (aContent == mStartOuterContent) {
397 innerRange->SetStart(mStartNode, mStartOffset);
398 }
399 if (aContent == mEndOuterContent) {
400 innerRange->SetEnd(mEndNode, mEndOffset);
401 }
402 // Note: we just init here. We do First() or Last() later.
403 mInnerIterator->Init(innerRange);
405 // make sure to place the outer-iterator outside
406 // the text control so that we don't go there again.
407 nsresult res1, res2;
408 nsCOMPtr<nsIDOMNode> outerNode(do_QueryInterface(aContent));
409 if (!mFindBackward) { // find forward
410 // cut the outer-iterator after the current node
411 res1 = outerRange->SetEnd(mEndNode, mEndOffset);
412 res2 = outerRange->SetStartAfter(outerNode);
413 }
414 else { // find backward
415 // cut the outer-iterator before the current node
416 res1 = outerRange->SetStart(mStartNode, mStartOffset);
417 res2 = outerRange->SetEndBefore(outerNode);
418 }
419 if (NS_FAILED(res1) || NS_FAILED(res2)) {
420 // we are done with the outer-iterator, the
421 // inner-iterator will traverse what we want
422 outerRange->Collapse(true);
423 }
425 // Note: we just re-init here, using the segment of our search range that
426 // is yet to be visited. Thus when we later do mOuterIterator->First() [or
427 // mOuterIterator->Last()], we will effectively be on the next node [or
428 // the previous node] _with respect to_ the search range.
429 mOuterIterator->Init(outerRange);
430 }
431 }
433 nsresult
434 NS_NewFindContentIterator(bool aFindBackward,
435 nsIContentIterator** aResult)
436 {
437 NS_ENSURE_ARG_POINTER(aResult);
438 if (!aResult) {
439 return NS_ERROR_NULL_POINTER;
440 }
442 nsFindContentIterator* it = new nsFindContentIterator(aFindBackward);
443 if (!it) {
444 return NS_ERROR_OUT_OF_MEMORY;
445 }
446 return it->QueryInterface(NS_GET_IID(nsIContentIterator), (void **)aResult);
447 }
448 // --------------------------------------------------------------------
450 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsFind)
451 NS_INTERFACE_MAP_ENTRY(nsIFind)
452 NS_INTERFACE_MAP_ENTRY(nsISupports)
453 NS_INTERFACE_MAP_END
455 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsFind)
456 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsFind)
458 NS_IMPL_CYCLE_COLLECTION(nsFind, mLastBlockParent, mIterNode, mIterator)
460 nsFind::nsFind()
461 : mFindBackward(false)
462 , mCaseSensitive(false)
463 , mIterOffset(0)
464 {
465 }
467 nsFind::~nsFind()
468 {
469 }
471 #ifdef DEBUG_FIND
472 static void DumpNode(nsIDOMNode* aNode)
473 {
474 if (!aNode)
475 {
476 printf(">>>> Node: NULL\n");
477 return;
478 }
479 nsAutoString nodeName;
480 aNode->GetNodeName(nodeName);
481 nsCOMPtr<nsIContent> textContent (do_QueryInterface(aNode));
482 if (textContent && textContent->IsNodeOfType(nsINode::eTEXT))
483 {
484 nsAutoString newText;
485 textContent->AppendTextTo(newText);
486 printf(">>>> Text node (node name %s): '%s'\n",
487 NS_LossyConvertUTF16toASCII(nodeName).get(),
488 NS_LossyConvertUTF16toASCII(newText).get());
489 }
490 else
491 printf(">>>> Node: %s\n", NS_LossyConvertUTF16toASCII(nodeName).get());
492 }
493 #endif
495 nsresult
496 nsFind::InitIterator(nsIDOMNode* aStartNode, int32_t aStartOffset,
497 nsIDOMNode* aEndNode, int32_t aEndOffset)
498 {
499 if (!mIterator)
500 {
501 mIterator = new nsFindContentIterator(mFindBackward);
502 NS_ENSURE_TRUE(mIterator, NS_ERROR_OUT_OF_MEMORY);
503 }
505 NS_ENSURE_ARG_POINTER(aStartNode);
506 NS_ENSURE_ARG_POINTER(aEndNode);
508 #ifdef DEBUG_FIND
509 printf("InitIterator search range:\n");
510 printf(" -- start %d, ", aStartOffset); DumpNode(aStartNode);
511 printf(" -- end %d, ", aEndOffset); DumpNode(aEndNode);
512 #endif
514 nsresult rv =
515 mIterator->Init(aStartNode, aStartOffset, aEndNode, aEndOffset);
516 NS_ENSURE_SUCCESS(rv, rv);
517 if (mFindBackward) {
518 mIterator->Last();
519 }
520 else {
521 mIterator->First();
522 }
523 return NS_OK;
524 }
526 /* attribute boolean findBackward; */
527 NS_IMETHODIMP
528 nsFind::GetFindBackwards(bool *aFindBackward)
529 {
530 if (!aFindBackward)
531 return NS_ERROR_NULL_POINTER;
533 *aFindBackward = mFindBackward;
534 return NS_OK;
535 }
536 NS_IMETHODIMP
537 nsFind::SetFindBackwards(bool aFindBackward)
538 {
539 mFindBackward = aFindBackward;
540 return NS_OK;
541 }
543 /* attribute boolean caseSensitive; */
544 NS_IMETHODIMP
545 nsFind::GetCaseSensitive(bool *aCaseSensitive)
546 {
547 if (!aCaseSensitive)
548 return NS_ERROR_NULL_POINTER;
550 *aCaseSensitive = mCaseSensitive;
551 return NS_OK;
552 }
553 NS_IMETHODIMP
554 nsFind::SetCaseSensitive(bool aCaseSensitive)
555 {
556 mCaseSensitive = aCaseSensitive;
557 return NS_OK;
558 }
560 NS_IMETHODIMP
561 nsFind::GetWordBreaker(nsIWordBreaker** aWordBreaker)
562 {
563 *aWordBreaker = mWordBreaker;
564 NS_IF_ADDREF(*aWordBreaker);
565 return NS_OK;
566 }
568 NS_IMETHODIMP
569 nsFind::SetWordBreaker(nsIWordBreaker* aWordBreaker)
570 {
571 mWordBreaker = aWordBreaker;
572 return NS_OK;
573 }
575 //
576 // Here begins the find code.
577 // A ten-thousand-foot view of how it works:
578 // Find needs to be able to compare across inline (but not block) nodes,
579 // e.g. find for "abc" should match a<b>b</b>c.
580 // So after we've searched a node, we're not done with it;
581 // in the case of a partial match we may need to reset the
582 // iterator to go back to a previously visited node,
583 // so we always save the "match anchor" node and offset.
584 //
585 // Text nodes store their text in an nsTextFragment, which is
586 // effectively a union of a one-byte string or a two-byte string.
587 // Single and double strings are intermixed in the dom.
588 // We don't have string classes which can deal with intermixed strings,
589 // so all the handling is done explicitly here.
590 //
592 nsresult
593 nsFind::NextNode(nsIDOMRange* aSearchRange,
594 nsIDOMRange* aStartPoint, nsIDOMRange* aEndPoint,
595 bool aContinueOk)
596 {
597 nsresult rv;
599 nsCOMPtr<nsIContent> content;
601 if (!mIterator || aContinueOk)
602 {
603 // If we are continuing, that means we have a match in progress.
604 // In that case, we want to continue from the end point
605 // (where we are now) to the beginning/end of the search range.
606 nsCOMPtr<nsIDOMNode> startNode;
607 nsCOMPtr<nsIDOMNode> endNode;
608 int32_t startOffset, endOffset;
609 if (aContinueOk)
610 {
611 #ifdef DEBUG_FIND
612 printf("Match in progress: continuing past endpoint\n");
613 #endif
614 if (mFindBackward) {
615 aSearchRange->GetStartContainer(getter_AddRefs(startNode));
616 aSearchRange->GetStartOffset(&startOffset);
617 aEndPoint->GetStartContainer(getter_AddRefs(endNode));
618 aEndPoint->GetStartOffset(&endOffset);
619 } else { // forward
620 aEndPoint->GetEndContainer(getter_AddRefs(startNode));
621 aEndPoint->GetEndOffset(&startOffset);
622 aSearchRange->GetEndContainer(getter_AddRefs(endNode));
623 aSearchRange->GetEndOffset(&endOffset);
624 }
625 }
626 else // Normal, not continuing
627 {
628 if (mFindBackward) {
629 aSearchRange->GetStartContainer(getter_AddRefs(startNode));
630 aSearchRange->GetStartOffset(&startOffset);
631 aStartPoint->GetEndContainer(getter_AddRefs(endNode));
632 aStartPoint->GetEndOffset(&endOffset);
633 // XXX Needs work:
634 // Problem with this approach: if there is a match which starts
635 // just before the current selection and continues into the selection,
636 // we will miss it, because our search algorithm only starts
637 // searching from the end of the word, so we would have to
638 // search the current selection but discount any matches
639 // that fall entirely inside it.
640 } else { // forward
641 aStartPoint->GetStartContainer(getter_AddRefs(startNode));
642 aStartPoint->GetStartOffset(&startOffset);
643 aEndPoint->GetEndContainer(getter_AddRefs(endNode));
644 aEndPoint->GetEndOffset(&endOffset);
645 }
646 }
648 rv = InitIterator(startNode, startOffset, endNode, endOffset);
649 NS_ENSURE_SUCCESS(rv, rv);
650 if (!aStartPoint)
651 aStartPoint = aSearchRange;
653 content = do_QueryInterface(mIterator->GetCurrentNode());
654 #ifdef DEBUG_FIND
655 nsCOMPtr<nsIDOMNode> dnode (do_QueryInterface(content));
656 printf(":::::: Got the first node "); DumpNode(dnode);
657 #endif
658 if (content && content->IsNodeOfType(nsINode::eTEXT) &&
659 !SkipNode(content))
660 {
661 mIterNode = do_QueryInterface(content);
662 // Also set mIterOffset if appropriate:
663 nsCOMPtr<nsIDOMNode> node;
664 if (mFindBackward) {
665 aStartPoint->GetEndContainer(getter_AddRefs(node));
666 if (mIterNode.get() == node.get())
667 aStartPoint->GetEndOffset(&mIterOffset);
668 else
669 mIterOffset = -1; // sign to start from end
670 }
671 else
672 {
673 aStartPoint->GetStartContainer(getter_AddRefs(node));
674 if (mIterNode.get() == node.get())
675 aStartPoint->GetStartOffset(&mIterOffset);
676 else
677 mIterOffset = 0;
678 }
679 #ifdef DEBUG_FIND
680 printf("Setting initial offset to %d\n", mIterOffset);
681 #endif
682 return NS_OK;
683 }
684 }
686 while (1)
687 {
688 if (mFindBackward)
689 mIterator->Prev();
690 else
691 mIterator->Next();
693 content = do_QueryInterface(mIterator->GetCurrentNode());
694 if (!content)
695 break;
697 #ifdef DEBUG_FIND
698 nsCOMPtr<nsIDOMNode> dnode (do_QueryInterface(content));
699 printf(":::::: Got another node "); DumpNode(dnode);
700 #endif
702 // If we ever cross a block node, we might want to reset
703 // the match anchor:
704 // we don't match patterns extending across block boundaries.
705 // But we can't depend on this test here now, because the iterator
706 // doesn't give us the parent going in and going out, and we
707 // need it both times to depend on this.
708 //if (IsBlockNode(content))
710 // Now see if we need to skip this node --
711 // e.g. is it part of a script or other invisible node?
712 // Note that we don't ask for CSS information;
713 // a node can be invisible due to CSS, and we'd still find it.
714 if (SkipNode(content))
715 continue;
717 if (content->IsNodeOfType(nsINode::eTEXT))
718 break;
719 #ifdef DEBUG_FIND
720 dnode = do_QueryInterface(content);
721 printf("Not a text node: "); DumpNode(dnode);
722 #endif
723 }
725 if (content)
726 mIterNode = do_QueryInterface(content);
727 else
728 mIterNode = nullptr;
729 mIterOffset = -1;
731 #ifdef DEBUG_FIND
732 printf("Iterator gave: "); DumpNode(mIterNode);
733 #endif
734 return NS_OK;
735 }
737 bool nsFind::IsBlockNode(nsIContent* aContent)
738 {
739 if (!aContent->IsHTML()) {
740 return false;
741 }
743 nsIAtom *atom = aContent->Tag();
745 if (atom == nsGkAtoms::img ||
746 atom == nsGkAtoms::hr ||
747 atom == nsGkAtoms::th ||
748 atom == nsGkAtoms::td)
749 return true;
751 return nsContentUtils::IsHTMLBlock(atom);
752 }
754 bool nsFind::IsTextNode(nsIDOMNode* aNode)
755 {
756 uint16_t nodeType;
757 aNode->GetNodeType(&nodeType);
759 return nodeType == nsIDOMNode::TEXT_NODE ||
760 nodeType == nsIDOMNode::CDATA_SECTION_NODE;
761 }
763 bool nsFind::IsVisibleNode(nsIDOMNode *aDOMNode)
764 {
765 nsCOMPtr<nsIContent> content(do_QueryInterface(aDOMNode));
766 if (!content)
767 return false;
769 nsIFrame *frame = content->GetPrimaryFrame();
770 if (!frame) {
771 // No frame! Not visible then.
772 return false;
773 }
775 return frame->StyleVisibility()->IsVisible();
776 }
778 bool nsFind::SkipNode(nsIContent* aContent)
779 {
780 nsIAtom *atom;
782 #ifdef HAVE_BIDI_ITERATOR
783 atom = aContent->Tag();
785 // We may not need to skip comment nodes,
786 // now that IsTextNode distinguishes them from real text nodes.
787 return (aContent->IsNodeOfType(nsINode::eCOMMENT) ||
788 (aContent->IsHTML() &&
789 (atom == sScriptAtom ||
790 atom == sNoframesAtom ||
791 atom == sSelectAtom)));
793 #else /* HAVE_BIDI_ITERATOR */
794 // Temporary: eventually we will have an iterator to do this,
795 // but for now, we have to climb up the tree for each node
796 // and see whether any parent is a skipped node,
797 // and take the performance hit.
799 nsIContent *content = aContent;
800 while (content)
801 {
802 atom = content->Tag();
804 if (aContent->IsNodeOfType(nsINode::eCOMMENT) ||
805 (content->IsHTML() &&
806 (atom == nsGkAtoms::script ||
807 atom == nsGkAtoms::noframes ||
808 atom == nsGkAtoms::select)))
809 {
810 #ifdef DEBUG_FIND
811 printf("Skipping node: ");
812 nsCOMPtr<nsIDOMNode> node (do_QueryInterface(content));
813 DumpNode(node);
814 #endif
816 return true;
817 }
819 // Only climb to the nearest block node
820 if (IsBlockNode(content))
821 return false;
823 content = content->GetParent();
824 }
826 return false;
827 #endif /* HAVE_BIDI_ITERATOR */
828 }
830 nsresult nsFind::GetBlockParent(nsIDOMNode* aNode, nsIDOMNode** aParent)
831 {
832 while (aNode)
833 {
834 nsCOMPtr<nsIDOMNode> parent;
835 nsresult rv = aNode->GetParentNode(getter_AddRefs(parent));
836 NS_ENSURE_SUCCESS(rv, rv);
837 nsCOMPtr<nsIContent> content (do_QueryInterface(parent));
838 if (content && IsBlockNode(content))
839 {
840 *aParent = parent;
841 NS_ADDREF(*aParent);
842 return NS_OK;
843 }
844 aNode = parent;
845 }
846 return NS_ERROR_FAILURE;
847 }
849 // Call ResetAll before returning,
850 // to remove all references to external objects.
851 void nsFind::ResetAll()
852 {
853 mIterator = nullptr;
854 mLastBlockParent = nullptr;
855 }
857 #define NBSP_CHARCODE (CHAR_TO_UNICHAR(160))
858 #define IsSpace(c) (nsCRT::IsAsciiSpace(c) || (c) == NBSP_CHARCODE)
859 #define OVERFLOW_PINDEX (mFindBackward ? pindex < 0 : pindex > patLen)
860 #define DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen)
861 #define ALMOST_DONE_WITH_PINDEX (mFindBackward ? pindex <= 0 : pindex >= patLen-1)
863 //
864 // Find:
865 // Take nodes out of the tree with NextNode,
866 // until null (NextNode will return 0 at the end of our range).
867 //
868 NS_IMETHODIMP
869 nsFind::Find(const char16_t *aPatText, nsIDOMRange* aSearchRange,
870 nsIDOMRange* aStartPoint, nsIDOMRange* aEndPoint,
871 nsIDOMRange** aRangeRet)
872 {
873 #ifdef DEBUG_FIND
874 printf("============== nsFind::Find('%s'%s, %p, %p, %p)\n",
875 NS_LossyConvertUTF16toASCII(aPatText).get(),
876 mFindBackward ? " (backward)" : " (forward)",
877 (void*)aSearchRange, (void*)aStartPoint, (void*)aEndPoint);
878 #endif
880 NS_ENSURE_ARG(aSearchRange);
881 NS_ENSURE_ARG(aStartPoint);
882 NS_ENSURE_ARG(aEndPoint);
883 NS_ENSURE_ARG_POINTER(aRangeRet);
884 *aRangeRet = 0;
886 if (!aPatText)
887 return NS_ERROR_NULL_POINTER;
889 ResetAll();
891 nsAutoString patAutoStr(aPatText);
892 if (!mCaseSensitive)
893 ToLowerCase(patAutoStr);
895 // Ignore soft hyphens in the pattern
896 static const char kShy[] = { char(CH_SHY), 0 };
897 patAutoStr.StripChars(kShy);
899 const char16_t* patStr = patAutoStr.get();
900 int32_t patLen = patAutoStr.Length() - 1;
902 // current offset into the pattern -- reset to beginning/end:
903 int32_t pindex = (mFindBackward ? patLen : 0);
905 // Current offset into the fragment
906 int32_t findex = 0;
908 // Direction to move pindex and ptr*
909 int incr = (mFindBackward ? -1 : 1);
911 nsCOMPtr<nsIContent> tc;
912 const nsTextFragment *frag = nullptr;
913 int32_t fragLen = 0;
915 // Pointers into the current fragment:
916 const char16_t *t2b = nullptr;
917 const char *t1b = nullptr;
919 // Keep track of when we're in whitespace:
920 // (only matters when we're matching)
921 bool inWhitespace = false;
923 // Place to save the range start point in case we find a match:
924 nsCOMPtr<nsIDOMNode> matchAnchorNode;
925 int32_t matchAnchorOffset = 0;
927 // Get the end point, so we know when to end searches:
928 nsCOMPtr<nsIDOMNode> endNode;
929 int32_t endOffset;
930 aEndPoint->GetEndContainer(getter_AddRefs(endNode));
931 aEndPoint->GetEndOffset(&endOffset);
933 char16_t prevChar = 0;
934 while (1)
935 {
936 #ifdef DEBUG_FIND
937 printf("Loop ...\n");
938 #endif
940 // If this is our first time on a new node, reset the pointers:
941 if (!frag)
942 {
944 tc = nullptr;
945 NextNode(aSearchRange, aStartPoint, aEndPoint, false);
946 if (!mIterNode) // Out of nodes
947 {
948 // Are we in the middle of a match?
949 // If so, try again with continuation.
950 if (matchAnchorNode)
951 NextNode(aSearchRange, aStartPoint, aEndPoint, true);
953 // Reset the iterator, so this nsFind will be usable if
954 // the user wants to search again (from beginning/end).
955 ResetAll();
956 return NS_OK;
957 }
959 // We have a new text content. If its block parent is different
960 // from the block parent of the last text content, then we
961 // need to clear the match since we don't want to find
962 // across block boundaries.
963 nsCOMPtr<nsIDOMNode> blockParent;
964 GetBlockParent(mIterNode, getter_AddRefs(blockParent));
965 #ifdef DEBUG_FIND
966 printf("New node: old blockparent = %p, new = %p\n",
967 (void*)mLastBlockParent.get(), (void*)blockParent.get());
968 #endif
969 if (blockParent != mLastBlockParent)
970 {
971 #ifdef DEBUG_FIND
972 printf("Different block parent!\n");
973 #endif
974 mLastBlockParent = blockParent;
975 // End any pending match:
976 matchAnchorNode = nullptr;
977 matchAnchorOffset = 0;
978 pindex = (mFindBackward ? patLen : 0);
979 inWhitespace = false;
980 }
982 // Get the text content:
983 tc = do_QueryInterface(mIterNode);
984 if (!tc || !(frag = tc->GetText())) // Out of nodes
985 {
986 mIterator = nullptr;
987 mLastBlockParent = 0;
988 ResetAll();
989 return NS_OK;
990 }
992 fragLen = frag->GetLength();
994 // Set our starting point in this node.
995 // If we're going back to the anchor node, which means that we
996 // just ended a partial match, use the saved offset:
997 if (mIterNode == matchAnchorNode)
998 findex = matchAnchorOffset + (mFindBackward ? 1 : 0);
1000 // mIterOffset, if set, is the range's idea of an offset,
1001 // and points between characters. But when translated
1002 // to a string index, it points to a character. If we're
1003 // going backward, this is one character too late and
1004 // we'll match part of our previous pattern.
1005 else if (mIterOffset >= 0)
1006 findex = mIterOffset - (mFindBackward ? 1 : 0);
1008 // Otherwise, just start at the appropriate end of the fragment:
1009 else if (mFindBackward)
1010 findex = fragLen - 1;
1011 else
1012 findex = 0;
1014 // Offset can only apply to the first node:
1015 mIterOffset = -1;
1017 // If this is outside the bounds of the string, then skip this node:
1018 if (findex < 0 || findex > fragLen-1)
1019 {
1020 #ifdef DEBUG_FIND
1021 printf("At the end of a text node -- skipping to the next\n");
1022 #endif
1023 frag = 0;
1024 continue;
1025 }
1027 #ifdef DEBUG_FIND
1028 printf("Starting from offset %d\n", findex);
1029 #endif
1030 if (frag->Is2b())
1031 {
1032 t2b = frag->Get2b();
1033 t1b = nullptr;
1034 #ifdef DEBUG_FIND
1035 nsAutoString str2(t2b, fragLen);
1036 printf("2 byte, '%s'\n", NS_LossyConvertUTF16toASCII(str2).get());
1037 #endif
1038 }
1039 else
1040 {
1041 t1b = frag->Get1b();
1042 t2b = nullptr;
1043 #ifdef DEBUG_FIND
1044 nsAutoCString str1(t1b, fragLen);
1045 printf("1 byte, '%s'\n", str1.get());
1046 #endif
1047 }
1048 }
1049 else // still on the old node
1050 {
1051 // Still on the old node. Advance the pointers,
1052 // then see if we need to pull a new node.
1053 findex += incr;
1054 #ifdef DEBUG_FIND
1055 printf("Same node -- (%d, %d)\n", pindex, findex);
1056 #endif
1057 if (mFindBackward ? (findex < 0) : (findex >= fragLen))
1058 {
1059 #ifdef DEBUG_FIND
1060 printf("Will need to pull a new node: mAO = %d, frag len=%d\n",
1061 matchAnchorOffset, fragLen);
1062 #endif
1063 // Done with this node. Pull a new one.
1064 frag = nullptr;
1065 continue;
1066 }
1067 }
1069 // Have we gone past the endpoint yet?
1070 // If we have, and we're not in the middle of a match, return.
1071 if (mIterNode == endNode &&
1072 ((mFindBackward && (findex < endOffset)) ||
1073 (!mFindBackward && (findex > endOffset))))
1074 {
1075 ResetAll();
1076 return NS_OK;
1077 }
1079 // The two characters we'll be comparing:
1080 char16_t c = (t2b ? t2b[findex] : CHAR_TO_UNICHAR(t1b[findex]));
1081 char16_t patc = patStr[pindex];
1083 #ifdef DEBUG_FIND
1084 printf("Comparing '%c'=%x to '%c' (%d of %d), findex=%d%s\n",
1085 (char)c, (int)c, patc, pindex, patLen, findex,
1086 inWhitespace ? " (inWhitespace)" : "");
1087 #endif
1089 // Do we need to go back to non-whitespace mode?
1090 // If inWhitespace, then this space in the pat str
1091 // has already matched at least one space in the document.
1092 if (inWhitespace && !IsSpace(c))
1093 {
1094 inWhitespace = false;
1095 pindex += incr;
1096 #ifdef DEBUG
1097 // This shouldn't happen -- if we were still matching, and we
1098 // were at the end of the pat string, then we should have
1099 // caught it in the last iteration and returned success.
1100 if (OVERFLOW_PINDEX)
1101 NS_ASSERTION(false, "Missed a whitespace match");
1102 #endif
1103 patc = patStr[pindex];
1104 }
1105 if (!inWhitespace && IsSpace(patc))
1106 inWhitespace = true;
1108 // convert to lower case if necessary
1109 else if (!inWhitespace && !mCaseSensitive && IsUpperCase(c))
1110 c = ToLowerCase(c);
1112 switch (c) {
1113 // ignore soft hyphens in the document
1114 case CH_SHY:
1115 continue;
1116 // treat curly and straight quotes as identical
1117 case CH_LEFT_SINGLE_QUOTE:
1118 case CH_RIGHT_SINGLE_QUOTE:
1119 c = CH_APOSTROPHE;
1120 break;
1121 case CH_LEFT_DOUBLE_QUOTE:
1122 case CH_RIGHT_DOUBLE_QUOTE:
1123 c = CH_QUOTE;
1124 break;
1125 }
1127 switch (patc) {
1128 // treat curly and straight quotes as identical
1129 case CH_LEFT_SINGLE_QUOTE:
1130 case CH_RIGHT_SINGLE_QUOTE:
1131 patc = CH_APOSTROPHE;
1132 break;
1133 case CH_LEFT_DOUBLE_QUOTE:
1134 case CH_RIGHT_DOUBLE_QUOTE:
1135 patc = CH_QUOTE;
1136 break;
1137 }
1139 // a '\n' between CJ characters is ignored
1140 if (pindex != (mFindBackward ? patLen : 0) && c != patc && !inWhitespace) {
1141 if (c == '\n' && t2b && IS_CJ_CHAR(prevChar)) {
1142 int32_t nindex = findex + incr;
1143 if (mFindBackward ? (nindex >= 0) : (nindex < fragLen)) {
1144 if (IS_CJ_CHAR(t2b[nindex]))
1145 continue;
1146 }
1147 }
1148 }
1150 // Compare
1151 if ((c == patc) || (inWhitespace && IsSpace(c)))
1152 {
1153 prevChar = c;
1154 #ifdef DEBUG_FIND
1155 if (inWhitespace)
1156 printf("YES (whitespace)(%d of %d)\n", pindex, patLen);
1157 else
1158 printf("YES! '%c' == '%c' (%d of %d)\n", c, patc, pindex, patLen);
1159 #endif
1161 // Save the range anchors if we haven't already:
1162 if (!matchAnchorNode) {
1163 matchAnchorNode = mIterNode;
1164 matchAnchorOffset = findex;
1165 }
1167 // Are we done?
1168 if (DONE_WITH_PINDEX)
1169 // Matched the whole string!
1170 {
1171 #ifdef DEBUG_FIND
1172 printf("Found a match!\n");
1173 #endif
1175 // Make the range:
1176 nsCOMPtr<nsIDOMNode> startParent;
1177 nsCOMPtr<nsIDOMNode> endParent;
1178 nsCOMPtr<nsIDOMRange> range = CreateRange(tc);
1179 if (range)
1180 {
1181 int32_t matchStartOffset, matchEndOffset;
1182 // convert char index to range point:
1183 int32_t mao = matchAnchorOffset + (mFindBackward ? 1 : 0);
1184 if (mFindBackward)
1185 {
1186 startParent = do_QueryInterface(tc);
1187 endParent = matchAnchorNode;
1188 matchStartOffset = findex;
1189 matchEndOffset = mao;
1190 }
1191 else
1192 {
1193 startParent = matchAnchorNode;
1194 endParent = do_QueryInterface(tc);
1195 matchStartOffset = mao;
1196 matchEndOffset = findex+1;
1197 }
1198 if (startParent && endParent &&
1199 IsVisibleNode(startParent) && IsVisibleNode(endParent))
1200 {
1201 range->SetStart(startParent, matchStartOffset);
1202 range->SetEnd(endParent, matchEndOffset);
1203 *aRangeRet = range.get();
1204 NS_ADDREF(*aRangeRet);
1205 }
1206 else {
1207 startParent = nullptr; // This match is no good -- invisible or bad range
1208 }
1209 }
1211 if (startParent) {
1212 // If startParent == nullptr, we didn't successfully make range
1213 // or, we didn't make a range because the start or end node were invisible
1214 // Reset the offset to the other end of the found string:
1215 mIterOffset = findex + (mFindBackward ? 1 : 0);
1216 #ifdef DEBUG_FIND
1217 printf("mIterOffset = %d, mIterNode = ", mIterOffset);
1218 DumpNode(mIterNode);
1219 #endif
1221 ResetAll();
1222 return NS_OK;
1223 }
1224 matchAnchorNode = nullptr; // This match is no good, continue on in document
1225 }
1227 if (matchAnchorNode) {
1228 // Not done, but still matching.
1229 // Advance and loop around for the next characters.
1230 // But don't advance from a space to a non-space:
1231 if (!inWhitespace || DONE_WITH_PINDEX || IsSpace(patStr[pindex+incr]))
1232 {
1233 pindex += incr;
1234 inWhitespace = false;
1235 #ifdef DEBUG_FIND
1236 printf("Advancing pindex to %d\n", pindex);
1237 #endif
1238 }
1240 continue;
1241 }
1242 }
1244 #ifdef DEBUG_FIND
1245 printf("NOT: %c == %c\n", c, patc);
1246 #endif
1248 // If we didn't match, go back to the beginning of patStr,
1249 // and set findex back to the next char after
1250 // we started the current match.
1251 if (matchAnchorNode) // we're ending a partial match
1252 {
1253 findex = matchAnchorOffset;
1254 mIterOffset = matchAnchorOffset;
1255 // +incr will be added to findex when we continue
1257 // Are we going back to a previous node?
1258 if (matchAnchorNode != mIterNode)
1259 {
1260 nsCOMPtr<nsIContent> content (do_QueryInterface(matchAnchorNode));
1261 DebugOnly<nsresult> rv = NS_ERROR_UNEXPECTED;
1262 if (content)
1263 rv = mIterator->PositionAt(content);
1264 frag = 0;
1265 NS_ASSERTION(NS_SUCCEEDED(rv), "Text content wasn't nsIContent!");
1266 #ifdef DEBUG_FIND
1267 printf("Repositioned anchor node\n");
1268 #endif
1269 }
1270 #ifdef DEBUG_FIND
1271 printf("Ending a partial match; findex -> %d, mIterOffset -> %d\n",
1272 findex, mIterOffset);
1273 #endif
1274 }
1275 matchAnchorNode = nullptr;
1276 matchAnchorOffset = 0;
1277 inWhitespace = false;
1278 pindex = (mFindBackward ? patLen : 0);
1279 #ifdef DEBUG_FIND
1280 printf("Setting findex back to %d, pindex to %d\n", findex, pindex);
1282 #endif
1283 } // end while loop
1285 // Out of nodes, and didn't match.
1286 ResetAll();
1287 return NS_OK;
1288 }
1290 /* static */
1291 already_AddRefed<nsIDOMRange>
1292 nsFind::CreateRange(nsINode* aNode)
1293 {
1294 nsRefPtr<nsRange> range = new nsRange(aNode);
1295 range->SetMaySpanAnonymousSubtrees(true);
1296 return range.forget();
1297 }