parser/htmlparser/public/nsScannerString.h

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:2d9a87d0ede8
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
9
10 #include "nsString.h"
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
14
15
16 /**
17 * NOTE: nsScannerString (and the other classes defined in this file) are
18 * not related to nsAString or any of the other xpcom/string classes.
19 *
20 * nsScannerString is based on the nsSlidingString implementation that used
21 * to live in xpcom/string. Now that nsAString is limited to representing
22 * only single fragment strings, nsSlidingString can no longer be used.
23 *
24 * An advantage to this design is that it does not employ any virtual
25 * functions.
26 *
27 * This file uses SCC-style indenting in deference to the nsSlidingString
28 * code from which this code is derived ;-)
29 */
30
31 class nsScannerIterator;
32 class nsScannerSubstring;
33 class nsScannerString;
34
35
36 /**
37 * nsScannerBufferList
38 *
39 * This class maintains a list of heap-allocated Buffer objects. The buffers
40 * are maintained in a circular linked list. Each buffer has a usage count
41 * that is decremented by the owning nsScannerSubstring.
42 *
43 * The buffer list itself is reference counted. This allows the buffer list
44 * to be shared by multiple nsScannerSubstring objects. The reference
45 * counting is not threadsafe, which is not at all a requirement.
46 *
47 * When a nsScannerSubstring releases its reference to a buffer list, it
48 * decrements the usage count of the first buffer in the buffer list that it
49 * was referencing. It informs the buffer list that it can discard buffers
50 * starting at that prefix. The buffer list will do so if the usage count of
51 * that buffer is 0 and if it is the first buffer in the list. It will
52 * continue to prune buffers starting from the front of the buffer list until
53 * it finds a buffer that has a usage count that is non-zero.
54 */
55 class nsScannerBufferList
56 {
57 public:
58
59 /**
60 * Buffer objects are directly followed by a data segment. The start
61 * of the data segment is determined by increment the |this| pointer
62 * by 1 unit.
63 */
64 class Buffer : public mozilla::LinkedListElement<Buffer>
65 {
66 public:
67
68 void IncrementUsageCount() { ++mUsageCount; }
69 void DecrementUsageCount() { --mUsageCount; }
70
71 bool IsInUse() const { return mUsageCount != 0; }
72
73 const char16_t* DataStart() const { return (const char16_t*) (this+1); }
74 char16_t* DataStart() { return ( char16_t*) (this+1); }
75
76 const char16_t* DataEnd() const { return mDataEnd; }
77 char16_t* DataEnd() { return mDataEnd; }
78
79 const Buffer* Next() const { return getNext(); }
80 Buffer* Next() { return getNext(); }
81
82 const Buffer* Prev() const { return getPrevious(); }
83 Buffer* Prev() { return getPrevious(); }
84
85 uint32_t DataLength() const { return mDataEnd - DataStart(); }
86 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
87
88 private:
89
90 friend class nsScannerBufferList;
91
92 int32_t mUsageCount;
93 char16_t* mDataEnd;
94 };
95
96 /**
97 * Position objects serve as lightweight pointers into a buffer list.
98 * The mPosition member must be contained with mBuffer->DataStart()
99 * and mBuffer->DataEnd().
100 */
101 class Position
102 {
103 public:
104
105 Position() {}
106
107 Position( Buffer* buffer, char16_t* position )
108 : mBuffer(buffer)
109 , mPosition(position)
110 {}
111
112 inline
113 Position( const nsScannerIterator& aIter );
114
115 inline
116 Position& operator=( const nsScannerIterator& aIter );
117
118 static size_t Distance( const Position& p1, const Position& p2 );
119
120 Buffer* mBuffer;
121 char16_t* mPosition;
122 };
123
124 static Buffer* AllocBufferFromString( const nsAString& );
125 static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
126
127 nsScannerBufferList( Buffer* buf )
128 : mRefCnt(0)
129 {
130 mBuffers.insertBack(buf);
131 }
132
133 void AddRef() { ++mRefCnt; }
134 void Release() { if (--mRefCnt == 0) delete this; }
135
136 void Append( Buffer* buf ) { mBuffers.insertBack(buf); }
137 void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
138 void SplitBuffer( const Position& );
139 void DiscardUnreferencedPrefix( Buffer* );
140
141 Buffer* Head() { return mBuffers.getFirst(); }
142 const Buffer* Head() const { return mBuffers.getFirst(); }
143
144 Buffer* Tail() { return mBuffers.getLast(); }
145 const Buffer* Tail() const { return mBuffers.getLast(); }
146
147 private:
148
149 friend class nsScannerSubstring;
150
151 ~nsScannerBufferList() { ReleaseAll(); }
152 void ReleaseAll();
153
154 int32_t mRefCnt;
155 mozilla::LinkedList<Buffer> mBuffers;
156 };
157
158
159 /**
160 * nsScannerFragment represents a "slice" of a Buffer object.
161 */
162 struct nsScannerFragment
163 {
164 typedef nsScannerBufferList::Buffer Buffer;
165
166 const Buffer* mBuffer;
167 const char16_t* mFragmentStart;
168 const char16_t* mFragmentEnd;
169 };
170
171
172 /**
173 * nsScannerSubstring is the base class for nsScannerString. It provides
174 * access to iterators and methods to bind the substring to another
175 * substring or nsAString instance.
176 *
177 * This class owns the buffer list.
178 */
179 class nsScannerSubstring
180 {
181 public:
182 typedef nsScannerBufferList::Buffer Buffer;
183 typedef nsScannerBufferList::Position Position;
184 typedef uint32_t size_type;
185
186 nsScannerSubstring();
187 nsScannerSubstring( const nsAString& s );
188
189 ~nsScannerSubstring();
190
191 nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
192 nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
193
194 size_type Length() const { return mLength; }
195
196 int32_t CountChar( char16_t ) const;
197
198 void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
199 void Rebind( const nsAString& );
200
201 const nsSubstring& AsString() const;
202
203 bool GetNextFragment( nsScannerFragment& ) const;
204 bool GetPrevFragment( nsScannerFragment& ) const;
205
206 static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
207 static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
208
209 protected:
210
211 void acquire_ownership_of_buffer_list() const
212 {
213 mBufferList->AddRef();
214 mStart.mBuffer->IncrementUsageCount();
215 }
216
217 void release_ownership_of_buffer_list()
218 {
219 if (mBufferList)
220 {
221 mStart.mBuffer->DecrementUsageCount();
222 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
223 mBufferList->Release();
224 }
225 }
226
227 void init_range_from_buffer_list()
228 {
229 mStart.mBuffer = mBufferList->Head();
230 mStart.mPosition = mStart.mBuffer->DataStart();
231
232 mEnd.mBuffer = mBufferList->Tail();
233 mEnd.mPosition = mEnd.mBuffer->DataEnd();
234
235 mLength = Position::Distance(mStart, mEnd);
236 }
237
238 Position mStart;
239 Position mEnd;
240 nsScannerBufferList *mBufferList;
241 size_type mLength;
242
243 // these fields are used to implement AsString
244 nsDependentSubstring mFlattenedRep;
245 bool mIsDirty;
246
247 friend class nsScannerSharedSubstring;
248 };
249
250
251 /**
252 * nsScannerString provides methods to grow and modify a buffer list.
253 */
254 class nsScannerString : public nsScannerSubstring
255 {
256 public:
257
258 nsScannerString( Buffer* );
259
260 // you are giving ownership to the string, it takes and keeps your
261 // buffer, deleting it when done.
262 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
263 // for use with this function.
264 void AppendBuffer( Buffer* );
265
266 void DiscardPrefix( const nsScannerIterator& );
267 // any other way you want to do this?
268
269 void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
270 void ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar);
271 };
272
273
274 /**
275 * nsScannerSharedSubstring implements copy-on-write semantics for
276 * nsScannerSubstring. When you call .writable(), it will copy the data
277 * and return a mutable string object. This class also manages releasing
278 * the reference to the scanner buffer when it is no longer needed.
279 */
280
281 class nsScannerSharedSubstring
282 {
283 public:
284 nsScannerSharedSubstring()
285 : mBuffer(nullptr), mBufferList(nullptr) { }
286
287 ~nsScannerSharedSubstring()
288 {
289 if (mBufferList)
290 ReleaseBuffer();
291 }
292
293 // Acquire a copy-on-write reference to the given substring.
294 NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart,
295 const nsScannerIterator& aEnd);
296
297 // Get a mutable reference to this string
298 nsSubstring& writable()
299 {
300 if (mBufferList)
301 MakeMutable();
302
303 return mString;
304 }
305
306 // Get a const reference to this string
307 const nsSubstring& str() const { return mString; }
308
309 private:
310 typedef nsScannerBufferList::Buffer Buffer;
311
312 NS_HIDDEN_(void) ReleaseBuffer();
313 NS_HIDDEN_(void) MakeMutable();
314
315 nsDependentSubstring mString;
316 Buffer *mBuffer;
317 nsScannerBufferList *mBufferList;
318 };
319
320 /**
321 * nsScannerIterator works just like nsReadingIterator<CharT> except that
322 * it knows how to iterate over a list of scanner buffers.
323 */
324 class nsScannerIterator
325 {
326 public:
327 typedef nsScannerIterator self_type;
328 typedef ptrdiff_t difference_type;
329 typedef char16_t value_type;
330 typedef const char16_t* pointer;
331 typedef const char16_t& reference;
332 typedef nsScannerSubstring::Buffer Buffer;
333
334 protected:
335
336 nsScannerFragment mFragment;
337 const char16_t* mPosition;
338 const nsScannerSubstring* mOwner;
339
340 friend class nsScannerSubstring;
341 friend class nsScannerSharedSubstring;
342
343 public:
344 nsScannerIterator() {}
345 // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
346 // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
347
348 inline void normalize_forward();
349 inline void normalize_backward();
350
351 pointer get() const
352 {
353 return mPosition;
354 }
355
356 char16_t operator*() const
357 {
358 return *get();
359 }
360
361 const nsScannerFragment& fragment() const
362 {
363 return mFragment;
364 }
365
366 const Buffer* buffer() const
367 {
368 return mFragment.mBuffer;
369 }
370
371 self_type& operator++()
372 {
373 ++mPosition;
374 normalize_forward();
375 return *this;
376 }
377
378 self_type operator++( int )
379 {
380 self_type result(*this);
381 ++mPosition;
382 normalize_forward();
383 return result;
384 }
385
386 self_type& operator--()
387 {
388 normalize_backward();
389 --mPosition;
390 return *this;
391 }
392
393 self_type operator--( int )
394 {
395 self_type result(*this);
396 normalize_backward();
397 --mPosition;
398 return result;
399 }
400
401 difference_type size_forward() const
402 {
403 return mFragment.mFragmentEnd - mPosition;
404 }
405
406 difference_type size_backward() const
407 {
408 return mPosition - mFragment.mFragmentStart;
409 }
410
411 self_type& advance( difference_type n )
412 {
413 while ( n > 0 )
414 {
415 difference_type one_hop = std::min(n, size_forward());
416
417 NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
418 // perhaps I should |break| if |!one_hop|?
419
420 mPosition += one_hop;
421 normalize_forward();
422 n -= one_hop;
423 }
424
425 while ( n < 0 )
426 {
427 normalize_backward();
428 difference_type one_hop = std::max(n, -size_backward());
429
430 NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
431 // perhaps I should |break| if |!one_hop|?
432
433 mPosition += one_hop;
434 n -= one_hop;
435 }
436
437 return *this;
438 }
439 };
440
441
442 inline
443 bool
444 SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
445 {
446 return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
447 }
448
449
450 /**
451 * this class is needed in order to make use of the methods in nsAlgorithm.h
452 */
453 template <>
454 struct nsCharSourceTraits<nsScannerIterator>
455 {
456 typedef nsScannerIterator::difference_type difference_type;
457
458 static
459 uint32_t
460 readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
461 {
462 return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
463 }
464
465 static
466 const nsScannerIterator::value_type*
467 read( const nsScannerIterator& iter )
468 {
469 return iter.get();
470 }
471
472 static
473 void
474 advance( nsScannerIterator& s, difference_type n )
475 {
476 s.advance(n);
477 }
478 };
479
480
481 /**
482 * inline methods follow
483 */
484
485 inline
486 void
487 nsScannerIterator::normalize_forward()
488 {
489 while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
490 mPosition = mFragment.mFragmentStart;
491 }
492
493 inline
494 void
495 nsScannerIterator::normalize_backward()
496 {
497 while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
498 mPosition = mFragment.mFragmentEnd;
499 }
500
501 inline
502 bool
503 operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
504 {
505 return lhs.get() == rhs.get();
506 }
507
508 inline
509 bool
510 operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
511 {
512 return lhs.get() != rhs.get();
513 }
514
515
516 inline
517 nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
518 : mBuffer(const_cast<Buffer*>(aIter.buffer()))
519 , mPosition(const_cast<char16_t*>(aIter.get()))
520 {}
521
522 inline
523 nsScannerBufferList::Position&
524 nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
525 {
526 mBuffer = const_cast<Buffer*>(aIter.buffer());
527 mPosition = const_cast<char16_t*>(aIter.get());
528 return *this;
529 }
530
531
532 /**
533 * scanner string utils
534 *
535 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
536 * Here we provide only the methods that the htmlparser module needs.
537 */
538
539 inline
540 size_t
541 Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
542 {
543 typedef nsScannerBufferList::Position Position;
544 return Position::Distance(Position(aStart), Position(aEnd));
545 }
546
547 void
548 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
549 const nsScannerIterator& aSrcEnd,
550 nsAString& aDest );
551
552 inline
553 void
554 CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
555 {
556 nsScannerIterator begin, end;
557 CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
558 }
559
560 void
561 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
562 const nsScannerIterator& aSrcEnd,
563 nsAString& aDest );
564
565 inline
566 void
567 AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
568 {
569 nsScannerIterator begin, end;
570 AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
571 }
572
573 void
574 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
575 const nsScannerIterator& aSrcEnd,
576 nsScannerSharedSubstring& aDest );
577
578 bool
579 FindCharInReadable( char16_t aChar,
580 nsScannerIterator& aStart,
581 const nsScannerIterator& aEnd );
582
583 bool
584 FindInReadable( const nsAString& aPattern,
585 nsScannerIterator& aStart,
586 nsScannerIterator& aEnd,
587 const nsStringComparator& = nsDefaultStringComparator() );
588
589 bool
590 RFindInReadable( const nsAString& aPattern,
591 nsScannerIterator& aStart,
592 nsScannerIterator& aEnd,
593 const nsStringComparator& = nsDefaultStringComparator() );
594
595 inline
596 bool
597 CaseInsensitiveFindInReadable( const nsAString& aPattern,
598 nsScannerIterator& aStart,
599 nsScannerIterator& aEnd )
600 {
601 return FindInReadable(aPattern, aStart, aEnd,
602 nsCaseInsensitiveStringComparator());
603 }
604
605 #endif // !defined(nsScannerString_h___)

mercurial