|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef nsScannerString_h___ |
|
8 #define nsScannerString_h___ |
|
9 |
|
10 #include "nsString.h" |
|
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator |
|
12 #include "mozilla/LinkedList.h" |
|
13 #include <algorithm> |
|
14 |
|
15 |
|
16 /** |
|
17 * NOTE: nsScannerString (and the other classes defined in this file) are |
|
18 * not related to nsAString or any of the other xpcom/string classes. |
|
19 * |
|
20 * nsScannerString is based on the nsSlidingString implementation that used |
|
21 * to live in xpcom/string. Now that nsAString is limited to representing |
|
22 * only single fragment strings, nsSlidingString can no longer be used. |
|
23 * |
|
24 * An advantage to this design is that it does not employ any virtual |
|
25 * functions. |
|
26 * |
|
27 * This file uses SCC-style indenting in deference to the nsSlidingString |
|
28 * code from which this code is derived ;-) |
|
29 */ |
|
30 |
|
31 class nsScannerIterator; |
|
32 class nsScannerSubstring; |
|
33 class nsScannerString; |
|
34 |
|
35 |
|
36 /** |
|
37 * nsScannerBufferList |
|
38 * |
|
39 * This class maintains a list of heap-allocated Buffer objects. The buffers |
|
40 * are maintained in a circular linked list. Each buffer has a usage count |
|
41 * that is decremented by the owning nsScannerSubstring. |
|
42 * |
|
43 * The buffer list itself is reference counted. This allows the buffer list |
|
44 * to be shared by multiple nsScannerSubstring objects. The reference |
|
45 * counting is not threadsafe, which is not at all a requirement. |
|
46 * |
|
47 * When a nsScannerSubstring releases its reference to a buffer list, it |
|
48 * decrements the usage count of the first buffer in the buffer list that it |
|
49 * was referencing. It informs the buffer list that it can discard buffers |
|
50 * starting at that prefix. The buffer list will do so if the usage count of |
|
51 * that buffer is 0 and if it is the first buffer in the list. It will |
|
52 * continue to prune buffers starting from the front of the buffer list until |
|
53 * it finds a buffer that has a usage count that is non-zero. |
|
54 */ |
|
55 class nsScannerBufferList |
|
56 { |
|
57 public: |
|
58 |
|
59 /** |
|
60 * Buffer objects are directly followed by a data segment. The start |
|
61 * of the data segment is determined by increment the |this| pointer |
|
62 * by 1 unit. |
|
63 */ |
|
64 class Buffer : public mozilla::LinkedListElement<Buffer> |
|
65 { |
|
66 public: |
|
67 |
|
68 void IncrementUsageCount() { ++mUsageCount; } |
|
69 void DecrementUsageCount() { --mUsageCount; } |
|
70 |
|
71 bool IsInUse() const { return mUsageCount != 0; } |
|
72 |
|
73 const char16_t* DataStart() const { return (const char16_t*) (this+1); } |
|
74 char16_t* DataStart() { return ( char16_t*) (this+1); } |
|
75 |
|
76 const char16_t* DataEnd() const { return mDataEnd; } |
|
77 char16_t* DataEnd() { return mDataEnd; } |
|
78 |
|
79 const Buffer* Next() const { return getNext(); } |
|
80 Buffer* Next() { return getNext(); } |
|
81 |
|
82 const Buffer* Prev() const { return getPrevious(); } |
|
83 Buffer* Prev() { return getPrevious(); } |
|
84 |
|
85 uint32_t DataLength() const { return mDataEnd - DataStart(); } |
|
86 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } |
|
87 |
|
88 private: |
|
89 |
|
90 friend class nsScannerBufferList; |
|
91 |
|
92 int32_t mUsageCount; |
|
93 char16_t* mDataEnd; |
|
94 }; |
|
95 |
|
96 /** |
|
97 * Position objects serve as lightweight pointers into a buffer list. |
|
98 * The mPosition member must be contained with mBuffer->DataStart() |
|
99 * and mBuffer->DataEnd(). |
|
100 */ |
|
101 class Position |
|
102 { |
|
103 public: |
|
104 |
|
105 Position() {} |
|
106 |
|
107 Position( Buffer* buffer, char16_t* position ) |
|
108 : mBuffer(buffer) |
|
109 , mPosition(position) |
|
110 {} |
|
111 |
|
112 inline |
|
113 Position( const nsScannerIterator& aIter ); |
|
114 |
|
115 inline |
|
116 Position& operator=( const nsScannerIterator& aIter ); |
|
117 |
|
118 static size_t Distance( const Position& p1, const Position& p2 ); |
|
119 |
|
120 Buffer* mBuffer; |
|
121 char16_t* mPosition; |
|
122 }; |
|
123 |
|
124 static Buffer* AllocBufferFromString( const nsAString& ); |
|
125 static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars |
|
126 |
|
127 nsScannerBufferList( Buffer* buf ) |
|
128 : mRefCnt(0) |
|
129 { |
|
130 mBuffers.insertBack(buf); |
|
131 } |
|
132 |
|
133 void AddRef() { ++mRefCnt; } |
|
134 void Release() { if (--mRefCnt == 0) delete this; } |
|
135 |
|
136 void Append( Buffer* buf ) { mBuffers.insertBack(buf); } |
|
137 void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); } |
|
138 void SplitBuffer( const Position& ); |
|
139 void DiscardUnreferencedPrefix( Buffer* ); |
|
140 |
|
141 Buffer* Head() { return mBuffers.getFirst(); } |
|
142 const Buffer* Head() const { return mBuffers.getFirst(); } |
|
143 |
|
144 Buffer* Tail() { return mBuffers.getLast(); } |
|
145 const Buffer* Tail() const { return mBuffers.getLast(); } |
|
146 |
|
147 private: |
|
148 |
|
149 friend class nsScannerSubstring; |
|
150 |
|
151 ~nsScannerBufferList() { ReleaseAll(); } |
|
152 void ReleaseAll(); |
|
153 |
|
154 int32_t mRefCnt; |
|
155 mozilla::LinkedList<Buffer> mBuffers; |
|
156 }; |
|
157 |
|
158 |
|
159 /** |
|
160 * nsScannerFragment represents a "slice" of a Buffer object. |
|
161 */ |
|
162 struct nsScannerFragment |
|
163 { |
|
164 typedef nsScannerBufferList::Buffer Buffer; |
|
165 |
|
166 const Buffer* mBuffer; |
|
167 const char16_t* mFragmentStart; |
|
168 const char16_t* mFragmentEnd; |
|
169 }; |
|
170 |
|
171 |
|
172 /** |
|
173 * nsScannerSubstring is the base class for nsScannerString. It provides |
|
174 * access to iterators and methods to bind the substring to another |
|
175 * substring or nsAString instance. |
|
176 * |
|
177 * This class owns the buffer list. |
|
178 */ |
|
179 class nsScannerSubstring |
|
180 { |
|
181 public: |
|
182 typedef nsScannerBufferList::Buffer Buffer; |
|
183 typedef nsScannerBufferList::Position Position; |
|
184 typedef uint32_t size_type; |
|
185 |
|
186 nsScannerSubstring(); |
|
187 nsScannerSubstring( const nsAString& s ); |
|
188 |
|
189 ~nsScannerSubstring(); |
|
190 |
|
191 nsScannerIterator& BeginReading( nsScannerIterator& iter ) const; |
|
192 nsScannerIterator& EndReading( nsScannerIterator& iter ) const; |
|
193 |
|
194 size_type Length() const { return mLength; } |
|
195 |
|
196 int32_t CountChar( char16_t ) const; |
|
197 |
|
198 void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& ); |
|
199 void Rebind( const nsAString& ); |
|
200 |
|
201 const nsSubstring& AsString() const; |
|
202 |
|
203 bool GetNextFragment( nsScannerFragment& ) const; |
|
204 bool GetPrevFragment( nsScannerFragment& ) const; |
|
205 |
|
206 static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); } |
|
207 static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); } |
|
208 |
|
209 protected: |
|
210 |
|
211 void acquire_ownership_of_buffer_list() const |
|
212 { |
|
213 mBufferList->AddRef(); |
|
214 mStart.mBuffer->IncrementUsageCount(); |
|
215 } |
|
216 |
|
217 void release_ownership_of_buffer_list() |
|
218 { |
|
219 if (mBufferList) |
|
220 { |
|
221 mStart.mBuffer->DecrementUsageCount(); |
|
222 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); |
|
223 mBufferList->Release(); |
|
224 } |
|
225 } |
|
226 |
|
227 void init_range_from_buffer_list() |
|
228 { |
|
229 mStart.mBuffer = mBufferList->Head(); |
|
230 mStart.mPosition = mStart.mBuffer->DataStart(); |
|
231 |
|
232 mEnd.mBuffer = mBufferList->Tail(); |
|
233 mEnd.mPosition = mEnd.mBuffer->DataEnd(); |
|
234 |
|
235 mLength = Position::Distance(mStart, mEnd); |
|
236 } |
|
237 |
|
238 Position mStart; |
|
239 Position mEnd; |
|
240 nsScannerBufferList *mBufferList; |
|
241 size_type mLength; |
|
242 |
|
243 // these fields are used to implement AsString |
|
244 nsDependentSubstring mFlattenedRep; |
|
245 bool mIsDirty; |
|
246 |
|
247 friend class nsScannerSharedSubstring; |
|
248 }; |
|
249 |
|
250 |
|
251 /** |
|
252 * nsScannerString provides methods to grow and modify a buffer list. |
|
253 */ |
|
254 class nsScannerString : public nsScannerSubstring |
|
255 { |
|
256 public: |
|
257 |
|
258 nsScannerString( Buffer* ); |
|
259 |
|
260 // you are giving ownership to the string, it takes and keeps your |
|
261 // buffer, deleting it when done. |
|
262 // Use AllocBuffer or AllocBufferFromString to create a Buffer object |
|
263 // for use with this function. |
|
264 void AppendBuffer( Buffer* ); |
|
265 |
|
266 void DiscardPrefix( const nsScannerIterator& ); |
|
267 // any other way you want to do this? |
|
268 |
|
269 void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); |
|
270 void ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar); |
|
271 }; |
|
272 |
|
273 |
|
274 /** |
|
275 * nsScannerSharedSubstring implements copy-on-write semantics for |
|
276 * nsScannerSubstring. When you call .writable(), it will copy the data |
|
277 * and return a mutable string object. This class also manages releasing |
|
278 * the reference to the scanner buffer when it is no longer needed. |
|
279 */ |
|
280 |
|
281 class nsScannerSharedSubstring |
|
282 { |
|
283 public: |
|
284 nsScannerSharedSubstring() |
|
285 : mBuffer(nullptr), mBufferList(nullptr) { } |
|
286 |
|
287 ~nsScannerSharedSubstring() |
|
288 { |
|
289 if (mBufferList) |
|
290 ReleaseBuffer(); |
|
291 } |
|
292 |
|
293 // Acquire a copy-on-write reference to the given substring. |
|
294 NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart, |
|
295 const nsScannerIterator& aEnd); |
|
296 |
|
297 // Get a mutable reference to this string |
|
298 nsSubstring& writable() |
|
299 { |
|
300 if (mBufferList) |
|
301 MakeMutable(); |
|
302 |
|
303 return mString; |
|
304 } |
|
305 |
|
306 // Get a const reference to this string |
|
307 const nsSubstring& str() const { return mString; } |
|
308 |
|
309 private: |
|
310 typedef nsScannerBufferList::Buffer Buffer; |
|
311 |
|
312 NS_HIDDEN_(void) ReleaseBuffer(); |
|
313 NS_HIDDEN_(void) MakeMutable(); |
|
314 |
|
315 nsDependentSubstring mString; |
|
316 Buffer *mBuffer; |
|
317 nsScannerBufferList *mBufferList; |
|
318 }; |
|
319 |
|
320 /** |
|
321 * nsScannerIterator works just like nsReadingIterator<CharT> except that |
|
322 * it knows how to iterate over a list of scanner buffers. |
|
323 */ |
|
324 class nsScannerIterator |
|
325 { |
|
326 public: |
|
327 typedef nsScannerIterator self_type; |
|
328 typedef ptrdiff_t difference_type; |
|
329 typedef char16_t value_type; |
|
330 typedef const char16_t* pointer; |
|
331 typedef const char16_t& reference; |
|
332 typedef nsScannerSubstring::Buffer Buffer; |
|
333 |
|
334 protected: |
|
335 |
|
336 nsScannerFragment mFragment; |
|
337 const char16_t* mPosition; |
|
338 const nsScannerSubstring* mOwner; |
|
339 |
|
340 friend class nsScannerSubstring; |
|
341 friend class nsScannerSharedSubstring; |
|
342 |
|
343 public: |
|
344 nsScannerIterator() {} |
|
345 // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK |
|
346 // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK |
|
347 |
|
348 inline void normalize_forward(); |
|
349 inline void normalize_backward(); |
|
350 |
|
351 pointer get() const |
|
352 { |
|
353 return mPosition; |
|
354 } |
|
355 |
|
356 char16_t operator*() const |
|
357 { |
|
358 return *get(); |
|
359 } |
|
360 |
|
361 const nsScannerFragment& fragment() const |
|
362 { |
|
363 return mFragment; |
|
364 } |
|
365 |
|
366 const Buffer* buffer() const |
|
367 { |
|
368 return mFragment.mBuffer; |
|
369 } |
|
370 |
|
371 self_type& operator++() |
|
372 { |
|
373 ++mPosition; |
|
374 normalize_forward(); |
|
375 return *this; |
|
376 } |
|
377 |
|
378 self_type operator++( int ) |
|
379 { |
|
380 self_type result(*this); |
|
381 ++mPosition; |
|
382 normalize_forward(); |
|
383 return result; |
|
384 } |
|
385 |
|
386 self_type& operator--() |
|
387 { |
|
388 normalize_backward(); |
|
389 --mPosition; |
|
390 return *this; |
|
391 } |
|
392 |
|
393 self_type operator--( int ) |
|
394 { |
|
395 self_type result(*this); |
|
396 normalize_backward(); |
|
397 --mPosition; |
|
398 return result; |
|
399 } |
|
400 |
|
401 difference_type size_forward() const |
|
402 { |
|
403 return mFragment.mFragmentEnd - mPosition; |
|
404 } |
|
405 |
|
406 difference_type size_backward() const |
|
407 { |
|
408 return mPosition - mFragment.mFragmentStart; |
|
409 } |
|
410 |
|
411 self_type& advance( difference_type n ) |
|
412 { |
|
413 while ( n > 0 ) |
|
414 { |
|
415 difference_type one_hop = std::min(n, size_forward()); |
|
416 |
|
417 NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string"); |
|
418 // perhaps I should |break| if |!one_hop|? |
|
419 |
|
420 mPosition += one_hop; |
|
421 normalize_forward(); |
|
422 n -= one_hop; |
|
423 } |
|
424 |
|
425 while ( n < 0 ) |
|
426 { |
|
427 normalize_backward(); |
|
428 difference_type one_hop = std::max(n, -size_backward()); |
|
429 |
|
430 NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string"); |
|
431 // perhaps I should |break| if |!one_hop|? |
|
432 |
|
433 mPosition += one_hop; |
|
434 n -= one_hop; |
|
435 } |
|
436 |
|
437 return *this; |
|
438 } |
|
439 }; |
|
440 |
|
441 |
|
442 inline |
|
443 bool |
|
444 SameFragment( const nsScannerIterator& a, const nsScannerIterator& b ) |
|
445 { |
|
446 return a.fragment().mFragmentStart == b.fragment().mFragmentStart; |
|
447 } |
|
448 |
|
449 |
|
450 /** |
|
451 * this class is needed in order to make use of the methods in nsAlgorithm.h |
|
452 */ |
|
453 template <> |
|
454 struct nsCharSourceTraits<nsScannerIterator> |
|
455 { |
|
456 typedef nsScannerIterator::difference_type difference_type; |
|
457 |
|
458 static |
|
459 uint32_t |
|
460 readable_distance( const nsScannerIterator& first, const nsScannerIterator& last ) |
|
461 { |
|
462 return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); |
|
463 } |
|
464 |
|
465 static |
|
466 const nsScannerIterator::value_type* |
|
467 read( const nsScannerIterator& iter ) |
|
468 { |
|
469 return iter.get(); |
|
470 } |
|
471 |
|
472 static |
|
473 void |
|
474 advance( nsScannerIterator& s, difference_type n ) |
|
475 { |
|
476 s.advance(n); |
|
477 } |
|
478 }; |
|
479 |
|
480 |
|
481 /** |
|
482 * inline methods follow |
|
483 */ |
|
484 |
|
485 inline |
|
486 void |
|
487 nsScannerIterator::normalize_forward() |
|
488 { |
|
489 while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) |
|
490 mPosition = mFragment.mFragmentStart; |
|
491 } |
|
492 |
|
493 inline |
|
494 void |
|
495 nsScannerIterator::normalize_backward() |
|
496 { |
|
497 while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) |
|
498 mPosition = mFragment.mFragmentEnd; |
|
499 } |
|
500 |
|
501 inline |
|
502 bool |
|
503 operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) |
|
504 { |
|
505 return lhs.get() == rhs.get(); |
|
506 } |
|
507 |
|
508 inline |
|
509 bool |
|
510 operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) |
|
511 { |
|
512 return lhs.get() != rhs.get(); |
|
513 } |
|
514 |
|
515 |
|
516 inline |
|
517 nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) |
|
518 : mBuffer(const_cast<Buffer*>(aIter.buffer())) |
|
519 , mPosition(const_cast<char16_t*>(aIter.get())) |
|
520 {} |
|
521 |
|
522 inline |
|
523 nsScannerBufferList::Position& |
|
524 nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter) |
|
525 { |
|
526 mBuffer = const_cast<Buffer*>(aIter.buffer()); |
|
527 mPosition = const_cast<char16_t*>(aIter.get()); |
|
528 return *this; |
|
529 } |
|
530 |
|
531 |
|
532 /** |
|
533 * scanner string utils |
|
534 * |
|
535 * These methods mimic the API provided by nsReadableUtils in xpcom/string. |
|
536 * Here we provide only the methods that the htmlparser module needs. |
|
537 */ |
|
538 |
|
539 inline |
|
540 size_t |
|
541 Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd ) |
|
542 { |
|
543 typedef nsScannerBufferList::Position Position; |
|
544 return Position::Distance(Position(aStart), Position(aEnd)); |
|
545 } |
|
546 |
|
547 void |
|
548 CopyUnicodeTo( const nsScannerIterator& aSrcStart, |
|
549 const nsScannerIterator& aSrcEnd, |
|
550 nsAString& aDest ); |
|
551 |
|
552 inline |
|
553 void |
|
554 CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) |
|
555 { |
|
556 nsScannerIterator begin, end; |
|
557 CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); |
|
558 } |
|
559 |
|
560 void |
|
561 AppendUnicodeTo( const nsScannerIterator& aSrcStart, |
|
562 const nsScannerIterator& aSrcEnd, |
|
563 nsAString& aDest ); |
|
564 |
|
565 inline |
|
566 void |
|
567 AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) |
|
568 { |
|
569 nsScannerIterator begin, end; |
|
570 AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); |
|
571 } |
|
572 |
|
573 void |
|
574 AppendUnicodeTo( const nsScannerIterator& aSrcStart, |
|
575 const nsScannerIterator& aSrcEnd, |
|
576 nsScannerSharedSubstring& aDest ); |
|
577 |
|
578 bool |
|
579 FindCharInReadable( char16_t aChar, |
|
580 nsScannerIterator& aStart, |
|
581 const nsScannerIterator& aEnd ); |
|
582 |
|
583 bool |
|
584 FindInReadable( const nsAString& aPattern, |
|
585 nsScannerIterator& aStart, |
|
586 nsScannerIterator& aEnd, |
|
587 const nsStringComparator& = nsDefaultStringComparator() ); |
|
588 |
|
589 bool |
|
590 RFindInReadable( const nsAString& aPattern, |
|
591 nsScannerIterator& aStart, |
|
592 nsScannerIterator& aEnd, |
|
593 const nsStringComparator& = nsDefaultStringComparator() ); |
|
594 |
|
595 inline |
|
596 bool |
|
597 CaseInsensitiveFindInReadable( const nsAString& aPattern, |
|
598 nsScannerIterator& aStart, |
|
599 nsScannerIterator& aEnd ) |
|
600 { |
|
601 return FindInReadable(aPattern, aStart, aEnd, |
|
602 nsCaseInsensitiveStringComparator()); |
|
603 } |
|
604 |
|
605 #endif // !defined(nsScannerString_h___) |