1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/htmlparser/public/nsScannerString.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,605 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef nsScannerString_h___ 1.11 +#define nsScannerString_h___ 1.12 + 1.13 +#include "nsString.h" 1.14 +#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator 1.15 +#include "mozilla/LinkedList.h" 1.16 +#include <algorithm> 1.17 + 1.18 + 1.19 + /** 1.20 + * NOTE: nsScannerString (and the other classes defined in this file) are 1.21 + * not related to nsAString or any of the other xpcom/string classes. 1.22 + * 1.23 + * nsScannerString is based on the nsSlidingString implementation that used 1.24 + * to live in xpcom/string. Now that nsAString is limited to representing 1.25 + * only single fragment strings, nsSlidingString can no longer be used. 1.26 + * 1.27 + * An advantage to this design is that it does not employ any virtual 1.28 + * functions. 1.29 + * 1.30 + * This file uses SCC-style indenting in deference to the nsSlidingString 1.31 + * code from which this code is derived ;-) 1.32 + */ 1.33 + 1.34 +class nsScannerIterator; 1.35 +class nsScannerSubstring; 1.36 +class nsScannerString; 1.37 + 1.38 + 1.39 + /** 1.40 + * nsScannerBufferList 1.41 + * 1.42 + * This class maintains a list of heap-allocated Buffer objects. The buffers 1.43 + * are maintained in a circular linked list. Each buffer has a usage count 1.44 + * that is decremented by the owning nsScannerSubstring. 1.45 + * 1.46 + * The buffer list itself is reference counted. This allows the buffer list 1.47 + * to be shared by multiple nsScannerSubstring objects. The reference 1.48 + * counting is not threadsafe, which is not at all a requirement. 1.49 + * 1.50 + * When a nsScannerSubstring releases its reference to a buffer list, it 1.51 + * decrements the usage count of the first buffer in the buffer list that it 1.52 + * was referencing. It informs the buffer list that it can discard buffers 1.53 + * starting at that prefix. The buffer list will do so if the usage count of 1.54 + * that buffer is 0 and if it is the first buffer in the list. It will 1.55 + * continue to prune buffers starting from the front of the buffer list until 1.56 + * it finds a buffer that has a usage count that is non-zero. 1.57 + */ 1.58 +class nsScannerBufferList 1.59 + { 1.60 + public: 1.61 + 1.62 + /** 1.63 + * Buffer objects are directly followed by a data segment. The start 1.64 + * of the data segment is determined by increment the |this| pointer 1.65 + * by 1 unit. 1.66 + */ 1.67 + class Buffer : public mozilla::LinkedListElement<Buffer> 1.68 + { 1.69 + public: 1.70 + 1.71 + void IncrementUsageCount() { ++mUsageCount; } 1.72 + void DecrementUsageCount() { --mUsageCount; } 1.73 + 1.74 + bool IsInUse() const { return mUsageCount != 0; } 1.75 + 1.76 + const char16_t* DataStart() const { return (const char16_t*) (this+1); } 1.77 + char16_t* DataStart() { return ( char16_t*) (this+1); } 1.78 + 1.79 + const char16_t* DataEnd() const { return mDataEnd; } 1.80 + char16_t* DataEnd() { return mDataEnd; } 1.81 + 1.82 + const Buffer* Next() const { return getNext(); } 1.83 + Buffer* Next() { return getNext(); } 1.84 + 1.85 + const Buffer* Prev() const { return getPrevious(); } 1.86 + Buffer* Prev() { return getPrevious(); } 1.87 + 1.88 + uint32_t DataLength() const { return mDataEnd - DataStart(); } 1.89 + void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } 1.90 + 1.91 + private: 1.92 + 1.93 + friend class nsScannerBufferList; 1.94 + 1.95 + int32_t mUsageCount; 1.96 + char16_t* mDataEnd; 1.97 + }; 1.98 + 1.99 + /** 1.100 + * Position objects serve as lightweight pointers into a buffer list. 1.101 + * The mPosition member must be contained with mBuffer->DataStart() 1.102 + * and mBuffer->DataEnd(). 1.103 + */ 1.104 + class Position 1.105 + { 1.106 + public: 1.107 + 1.108 + Position() {} 1.109 + 1.110 + Position( Buffer* buffer, char16_t* position ) 1.111 + : mBuffer(buffer) 1.112 + , mPosition(position) 1.113 + {} 1.114 + 1.115 + inline 1.116 + Position( const nsScannerIterator& aIter ); 1.117 + 1.118 + inline 1.119 + Position& operator=( const nsScannerIterator& aIter ); 1.120 + 1.121 + static size_t Distance( const Position& p1, const Position& p2 ); 1.122 + 1.123 + Buffer* mBuffer; 1.124 + char16_t* mPosition; 1.125 + }; 1.126 + 1.127 + static Buffer* AllocBufferFromString( const nsAString& ); 1.128 + static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars 1.129 + 1.130 + nsScannerBufferList( Buffer* buf ) 1.131 + : mRefCnt(0) 1.132 + { 1.133 + mBuffers.insertBack(buf); 1.134 + } 1.135 + 1.136 + void AddRef() { ++mRefCnt; } 1.137 + void Release() { if (--mRefCnt == 0) delete this; } 1.138 + 1.139 + void Append( Buffer* buf ) { mBuffers.insertBack(buf); } 1.140 + void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); } 1.141 + void SplitBuffer( const Position& ); 1.142 + void DiscardUnreferencedPrefix( Buffer* ); 1.143 + 1.144 + Buffer* Head() { return mBuffers.getFirst(); } 1.145 + const Buffer* Head() const { return mBuffers.getFirst(); } 1.146 + 1.147 + Buffer* Tail() { return mBuffers.getLast(); } 1.148 + const Buffer* Tail() const { return mBuffers.getLast(); } 1.149 + 1.150 + private: 1.151 + 1.152 + friend class nsScannerSubstring; 1.153 + 1.154 + ~nsScannerBufferList() { ReleaseAll(); } 1.155 + void ReleaseAll(); 1.156 + 1.157 + int32_t mRefCnt; 1.158 + mozilla::LinkedList<Buffer> mBuffers; 1.159 + }; 1.160 + 1.161 + 1.162 + /** 1.163 + * nsScannerFragment represents a "slice" of a Buffer object. 1.164 + */ 1.165 +struct nsScannerFragment 1.166 + { 1.167 + typedef nsScannerBufferList::Buffer Buffer; 1.168 + 1.169 + const Buffer* mBuffer; 1.170 + const char16_t* mFragmentStart; 1.171 + const char16_t* mFragmentEnd; 1.172 + }; 1.173 + 1.174 + 1.175 + /** 1.176 + * nsScannerSubstring is the base class for nsScannerString. It provides 1.177 + * access to iterators and methods to bind the substring to another 1.178 + * substring or nsAString instance. 1.179 + * 1.180 + * This class owns the buffer list. 1.181 + */ 1.182 +class nsScannerSubstring 1.183 + { 1.184 + public: 1.185 + typedef nsScannerBufferList::Buffer Buffer; 1.186 + typedef nsScannerBufferList::Position Position; 1.187 + typedef uint32_t size_type; 1.188 + 1.189 + nsScannerSubstring(); 1.190 + nsScannerSubstring( const nsAString& s ); 1.191 + 1.192 + ~nsScannerSubstring(); 1.193 + 1.194 + nsScannerIterator& BeginReading( nsScannerIterator& iter ) const; 1.195 + nsScannerIterator& EndReading( nsScannerIterator& iter ) const; 1.196 + 1.197 + size_type Length() const { return mLength; } 1.198 + 1.199 + int32_t CountChar( char16_t ) const; 1.200 + 1.201 + void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& ); 1.202 + void Rebind( const nsAString& ); 1.203 + 1.204 + const nsSubstring& AsString() const; 1.205 + 1.206 + bool GetNextFragment( nsScannerFragment& ) const; 1.207 + bool GetPrevFragment( nsScannerFragment& ) const; 1.208 + 1.209 + static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); } 1.210 + static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); } 1.211 + 1.212 + protected: 1.213 + 1.214 + void acquire_ownership_of_buffer_list() const 1.215 + { 1.216 + mBufferList->AddRef(); 1.217 + mStart.mBuffer->IncrementUsageCount(); 1.218 + } 1.219 + 1.220 + void release_ownership_of_buffer_list() 1.221 + { 1.222 + if (mBufferList) 1.223 + { 1.224 + mStart.mBuffer->DecrementUsageCount(); 1.225 + mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); 1.226 + mBufferList->Release(); 1.227 + } 1.228 + } 1.229 + 1.230 + void init_range_from_buffer_list() 1.231 + { 1.232 + mStart.mBuffer = mBufferList->Head(); 1.233 + mStart.mPosition = mStart.mBuffer->DataStart(); 1.234 + 1.235 + mEnd.mBuffer = mBufferList->Tail(); 1.236 + mEnd.mPosition = mEnd.mBuffer->DataEnd(); 1.237 + 1.238 + mLength = Position::Distance(mStart, mEnd); 1.239 + } 1.240 + 1.241 + Position mStart; 1.242 + Position mEnd; 1.243 + nsScannerBufferList *mBufferList; 1.244 + size_type mLength; 1.245 + 1.246 + // these fields are used to implement AsString 1.247 + nsDependentSubstring mFlattenedRep; 1.248 + bool mIsDirty; 1.249 + 1.250 + friend class nsScannerSharedSubstring; 1.251 + }; 1.252 + 1.253 + 1.254 + /** 1.255 + * nsScannerString provides methods to grow and modify a buffer list. 1.256 + */ 1.257 +class nsScannerString : public nsScannerSubstring 1.258 + { 1.259 + public: 1.260 + 1.261 + nsScannerString( Buffer* ); 1.262 + 1.263 + // you are giving ownership to the string, it takes and keeps your 1.264 + // buffer, deleting it when done. 1.265 + // Use AllocBuffer or AllocBufferFromString to create a Buffer object 1.266 + // for use with this function. 1.267 + void AppendBuffer( Buffer* ); 1.268 + 1.269 + void DiscardPrefix( const nsScannerIterator& ); 1.270 + // any other way you want to do this? 1.271 + 1.272 + void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); 1.273 + void ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar); 1.274 + }; 1.275 + 1.276 + 1.277 + /** 1.278 + * nsScannerSharedSubstring implements copy-on-write semantics for 1.279 + * nsScannerSubstring. When you call .writable(), it will copy the data 1.280 + * and return a mutable string object. This class also manages releasing 1.281 + * the reference to the scanner buffer when it is no longer needed. 1.282 + */ 1.283 + 1.284 +class nsScannerSharedSubstring 1.285 + { 1.286 + public: 1.287 + nsScannerSharedSubstring() 1.288 + : mBuffer(nullptr), mBufferList(nullptr) { } 1.289 + 1.290 + ~nsScannerSharedSubstring() 1.291 + { 1.292 + if (mBufferList) 1.293 + ReleaseBuffer(); 1.294 + } 1.295 + 1.296 + // Acquire a copy-on-write reference to the given substring. 1.297 + NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart, 1.298 + const nsScannerIterator& aEnd); 1.299 + 1.300 + // Get a mutable reference to this string 1.301 + nsSubstring& writable() 1.302 + { 1.303 + if (mBufferList) 1.304 + MakeMutable(); 1.305 + 1.306 + return mString; 1.307 + } 1.308 + 1.309 + // Get a const reference to this string 1.310 + const nsSubstring& str() const { return mString; } 1.311 + 1.312 + private: 1.313 + typedef nsScannerBufferList::Buffer Buffer; 1.314 + 1.315 + NS_HIDDEN_(void) ReleaseBuffer(); 1.316 + NS_HIDDEN_(void) MakeMutable(); 1.317 + 1.318 + nsDependentSubstring mString; 1.319 + Buffer *mBuffer; 1.320 + nsScannerBufferList *mBufferList; 1.321 + }; 1.322 + 1.323 + /** 1.324 + * nsScannerIterator works just like nsReadingIterator<CharT> except that 1.325 + * it knows how to iterate over a list of scanner buffers. 1.326 + */ 1.327 +class nsScannerIterator 1.328 + { 1.329 + public: 1.330 + typedef nsScannerIterator self_type; 1.331 + typedef ptrdiff_t difference_type; 1.332 + typedef char16_t value_type; 1.333 + typedef const char16_t* pointer; 1.334 + typedef const char16_t& reference; 1.335 + typedef nsScannerSubstring::Buffer Buffer; 1.336 + 1.337 + protected: 1.338 + 1.339 + nsScannerFragment mFragment; 1.340 + const char16_t* mPosition; 1.341 + const nsScannerSubstring* mOwner; 1.342 + 1.343 + friend class nsScannerSubstring; 1.344 + friend class nsScannerSharedSubstring; 1.345 + 1.346 + public: 1.347 + nsScannerIterator() {} 1.348 + // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK 1.349 + // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK 1.350 + 1.351 + inline void normalize_forward(); 1.352 + inline void normalize_backward(); 1.353 + 1.354 + pointer get() const 1.355 + { 1.356 + return mPosition; 1.357 + } 1.358 + 1.359 + char16_t operator*() const 1.360 + { 1.361 + return *get(); 1.362 + } 1.363 + 1.364 + const nsScannerFragment& fragment() const 1.365 + { 1.366 + return mFragment; 1.367 + } 1.368 + 1.369 + const Buffer* buffer() const 1.370 + { 1.371 + return mFragment.mBuffer; 1.372 + } 1.373 + 1.374 + self_type& operator++() 1.375 + { 1.376 + ++mPosition; 1.377 + normalize_forward(); 1.378 + return *this; 1.379 + } 1.380 + 1.381 + self_type operator++( int ) 1.382 + { 1.383 + self_type result(*this); 1.384 + ++mPosition; 1.385 + normalize_forward(); 1.386 + return result; 1.387 + } 1.388 + 1.389 + self_type& operator--() 1.390 + { 1.391 + normalize_backward(); 1.392 + --mPosition; 1.393 + return *this; 1.394 + } 1.395 + 1.396 + self_type operator--( int ) 1.397 + { 1.398 + self_type result(*this); 1.399 + normalize_backward(); 1.400 + --mPosition; 1.401 + return result; 1.402 + } 1.403 + 1.404 + difference_type size_forward() const 1.405 + { 1.406 + return mFragment.mFragmentEnd - mPosition; 1.407 + } 1.408 + 1.409 + difference_type size_backward() const 1.410 + { 1.411 + return mPosition - mFragment.mFragmentStart; 1.412 + } 1.413 + 1.414 + self_type& advance( difference_type n ) 1.415 + { 1.416 + while ( n > 0 ) 1.417 + { 1.418 + difference_type one_hop = std::min(n, size_forward()); 1.419 + 1.420 + NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string"); 1.421 + // perhaps I should |break| if |!one_hop|? 1.422 + 1.423 + mPosition += one_hop; 1.424 + normalize_forward(); 1.425 + n -= one_hop; 1.426 + } 1.427 + 1.428 + while ( n < 0 ) 1.429 + { 1.430 + normalize_backward(); 1.431 + difference_type one_hop = std::max(n, -size_backward()); 1.432 + 1.433 + NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string"); 1.434 + // perhaps I should |break| if |!one_hop|? 1.435 + 1.436 + mPosition += one_hop; 1.437 + n -= one_hop; 1.438 + } 1.439 + 1.440 + return *this; 1.441 + } 1.442 + }; 1.443 + 1.444 + 1.445 +inline 1.446 +bool 1.447 +SameFragment( const nsScannerIterator& a, const nsScannerIterator& b ) 1.448 + { 1.449 + return a.fragment().mFragmentStart == b.fragment().mFragmentStart; 1.450 + } 1.451 + 1.452 + 1.453 + /** 1.454 + * this class is needed in order to make use of the methods in nsAlgorithm.h 1.455 + */ 1.456 +template <> 1.457 +struct nsCharSourceTraits<nsScannerIterator> 1.458 + { 1.459 + typedef nsScannerIterator::difference_type difference_type; 1.460 + 1.461 + static 1.462 + uint32_t 1.463 + readable_distance( const nsScannerIterator& first, const nsScannerIterator& last ) 1.464 + { 1.465 + return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); 1.466 + } 1.467 + 1.468 + static 1.469 + const nsScannerIterator::value_type* 1.470 + read( const nsScannerIterator& iter ) 1.471 + { 1.472 + return iter.get(); 1.473 + } 1.474 + 1.475 + static 1.476 + void 1.477 + advance( nsScannerIterator& s, difference_type n ) 1.478 + { 1.479 + s.advance(n); 1.480 + } 1.481 + }; 1.482 + 1.483 + 1.484 + /** 1.485 + * inline methods follow 1.486 + */ 1.487 + 1.488 +inline 1.489 +void 1.490 +nsScannerIterator::normalize_forward() 1.491 + { 1.492 + while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) 1.493 + mPosition = mFragment.mFragmentStart; 1.494 + } 1.495 + 1.496 +inline 1.497 +void 1.498 +nsScannerIterator::normalize_backward() 1.499 + { 1.500 + while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) 1.501 + mPosition = mFragment.mFragmentEnd; 1.502 + } 1.503 + 1.504 +inline 1.505 +bool 1.506 +operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) 1.507 + { 1.508 + return lhs.get() == rhs.get(); 1.509 + } 1.510 + 1.511 +inline 1.512 +bool 1.513 +operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) 1.514 + { 1.515 + return lhs.get() != rhs.get(); 1.516 + } 1.517 + 1.518 + 1.519 +inline 1.520 +nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) 1.521 + : mBuffer(const_cast<Buffer*>(aIter.buffer())) 1.522 + , mPosition(const_cast<char16_t*>(aIter.get())) 1.523 + {} 1.524 + 1.525 +inline 1.526 +nsScannerBufferList::Position& 1.527 +nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter) 1.528 + { 1.529 + mBuffer = const_cast<Buffer*>(aIter.buffer()); 1.530 + mPosition = const_cast<char16_t*>(aIter.get()); 1.531 + return *this; 1.532 + } 1.533 + 1.534 + 1.535 + /** 1.536 + * scanner string utils 1.537 + * 1.538 + * These methods mimic the API provided by nsReadableUtils in xpcom/string. 1.539 + * Here we provide only the methods that the htmlparser module needs. 1.540 + */ 1.541 + 1.542 +inline 1.543 +size_t 1.544 +Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd ) 1.545 + { 1.546 + typedef nsScannerBufferList::Position Position; 1.547 + return Position::Distance(Position(aStart), Position(aEnd)); 1.548 + } 1.549 + 1.550 +void 1.551 +CopyUnicodeTo( const nsScannerIterator& aSrcStart, 1.552 + const nsScannerIterator& aSrcEnd, 1.553 + nsAString& aDest ); 1.554 + 1.555 +inline 1.556 +void 1.557 +CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) 1.558 + { 1.559 + nsScannerIterator begin, end; 1.560 + CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); 1.561 + } 1.562 + 1.563 +void 1.564 +AppendUnicodeTo( const nsScannerIterator& aSrcStart, 1.565 + const nsScannerIterator& aSrcEnd, 1.566 + nsAString& aDest ); 1.567 + 1.568 +inline 1.569 +void 1.570 +AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) 1.571 + { 1.572 + nsScannerIterator begin, end; 1.573 + AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); 1.574 + } 1.575 + 1.576 +void 1.577 +AppendUnicodeTo( const nsScannerIterator& aSrcStart, 1.578 + const nsScannerIterator& aSrcEnd, 1.579 + nsScannerSharedSubstring& aDest ); 1.580 + 1.581 +bool 1.582 +FindCharInReadable( char16_t aChar, 1.583 + nsScannerIterator& aStart, 1.584 + const nsScannerIterator& aEnd ); 1.585 + 1.586 +bool 1.587 +FindInReadable( const nsAString& aPattern, 1.588 + nsScannerIterator& aStart, 1.589 + nsScannerIterator& aEnd, 1.590 + const nsStringComparator& = nsDefaultStringComparator() ); 1.591 + 1.592 +bool 1.593 +RFindInReadable( const nsAString& aPattern, 1.594 + nsScannerIterator& aStart, 1.595 + nsScannerIterator& aEnd, 1.596 + const nsStringComparator& = nsDefaultStringComparator() ); 1.597 + 1.598 +inline 1.599 +bool 1.600 +CaseInsensitiveFindInReadable( const nsAString& aPattern, 1.601 + nsScannerIterator& aStart, 1.602 + nsScannerIterator& aEnd ) 1.603 + { 1.604 + return FindInReadable(aPattern, aStart, aEnd, 1.605 + nsCaseInsensitiveStringComparator()); 1.606 + } 1.607 + 1.608 +#endif // !defined(nsScannerString_h___)