michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef nsScannerString_h___ michael@0: #define nsScannerString_h___ michael@0: michael@0: #include "nsString.h" michael@0: #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator michael@0: #include "mozilla/LinkedList.h" michael@0: #include michael@0: michael@0: michael@0: /** michael@0: * NOTE: nsScannerString (and the other classes defined in this file) are michael@0: * not related to nsAString or any of the other xpcom/string classes. michael@0: * michael@0: * nsScannerString is based on the nsSlidingString implementation that used michael@0: * to live in xpcom/string. Now that nsAString is limited to representing michael@0: * only single fragment strings, nsSlidingString can no longer be used. michael@0: * michael@0: * An advantage to this design is that it does not employ any virtual michael@0: * functions. michael@0: * michael@0: * This file uses SCC-style indenting in deference to the nsSlidingString michael@0: * code from which this code is derived ;-) michael@0: */ michael@0: michael@0: class nsScannerIterator; michael@0: class nsScannerSubstring; michael@0: class nsScannerString; michael@0: michael@0: michael@0: /** michael@0: * nsScannerBufferList michael@0: * michael@0: * This class maintains a list of heap-allocated Buffer objects. The buffers michael@0: * are maintained in a circular linked list. Each buffer has a usage count michael@0: * that is decremented by the owning nsScannerSubstring. michael@0: * michael@0: * The buffer list itself is reference counted. This allows the buffer list michael@0: * to be shared by multiple nsScannerSubstring objects. The reference michael@0: * counting is not threadsafe, which is not at all a requirement. michael@0: * michael@0: * When a nsScannerSubstring releases its reference to a buffer list, it michael@0: * decrements the usage count of the first buffer in the buffer list that it michael@0: * was referencing. It informs the buffer list that it can discard buffers michael@0: * starting at that prefix. The buffer list will do so if the usage count of michael@0: * that buffer is 0 and if it is the first buffer in the list. It will michael@0: * continue to prune buffers starting from the front of the buffer list until michael@0: * it finds a buffer that has a usage count that is non-zero. michael@0: */ michael@0: class nsScannerBufferList michael@0: { michael@0: public: michael@0: michael@0: /** michael@0: * Buffer objects are directly followed by a data segment. The start michael@0: * of the data segment is determined by increment the |this| pointer michael@0: * by 1 unit. michael@0: */ michael@0: class Buffer : public mozilla::LinkedListElement michael@0: { michael@0: public: michael@0: michael@0: void IncrementUsageCount() { ++mUsageCount; } michael@0: void DecrementUsageCount() { --mUsageCount; } michael@0: michael@0: bool IsInUse() const { return mUsageCount != 0; } michael@0: michael@0: const char16_t* DataStart() const { return (const char16_t*) (this+1); } michael@0: char16_t* DataStart() { return ( char16_t*) (this+1); } michael@0: michael@0: const char16_t* DataEnd() const { return mDataEnd; } michael@0: char16_t* DataEnd() { return mDataEnd; } michael@0: michael@0: const Buffer* Next() const { return getNext(); } michael@0: Buffer* Next() { return getNext(); } michael@0: michael@0: const Buffer* Prev() const { return getPrevious(); } michael@0: Buffer* Prev() { return getPrevious(); } michael@0: michael@0: uint32_t DataLength() const { return mDataEnd - DataStart(); } michael@0: void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } michael@0: michael@0: private: michael@0: michael@0: friend class nsScannerBufferList; michael@0: michael@0: int32_t mUsageCount; michael@0: char16_t* mDataEnd; michael@0: }; michael@0: michael@0: /** michael@0: * Position objects serve as lightweight pointers into a buffer list. michael@0: * The mPosition member must be contained with mBuffer->DataStart() michael@0: * and mBuffer->DataEnd(). michael@0: */ michael@0: class Position michael@0: { michael@0: public: michael@0: michael@0: Position() {} michael@0: michael@0: Position( Buffer* buffer, char16_t* position ) michael@0: : mBuffer(buffer) michael@0: , mPosition(position) michael@0: {} michael@0: michael@0: inline michael@0: Position( const nsScannerIterator& aIter ); michael@0: michael@0: inline michael@0: Position& operator=( const nsScannerIterator& aIter ); michael@0: michael@0: static size_t Distance( const Position& p1, const Position& p2 ); michael@0: michael@0: Buffer* mBuffer; michael@0: char16_t* mPosition; michael@0: }; michael@0: michael@0: static Buffer* AllocBufferFromString( const nsAString& ); michael@0: static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars michael@0: michael@0: nsScannerBufferList( Buffer* buf ) michael@0: : mRefCnt(0) michael@0: { michael@0: mBuffers.insertBack(buf); michael@0: } michael@0: michael@0: void AddRef() { ++mRefCnt; } michael@0: void Release() { if (--mRefCnt == 0) delete this; } michael@0: michael@0: void Append( Buffer* buf ) { mBuffers.insertBack(buf); } michael@0: void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); } michael@0: void SplitBuffer( const Position& ); michael@0: void DiscardUnreferencedPrefix( Buffer* ); michael@0: michael@0: Buffer* Head() { return mBuffers.getFirst(); } michael@0: const Buffer* Head() const { return mBuffers.getFirst(); } michael@0: michael@0: Buffer* Tail() { return mBuffers.getLast(); } michael@0: const Buffer* Tail() const { return mBuffers.getLast(); } michael@0: michael@0: private: michael@0: michael@0: friend class nsScannerSubstring; michael@0: michael@0: ~nsScannerBufferList() { ReleaseAll(); } michael@0: void ReleaseAll(); michael@0: michael@0: int32_t mRefCnt; michael@0: mozilla::LinkedList mBuffers; michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * nsScannerFragment represents a "slice" of a Buffer object. michael@0: */ michael@0: struct nsScannerFragment michael@0: { michael@0: typedef nsScannerBufferList::Buffer Buffer; michael@0: michael@0: const Buffer* mBuffer; michael@0: const char16_t* mFragmentStart; michael@0: const char16_t* mFragmentEnd; michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * nsScannerSubstring is the base class for nsScannerString. It provides michael@0: * access to iterators and methods to bind the substring to another michael@0: * substring or nsAString instance. michael@0: * michael@0: * This class owns the buffer list. michael@0: */ michael@0: class nsScannerSubstring michael@0: { michael@0: public: michael@0: typedef nsScannerBufferList::Buffer Buffer; michael@0: typedef nsScannerBufferList::Position Position; michael@0: typedef uint32_t size_type; michael@0: michael@0: nsScannerSubstring(); michael@0: nsScannerSubstring( const nsAString& s ); michael@0: michael@0: ~nsScannerSubstring(); michael@0: michael@0: nsScannerIterator& BeginReading( nsScannerIterator& iter ) const; michael@0: nsScannerIterator& EndReading( nsScannerIterator& iter ) const; michael@0: michael@0: size_type Length() const { return mLength; } michael@0: michael@0: int32_t CountChar( char16_t ) const; michael@0: michael@0: void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& ); michael@0: void Rebind( const nsAString& ); michael@0: michael@0: const nsSubstring& AsString() const; michael@0: michael@0: bool GetNextFragment( nsScannerFragment& ) const; michael@0: bool GetPrevFragment( nsScannerFragment& ) const; michael@0: michael@0: static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); } michael@0: static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); } michael@0: michael@0: protected: michael@0: michael@0: void acquire_ownership_of_buffer_list() const michael@0: { michael@0: mBufferList->AddRef(); michael@0: mStart.mBuffer->IncrementUsageCount(); michael@0: } michael@0: michael@0: void release_ownership_of_buffer_list() michael@0: { michael@0: if (mBufferList) michael@0: { michael@0: mStart.mBuffer->DecrementUsageCount(); michael@0: mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); michael@0: mBufferList->Release(); michael@0: } michael@0: } michael@0: michael@0: void init_range_from_buffer_list() michael@0: { michael@0: mStart.mBuffer = mBufferList->Head(); michael@0: mStart.mPosition = mStart.mBuffer->DataStart(); michael@0: michael@0: mEnd.mBuffer = mBufferList->Tail(); michael@0: mEnd.mPosition = mEnd.mBuffer->DataEnd(); michael@0: michael@0: mLength = Position::Distance(mStart, mEnd); michael@0: } michael@0: michael@0: Position mStart; michael@0: Position mEnd; michael@0: nsScannerBufferList *mBufferList; michael@0: size_type mLength; michael@0: michael@0: // these fields are used to implement AsString michael@0: nsDependentSubstring mFlattenedRep; michael@0: bool mIsDirty; michael@0: michael@0: friend class nsScannerSharedSubstring; michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * nsScannerString provides methods to grow and modify a buffer list. michael@0: */ michael@0: class nsScannerString : public nsScannerSubstring michael@0: { michael@0: public: michael@0: michael@0: nsScannerString( Buffer* ); michael@0: michael@0: // you are giving ownership to the string, it takes and keeps your michael@0: // buffer, deleting it when done. michael@0: // Use AllocBuffer or AllocBufferFromString to create a Buffer object michael@0: // for use with this function. michael@0: void AppendBuffer( Buffer* ); michael@0: michael@0: void DiscardPrefix( const nsScannerIterator& ); michael@0: // any other way you want to do this? michael@0: michael@0: void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); michael@0: void ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar); michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * nsScannerSharedSubstring implements copy-on-write semantics for michael@0: * nsScannerSubstring. When you call .writable(), it will copy the data michael@0: * and return a mutable string object. This class also manages releasing michael@0: * the reference to the scanner buffer when it is no longer needed. michael@0: */ michael@0: michael@0: class nsScannerSharedSubstring michael@0: { michael@0: public: michael@0: nsScannerSharedSubstring() michael@0: : mBuffer(nullptr), mBufferList(nullptr) { } michael@0: michael@0: ~nsScannerSharedSubstring() michael@0: { michael@0: if (mBufferList) michael@0: ReleaseBuffer(); michael@0: } michael@0: michael@0: // Acquire a copy-on-write reference to the given substring. michael@0: NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart, michael@0: const nsScannerIterator& aEnd); michael@0: michael@0: // Get a mutable reference to this string michael@0: nsSubstring& writable() michael@0: { michael@0: if (mBufferList) michael@0: MakeMutable(); michael@0: michael@0: return mString; michael@0: } michael@0: michael@0: // Get a const reference to this string michael@0: const nsSubstring& str() const { return mString; } michael@0: michael@0: private: michael@0: typedef nsScannerBufferList::Buffer Buffer; michael@0: michael@0: NS_HIDDEN_(void) ReleaseBuffer(); michael@0: NS_HIDDEN_(void) MakeMutable(); michael@0: michael@0: nsDependentSubstring mString; michael@0: Buffer *mBuffer; michael@0: nsScannerBufferList *mBufferList; michael@0: }; michael@0: michael@0: /** michael@0: * nsScannerIterator works just like nsReadingIterator except that michael@0: * it knows how to iterate over a list of scanner buffers. michael@0: */ michael@0: class nsScannerIterator michael@0: { michael@0: public: michael@0: typedef nsScannerIterator self_type; michael@0: typedef ptrdiff_t difference_type; michael@0: typedef char16_t value_type; michael@0: typedef const char16_t* pointer; michael@0: typedef const char16_t& reference; michael@0: typedef nsScannerSubstring::Buffer Buffer; michael@0: michael@0: protected: michael@0: michael@0: nsScannerFragment mFragment; michael@0: const char16_t* mPosition; michael@0: const nsScannerSubstring* mOwner; michael@0: michael@0: friend class nsScannerSubstring; michael@0: friend class nsScannerSharedSubstring; michael@0: michael@0: public: michael@0: nsScannerIterator() {} michael@0: // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK michael@0: // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK michael@0: michael@0: inline void normalize_forward(); michael@0: inline void normalize_backward(); michael@0: michael@0: pointer get() const michael@0: { michael@0: return mPosition; michael@0: } michael@0: michael@0: char16_t operator*() const michael@0: { michael@0: return *get(); michael@0: } michael@0: michael@0: const nsScannerFragment& fragment() const michael@0: { michael@0: return mFragment; michael@0: } michael@0: michael@0: const Buffer* buffer() const michael@0: { michael@0: return mFragment.mBuffer; michael@0: } michael@0: michael@0: self_type& operator++() michael@0: { michael@0: ++mPosition; michael@0: normalize_forward(); michael@0: return *this; michael@0: } michael@0: michael@0: self_type operator++( int ) michael@0: { michael@0: self_type result(*this); michael@0: ++mPosition; michael@0: normalize_forward(); michael@0: return result; michael@0: } michael@0: michael@0: self_type& operator--() michael@0: { michael@0: normalize_backward(); michael@0: --mPosition; michael@0: return *this; michael@0: } michael@0: michael@0: self_type operator--( int ) michael@0: { michael@0: self_type result(*this); michael@0: normalize_backward(); michael@0: --mPosition; michael@0: return result; michael@0: } michael@0: michael@0: difference_type size_forward() const michael@0: { michael@0: return mFragment.mFragmentEnd - mPosition; michael@0: } michael@0: michael@0: difference_type size_backward() const michael@0: { michael@0: return mPosition - mFragment.mFragmentStart; michael@0: } michael@0: michael@0: self_type& advance( difference_type n ) michael@0: { michael@0: while ( n > 0 ) michael@0: { michael@0: difference_type one_hop = std::min(n, size_forward()); michael@0: michael@0: NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string"); michael@0: // perhaps I should |break| if |!one_hop|? michael@0: michael@0: mPosition += one_hop; michael@0: normalize_forward(); michael@0: n -= one_hop; michael@0: } michael@0: michael@0: while ( n < 0 ) michael@0: { michael@0: normalize_backward(); michael@0: difference_type one_hop = std::max(n, -size_backward()); michael@0: michael@0: NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string"); michael@0: // perhaps I should |break| if |!one_hop|? michael@0: michael@0: mPosition += one_hop; michael@0: n -= one_hop; michael@0: } michael@0: michael@0: return *this; michael@0: } michael@0: }; michael@0: michael@0: michael@0: inline michael@0: bool michael@0: SameFragment( const nsScannerIterator& a, const nsScannerIterator& b ) michael@0: { michael@0: return a.fragment().mFragmentStart == b.fragment().mFragmentStart; michael@0: } michael@0: michael@0: michael@0: /** michael@0: * this class is needed in order to make use of the methods in nsAlgorithm.h michael@0: */ michael@0: template <> michael@0: struct nsCharSourceTraits michael@0: { michael@0: typedef nsScannerIterator::difference_type difference_type; michael@0: michael@0: static michael@0: uint32_t michael@0: readable_distance( const nsScannerIterator& first, const nsScannerIterator& last ) michael@0: { michael@0: return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); michael@0: } michael@0: michael@0: static michael@0: const nsScannerIterator::value_type* michael@0: read( const nsScannerIterator& iter ) michael@0: { michael@0: return iter.get(); michael@0: } michael@0: michael@0: static michael@0: void michael@0: advance( nsScannerIterator& s, difference_type n ) michael@0: { michael@0: s.advance(n); michael@0: } michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * inline methods follow michael@0: */ michael@0: michael@0: inline michael@0: void michael@0: nsScannerIterator::normalize_forward() michael@0: { michael@0: while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) michael@0: mPosition = mFragment.mFragmentStart; michael@0: } michael@0: michael@0: inline michael@0: void michael@0: nsScannerIterator::normalize_backward() michael@0: { michael@0: while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) michael@0: mPosition = mFragment.mFragmentEnd; michael@0: } michael@0: michael@0: inline michael@0: bool michael@0: operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) michael@0: { michael@0: return lhs.get() == rhs.get(); michael@0: } michael@0: michael@0: inline michael@0: bool michael@0: operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) michael@0: { michael@0: return lhs.get() != rhs.get(); michael@0: } michael@0: michael@0: michael@0: inline michael@0: nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) michael@0: : mBuffer(const_cast(aIter.buffer())) michael@0: , mPosition(const_cast(aIter.get())) michael@0: {} michael@0: michael@0: inline michael@0: nsScannerBufferList::Position& michael@0: nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter) michael@0: { michael@0: mBuffer = const_cast(aIter.buffer()); michael@0: mPosition = const_cast(aIter.get()); michael@0: return *this; michael@0: } michael@0: michael@0: michael@0: /** michael@0: * scanner string utils michael@0: * michael@0: * These methods mimic the API provided by nsReadableUtils in xpcom/string. michael@0: * Here we provide only the methods that the htmlparser module needs. michael@0: */ michael@0: michael@0: inline michael@0: size_t michael@0: Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd ) michael@0: { michael@0: typedef nsScannerBufferList::Position Position; michael@0: return Position::Distance(Position(aStart), Position(aEnd)); michael@0: } michael@0: michael@0: void michael@0: CopyUnicodeTo( const nsScannerIterator& aSrcStart, michael@0: const nsScannerIterator& aSrcEnd, michael@0: nsAString& aDest ); michael@0: michael@0: inline michael@0: void michael@0: CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) michael@0: { michael@0: nsScannerIterator begin, end; michael@0: CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); michael@0: } michael@0: michael@0: void michael@0: AppendUnicodeTo( const nsScannerIterator& aSrcStart, michael@0: const nsScannerIterator& aSrcEnd, michael@0: nsAString& aDest ); michael@0: michael@0: inline michael@0: void michael@0: AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) michael@0: { michael@0: nsScannerIterator begin, end; michael@0: AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); michael@0: } michael@0: michael@0: void michael@0: AppendUnicodeTo( const nsScannerIterator& aSrcStart, michael@0: const nsScannerIterator& aSrcEnd, michael@0: nsScannerSharedSubstring& aDest ); michael@0: michael@0: bool michael@0: FindCharInReadable( char16_t aChar, michael@0: nsScannerIterator& aStart, michael@0: const nsScannerIterator& aEnd ); michael@0: michael@0: bool michael@0: FindInReadable( const nsAString& aPattern, michael@0: nsScannerIterator& aStart, michael@0: nsScannerIterator& aEnd, michael@0: const nsStringComparator& = nsDefaultStringComparator() ); michael@0: michael@0: bool michael@0: RFindInReadable( const nsAString& aPattern, michael@0: nsScannerIterator& aStart, michael@0: nsScannerIterator& aEnd, michael@0: const nsStringComparator& = nsDefaultStringComparator() ); michael@0: michael@0: inline michael@0: bool michael@0: CaseInsensitiveFindInReadable( const nsAString& aPattern, michael@0: nsScannerIterator& aStart, michael@0: nsScannerIterator& aEnd ) michael@0: { michael@0: return FindInReadable(aPattern, aStart, aEnd, michael@0: nsCaseInsensitiveStringComparator()); michael@0: } michael@0: michael@0: #endif // !defined(nsScannerString_h___)