parser/htmlparser/public/nsScannerString.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef nsScannerString_h___
     8 #define nsScannerString_h___
    10 #include "nsString.h"
    11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
    12 #include "mozilla/LinkedList.h"
    13 #include <algorithm>
    16   /**
    17    * NOTE: nsScannerString (and the other classes defined in this file) are
    18    * not related to nsAString or any of the other xpcom/string classes.
    19    *
    20    * nsScannerString is based on the nsSlidingString implementation that used
    21    * to live in xpcom/string.  Now that nsAString is limited to representing
    22    * only single fragment strings, nsSlidingString can no longer be used.
    23    *
    24    * An advantage to this design is that it does not employ any virtual 
    25    * functions.
    26    *
    27    * This file uses SCC-style indenting in deference to the nsSlidingString
    28    * code from which this code is derived ;-)
    29    */
    31 class nsScannerIterator;
    32 class nsScannerSubstring;
    33 class nsScannerString;
    36   /**
    37    * nsScannerBufferList
    38    *
    39    * This class maintains a list of heap-allocated Buffer objects.  The buffers
    40    * are maintained in a circular linked list.  Each buffer has a usage count
    41    * that is decremented by the owning nsScannerSubstring.
    42    *
    43    * The buffer list itself is reference counted.  This allows the buffer list
    44    * to be shared by multiple nsScannerSubstring objects.  The reference
    45    * counting is not threadsafe, which is not at all a requirement.
    46    *
    47    * When a nsScannerSubstring releases its reference to a buffer list, it
    48    * decrements the usage count of the first buffer in the buffer list that it
    49    * was referencing.  It informs the buffer list that it can discard buffers
    50    * starting at that prefix.  The buffer list will do so if the usage count of
    51    * that buffer is 0 and if it is the first buffer in the list.  It will
    52    * continue to prune buffers starting from the front of the buffer list until
    53    * it finds a buffer that has a usage count that is non-zero.
    54    */
    55 class nsScannerBufferList
    56   {
    57     public:
    59         /**
    60          * Buffer objects are directly followed by a data segment.  The start
    61          * of the data segment is determined by increment the |this| pointer
    62          * by 1 unit.
    63          */
    64       class Buffer : public mozilla::LinkedListElement<Buffer>
    65         {
    66           public:
    68             void IncrementUsageCount() { ++mUsageCount; }
    69             void DecrementUsageCount() { --mUsageCount; }
    71             bool IsInUse() const { return mUsageCount != 0; }
    73             const char16_t* DataStart() const { return (const char16_t*) (this+1); }
    74                   char16_t* DataStart()       { return (      char16_t*) (this+1); }
    76             const char16_t* DataEnd() const { return mDataEnd; }
    77                   char16_t* DataEnd()       { return mDataEnd; }
    79             const Buffer* Next() const { return getNext(); }
    80                   Buffer* Next()       { return getNext(); }
    82             const Buffer* Prev() const { return getPrevious(); }
    83                   Buffer* Prev()       { return getPrevious(); }
    85             uint32_t DataLength() const { return mDataEnd - DataStart(); }
    86             void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
    88           private:
    90             friend class nsScannerBufferList;
    92             int32_t    mUsageCount;
    93             char16_t* mDataEnd;
    94         };
    96         /**
    97          * Position objects serve as lightweight pointers into a buffer list.
    98          * The mPosition member must be contained with mBuffer->DataStart()
    99          * and mBuffer->DataEnd().
   100          */
   101       class Position
   102         {
   103           public:
   105             Position() {}
   107             Position( Buffer* buffer, char16_t* position )
   108               : mBuffer(buffer)
   109               , mPosition(position)
   110               {}
   112             inline
   113             Position( const nsScannerIterator& aIter );
   115             inline
   116             Position& operator=( const nsScannerIterator& aIter );
   118             static size_t Distance( const Position& p1, const Position& p2 );
   120             Buffer*    mBuffer;
   121             char16_t* mPosition;
   122         };
   124       static Buffer* AllocBufferFromString( const nsAString& );
   125       static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
   127       nsScannerBufferList( Buffer* buf )
   128         : mRefCnt(0)
   129         {
   130           mBuffers.insertBack(buf);
   131         }
   133       void  AddRef()  { ++mRefCnt; }
   134       void  Release() { if (--mRefCnt == 0) delete this; }
   136       void  Append( Buffer* buf ) { mBuffers.insertBack(buf); } 
   137       void  InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
   138       void  SplitBuffer( const Position& );
   139       void  DiscardUnreferencedPrefix( Buffer* );
   141             Buffer* Head()       { return mBuffers.getFirst(); }
   142       const Buffer* Head() const { return mBuffers.getFirst(); }
   144             Buffer* Tail()       { return mBuffers.getLast(); }
   145       const Buffer* Tail() const { return mBuffers.getLast(); }
   147     private:
   149       friend class nsScannerSubstring;
   151       ~nsScannerBufferList() { ReleaseAll(); }
   152       void ReleaseAll();
   154       int32_t mRefCnt;
   155       mozilla::LinkedList<Buffer> mBuffers;
   156   };
   159   /**
   160    * nsScannerFragment represents a "slice" of a Buffer object.
   161    */
   162 struct nsScannerFragment
   163   {
   164     typedef nsScannerBufferList::Buffer Buffer;
   166     const Buffer*    mBuffer;
   167     const char16_t* mFragmentStart;
   168     const char16_t* mFragmentEnd;
   169   };
   172   /**
   173    * nsScannerSubstring is the base class for nsScannerString.  It provides
   174    * access to iterators and methods to bind the substring to another
   175    * substring or nsAString instance.
   176    *
   177    * This class owns the buffer list.
   178    */
   179 class nsScannerSubstring
   180   {
   181     public:
   182       typedef nsScannerBufferList::Buffer      Buffer;
   183       typedef nsScannerBufferList::Position    Position;
   184       typedef uint32_t                         size_type;
   186       nsScannerSubstring();
   187       nsScannerSubstring( const nsAString& s );
   189       ~nsScannerSubstring();
   191       nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
   192       nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
   194       size_type Length() const { return mLength; }
   196       int32_t CountChar( char16_t ) const;
   198       void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
   199       void Rebind( const nsAString& );
   201       const nsSubstring& AsString() const;
   203       bool GetNextFragment( nsScannerFragment& ) const;
   204       bool GetPrevFragment( nsScannerFragment& ) const;
   206       static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
   207       static inline Buffer* AllocBuffer( size_type aCapacity )             { return nsScannerBufferList::AllocBuffer(aCapacity); }
   209     protected:
   211       void acquire_ownership_of_buffer_list() const
   212         {
   213           mBufferList->AddRef();
   214           mStart.mBuffer->IncrementUsageCount();
   215         }
   217       void release_ownership_of_buffer_list()
   218         {
   219           if (mBufferList)
   220             {
   221               mStart.mBuffer->DecrementUsageCount();
   222               mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
   223               mBufferList->Release();
   224             }
   225         }
   227       void init_range_from_buffer_list()
   228         {
   229           mStart.mBuffer = mBufferList->Head();
   230           mStart.mPosition = mStart.mBuffer->DataStart();
   232           mEnd.mBuffer = mBufferList->Tail();
   233           mEnd.mPosition = mEnd.mBuffer->DataEnd();
   235           mLength = Position::Distance(mStart, mEnd);
   236         }
   238       Position             mStart;
   239       Position             mEnd;
   240       nsScannerBufferList *mBufferList;
   241       size_type            mLength;
   243       // these fields are used to implement AsString
   244       nsDependentSubstring mFlattenedRep;
   245       bool                 mIsDirty;
   247       friend class nsScannerSharedSubstring;
   248   };
   251   /**
   252    * nsScannerString provides methods to grow and modify a buffer list.
   253    */
   254 class nsScannerString : public nsScannerSubstring
   255   {
   256     public:
   258       nsScannerString( Buffer* );
   260         // you are giving ownership to the string, it takes and keeps your
   261         // buffer, deleting it when done.
   262         // Use AllocBuffer or AllocBufferFromString to create a Buffer object
   263         // for use with this function.
   264       void AppendBuffer( Buffer* );
   266       void DiscardPrefix( const nsScannerIterator& );
   267         // any other way you want to do this?
   269       void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
   270       void ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar);
   271   };
   274   /**
   275    * nsScannerSharedSubstring implements copy-on-write semantics for
   276    * nsScannerSubstring.  When you call .writable(), it will copy the data
   277    * and return a mutable string object.  This class also manages releasing
   278    * the reference to the scanner buffer when it is no longer needed.
   279    */
   281 class nsScannerSharedSubstring
   282   {
   283     public:
   284       nsScannerSharedSubstring()
   285         : mBuffer(nullptr), mBufferList(nullptr) { }
   287       ~nsScannerSharedSubstring()
   288         {
   289           if (mBufferList)
   290             ReleaseBuffer();
   291         }
   293         // Acquire a copy-on-write reference to the given substring.
   294       NS_HIDDEN_(void) Rebind(const nsScannerIterator& aStart,
   295                               const nsScannerIterator& aEnd);
   297        // Get a mutable reference to this string
   298       nsSubstring& writable()
   299         {
   300           if (mBufferList)
   301             MakeMutable();
   303           return mString;
   304         }
   306         // Get a const reference to this string
   307       const nsSubstring& str() const { return mString; }
   309     private:
   310       typedef nsScannerBufferList::Buffer Buffer;
   312       NS_HIDDEN_(void) ReleaseBuffer();
   313       NS_HIDDEN_(void) MakeMutable();
   315       nsDependentSubstring  mString;
   316       Buffer               *mBuffer;
   317       nsScannerBufferList  *mBufferList;
   318   };
   320   /**
   321    * nsScannerIterator works just like nsReadingIterator<CharT> except that
   322    * it knows how to iterate over a list of scanner buffers.
   323    */
   324 class nsScannerIterator
   325   {
   326     public:
   327       typedef nsScannerIterator             self_type;
   328       typedef ptrdiff_t                     difference_type;
   329       typedef char16_t                     value_type;
   330       typedef const char16_t*              pointer;
   331       typedef const char16_t&              reference;
   332       typedef nsScannerSubstring::Buffer    Buffer;
   334     protected:
   336       nsScannerFragment         mFragment;
   337       const char16_t*          mPosition;
   338       const nsScannerSubstring* mOwner;
   340       friend class nsScannerSubstring;
   341       friend class nsScannerSharedSubstring;
   343     public:
   344       nsScannerIterator() {}
   345       // nsScannerIterator( const nsScannerIterator& );             // auto-generated copy-constructor OK
   346       // nsScannerIterator& operator=( const nsScannerIterator& );  // auto-generated copy-assignment operator OK
   348       inline void normalize_forward();
   349       inline void normalize_backward();
   351       pointer get() const
   352         {
   353           return mPosition;
   354         }
   356       char16_t operator*() const
   357         {
   358           return *get();
   359         }
   361       const nsScannerFragment& fragment() const
   362         {
   363           return mFragment;
   364         }
   366       const Buffer* buffer() const
   367         {
   368           return mFragment.mBuffer;
   369         }
   371       self_type& operator++()
   372         {
   373           ++mPosition;
   374           normalize_forward();
   375           return *this;
   376         }
   378       self_type operator++( int )
   379         {
   380           self_type result(*this);
   381           ++mPosition;
   382           normalize_forward();
   383           return result;
   384         }
   386       self_type& operator--()
   387         {
   388           normalize_backward();
   389           --mPosition;
   390           return *this;
   391         }
   393       self_type operator--( int )
   394         {
   395           self_type result(*this);
   396           normalize_backward();
   397           --mPosition;
   398           return result;
   399         }
   401       difference_type size_forward() const
   402         {
   403           return mFragment.mFragmentEnd - mPosition;
   404         }
   406       difference_type size_backward() const
   407         {
   408           return mPosition - mFragment.mFragmentStart;
   409         }
   411       self_type& advance( difference_type n )
   412         {
   413           while ( n > 0 )
   414             {
   415               difference_type one_hop = std::min(n, size_forward());
   417               NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
   418                 // perhaps I should |break| if |!one_hop|?
   420               mPosition += one_hop;
   421               normalize_forward();
   422               n -= one_hop;
   423             }
   425           while ( n < 0 )
   426             {
   427               normalize_backward();
   428               difference_type one_hop = std::max(n, -size_backward());
   430               NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
   431                 // perhaps I should |break| if |!one_hop|?
   433               mPosition += one_hop;
   434               n -= one_hop;
   435             }
   437           return *this;
   438         }
   439   };
   442 inline
   443 bool
   444 SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
   445   {
   446     return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
   447   }
   450   /**
   451    * this class is needed in order to make use of the methods in nsAlgorithm.h
   452    */
   453 template <>
   454 struct nsCharSourceTraits<nsScannerIterator>
   455   {
   456     typedef nsScannerIterator::difference_type difference_type;
   458     static
   459     uint32_t
   460     readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
   461       {
   462         return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
   463       }
   465     static
   466     const nsScannerIterator::value_type*
   467     read( const nsScannerIterator& iter )
   468       {
   469         return iter.get();
   470       }
   472     static
   473     void
   474     advance( nsScannerIterator& s, difference_type n )
   475       {
   476         s.advance(n);
   477       }
   478   };
   481   /**
   482    * inline methods follow
   483    */
   485 inline
   486 void
   487 nsScannerIterator::normalize_forward()
   488   {
   489     while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
   490       mPosition = mFragment.mFragmentStart;
   491   }
   493 inline
   494 void
   495 nsScannerIterator::normalize_backward()
   496   {
   497     while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
   498       mPosition = mFragment.mFragmentEnd;
   499   }
   501 inline
   502 bool
   503 operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
   504   {
   505     return lhs.get() == rhs.get();
   506   }
   508 inline
   509 bool
   510 operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
   511   {
   512     return lhs.get() != rhs.get();
   513   }
   516 inline
   517 nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
   518   : mBuffer(const_cast<Buffer*>(aIter.buffer()))
   519   , mPosition(const_cast<char16_t*>(aIter.get()))
   520   {}
   522 inline
   523 nsScannerBufferList::Position&
   524 nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
   525   {
   526     mBuffer   = const_cast<Buffer*>(aIter.buffer());
   527     mPosition = const_cast<char16_t*>(aIter.get());
   528     return *this;
   529   }
   532   /**
   533    * scanner string utils
   534    *
   535    * These methods mimic the API provided by nsReadableUtils in xpcom/string.
   536    * Here we provide only the methods that the htmlparser module needs.
   537    */
   539 inline
   540 size_t
   541 Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
   542   {
   543     typedef nsScannerBufferList::Position Position;
   544     return Position::Distance(Position(aStart), Position(aEnd));
   545   }
   547 void
   548 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
   549                const nsScannerIterator& aSrcEnd,
   550                nsAString& aDest );
   552 inline
   553 void
   554 CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
   555   {
   556     nsScannerIterator begin, end;
   557     CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
   558   }
   560 void
   561 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   562                  const nsScannerIterator& aSrcEnd,
   563                  nsAString& aDest );
   565 inline
   566 void
   567 AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
   568   {
   569     nsScannerIterator begin, end;
   570     AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
   571   }
   573 void
   574 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   575                  const nsScannerIterator& aSrcEnd,
   576                  nsScannerSharedSubstring& aDest );
   578 bool
   579 FindCharInReadable( char16_t aChar,
   580                     nsScannerIterator& aStart,
   581                     const nsScannerIterator& aEnd );
   583 bool
   584 FindInReadable( const nsAString& aPattern,
   585                 nsScannerIterator& aStart,
   586                 nsScannerIterator& aEnd,
   587                 const nsStringComparator& = nsDefaultStringComparator() );
   589 bool
   590 RFindInReadable( const nsAString& aPattern,
   591                  nsScannerIterator& aStart,
   592                  nsScannerIterator& aEnd,
   593                  const nsStringComparator& = nsDefaultStringComparator() );
   595 inline
   596 bool
   597 CaseInsensitiveFindInReadable( const nsAString& aPattern, 
   598                                nsScannerIterator& aStart,
   599                                nsScannerIterator& aEnd )
   600   {
   601     return FindInReadable(aPattern, aStart, aEnd,
   602                           nsCaseInsensitiveStringComparator());
   603   }
   605 #endif // !defined(nsScannerString_h___)

mercurial