parser/htmlparser/src/nsScannerString.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include <stdlib.h>
     8 #include "nsScannerString.h"
    11   /**
    12    * nsScannerBufferList
    13    */
    15 #define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
    16                       (sizeof(Buffer) + sizeof(char16_t)))
    18 nsScannerBufferList::Buffer*
    19 nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
    20   {
    21     uint32_t len = aString.Length();
    22     Buffer* buf = AllocBuffer(len);
    24     if (buf)
    25       {
    26         nsAString::const_iterator source;
    27         aString.BeginReading(source);
    28         nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
    29       }
    30     return buf;
    31   }
    33 nsScannerBufferList::Buffer*
    34 nsScannerBufferList::AllocBuffer( uint32_t capacity )
    35   {
    36     if (capacity > MAX_CAPACITY)
    37       return nullptr;
    39     void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
    40     if (!ptr)
    41       return nullptr;
    43     Buffer* buf = new (ptr) Buffer();
    45     buf->mUsageCount = 0;
    46     buf->mDataEnd = buf->DataStart() + capacity;
    48     // XXX null terminate.  this shouldn't be required, but we do it because
    49     // nsScanner erroneously thinks it can dereference DataEnd :-(
    50     *buf->mDataEnd = char16_t(0);
    51     return buf;
    52   }
    54 void
    55 nsScannerBufferList::ReleaseAll()
    56   {
    57     while (!mBuffers.isEmpty())
    58       {
    59         Buffer* node = mBuffers.popFirst();
    60         //printf(">>> freeing buffer @%p\n", node);
    61         free(node);
    62       }
    63   }
    65 void
    66 nsScannerBufferList::SplitBuffer( const Position& pos )
    67   {
    68     // splitting to the right keeps the work string and any extant token
    69     // pointing to and holding a reference count on the same buffer.
    71     Buffer* bufferToSplit = pos.mBuffer;
    72     NS_ASSERTION(bufferToSplit, "null pointer");
    74     uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
    75     NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
    76                  splitOffset <= bufferToSplit->DataLength(),
    77                  "split offset is outside buffer");
    79     uint32_t len = bufferToSplit->DataLength() - splitOffset;
    80     Buffer* new_buffer = AllocBuffer(len);
    81     if (new_buffer)
    82       {
    83         nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
    84                                       bufferToSplit->DataStart() + splitOffset,
    85                                       len);
    86         InsertAfter(new_buffer, bufferToSplit);
    87         bufferToSplit->SetDataLength(splitOffset);
    88       }
    89   }
    91 void
    92 nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
    93   {
    94     if (aBuf == Head())
    95       {
    96         while (!mBuffers.isEmpty() && !Head()->IsInUse())
    97           {
    98             Buffer* buffer = Head();
    99             buffer->remove();
   100             free(buffer);
   101           }
   102       }
   103   }
   105 size_t
   106 nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
   107   {
   108     size_t result = 0;
   109     if (aStart.mBuffer == aEnd.mBuffer)
   110       {
   111         result = aEnd.mPosition - aStart.mPosition;
   112       }
   113     else
   114       {
   115         result = aStart.mBuffer->DataEnd() - aStart.mPosition;
   116         for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
   117           result += b->DataLength();
   118         result += aEnd.mPosition - aEnd.mBuffer->DataStart();
   119       }
   120     return result;
   121   }
   124 /**
   125  * nsScannerSubstring
   126  */
   128 nsScannerSubstring::nsScannerSubstring()
   129   : mStart(nullptr, nullptr)
   130   , mEnd(nullptr, nullptr)
   131   , mBufferList(nullptr)
   132   , mLength(0)
   133   , mIsDirty(true)
   134   {
   135   }
   137 nsScannerSubstring::nsScannerSubstring( const nsAString& s )
   138   : mBufferList(nullptr)
   139   , mIsDirty(true)
   140   {
   141     Rebind(s);
   142   }
   144 nsScannerSubstring::~nsScannerSubstring()
   145   {
   146     release_ownership_of_buffer_list();
   147   }
   149 int32_t
   150 nsScannerSubstring::CountChar( char16_t c ) const
   151   {
   152       /*
   153         re-write this to use a counting sink
   154        */
   156     size_type result = 0;
   157     size_type lengthToExamine = Length();
   159     nsScannerIterator iter;
   160     for ( BeginReading(iter); ; )
   161       {
   162         int32_t lengthToExamineInThisFragment = iter.size_forward();
   163         const char16_t* fromBegin = iter.get();
   164         result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
   165         if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
   166           return result;
   167         iter.advance(lengthToExamineInThisFragment);
   168       }
   169       // never reached; quiets warnings
   170     return 0;
   171   }
   173 void
   174 nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
   175                             const nsScannerIterator& aStart, 
   176                             const nsScannerIterator& aEnd )
   177   {
   178     // allow for the case where &aString == this
   180     aString.acquire_ownership_of_buffer_list();
   181     release_ownership_of_buffer_list();
   183     mStart      = aStart;
   184     mEnd        = aEnd;
   185     mBufferList = aString.mBufferList;
   186     mLength     = Distance(aStart, aEnd);
   187     mIsDirty    = true;
   188   }
   190 void
   191 nsScannerSubstring::Rebind( const nsAString& aString )
   192   {
   193     release_ownership_of_buffer_list();
   195     mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
   196     mIsDirty    = true;
   198     init_range_from_buffer_list();
   199     acquire_ownership_of_buffer_list();
   200   }
   202 const nsSubstring&
   203 nsScannerSubstring::AsString() const
   204   {
   205     if (mIsDirty)
   206       {
   207         nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
   209         if (mStart.mBuffer == mEnd.mBuffer) {
   210           // We only have a single fragment to deal with, so just return it
   211           // as a substring.
   212           mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
   213         } else {
   214           // Otherwise, we need to copy the data into a flattened buffer.
   215           nsScannerIterator start, end;
   216           CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
   217         }
   219         mutable_this->mIsDirty = false;
   220       }
   222     return mFlattenedRep;
   223   }
   225 nsScannerIterator&
   226 nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
   227   {
   228     iter.mOwner = this;
   230     iter.mFragment.mBuffer = mStart.mBuffer;
   231     iter.mFragment.mFragmentStart = mStart.mPosition;
   232     if (mStart.mBuffer == mEnd.mBuffer)
   233       iter.mFragment.mFragmentEnd = mEnd.mPosition;
   234     else
   235       iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
   237     iter.mPosition = mStart.mPosition;
   238     iter.normalize_forward();
   239     return iter;
   240   }
   242 nsScannerIterator&
   243 nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
   244   {
   245     iter.mOwner = this;
   247     iter.mFragment.mBuffer = mEnd.mBuffer;
   248     iter.mFragment.mFragmentEnd = mEnd.mPosition;
   249     if (mStart.mBuffer == mEnd.mBuffer)
   250       iter.mFragment.mFragmentStart = mStart.mPosition;
   251     else
   252       iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
   254     iter.mPosition = mEnd.mPosition;
   255     // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
   256     return iter;
   257   }
   259 bool
   260 nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
   261   {
   262     // check to see if we are at the end of the buffer list
   263     if (frag.mBuffer == mEnd.mBuffer)
   264       return false;
   266     frag.mBuffer = frag.mBuffer->getNext();
   268     if (frag.mBuffer == mStart.mBuffer)
   269       frag.mFragmentStart = mStart.mPosition;
   270     else
   271       frag.mFragmentStart = frag.mBuffer->DataStart();
   273     if (frag.mBuffer == mEnd.mBuffer)
   274       frag.mFragmentEnd = mEnd.mPosition;
   275     else
   276       frag.mFragmentEnd = frag.mBuffer->DataEnd();
   278     return true;
   279   }
   281 bool
   282 nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
   283   {
   284     // check to see if we are at the beginning of the buffer list
   285     if (frag.mBuffer == mStart.mBuffer)
   286       return false;
   288     frag.mBuffer = frag.mBuffer->getPrevious();
   290     if (frag.mBuffer == mStart.mBuffer)
   291       frag.mFragmentStart = mStart.mPosition;
   292     else
   293       frag.mFragmentStart = frag.mBuffer->DataStart();
   295     if (frag.mBuffer == mEnd.mBuffer)
   296       frag.mFragmentEnd = mEnd.mPosition;
   297     else
   298       frag.mFragmentEnd = frag.mBuffer->DataEnd();
   300     return true;
   301   }
   304   /**
   305    * nsScannerString
   306    */
   308 nsScannerString::nsScannerString( Buffer* aBuf )
   309   {
   310     mBufferList = new nsScannerBufferList(aBuf);
   312     init_range_from_buffer_list();
   313     acquire_ownership_of_buffer_list();
   314   }
   316 void
   317 nsScannerString::AppendBuffer( Buffer* aBuf )
   318   {
   319     mBufferList->Append(aBuf);
   320     mLength += aBuf->DataLength();
   322     mEnd.mBuffer = aBuf;
   323     mEnd.mPosition = aBuf->DataEnd();
   325     mIsDirty = true;
   326   }
   328 void
   329 nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
   330   {
   331     Position old_start(mStart);
   332     mStart = aIter;
   333     mLength -= Position::Distance(old_start, mStart);
   335     mStart.mBuffer->IncrementUsageCount();
   336     old_start.mBuffer->DecrementUsageCount();
   338     mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
   340     mIsDirty = true;
   341   }
   343 void
   344 nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
   345     /*
   346      * Warning: this routine manipulates the shared buffer list in an unexpected way.
   347      *  The original design did not really allow for insertions, but this call promises
   348      *  that if called for a point after the end of all extant token strings, that no token string
   349      *  or the work string will be invalidated.
   350      *
   351      *  This routine is protected because it is the responsibility of the derived class to keep those promises.
   352      */
   353   {
   354     Position insertPos(aInsertPoint);
   356     mBufferList->SplitBuffer(insertPos);
   357       // splitting to the right keeps the work string and any extant token pointing to and
   358       //  holding a reference count on the same buffer
   360     Buffer* new_buffer = AllocBufferFromString(aReadable);
   361       // make a new buffer with all the data to insert...
   362       //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
   363       //  of this and decide if we should do the work to fill it
   365     Buffer* buffer_to_split = insertPos.mBuffer;
   366     mBufferList->InsertAfter(new_buffer, buffer_to_split);
   367     mLength += aReadable.Length();
   369     mEnd.mBuffer = mBufferList->Tail();
   370     mEnd.mPosition = mEnd.mBuffer->DataEnd();
   372     mIsDirty = true;
   373   }
   375 void
   376 nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar)
   377   {
   378     // XXX Casting a const to non-const. Unless the base class
   379     // provides support for writing iterators, this is the best
   380     // that can be done.
   381     char16_t* pos = const_cast<char16_t*>(aPosition.get());
   382     *pos = aChar;
   384     mIsDirty = true;
   385   }
   388   /**
   389    * nsScannerSharedSubstring
   390    */
   392 void
   393 nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
   394                               const nsScannerIterator &aEnd)
   395 {
   396   // If the start and end positions are inside the same buffer, we must
   397   // acquire ownership of the buffer.  If not, we can optimize by not holding
   398   // onto it.
   400   Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
   401   bool sameBuffer = buffer == aEnd.buffer();
   403   nsScannerBufferList *bufferList;
   405   if (sameBuffer) {
   406     bufferList = aStart.mOwner->mBufferList;
   407     bufferList->AddRef();
   408     buffer->IncrementUsageCount();
   409   }
   411   if (mBufferList)
   412     ReleaseBuffer();
   414   if (sameBuffer) {
   415     mBuffer = buffer;
   416     mBufferList = bufferList;
   417     mString.Rebind(aStart.mPosition, aEnd.mPosition);
   418   } else {
   419     mBuffer = nullptr;
   420     mBufferList = nullptr;
   421     CopyUnicodeTo(aStart, aEnd, mString);
   422   }
   423 }
   425 void
   426 nsScannerSharedSubstring::ReleaseBuffer()
   427 {
   428   NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
   429   mBuffer->DecrementUsageCount();
   430   mBufferList->DiscardUnreferencedPrefix(mBuffer);
   431   mBufferList->Release();
   432 }
   434 void
   435 nsScannerSharedSubstring::MakeMutable()
   436 {
   437   nsString temp(mString); // this will force a copy of the data
   438   mString.Assign(temp);   // mString will now share the just-allocated buffer
   440   ReleaseBuffer();
   442   mBuffer = nullptr;
   443   mBufferList = nullptr;
   444 }
   446   /**
   447    * utils -- based on code from nsReadableUtils.cpp
   448    */
   450 // private helper function
   451 static inline
   452 nsAString::iterator&
   453 copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
   454   {
   455     typedef nsCharSourceTraits<nsScannerIterator> source_traits;
   456     typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
   458     while ( first != last )
   459       {
   460         uint32_t distance = source_traits::readable_distance(first, last);
   461         sink_traits::write(result, source_traits::read(first), distance);
   462         NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
   463         source_traits::advance(first, distance);
   464       }
   466     return result;
   467   }
   469 void
   470 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
   471                const nsScannerIterator& aSrcEnd,
   472                nsAString& aDest )
   473   {
   474     nsAString::iterator writer;
   475     if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible_t())) {
   476       aDest.Truncate();
   477       return; // out of memory
   478     }
   479     aDest.BeginWriting(writer);
   480     nsScannerIterator fromBegin(aSrcStart);
   482     copy_multifragment_string(fromBegin, aSrcEnd, writer);
   483   }
   485 void
   486 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   487                  const nsScannerIterator& aSrcEnd,
   488                  nsScannerSharedSubstring& aDest )
   489   {
   490     // Check whether we can just create a dependent string.
   491     if (aDest.str().IsEmpty()) {
   492       // We can just make |aDest| point to the buffer.
   493       // This will take care of copying if the buffer spans fragments.
   494       aDest.Rebind(aSrcStart, aSrcEnd);
   495     } else {
   496       // The dest string is not empty, so it can't be a dependent substring.
   497       AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
   498     }
   499   }
   501 void
   502 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   503                  const nsScannerIterator& aSrcEnd,
   504                  nsAString& aDest )
   505   {
   506     nsAString::iterator writer;
   507     uint32_t oldLength = aDest.Length();
   508     if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible_t()))
   509       return; // out of memory
   510     aDest.BeginWriting(writer).advance(oldLength);
   511     nsScannerIterator fromBegin(aSrcStart);
   513     copy_multifragment_string(fromBegin, aSrcEnd, writer);
   514   }
   516 bool
   517 FindCharInReadable( char16_t aChar,
   518                     nsScannerIterator& aSearchStart,
   519                     const nsScannerIterator& aSearchEnd )
   520   {
   521     while ( aSearchStart != aSearchEnd )
   522       {
   523         int32_t fragmentLength;
   524         if ( SameFragment(aSearchStart, aSearchEnd) ) 
   525           fragmentLength = aSearchEnd.get() - aSearchStart.get();
   526         else
   527           fragmentLength = aSearchStart.size_forward();
   529         const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
   530         if ( charFoundAt ) {
   531           aSearchStart.advance( charFoundAt - aSearchStart.get() );
   532           return true;
   533         }
   535         aSearchStart.advance(fragmentLength);
   536       }
   538     return false;
   539   }
   541 bool
   542 FindInReadable( const nsAString& aPattern,
   543                 nsScannerIterator& aSearchStart,
   544                 nsScannerIterator& aSearchEnd,
   545                 const nsStringComparator& compare )
   546   {
   547     bool found_it = false;
   549       // only bother searching at all if we're given a non-empty range to search
   550     if ( aSearchStart != aSearchEnd )
   551       {
   552         nsAString::const_iterator aPatternStart, aPatternEnd;
   553         aPattern.BeginReading(aPatternStart);
   554         aPattern.EndReading(aPatternEnd);
   556           // outer loop keeps searching till we find it or run out of string to search
   557         while ( !found_it )
   558           {
   559               // fast inner loop (that's what it's called, not what it is) looks for a potential match
   560             while ( aSearchStart != aSearchEnd &&
   561                     compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
   562               ++aSearchStart;
   564               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
   565             if ( aSearchStart == aSearchEnd )
   566               break;
   568               // otherwise, we're at a potential match, let's see if we really hit one
   569             nsAString::const_iterator testPattern(aPatternStart);
   570             nsScannerIterator testSearch(aSearchStart);
   572               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
   573             for(;;)
   574               {
   575                   // we already compared the first character in the outer loop,
   576                   //  so we'll advance before the next comparison
   577                 ++testPattern;
   578                 ++testSearch;
   580                   // if we verified all the way to the end of the pattern, then we found it!
   581                 if ( testPattern == aPatternEnd )
   582                   {
   583                     found_it = true;
   584                     aSearchEnd = testSearch; // return the exact found range through the parameters
   585                     break;
   586                   }
   588                   // if we got to end of the string we're searching before we hit the end of the
   589                   //  pattern, we'll never find what we're looking for
   590                 if ( testSearch == aSearchEnd )
   591                   {
   592                     aSearchStart = aSearchEnd;
   593                     break;
   594                   }
   596                   // else if we mismatched ... it's time to advance to the next search position
   597                   //  and get back into the `fast' loop
   598                 if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
   599                   {
   600                     ++aSearchStart;
   601                     break;
   602                   }
   603               }
   604           }
   605       }
   607     return found_it;
   608   }
   610   /**
   611    * This implementation is simple, but does too much work.
   612    * It searches the entire string from left to right, and returns the last match found, if any.
   613    * This implementation will be replaced when I get |reverse_iterator|s working.
   614    */
   615 bool
   616 RFindInReadable( const nsAString& aPattern,
   617                  nsScannerIterator& aSearchStart,
   618                  nsScannerIterator& aSearchEnd,
   619                  const nsStringComparator& aComparator )
   620   {
   621     bool found_it = false;
   623     nsScannerIterator savedSearchEnd(aSearchEnd);
   624     nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
   626     while ( searchStart != searchEnd )
   627       {
   628         if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
   629           {
   630             found_it = true;
   632               // this is the best match so far, so remember it
   633             aSearchStart = searchStart;
   634             aSearchEnd = searchEnd;
   636               // ...and get ready to search some more
   637               //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
   638             ++searchStart;
   639             searchEnd = savedSearchEnd;
   640           }
   641       }
   643       // if we never found it, return an empty range
   644     if ( !found_it )
   645       aSearchStart = aSearchEnd;
   647     return found_it;
   648   }

mercurial