The Tor Browser: parser/htmlparser/src/nsScannerString.cpp@6474c204b198 (annotated)

parser/htmlparser/src/nsScannerString.cpp@6474c204b198 (annotated)

parser/htmlparser/src/nsScannerString.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Wed, 31 Dec 2014 06:09:35 +0100
changeset 0: 6474c204b198
permissions: -rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include <stdlib.h>
 #include "nsScannerString.h"
   /**
    * nsScannerBufferList
    */
 #define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
                       (sizeof(Buffer) + sizeof(char16_t)))
 nsScannerBufferList::Buffer*
 nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
   {
     uint32_t len = aString.Length();
     Buffer* buf = AllocBuffer(len);
     if (buf)
       {
         nsAString::const_iterator source;
         aString.BeginReading(source);
         nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
       }
     return buf;
   }
 nsScannerBufferList::Buffer*
 nsScannerBufferList::AllocBuffer( uint32_t capacity )
   {
     if (capacity > MAX_CAPACITY)
       return nullptr;
     void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
     if (!ptr)
       return nullptr;
     Buffer* buf = new (ptr) Buffer();
     buf->mUsageCount = 0;
     buf->mDataEnd = buf->DataStart() + capacity;
     // XXX null terminate.  this shouldn't be required, but we do it because
     // nsScanner erroneously thinks it can dereference DataEnd :-(
     *buf->mDataEnd = char16_t(0);
     return buf;
   }
 void
 nsScannerBufferList::ReleaseAll()
   {
     while (!mBuffers.isEmpty())
       {
         Buffer* node = mBuffers.popFirst();
         //printf(">>> freeing buffer @%p\n", node);
         free(node);
       }
   }
 void
 nsScannerBufferList::SplitBuffer( const Position& pos )
   {
     // splitting to the right keeps the work string and any extant token
     // pointing to and holding a reference count on the same buffer.
     Buffer* bufferToSplit = pos.mBuffer;
     NS_ASSERTION(bufferToSplit, "null pointer");
     uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
     NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
                  splitOffset <= bufferToSplit->DataLength(),
                  "split offset is outside buffer");
     uint32_t len = bufferToSplit->DataLength() - splitOffset;
     Buffer* new_buffer = AllocBuffer(len);
     if (new_buffer)
       {
         nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
                                       bufferToSplit->DataStart() + splitOffset,
                                       len);
         InsertAfter(new_buffer, bufferToSplit);
         bufferToSplit->SetDataLength(splitOffset);
       }
   }
 void
 nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
   {
     if (aBuf == Head())
       {
         while (!mBuffers.isEmpty() && !Head()->IsInUse())
           {
             Buffer* buffer = Head();
             buffer->remove();
             free(buffer);
           }
       }
   }
 size_t
 nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
   {
     size_t result = 0;
     if (aStart.mBuffer == aEnd.mBuffer)
       {
         result = aEnd.mPosition - aStart.mPosition;
       }
     else
       {
         result = aStart.mBuffer->DataEnd() - aStart.mPosition;
         for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
           result += b->DataLength();
         result += aEnd.mPosition - aEnd.mBuffer->DataStart();
       }
     return result;
   }
 /**
  * nsScannerSubstring
  */
 nsScannerSubstring::nsScannerSubstring()
   : mStart(nullptr, nullptr)
   , mEnd(nullptr, nullptr)
   , mBufferList(nullptr)
   , mLength(0)
   , mIsDirty(true)
   {
   }
 nsScannerSubstring::nsScannerSubstring( const nsAString& s )
   : mBufferList(nullptr)
   , mIsDirty(true)
   {
     Rebind(s);
   }
 nsScannerSubstring::~nsScannerSubstring()
   {
     release_ownership_of_buffer_list();
   }
 int32_t
 nsScannerSubstring::CountChar( char16_t c ) const
   {
       /*
         re-write this to use a counting sink
        */
     size_type result = 0;
     size_type lengthToExamine = Length();
     nsScannerIterator iter;
     for ( BeginReading(iter); ; )
       {
         int32_t lengthToExamineInThisFragment = iter.size_forward();
         const char16_t* fromBegin = iter.get();
         result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
         if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
           return result;
         iter.advance(lengthToExamineInThisFragment);
       }
       // never reached; quiets warnings
     return 0;
   }
 void
 nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
                             const nsScannerIterator& aStart,
                             const nsScannerIterator& aEnd )
   {
     // allow for the case where &aString == this
     aString.acquire_ownership_of_buffer_list();
     release_ownership_of_buffer_list();
     mStart      = aStart;
     mEnd        = aEnd;
     mBufferList = aString.mBufferList;
     mLength     = Distance(aStart, aEnd);
     mIsDirty    = true;
   }
 void
 nsScannerSubstring::Rebind( const nsAString& aString )
   {
     release_ownership_of_buffer_list();
     mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
     mIsDirty    = true;
     init_range_from_buffer_list();
     acquire_ownership_of_buffer_list();
   }
 const nsSubstring&
 nsScannerSubstring::AsString() const
   {
     if (mIsDirty)
       {
         nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
         if (mStart.mBuffer == mEnd.mBuffer) {
           // We only have a single fragment to deal with, so just return it
           // as a substring.
           mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
         } else {
           // Otherwise, we need to copy the data into a flattened buffer.
           nsScannerIterator start, end;
           CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
         }
         mutable_this->mIsDirty = false;
       }
     return mFlattenedRep;
   }
 nsScannerIterator&
 nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
   {
     iter.mOwner = this;
     iter.mFragment.mBuffer = mStart.mBuffer;
     iter.mFragment.mFragmentStart = mStart.mPosition;
     if (mStart.mBuffer == mEnd.mBuffer)
       iter.mFragment.mFragmentEnd = mEnd.mPosition;
     else
       iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
     iter.mPosition = mStart.mPosition;
     iter.normalize_forward();
     return iter;
   }
 nsScannerIterator&
 nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
   {
     iter.mOwner = this;
     iter.mFragment.mBuffer = mEnd.mBuffer;
     iter.mFragment.mFragmentEnd = mEnd.mPosition;
     if (mStart.mBuffer == mEnd.mBuffer)
       iter.mFragment.mFragmentStart = mStart.mPosition;
     else
       iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
     iter.mPosition = mEnd.mPosition;
     // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
     return iter;
   }
 bool
 nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
   {
     // check to see if we are at the end of the buffer list
     if (frag.mBuffer == mEnd.mBuffer)
       return false;
     frag.mBuffer = frag.mBuffer->getNext();
     if (frag.mBuffer == mStart.mBuffer)
       frag.mFragmentStart = mStart.mPosition;
     else
       frag.mFragmentStart = frag.mBuffer->DataStart();
     if (frag.mBuffer == mEnd.mBuffer)
       frag.mFragmentEnd = mEnd.mPosition;
     else
       frag.mFragmentEnd = frag.mBuffer->DataEnd();
     return true;
   }
 bool
 nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
   {
     // check to see if we are at the beginning of the buffer list
     if (frag.mBuffer == mStart.mBuffer)
       return false;
     frag.mBuffer = frag.mBuffer->getPrevious();
     if (frag.mBuffer == mStart.mBuffer)
       frag.mFragmentStart = mStart.mPosition;
     else
       frag.mFragmentStart = frag.mBuffer->DataStart();
     if (frag.mBuffer == mEnd.mBuffer)
       frag.mFragmentEnd = mEnd.mPosition;
     else
       frag.mFragmentEnd = frag.mBuffer->DataEnd();
     return true;
   }
   /**
    * nsScannerString
    */
 nsScannerString::nsScannerString( Buffer* aBuf )
   {
     mBufferList = new nsScannerBufferList(aBuf);
     init_range_from_buffer_list();
     acquire_ownership_of_buffer_list();
   }
 void
 nsScannerString::AppendBuffer( Buffer* aBuf )
   {
     mBufferList->Append(aBuf);
     mLength += aBuf->DataLength();
     mEnd.mBuffer = aBuf;
     mEnd.mPosition = aBuf->DataEnd();
     mIsDirty = true;
   }
 void
 nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
   {
     Position old_start(mStart);
     mStart = aIter;
     mLength -= Position::Distance(old_start, mStart);
     mStart.mBuffer->IncrementUsageCount();
     old_start.mBuffer->DecrementUsageCount();
     mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
     mIsDirty = true;
   }
 void
 nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
     /*
      * Warning: this routine manipulates the shared buffer list in an unexpected way.
      *  The original design did not really allow for insertions, but this call promises
      *  that if called for a point after the end of all extant token strings, that no token string
      *  or the work string will be invalidated.
      *
      *  This routine is protected because it is the responsibility of the derived class to keep those promises.
      */
   {
     Position insertPos(aInsertPoint);
     mBufferList->SplitBuffer(insertPos);
       // splitting to the right keeps the work string and any extant token pointing to and
       //  holding a reference count on the same buffer
     Buffer* new_buffer = AllocBufferFromString(aReadable);
       // make a new buffer with all the data to insert...
       //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
       //  of this and decide if we should do the work to fill it
     Buffer* buffer_to_split = insertPos.mBuffer;
     mBufferList->InsertAfter(new_buffer, buffer_to_split);
     mLength += aReadable.Length();
     mEnd.mBuffer = mBufferList->Tail();
     mEnd.mPosition = mEnd.mBuffer->DataEnd();
     mIsDirty = true;
   }
 void
 nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar)
   {
     // XXX Casting a const to non-const. Unless the base class
     // provides support for writing iterators, this is the best
     // that can be done.
     char16_t* pos = const_cast<char16_t*>(aPosition.get());
     *pos = aChar;
     mIsDirty = true;
   }
   /**
    * nsScannerSharedSubstring
    */
 void
 nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
                               const nsScannerIterator &aEnd)
 {
   // If the start and end positions are inside the same buffer, we must
   // acquire ownership of the buffer.  If not, we can optimize by not holding
   // onto it.
   Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
   bool sameBuffer = buffer == aEnd.buffer();
   nsScannerBufferList *bufferList;
   if (sameBuffer) {
     bufferList = aStart.mOwner->mBufferList;
     bufferList->AddRef();
     buffer->IncrementUsageCount();
   }
   if (mBufferList)
     ReleaseBuffer();
   if (sameBuffer) {
     mBuffer = buffer;
     mBufferList = bufferList;
     mString.Rebind(aStart.mPosition, aEnd.mPosition);
   } else {
     mBuffer = nullptr;
     mBufferList = nullptr;
     CopyUnicodeTo(aStart, aEnd, mString);
   }
 }
 void
 nsScannerSharedSubstring::ReleaseBuffer()
 {
   NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
   mBuffer->DecrementUsageCount();
   mBufferList->DiscardUnreferencedPrefix(mBuffer);
   mBufferList->Release();
 }
 void
 nsScannerSharedSubstring::MakeMutable()
 {
   nsString temp(mString); // this will force a copy of the data
   mString.Assign(temp);   // mString will now share the just-allocated buffer
   ReleaseBuffer();
   mBuffer = nullptr;
   mBufferList = nullptr;
 }
   /**
    * utils -- based on code from nsReadableUtils.cpp
    */
 // private helper function
 static inline
 nsAString::iterator&
 copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
   {
     typedef nsCharSourceTraits<nsScannerIterator> source_traits;
     typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
     while ( first != last )
       {
         uint32_t distance = source_traits::readable_distance(first, last);
         sink_traits::write(result, source_traits::read(first), distance);
         NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
         source_traits::advance(first, distance);
       }
     return result;
   }
 void
 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
                const nsScannerIterator& aSrcEnd,
                nsAString& aDest )
   {
     nsAString::iterator writer;
     if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible_t())) {
       aDest.Truncate();
       return; // out of memory
     }
     aDest.BeginWriting(writer);
     nsScannerIterator fromBegin(aSrcStart);
     copy_multifragment_string(fromBegin, aSrcEnd, writer);
   }
 void
 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
                  const nsScannerIterator& aSrcEnd,
                  nsScannerSharedSubstring& aDest )
   {
     // Check whether we can just create a dependent string.
     if (aDest.str().IsEmpty()) {
       // We can just make |aDest| point to the buffer.
       // This will take care of copying if the buffer spans fragments.
       aDest.Rebind(aSrcStart, aSrcEnd);
     } else {
       // The dest string is not empty, so it can't be a dependent substring.
       AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
     }
   }
 void
 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
                  const nsScannerIterator& aSrcEnd,
                  nsAString& aDest )
   {
     nsAString::iterator writer;
     uint32_t oldLength = aDest.Length();
     if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible_t()))
       return; // out of memory
     aDest.BeginWriting(writer).advance(oldLength);
     nsScannerIterator fromBegin(aSrcStart);
     copy_multifragment_string(fromBegin, aSrcEnd, writer);
   }
 bool
 FindCharInReadable( char16_t aChar,
                     nsScannerIterator& aSearchStart,
                     const nsScannerIterator& aSearchEnd )
   {
     while ( aSearchStart != aSearchEnd )
       {
         int32_t fragmentLength;
         if ( SameFragment(aSearchStart, aSearchEnd) )
           fragmentLength = aSearchEnd.get() - aSearchStart.get();
         else
           fragmentLength = aSearchStart.size_forward();
         const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
         if ( charFoundAt ) {
           aSearchStart.advance( charFoundAt - aSearchStart.get() );
           return true;
         }
         aSearchStart.advance(fragmentLength);
       }
     return false;
   }
 bool
 FindInReadable( const nsAString& aPattern,
                 nsScannerIterator& aSearchStart,
                 nsScannerIterator& aSearchEnd,
                 const nsStringComparator& compare )
   {
     bool found_it = false;
       // only bother searching at all if we're given a non-empty range to search
     if ( aSearchStart != aSearchEnd )
       {
         nsAString::const_iterator aPatternStart, aPatternEnd;
         aPattern.BeginReading(aPatternStart);
         aPattern.EndReading(aPatternEnd);
           // outer loop keeps searching till we find it or run out of string to search
         while ( !found_it )
           {
               // fast inner loop (that's what it's called, not what it is) looks for a potential match
             while ( aSearchStart != aSearchEnd &&
                     compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
               ++aSearchStart;
               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
             if ( aSearchStart == aSearchEnd )
               break;
               // otherwise, we're at a potential match, let's see if we really hit one
             nsAString::const_iterator testPattern(aPatternStart);
             nsScannerIterator testSearch(aSearchStart);
               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
             for(;;)
               {
                   // we already compared the first character in the outer loop,
                   //  so we'll advance before the next comparison
                 ++testPattern;
                 ++testSearch;
                   // if we verified all the way to the end of the pattern, then we found it!
                 if ( testPattern == aPatternEnd )
                   {
                     found_it = true;
                     aSearchEnd = testSearch; // return the exact found range through the parameters
                     break;
                   }
                   // if we got to end of the string we're searching before we hit the end of the
                   //  pattern, we'll never find what we're looking for
                 if ( testSearch == aSearchEnd )
                   {
                     aSearchStart = aSearchEnd;
                     break;
                   }
                   // else if we mismatched ... it's time to advance to the next search position
                   //  and get back into the `fast' loop
                 if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
                   {
                     ++aSearchStart;
                     break;
                   }
               }
           }
       }
     return found_it;
   }
   /**
    * This implementation is simple, but does too much work.
    * It searches the entire string from left to right, and returns the last match found, if any.
    * This implementation will be replaced when I get |reverse_iterator|s working.
    */
 bool
 RFindInReadable( const nsAString& aPattern,
                  nsScannerIterator& aSearchStart,
                  nsScannerIterator& aSearchEnd,
                  const nsStringComparator& aComparator )
   {
     bool found_it = false;
     nsScannerIterator savedSearchEnd(aSearchEnd);
     nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
     while ( searchStart != searchEnd )
       {
         if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
           {
             found_it = true;
               // this is the best match so far, so remember it
             aSearchStart = searchStart;
             aSearchEnd = searchEnd;
               // ...and get ready to search some more
               //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
             ++searchStart;
             searchEnd = savedSearchEnd;
           }
       }
       // if we never found it, return an empty range
     if ( !found_it )
       aSearchStart = aSearchEnd;
     return found_it;
   }

The Tor Browser / annotate

parser/htmlparser/src/nsScannerString.cpp@6474c204b198 (annotated)

parser/htmlparser/src/nsScannerString.cpp