The Tor Browser: diff parser/htmlparser/src/nsScannerString.cpp

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/htmlparser/src/nsScannerString.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,648 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include <stdlib.h>
    1.11 +#include "nsScannerString.h"
    1.12 +
    1.13 +
    1.14 +  /**
    1.15 +   * nsScannerBufferList
    1.16 +   */
    1.17 +
    1.18 +#define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
    1.19 +                      (sizeof(Buffer) + sizeof(char16_t)))
    1.20 +
    1.21 +nsScannerBufferList::Buffer*
    1.22 +nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
    1.23 +  {
    1.24 +    uint32_t len = aString.Length();
    1.25 +    Buffer* buf = AllocBuffer(len);
    1.26 +
    1.27 +    if (buf)
    1.28 +      {
    1.29 +        nsAString::const_iterator source;
    1.30 +        aString.BeginReading(source);
    1.31 +        nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
    1.32 +      }
    1.33 +    return buf;
    1.34 +  }
    1.35 +
    1.36 +nsScannerBufferList::Buffer*
    1.37 +nsScannerBufferList::AllocBuffer( uint32_t capacity )
    1.38 +  {
    1.39 +    if (capacity > MAX_CAPACITY)
    1.40 +      return nullptr;
    1.41 +
    1.42 +    void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
    1.43 +    if (!ptr)
    1.44 +      return nullptr;
    1.45 +
    1.46 +    Buffer* buf = new (ptr) Buffer();
    1.47 +
    1.48 +    buf->mUsageCount = 0;
    1.49 +    buf->mDataEnd = buf->DataStart() + capacity;
    1.50 +
    1.51 +    // XXX null terminate.  this shouldn't be required, but we do it because
    1.52 +    // nsScanner erroneously thinks it can dereference DataEnd :-(
    1.53 +    *buf->mDataEnd = char16_t(0);
    1.54 +    return buf;
    1.55 +  }
    1.56 +
    1.57 +void
    1.58 +nsScannerBufferList::ReleaseAll()
    1.59 +  {
    1.60 +    while (!mBuffers.isEmpty())
    1.61 +      {
    1.62 +        Buffer* node = mBuffers.popFirst();
    1.63 +        //printf(">>> freeing buffer @%p\n", node);
    1.64 +        free(node);
    1.65 +      }
    1.66 +  }
    1.67 +
    1.68 +void
    1.69 +nsScannerBufferList::SplitBuffer( const Position& pos )
    1.70 +  {
    1.71 +    // splitting to the right keeps the work string and any extant token
    1.72 +    // pointing to and holding a reference count on the same buffer.
    1.73 +
    1.74 +    Buffer* bufferToSplit = pos.mBuffer;
    1.75 +    NS_ASSERTION(bufferToSplit, "null pointer");
    1.76 +
    1.77 +    uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
    1.78 +    NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
    1.79 +                 splitOffset <= bufferToSplit->DataLength(),
    1.80 +                 "split offset is outside buffer");
    1.81 +    
    1.82 +    uint32_t len = bufferToSplit->DataLength() - splitOffset;
    1.83 +    Buffer* new_buffer = AllocBuffer(len);
    1.84 +    if (new_buffer)
    1.85 +      {
    1.86 +        nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
    1.87 +                                      bufferToSplit->DataStart() + splitOffset,
    1.88 +                                      len);
    1.89 +        InsertAfter(new_buffer, bufferToSplit);
    1.90 +        bufferToSplit->SetDataLength(splitOffset);
    1.91 +      }
    1.92 +  }
    1.93 +
    1.94 +void
    1.95 +nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
    1.96 +  {
    1.97 +    if (aBuf == Head())
    1.98 +      {
    1.99 +        while (!mBuffers.isEmpty() && !Head()->IsInUse())
   1.100 +          {
   1.101 +            Buffer* buffer = Head();
   1.102 +            buffer->remove();
   1.103 +            free(buffer);
   1.104 +          }
   1.105 +      }
   1.106 +  }
   1.107 +
   1.108 +size_t
   1.109 +nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
   1.110 +  {
   1.111 +    size_t result = 0;
   1.112 +    if (aStart.mBuffer == aEnd.mBuffer)
   1.113 +      {
   1.114 +        result = aEnd.mPosition - aStart.mPosition;
   1.115 +      }
   1.116 +    else
   1.117 +      {
   1.118 +        result = aStart.mBuffer->DataEnd() - aStart.mPosition;
   1.119 +        for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
   1.120 +          result += b->DataLength();
   1.121 +        result += aEnd.mPosition - aEnd.mBuffer->DataStart();
   1.122 +      }
   1.123 +    return result;
   1.124 +  }
   1.125 +
   1.126 +
   1.127 +/**
   1.128 + * nsScannerSubstring
   1.129 + */
   1.130 +
   1.131 +nsScannerSubstring::nsScannerSubstring()
   1.132 +  : mStart(nullptr, nullptr)
   1.133 +  , mEnd(nullptr, nullptr)
   1.134 +  , mBufferList(nullptr)
   1.135 +  , mLength(0)
   1.136 +  , mIsDirty(true)
   1.137 +  {
   1.138 +  }
   1.139 +
   1.140 +nsScannerSubstring::nsScannerSubstring( const nsAString& s )
   1.141 +  : mBufferList(nullptr)
   1.142 +  , mIsDirty(true)
   1.143 +  {
   1.144 +    Rebind(s);
   1.145 +  }
   1.146 +
   1.147 +nsScannerSubstring::~nsScannerSubstring()
   1.148 +  {
   1.149 +    release_ownership_of_buffer_list();
   1.150 +  }
   1.151 +
   1.152 +int32_t
   1.153 +nsScannerSubstring::CountChar( char16_t c ) const
   1.154 +  {
   1.155 +      /*
   1.156 +        re-write this to use a counting sink
   1.157 +       */
   1.158 +
   1.159 +    size_type result = 0;
   1.160 +    size_type lengthToExamine = Length();
   1.161 +
   1.162 +    nsScannerIterator iter;
   1.163 +    for ( BeginReading(iter); ; )
   1.164 +      {
   1.165 +        int32_t lengthToExamineInThisFragment = iter.size_forward();
   1.166 +        const char16_t* fromBegin = iter.get();
   1.167 +        result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
   1.168 +        if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
   1.169 +          return result;
   1.170 +        iter.advance(lengthToExamineInThisFragment);
   1.171 +      }
   1.172 +      // never reached; quiets warnings
   1.173 +    return 0;
   1.174 +  }
   1.175 +
   1.176 +void
   1.177 +nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
   1.178 +                            const nsScannerIterator& aStart, 
   1.179 +                            const nsScannerIterator& aEnd )
   1.180 +  {
   1.181 +    // allow for the case where &aString == this
   1.182 +
   1.183 +    aString.acquire_ownership_of_buffer_list();
   1.184 +    release_ownership_of_buffer_list();
   1.185 +
   1.186 +    mStart      = aStart;
   1.187 +    mEnd        = aEnd;
   1.188 +    mBufferList = aString.mBufferList;
   1.189 +    mLength     = Distance(aStart, aEnd);
   1.190 +    mIsDirty    = true;
   1.191 +  }
   1.192 +
   1.193 +void
   1.194 +nsScannerSubstring::Rebind( const nsAString& aString )
   1.195 +  {
   1.196 +    release_ownership_of_buffer_list();
   1.197 +
   1.198 +    mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
   1.199 +    mIsDirty    = true;
   1.200 +
   1.201 +    init_range_from_buffer_list();
   1.202 +    acquire_ownership_of_buffer_list();
   1.203 +  }
   1.204 +
   1.205 +const nsSubstring&
   1.206 +nsScannerSubstring::AsString() const
   1.207 +  {
   1.208 +    if (mIsDirty)
   1.209 +      {
   1.210 +        nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
   1.211 +
   1.212 +        if (mStart.mBuffer == mEnd.mBuffer) {
   1.213 +          // We only have a single fragment to deal with, so just return it
   1.214 +          // as a substring.
   1.215 +          mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
   1.216 +        } else {
   1.217 +          // Otherwise, we need to copy the data into a flattened buffer.
   1.218 +          nsScannerIterator start, end;
   1.219 +          CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
   1.220 +        }
   1.221 +
   1.222 +        mutable_this->mIsDirty = false;
   1.223 +      }
   1.224 +
   1.225 +    return mFlattenedRep;
   1.226 +  }
   1.227 +
   1.228 +nsScannerIterator&
   1.229 +nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
   1.230 +  {
   1.231 +    iter.mOwner = this;
   1.232 +
   1.233 +    iter.mFragment.mBuffer = mStart.mBuffer;
   1.234 +    iter.mFragment.mFragmentStart = mStart.mPosition;
   1.235 +    if (mStart.mBuffer == mEnd.mBuffer)
   1.236 +      iter.mFragment.mFragmentEnd = mEnd.mPosition;
   1.237 +    else
   1.238 +      iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
   1.239 +
   1.240 +    iter.mPosition = mStart.mPosition;
   1.241 +    iter.normalize_forward();
   1.242 +    return iter;
   1.243 +  }
   1.244 +
   1.245 +nsScannerIterator&
   1.246 +nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
   1.247 +  {
   1.248 +    iter.mOwner = this;
   1.249 +
   1.250 +    iter.mFragment.mBuffer = mEnd.mBuffer;
   1.251 +    iter.mFragment.mFragmentEnd = mEnd.mPosition;
   1.252 +    if (mStart.mBuffer == mEnd.mBuffer)
   1.253 +      iter.mFragment.mFragmentStart = mStart.mPosition;
   1.254 +    else
   1.255 +      iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
   1.256 +
   1.257 +    iter.mPosition = mEnd.mPosition;
   1.258 +    // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
   1.259 +    return iter;
   1.260 +  }
   1.261 +
   1.262 +bool
   1.263 +nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
   1.264 +  {
   1.265 +    // check to see if we are at the end of the buffer list
   1.266 +    if (frag.mBuffer == mEnd.mBuffer)
   1.267 +      return false;
   1.268 +
   1.269 +    frag.mBuffer = frag.mBuffer->getNext();
   1.270 +
   1.271 +    if (frag.mBuffer == mStart.mBuffer)
   1.272 +      frag.mFragmentStart = mStart.mPosition;
   1.273 +    else
   1.274 +      frag.mFragmentStart = frag.mBuffer->DataStart();
   1.275 +
   1.276 +    if (frag.mBuffer == mEnd.mBuffer)
   1.277 +      frag.mFragmentEnd = mEnd.mPosition;
   1.278 +    else
   1.279 +      frag.mFragmentEnd = frag.mBuffer->DataEnd();
   1.280 +
   1.281 +    return true;
   1.282 +  }
   1.283 +
   1.284 +bool
   1.285 +nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
   1.286 +  {
   1.287 +    // check to see if we are at the beginning of the buffer list
   1.288 +    if (frag.mBuffer == mStart.mBuffer)
   1.289 +      return false;
   1.290 +
   1.291 +    frag.mBuffer = frag.mBuffer->getPrevious();
   1.292 +
   1.293 +    if (frag.mBuffer == mStart.mBuffer)
   1.294 +      frag.mFragmentStart = mStart.mPosition;
   1.295 +    else
   1.296 +      frag.mFragmentStart = frag.mBuffer->DataStart();
   1.297 +
   1.298 +    if (frag.mBuffer == mEnd.mBuffer)
   1.299 +      frag.mFragmentEnd = mEnd.mPosition;
   1.300 +    else
   1.301 +      frag.mFragmentEnd = frag.mBuffer->DataEnd();
   1.302 +
   1.303 +    return true;
   1.304 +  }
   1.305 +
   1.306 +
   1.307 +  /**
   1.308 +   * nsScannerString
   1.309 +   */
   1.310 +
   1.311 +nsScannerString::nsScannerString( Buffer* aBuf )
   1.312 +  {
   1.313 +    mBufferList = new nsScannerBufferList(aBuf);
   1.314 +
   1.315 +    init_range_from_buffer_list();
   1.316 +    acquire_ownership_of_buffer_list();
   1.317 +  }
   1.318 +
   1.319 +void
   1.320 +nsScannerString::AppendBuffer( Buffer* aBuf )
   1.321 +  {
   1.322 +    mBufferList->Append(aBuf);
   1.323 +    mLength += aBuf->DataLength();
   1.324 +
   1.325 +    mEnd.mBuffer = aBuf;
   1.326 +    mEnd.mPosition = aBuf->DataEnd();
   1.327 +
   1.328 +    mIsDirty = true;
   1.329 +  }
   1.330 +
   1.331 +void
   1.332 +nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
   1.333 +  {
   1.334 +    Position old_start(mStart);
   1.335 +    mStart = aIter;
   1.336 +    mLength -= Position::Distance(old_start, mStart);
   1.337 +    
   1.338 +    mStart.mBuffer->IncrementUsageCount();
   1.339 +    old_start.mBuffer->DecrementUsageCount();
   1.340 +
   1.341 +    mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
   1.342 +
   1.343 +    mIsDirty = true;
   1.344 +  }
   1.345 +
   1.346 +void
   1.347 +nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
   1.348 +    /*
   1.349 +     * Warning: this routine manipulates the shared buffer list in an unexpected way.
   1.350 +     *  The original design did not really allow for insertions, but this call promises
   1.351 +     *  that if called for a point after the end of all extant token strings, that no token string
   1.352 +     *  or the work string will be invalidated.
   1.353 +     *
   1.354 +     *  This routine is protected because it is the responsibility of the derived class to keep those promises.
   1.355 +     */
   1.356 +  {
   1.357 +    Position insertPos(aInsertPoint);
   1.358 +
   1.359 +    mBufferList->SplitBuffer(insertPos);
   1.360 +      // splitting to the right keeps the work string and any extant token pointing to and
   1.361 +      //  holding a reference count on the same buffer
   1.362 +
   1.363 +    Buffer* new_buffer = AllocBufferFromString(aReadable);
   1.364 +      // make a new buffer with all the data to insert...
   1.365 +      //  BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
   1.366 +      //  of this and decide if we should do the work to fill it
   1.367 +
   1.368 +    Buffer* buffer_to_split = insertPos.mBuffer;
   1.369 +    mBufferList->InsertAfter(new_buffer, buffer_to_split);
   1.370 +    mLength += aReadable.Length();
   1.371 +
   1.372 +    mEnd.mBuffer = mBufferList->Tail();
   1.373 +    mEnd.mPosition = mEnd.mBuffer->DataEnd();
   1.374 +
   1.375 +    mIsDirty = true;
   1.376 +  }
   1.377 +
   1.378 +void
   1.379 +nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, char16_t aChar)
   1.380 +  {
   1.381 +    // XXX Casting a const to non-const. Unless the base class
   1.382 +    // provides support for writing iterators, this is the best
   1.383 +    // that can be done.
   1.384 +    char16_t* pos = const_cast<char16_t*>(aPosition.get());
   1.385 +    *pos = aChar;
   1.386 +
   1.387 +    mIsDirty = true;
   1.388 +  }
   1.389 +
   1.390 +
   1.391 +  /**
   1.392 +   * nsScannerSharedSubstring
   1.393 +   */
   1.394 +
   1.395 +void
   1.396 +nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
   1.397 +                              const nsScannerIterator &aEnd)
   1.398 +{
   1.399 +  // If the start and end positions are inside the same buffer, we must
   1.400 +  // acquire ownership of the buffer.  If not, we can optimize by not holding
   1.401 +  // onto it.
   1.402 +
   1.403 +  Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
   1.404 +  bool sameBuffer = buffer == aEnd.buffer();
   1.405 +
   1.406 +  nsScannerBufferList *bufferList;
   1.407 +
   1.408 +  if (sameBuffer) {
   1.409 +    bufferList = aStart.mOwner->mBufferList;
   1.410 +    bufferList->AddRef();
   1.411 +    buffer->IncrementUsageCount();
   1.412 +  }
   1.413 +
   1.414 +  if (mBufferList)
   1.415 +    ReleaseBuffer();
   1.416 +
   1.417 +  if (sameBuffer) {
   1.418 +    mBuffer = buffer;
   1.419 +    mBufferList = bufferList;
   1.420 +    mString.Rebind(aStart.mPosition, aEnd.mPosition);
   1.421 +  } else {
   1.422 +    mBuffer = nullptr;
   1.423 +    mBufferList = nullptr;
   1.424 +    CopyUnicodeTo(aStart, aEnd, mString);
   1.425 +  }
   1.426 +}
   1.427 +
   1.428 +void
   1.429 +nsScannerSharedSubstring::ReleaseBuffer()
   1.430 +{
   1.431 +  NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
   1.432 +  mBuffer->DecrementUsageCount();
   1.433 +  mBufferList->DiscardUnreferencedPrefix(mBuffer);
   1.434 +  mBufferList->Release();
   1.435 +}
   1.436 +
   1.437 +void
   1.438 +nsScannerSharedSubstring::MakeMutable()
   1.439 +{
   1.440 +  nsString temp(mString); // this will force a copy of the data
   1.441 +  mString.Assign(temp);   // mString will now share the just-allocated buffer
   1.442 +
   1.443 +  ReleaseBuffer();
   1.444 +
   1.445 +  mBuffer = nullptr;
   1.446 +  mBufferList = nullptr;
   1.447 +}
   1.448 +
   1.449 +  /**
   1.450 +   * utils -- based on code from nsReadableUtils.cpp
   1.451 +   */
   1.452 +
   1.453 +// private helper function
   1.454 +static inline
   1.455 +nsAString::iterator&
   1.456 +copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
   1.457 +  {
   1.458 +    typedef nsCharSourceTraits<nsScannerIterator> source_traits;
   1.459 +    typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
   1.460 +
   1.461 +    while ( first != last )
   1.462 +      {
   1.463 +        uint32_t distance = source_traits::readable_distance(first, last);
   1.464 +        sink_traits::write(result, source_traits::read(first), distance);
   1.465 +        NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
   1.466 +        source_traits::advance(first, distance);
   1.467 +      }
   1.468 +
   1.469 +    return result;
   1.470 +  }
   1.471 +
   1.472 +void
   1.473 +CopyUnicodeTo( const nsScannerIterator& aSrcStart,
   1.474 +               const nsScannerIterator& aSrcEnd,
   1.475 +               nsAString& aDest )
   1.476 +  {
   1.477 +    nsAString::iterator writer;
   1.478 +    if (!aDest.SetLength(Distance(aSrcStart, aSrcEnd), mozilla::fallible_t())) {
   1.479 +      aDest.Truncate();
   1.480 +      return; // out of memory
   1.481 +    }
   1.482 +    aDest.BeginWriting(writer);
   1.483 +    nsScannerIterator fromBegin(aSrcStart);
   1.484 +    
   1.485 +    copy_multifragment_string(fromBegin, aSrcEnd, writer);
   1.486 +  }
   1.487 +
   1.488 +void
   1.489 +AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   1.490 +                 const nsScannerIterator& aSrcEnd,
   1.491 +                 nsScannerSharedSubstring& aDest )
   1.492 +  {
   1.493 +    // Check whether we can just create a dependent string.
   1.494 +    if (aDest.str().IsEmpty()) {
   1.495 +      // We can just make |aDest| point to the buffer.
   1.496 +      // This will take care of copying if the buffer spans fragments.
   1.497 +      aDest.Rebind(aSrcStart, aSrcEnd);
   1.498 +    } else {
   1.499 +      // The dest string is not empty, so it can't be a dependent substring.
   1.500 +      AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
   1.501 +    }
   1.502 +  }
   1.503 +
   1.504 +void
   1.505 +AppendUnicodeTo( const nsScannerIterator& aSrcStart,
   1.506 +                 const nsScannerIterator& aSrcEnd,
   1.507 +                 nsAString& aDest )
   1.508 +  {
   1.509 +    nsAString::iterator writer;
   1.510 +    uint32_t oldLength = aDest.Length();
   1.511 +    if (!aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd), mozilla::fallible_t()))
   1.512 +      return; // out of memory
   1.513 +    aDest.BeginWriting(writer).advance(oldLength);
   1.514 +    nsScannerIterator fromBegin(aSrcStart);
   1.515 +    
   1.516 +    copy_multifragment_string(fromBegin, aSrcEnd, writer);
   1.517 +  }
   1.518 +
   1.519 +bool
   1.520 +FindCharInReadable( char16_t aChar,
   1.521 +                    nsScannerIterator& aSearchStart,
   1.522 +                    const nsScannerIterator& aSearchEnd )
   1.523 +  {
   1.524 +    while ( aSearchStart != aSearchEnd )
   1.525 +      {
   1.526 +        int32_t fragmentLength;
   1.527 +        if ( SameFragment(aSearchStart, aSearchEnd) ) 
   1.528 +          fragmentLength = aSearchEnd.get() - aSearchStart.get();
   1.529 +        else
   1.530 +          fragmentLength = aSearchStart.size_forward();
   1.531 +
   1.532 +        const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
   1.533 +        if ( charFoundAt ) {
   1.534 +          aSearchStart.advance( charFoundAt - aSearchStart.get() );
   1.535 +          return true;
   1.536 +        }
   1.537 +
   1.538 +        aSearchStart.advance(fragmentLength);
   1.539 +      }
   1.540 +
   1.541 +    return false;
   1.542 +  }
   1.543 +
   1.544 +bool
   1.545 +FindInReadable( const nsAString& aPattern,
   1.546 +                nsScannerIterator& aSearchStart,
   1.547 +                nsScannerIterator& aSearchEnd,
   1.548 +                const nsStringComparator& compare )
   1.549 +  {
   1.550 +    bool found_it = false;
   1.551 +
   1.552 +      // only bother searching at all if we're given a non-empty range to search
   1.553 +    if ( aSearchStart != aSearchEnd )
   1.554 +      {
   1.555 +        nsAString::const_iterator aPatternStart, aPatternEnd;
   1.556 +        aPattern.BeginReading(aPatternStart);
   1.557 +        aPattern.EndReading(aPatternEnd);
   1.558 +
   1.559 +          // outer loop keeps searching till we find it or run out of string to search
   1.560 +        while ( !found_it )
   1.561 +          {
   1.562 +              // fast inner loop (that's what it's called, not what it is) looks for a potential match
   1.563 +            while ( aSearchStart != aSearchEnd &&
   1.564 +                    compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
   1.565 +              ++aSearchStart;
   1.566 +
   1.567 +              // if we broke out of the `fast' loop because we're out of string ... we're done: no match
   1.568 +            if ( aSearchStart == aSearchEnd )
   1.569 +              break;
   1.570 +
   1.571 +              // otherwise, we're at a potential match, let's see if we really hit one
   1.572 +            nsAString::const_iterator testPattern(aPatternStart);
   1.573 +            nsScannerIterator testSearch(aSearchStart);
   1.574 +
   1.575 +              // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
   1.576 +            for(;;)
   1.577 +              {
   1.578 +                  // we already compared the first character in the outer loop,
   1.579 +                  //  so we'll advance before the next comparison
   1.580 +                ++testPattern;
   1.581 +                ++testSearch;
   1.582 +
   1.583 +                  // if we verified all the way to the end of the pattern, then we found it!
   1.584 +                if ( testPattern == aPatternEnd )
   1.585 +                  {
   1.586 +                    found_it = true;
   1.587 +                    aSearchEnd = testSearch; // return the exact found range through the parameters
   1.588 +                    break;
   1.589 +                  }
   1.590 +
   1.591 +                  // if we got to end of the string we're searching before we hit the end of the
   1.592 +                  //  pattern, we'll never find what we're looking for
   1.593 +                if ( testSearch == aSearchEnd )
   1.594 +                  {
   1.595 +                    aSearchStart = aSearchEnd;
   1.596 +                    break;
   1.597 +                  }
   1.598 +
   1.599 +                  // else if we mismatched ... it's time to advance to the next search position
   1.600 +                  //  and get back into the `fast' loop
   1.601 +                if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
   1.602 +                  {
   1.603 +                    ++aSearchStart;
   1.604 +                    break;
   1.605 +                  }
   1.606 +              }
   1.607 +          }
   1.608 +      }
   1.609 +
   1.610 +    return found_it;
   1.611 +  }
   1.612 +
   1.613 +  /**
   1.614 +   * This implementation is simple, but does too much work.
   1.615 +   * It searches the entire string from left to right, and returns the last match found, if any.
   1.616 +   * This implementation will be replaced when I get |reverse_iterator|s working.
   1.617 +   */
   1.618 +bool
   1.619 +RFindInReadable( const nsAString& aPattern,
   1.620 +                 nsScannerIterator& aSearchStart,
   1.621 +                 nsScannerIterator& aSearchEnd,
   1.622 +                 const nsStringComparator& aComparator )
   1.623 +  {
   1.624 +    bool found_it = false;
   1.625 +
   1.626 +    nsScannerIterator savedSearchEnd(aSearchEnd);
   1.627 +    nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
   1.628 +
   1.629 +    while ( searchStart != searchEnd )
   1.630 +      {
   1.631 +        if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
   1.632 +          {
   1.633 +            found_it = true;
   1.634 +
   1.635 +              // this is the best match so far, so remember it
   1.636 +            aSearchStart = searchStart;
   1.637 +            aSearchEnd = searchEnd;
   1.638 +
   1.639 +              // ...and get ready to search some more
   1.640 +              //  (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
   1.641 +            ++searchStart;
   1.642 +            searchEnd = savedSearchEnd;
   1.643 +          }
   1.644 +      }
   1.645 +
   1.646 +      // if we never found it, return an empty range
   1.647 +    if ( !found_it )
   1.648 +      aSearchStart = aSearchEnd;
   1.649 +
   1.650 +    return found_it;
   1.651 +  }
The Tor Browser / file diff

diff: parser/htmlparser/src/nsScannerString.cpp

parser/htmlparser/src/nsScannerString.cpp