content/base/src/nsTextFragment.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/base/src/nsTextFragment.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,445 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/*
    1.10 + * A class which represents a fragment of text (eg inside a text
    1.11 + * node); if only codepoints below 256 are used, the text is stored as
    1.12 + * a char*; otherwise the text is stored as a char16_t*
    1.13 + */
    1.14 +
    1.15 +#include "nsTextFragment.h"
    1.16 +#include "nsCRT.h"
    1.17 +#include "nsReadableUtils.h"
    1.18 +#include "nsMemory.h"
    1.19 +#include "nsBidiUtils.h"
    1.20 +#include "nsUnicharUtils.h"
    1.21 +#include "nsUTF8Utils.h"
    1.22 +#include "mozilla/MemoryReporting.h"
    1.23 +#include "mozilla/SSE.h"
    1.24 +#include "nsTextFragmentImpl.h"
    1.25 +#include <algorithm>
    1.26 +
    1.27 +#define TEXTFRAG_WHITE_AFTER_NEWLINE 50
    1.28 +#define TEXTFRAG_MAX_NEWLINES 7
    1.29 +
    1.30 +// Static buffer used for common fragments
    1.31 +static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
    1.32 +static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
    1.33 +static char sSingleCharSharedString[256];
    1.34 +
    1.35 +// static
    1.36 +nsresult
    1.37 +nsTextFragment::Init()
    1.38 +{
    1.39 +  // Create whitespace strings
    1.40 +  uint32_t i;
    1.41 +  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    1.42 +    sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    1.43 +    sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    1.44 +    NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
    1.45 +                   NS_ERROR_OUT_OF_MEMORY);
    1.46 +    sSpaceSharedString[i][0] = ' ';
    1.47 +    sTabSharedString[i][0] = ' ';
    1.48 +    uint32_t j;
    1.49 +    for (j = 1; j < 1 + i; ++j) {
    1.50 +      sSpaceSharedString[i][j] = '\n';
    1.51 +      sTabSharedString[i][j] = '\n';
    1.52 +    }
    1.53 +    for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
    1.54 +      sSpaceSharedString[i][j] = ' ';
    1.55 +      sTabSharedString[i][j] = '\t';
    1.56 +    }
    1.57 +  }
    1.58 +
    1.59 +  // Create single-char strings
    1.60 +  for (i = 0; i < 256; ++i) {
    1.61 +    sSingleCharSharedString[i] = i;
    1.62 +  }
    1.63 +
    1.64 +  return NS_OK;
    1.65 +}
    1.66 +
    1.67 +// static
    1.68 +void
    1.69 +nsTextFragment::Shutdown()
    1.70 +{
    1.71 +  uint32_t  i;
    1.72 +  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    1.73 +    delete [] sSpaceSharedString[i];
    1.74 +    delete [] sTabSharedString[i];
    1.75 +    sSpaceSharedString[i] = nullptr;
    1.76 +    sTabSharedString[i] = nullptr;
    1.77 +  }
    1.78 +}
    1.79 +
    1.80 +nsTextFragment::~nsTextFragment()
    1.81 +{
    1.82 +  ReleaseText();
    1.83 +  MOZ_COUNT_DTOR(nsTextFragment);
    1.84 +}
    1.85 +
    1.86 +void
    1.87 +nsTextFragment::ReleaseText()
    1.88 +{
    1.89 +  if (mState.mLength && m1b && mState.mInHeap) {
    1.90 +    moz_free(m2b); // m1b == m2b as far as moz_free is concerned
    1.91 +  }
    1.92 +
    1.93 +  m1b = nullptr;
    1.94 +  mState.mIsBidi = false;
    1.95 +
    1.96 +  // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
    1.97 +  mAllBits = 0;
    1.98 +}
    1.99 +
   1.100 +nsTextFragment&
   1.101 +nsTextFragment::operator=(const nsTextFragment& aOther)
   1.102 +{
   1.103 +  ReleaseText();
   1.104 +
   1.105 +  if (aOther.mState.mLength) {
   1.106 +    if (!aOther.mState.mInHeap) {
   1.107 +      m1b = aOther.m1b; // This will work even if aOther is using m2b
   1.108 +    }
   1.109 +    else {
   1.110 +      size_t m2bSize = aOther.mState.mLength *
   1.111 +        (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));
   1.112 +
   1.113 +      m2b = static_cast<char16_t*>(moz_malloc(m2bSize));
   1.114 +      if (m2b) {
   1.115 +        memcpy(m2b, aOther.m2b, m2bSize);
   1.116 +      } else {
   1.117 +        // allocate a buffer for a single REPLACEMENT CHARACTER
   1.118 +        m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));
   1.119 +        m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER
   1.120 +        mState.mIs2b = true;
   1.121 +        mState.mInHeap = true;
   1.122 +        mState.mLength = 1;
   1.123 +      }
   1.124 +    }
   1.125 +
   1.126 +    if (m1b) {
   1.127 +      mAllBits = aOther.mAllBits;
   1.128 +    }
   1.129 +  }
   1.130 +
   1.131 +  return *this;
   1.132 +}
   1.133 +
   1.134 +static inline int32_t
   1.135 +FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)
   1.136 +{
   1.137 +  typedef Non8BitParameters<sizeof(size_t)> p;
   1.138 +  const size_t mask = p::mask();
   1.139 +  const uint32_t alignMask = p::alignMask();
   1.140 +  const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
   1.141 +  const int32_t len = end - str;
   1.142 +  int32_t i = 0;
   1.143 +
   1.144 +  // Align ourselves to a word boundary.
   1.145 +  int32_t alignLen =
   1.146 +    std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
   1.147 +  for (; i < alignLen; i++) {
   1.148 +    if (str[i] > 255)
   1.149 +      return i;
   1.150 +  }
   1.151 +
   1.152 +  // Check one word at a time.
   1.153 +  const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
   1.154 +  for (; i < wordWalkEnd; i += numUnicharsPerWord) {
   1.155 +    const size_t word = *reinterpret_cast<const size_t*>(str + i);
   1.156 +    if (word & mask)
   1.157 +      return i;
   1.158 +  }
   1.159 +
   1.160 +  // Take care of the remainder one character at a time.
   1.161 +  for (; i < len; i++) {
   1.162 +    if (str[i] > 255)
   1.163 +      return i;
   1.164 +  }
   1.165 +
   1.166 +  return -1;
   1.167 +}
   1.168 +
   1.169 +#ifdef MOZILLA_MAY_SUPPORT_SSE2
   1.170 +namespace mozilla {
   1.171 +  namespace SSE2 {
   1.172 +    int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);
   1.173 +  }
   1.174 +}
   1.175 +#endif
   1.176 +
   1.177 +/*
   1.178 + * This function returns -1 if all characters in str are 8 bit characters.
   1.179 + * Otherwise, it returns a value less than or equal to the index of the first
   1.180 + * non-8bit character in str. For example, if first non-8bit character is at
   1.181 + * position 25, it may return 25, or for example 24, or 16. But it guarantees
   1.182 + * there is no non-8bit character before returned value.
   1.183 + */
   1.184 +static inline int32_t
   1.185 +FirstNon8Bit(const char16_t *str, const char16_t *end)
   1.186 +{
   1.187 +#ifdef MOZILLA_MAY_SUPPORT_SSE2
   1.188 +  if (mozilla::supports_sse2()) {
   1.189 +    return mozilla::SSE2::FirstNon8Bit(str, end);
   1.190 +  }
   1.191 +#endif
   1.192 +
   1.193 +  return FirstNon8BitUnvectorized(str, end);
   1.194 +}
   1.195 +
   1.196 +bool
   1.197 +nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)
   1.198 +{
   1.199 +  ReleaseText();
   1.200 +
   1.201 +  if (aLength == 0) {
   1.202 +    return true;
   1.203 +  }
   1.204 +  
   1.205 +  char16_t firstChar = *aBuffer;
   1.206 +  if (aLength == 1 && firstChar < 256) {
   1.207 +    m1b = sSingleCharSharedString + firstChar;
   1.208 +    mState.mInHeap = false;
   1.209 +    mState.mIs2b = false;
   1.210 +    mState.mLength = 1;
   1.211 +
   1.212 +    return true;
   1.213 +  }
   1.214 +
   1.215 +  const char16_t *ucp = aBuffer;
   1.216 +  const char16_t *uend = aBuffer + aLength;
   1.217 +
   1.218 +  // Check if we can use a shared string
   1.219 +  if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
   1.220 +     (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
   1.221 +    if (firstChar == ' ') {
   1.222 +      ++ucp;
   1.223 +    }
   1.224 +
   1.225 +    const char16_t* start = ucp;
   1.226 +    while (ucp < uend && *ucp == '\n') {
   1.227 +      ++ucp;
   1.228 +    }
   1.229 +    const char16_t* endNewLine = ucp;
   1.230 +
   1.231 +    char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
   1.232 +    while (ucp < uend && *ucp == space) {
   1.233 +      ++ucp;
   1.234 +    }
   1.235 +
   1.236 +    if (ucp == uend &&
   1.237 +        endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
   1.238 +        ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
   1.239 +      char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
   1.240 +      m1b = strings[endNewLine - start];
   1.241 +
   1.242 +      // If we didn't find a space in the beginning, skip it now.
   1.243 +      if (firstChar != ' ') {
   1.244 +        ++m1b;
   1.245 +      }
   1.246 +
   1.247 +      mState.mInHeap = false;
   1.248 +      mState.mIs2b = false;
   1.249 +      mState.mLength = aLength;
   1.250 +
   1.251 +      return true;        
   1.252 +    }
   1.253 +  }
   1.254 +
   1.255 +  // See if we need to store the data in ucs2 or not
   1.256 +  int32_t first16bit = FirstNon8Bit(ucp, uend);
   1.257 +
   1.258 +  if (first16bit != -1) { // aBuffer contains no non-8bit character
   1.259 +    // Use ucs2 storage because we have to
   1.260 +    size_t m2bSize = aLength * sizeof(char16_t);
   1.261 +    m2b = (char16_t *)moz_malloc(m2bSize);
   1.262 +    if (!m2b) {
   1.263 +      return false;
   1.264 +    }
   1.265 +    memcpy(m2b, aBuffer, m2bSize);
   1.266 +
   1.267 +    mState.mIs2b = true;
   1.268 +    if (aUpdateBidi) {
   1.269 +      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
   1.270 +    }
   1.271 +
   1.272 +  } else {
   1.273 +    // Use 1 byte storage because we can
   1.274 +    char* buff = (char *)moz_malloc(aLength * sizeof(char));
   1.275 +    if (!buff) {
   1.276 +      return false;
   1.277 +    }
   1.278 +
   1.279 +    // Copy data
   1.280 +    LossyConvertEncoding16to8 converter(buff);
   1.281 +    copy_string(aBuffer, aBuffer+aLength, converter);
   1.282 +    m1b = buff;
   1.283 +    mState.mIs2b = false;
   1.284 +  }
   1.285 +
   1.286 +  // Setup our fields
   1.287 +  mState.mInHeap = true;
   1.288 +  mState.mLength = aLength;
   1.289 +
   1.290 +  return true;
   1.291 +}
   1.292 +
   1.293 +void
   1.294 +nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)
   1.295 +{
   1.296 +  NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
   1.297 +  NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
   1.298 +
   1.299 +  if (aOffset < 0) {
   1.300 +    aOffset = 0;
   1.301 +  }
   1.302 +
   1.303 +  if (uint32_t(aOffset + aCount) > GetLength()) {
   1.304 +    aCount = mState.mLength - aOffset;
   1.305 +  }
   1.306 +
   1.307 +  if (aCount != 0) {
   1.308 +    if (mState.mIs2b) {
   1.309 +      memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);
   1.310 +    } else {
   1.311 +      const char *cp = m1b + aOffset;
   1.312 +      const char *end = cp + aCount;
   1.313 +      LossyConvertEncoding8to16 converter(aDest);
   1.314 +      copy_string(cp, end, converter);
   1.315 +    }
   1.316 +  }
   1.317 +}
   1.318 +
   1.319 +bool
   1.320 +nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)
   1.321 +{
   1.322 +  // This is a common case because some callsites create a textnode
   1.323 +  // with a value by creating the node and then calling AppendData.
   1.324 +  if (mState.mLength == 0) {
   1.325 +    return SetTo(aBuffer, aLength, aUpdateBidi);
   1.326 +  }
   1.327 +
   1.328 +  // Should we optimize for aData.Length() == 0?
   1.329 +
   1.330 +  if (mState.mIs2b) {
   1.331 +    // Already a 2-byte string so the result will be too
   1.332 +    char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t));
   1.333 +    if (!buff) {
   1.334 +      return false;
   1.335 +    }
   1.336 +
   1.337 +    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
   1.338 +    mState.mLength += aLength;
   1.339 +    m2b = buff;
   1.340 +
   1.341 +    if (aUpdateBidi) {
   1.342 +      UpdateBidiFlag(aBuffer, aLength);
   1.343 +    }
   1.344 +
   1.345 +    return true;
   1.346 +  }
   1.347 +
   1.348 +  // Current string is a 1-byte string, check if the new data fits in one byte too.
   1.349 +  int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
   1.350 +
   1.351 +  if (first16bit != -1) { // aBuffer contains no non-8bit character
   1.352 +    // The old data was 1-byte, but the new is not so we have to expand it
   1.353 +    // all to 2-byte
   1.354 +    char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) *
   1.355 +                                                  sizeof(char16_t));
   1.356 +    if (!buff) {
   1.357 +      return false;
   1.358 +    }
   1.359 +
   1.360 +    // Copy data into buff
   1.361 +    LossyConvertEncoding8to16 converter(buff);
   1.362 +    copy_string(m1b, m1b+mState.mLength, converter);
   1.363 +
   1.364 +    memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
   1.365 +    mState.mLength += aLength;
   1.366 +    mState.mIs2b = true;
   1.367 +
   1.368 +    if (mState.mInHeap) {
   1.369 +      moz_free(m2b);
   1.370 +    }
   1.371 +    m2b = buff;
   1.372 +
   1.373 +    mState.mInHeap = true;
   1.374 +
   1.375 +    if (aUpdateBidi) {
   1.376 +      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
   1.377 +    }
   1.378 +
   1.379 +    return true;
   1.380 +  }
   1.381 +
   1.382 +  // The new and the old data is all 1-byte
   1.383 +  char* buff;
   1.384 +  if (mState.mInHeap) {
   1.385 +    buff = (char*)moz_realloc(const_cast<char*>(m1b),
   1.386 +                                    (mState.mLength + aLength) * sizeof(char));
   1.387 +    if (!buff) {
   1.388 +      return false;
   1.389 +    }
   1.390 +  }
   1.391 +  else {
   1.392 +    buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char));
   1.393 +    if (!buff) {
   1.394 +      return false;
   1.395 +    }
   1.396 +
   1.397 +    memcpy(buff, m1b, mState.mLength);
   1.398 +    mState.mInHeap = true;
   1.399 +  }
   1.400 +
   1.401 +  // Copy aBuffer into buff.
   1.402 +  LossyConvertEncoding16to8 converter(buff + mState.mLength);
   1.403 +  copy_string(aBuffer, aBuffer + aLength, converter);
   1.404 +
   1.405 +  m1b = buff;
   1.406 +  mState.mLength += aLength;
   1.407 +
   1.408 +  return true;
   1.409 +}
   1.410 +
   1.411 +/* virtual */ size_t
   1.412 +nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
   1.413 +{
   1.414 +  if (Is2b()) {
   1.415 +    return aMallocSizeOf(m2b);
   1.416 +  }
   1.417 +
   1.418 +  if (mState.mInHeap) {
   1.419 +    return aMallocSizeOf(m1b);
   1.420 +  }
   1.421 +
   1.422 +  return 0;
   1.423 +}
   1.424 +
   1.425 +// To save time we only do this when we really want to know, not during
   1.426 +// every allocation
   1.427 +void
   1.428 +nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
   1.429 +{
   1.430 +  if (mState.mIs2b && !mState.mIsBidi) {
   1.431 +    const char16_t* cp = aBuffer;
   1.432 +    const char16_t* end = cp + aLength;
   1.433 +    while (cp < end) {
   1.434 +      char16_t ch1 = *cp++;
   1.435 +      uint32_t utf32Char = ch1;
   1.436 +      if (NS_IS_HIGH_SURROGATE(ch1) &&
   1.437 +          cp < end &&
   1.438 +          NS_IS_LOW_SURROGATE(*cp)) {
   1.439 +        char16_t ch2 = *cp++;
   1.440 +        utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
   1.441 +      }
   1.442 +      if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
   1.443 +        mState.mIsBidi = true;
   1.444 +        break;
   1.445 +      }
   1.446 +    }
   1.447 +  }
   1.448 +}

mercurial