The Tor Browser: content/base/src/nsTextFragment.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 /*

     7  * A class which represents a fragment of text (eg inside a text

     8  * node); if only codepoints below 256 are used, the text is stored as

     9  * a char*; otherwise the text is stored as a char16_t*

    10  */

    12 #include "nsTextFragment.h"

    13 #include "nsCRT.h"

    14 #include "nsReadableUtils.h"

    15 #include "nsMemory.h"

    16 #include "nsBidiUtils.h"

    17 #include "nsUnicharUtils.h"

    18 #include "nsUTF8Utils.h"

    19 #include "mozilla/MemoryReporting.h"

    20 #include "mozilla/SSE.h"

    21 #include "nsTextFragmentImpl.h"

    22 #include <algorithm>

    24 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50

    25 #define TEXTFRAG_MAX_NEWLINES 7

    27 // Static buffer used for common fragments

    28 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];

    29 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];

    30 static char sSingleCharSharedString[256];

    32 // static

    33 nsresult

    34 nsTextFragment::Init()

    35 {

    36   // Create whitespace strings

    37   uint32_t i;

    38   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {

    39     sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];

    40     sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];

    41     NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],

    42                    NS_ERROR_OUT_OF_MEMORY);

    43     sSpaceSharedString[i][0] = ' ';

    44     sTabSharedString[i][0] = ' ';

    45     uint32_t j;

    46     for (j = 1; j < 1 + i; ++j) {

    47       sSpaceSharedString[i][j] = '\n';

    48       sTabSharedString[i][j] = '\n';

    49     }

    50     for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {

    51       sSpaceSharedString[i][j] = ' ';

    52       sTabSharedString[i][j] = '\t';

    53     }

    54   }

    56   // Create single-char strings

    57   for (i = 0; i < 256; ++i) {

    58     sSingleCharSharedString[i] = i;

    59   }

    61   return NS_OK;

    62 }

    64 // static

    65 void

    66 nsTextFragment::Shutdown()

    67 {

    68   uint32_t  i;

    69   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {

    70     delete [] sSpaceSharedString[i];

    71     delete [] sTabSharedString[i];

    72     sSpaceSharedString[i] = nullptr;

    73     sTabSharedString[i] = nullptr;

    74   }

    75 }

    77 nsTextFragment::~nsTextFragment()

    78 {

    79   ReleaseText();

    80   MOZ_COUNT_DTOR(nsTextFragment);

    81 }

    83 void

    84 nsTextFragment::ReleaseText()

    85 {

    86   if (mState.mLength && m1b && mState.mInHeap) {

    87     moz_free(m2b); // m1b == m2b as far as moz_free is concerned

    88   }

    90   m1b = nullptr;

    91   mState.mIsBidi = false;

    93   // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;

    94   mAllBits = 0;

    95 }

    97 nsTextFragment&

    98 nsTextFragment::operator=(const nsTextFragment& aOther)

    99 {

   100   ReleaseText();

   102   if (aOther.mState.mLength) {

   103     if (!aOther.mState.mInHeap) {

   104       m1b = aOther.m1b; // This will work even if aOther is using m2b

   105     }

   106     else {

   107       size_t m2bSize = aOther.mState.mLength *

   108         (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));

   110       m2b = static_cast<char16_t*>(moz_malloc(m2bSize));

   111       if (m2b) {

   112         memcpy(m2b, aOther.m2b, m2bSize);

   113       } else {

   114         // allocate a buffer for a single REPLACEMENT CHARACTER

   115         m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));

   116         m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER

   117         mState.mIs2b = true;

   118         mState.mInHeap = true;

   119         mState.mLength = 1;

   120       }

   121     }

   123     if (m1b) {

   124       mAllBits = aOther.mAllBits;

   125     }

   126   }

   128   return *this;

   129 }

   131 static inline int32_t

   132 FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)

   133 {

   134   typedef Non8BitParameters<sizeof(size_t)> p;

   135   const size_t mask = p::mask();

   136   const uint32_t alignMask = p::alignMask();

   137   const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();

   138   const int32_t len = end - str;

   139   int32_t i = 0;

   141   // Align ourselves to a word boundary.

   142   int32_t alignLen =

   143     std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));

   144   for (; i < alignLen; i++) {

   145     if (str[i] > 255)

   146       return i;

   147   }

   149   // Check one word at a time.

   150   const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;

   151   for (; i < wordWalkEnd; i += numUnicharsPerWord) {

   152     const size_t word = *reinterpret_cast<const size_t*>(str + i);

   153     if (word & mask)

   154       return i;

   155   }

   157   // Take care of the remainder one character at a time.

   158   for (; i < len; i++) {

   159     if (str[i] > 255)

   160       return i;

   161   }

   163   return -1;

   164 }

   166 #ifdef MOZILLA_MAY_SUPPORT_SSE2

   167 namespace mozilla {

   168   namespace SSE2 {

   169     int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);

   170   }

   171 }

   172 #endif

   174 /*

   175  * This function returns -1 if all characters in str are 8 bit characters.

   176  * Otherwise, it returns a value less than or equal to the index of the first

   177  * non-8bit character in str. For example, if first non-8bit character is at

   178  * position 25, it may return 25, or for example 24, or 16. But it guarantees

   179  * there is no non-8bit character before returned value.

   180  */

   181 static inline int32_t

   182 FirstNon8Bit(const char16_t *str, const char16_t *end)

   183 {

   184 #ifdef MOZILLA_MAY_SUPPORT_SSE2

   185   if (mozilla::supports_sse2()) {

   186     return mozilla::SSE2::FirstNon8Bit(str, end);

   187   }

   188 #endif

   190   return FirstNon8BitUnvectorized(str, end);

   191 }

   193 bool

   194 nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)

   195 {

   196   ReleaseText();

   198   if (aLength == 0) {

   199     return true;

   200   }

   202   char16_t firstChar = *aBuffer;

   203   if (aLength == 1 && firstChar < 256) {

   204     m1b = sSingleCharSharedString + firstChar;

   205     mState.mInHeap = false;

   206     mState.mIs2b = false;

   207     mState.mLength = 1;

   209     return true;

   210   }

   212   const char16_t *ucp = aBuffer;

   213   const char16_t *uend = aBuffer + aLength;

   215   // Check if we can use a shared string

   216   if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&

   217      (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {

   218     if (firstChar == ' ') {

   219       ++ucp;

   220     }

   222     const char16_t* start = ucp;

   223     while (ucp < uend && *ucp == '\n') {

   224       ++ucp;

   225     }

   226     const char16_t* endNewLine = ucp;

   228     char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';

   229     while (ucp < uend && *ucp == space) {

   230       ++ucp;

   231     }

   233     if (ucp == uend &&

   234         endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&

   235         ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {

   236       char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;

   237       m1b = strings[endNewLine - start];

   239       // If we didn't find a space in the beginning, skip it now.

   240       if (firstChar != ' ') {

   241         ++m1b;

   242       }

   244       mState.mInHeap = false;

   245       mState.mIs2b = false;

   246       mState.mLength = aLength;

   248       return true;

   249     }

   250   }

   252   // See if we need to store the data in ucs2 or not

   253   int32_t first16bit = FirstNon8Bit(ucp, uend);

   255   if (first16bit != -1) { // aBuffer contains no non-8bit character

   256     // Use ucs2 storage because we have to

   257     size_t m2bSize = aLength * sizeof(char16_t);

   258     m2b = (char16_t *)moz_malloc(m2bSize);

   259     if (!m2b) {

   260       return false;

   261     }

   262     memcpy(m2b, aBuffer, m2bSize);

   264     mState.mIs2b = true;

   265     if (aUpdateBidi) {

   266       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);

   267     }

   269   } else {

   270     // Use 1 byte storage because we can

   271     char* buff = (char *)moz_malloc(aLength * sizeof(char));

   272     if (!buff) {

   273       return false;

   274     }

   276     // Copy data

   277     LossyConvertEncoding16to8 converter(buff);

   278     copy_string(aBuffer, aBuffer+aLength, converter);

   279     m1b = buff;

   280     mState.mIs2b = false;

   281   }

   283   // Setup our fields

   284   mState.mInHeap = true;

   285   mState.mLength = aLength;

   287   return true;

   288 }

   290 void

   291 nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)

   292 {

   293   NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");

   294   NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");

   296   if (aOffset < 0) {

   297     aOffset = 0;

   298   }

   300   if (uint32_t(aOffset + aCount) > GetLength()) {

   301     aCount = mState.mLength - aOffset;

   302   }

   304   if (aCount != 0) {

   305     if (mState.mIs2b) {

   306       memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);

   307     } else {

   308       const char *cp = m1b + aOffset;

   309       const char *end = cp + aCount;

   310       LossyConvertEncoding8to16 converter(aDest);

   311       copy_string(cp, end, converter);

   312     }

   313   }

   314 }

   316 bool

   317 nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)

   318 {

   319   // This is a common case because some callsites create a textnode

   320   // with a value by creating the node and then calling AppendData.

   321   if (mState.mLength == 0) {

   322     return SetTo(aBuffer, aLength, aUpdateBidi);

   323   }

   325   // Should we optimize for aData.Length() == 0?

   327   if (mState.mIs2b) {

   328     // Already a 2-byte string so the result will be too

   329     char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t));

   330     if (!buff) {

   331       return false;

   332     }

   334     memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));

   335     mState.mLength += aLength;

   336     m2b = buff;

   338     if (aUpdateBidi) {

   339       UpdateBidiFlag(aBuffer, aLength);

   340     }

   342     return true;

   343   }

   345   // Current string is a 1-byte string, check if the new data fits in one byte too.

   346   int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);

   348   if (first16bit != -1) { // aBuffer contains no non-8bit character

   349     // The old data was 1-byte, but the new is not so we have to expand it

   350     // all to 2-byte

   351     char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) *

   352                                                   sizeof(char16_t));

   353     if (!buff) {

   354       return false;

   355     }

   357     // Copy data into buff

   358     LossyConvertEncoding8to16 converter(buff);

   359     copy_string(m1b, m1b+mState.mLength, converter);

   361     memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));

   362     mState.mLength += aLength;

   363     mState.mIs2b = true;

   365     if (mState.mInHeap) {

   366       moz_free(m2b);

   367     }

   368     m2b = buff;

   370     mState.mInHeap = true;

   372     if (aUpdateBidi) {

   373       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);

   374     }

   376     return true;

   377   }

   379   // The new and the old data is all 1-byte

   380   char* buff;

   381   if (mState.mInHeap) {

   382     buff = (char*)moz_realloc(const_cast<char*>(m1b),

   383                                     (mState.mLength + aLength) * sizeof(char));

   384     if (!buff) {

   385       return false;

   386     }

   387   }

   388   else {

   389     buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char));

   390     if (!buff) {

   391       return false;

   392     }

   394     memcpy(buff, m1b, mState.mLength);

   395     mState.mInHeap = true;

   396   }

   398   // Copy aBuffer into buff.

   399   LossyConvertEncoding16to8 converter(buff + mState.mLength);

   400   copy_string(aBuffer, aBuffer + aLength, converter);

   402   m1b = buff;

   403   mState.mLength += aLength;

   405   return true;

   406 }

   408 /* virtual */ size_t

   409 nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const

   410 {

   411   if (Is2b()) {

   412     return aMallocSizeOf(m2b);

   413   }

   415   if (mState.mInHeap) {

   416     return aMallocSizeOf(m1b);

   417   }

   419   return 0;

   420 }

   422 // To save time we only do this when we really want to know, not during

   423 // every allocation

   424 void

   425 nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)

   426 {

   427   if (mState.mIs2b && !mState.mIsBidi) {

   428     const char16_t* cp = aBuffer;

   429     const char16_t* end = cp + aLength;

   430     while (cp < end) {

   431       char16_t ch1 = *cp++;

   432       uint32_t utf32Char = ch1;

   433       if (NS_IS_HIGH_SURROGATE(ch1) &&

   434           cp < end &&

   435           NS_IS_LOW_SURROGATE(*cp)) {

   436         char16_t ch2 = *cp++;

   437         utf32Char = SURROGATE_TO_UCS4(ch1, ch2);

   438       }

   439       if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {

   440         mState.mIsBidi = true;

   441         break;

   442       }

   443     }

   444   }

   445 }

The Tor Browser / file revision

content/base/src/nsTextFragment.cpp@6474c204b198

content/base/src/nsTextFragment.cpp