content/base/src/nsTextFragment.cpp

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /*
     7  * A class which represents a fragment of text (eg inside a text
     8  * node); if only codepoints below 256 are used, the text is stored as
     9  * a char*; otherwise the text is stored as a char16_t*
    10  */
    12 #include "nsTextFragment.h"
    13 #include "nsCRT.h"
    14 #include "nsReadableUtils.h"
    15 #include "nsMemory.h"
    16 #include "nsBidiUtils.h"
    17 #include "nsUnicharUtils.h"
    18 #include "nsUTF8Utils.h"
    19 #include "mozilla/MemoryReporting.h"
    20 #include "mozilla/SSE.h"
    21 #include "nsTextFragmentImpl.h"
    22 #include <algorithm>
    24 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
    25 #define TEXTFRAG_MAX_NEWLINES 7
    27 // Static buffer used for common fragments
    28 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
    29 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
    30 static char sSingleCharSharedString[256];
    32 // static
    33 nsresult
    34 nsTextFragment::Init()
    35 {
    36   // Create whitespace strings
    37   uint32_t i;
    38   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    39     sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    40     sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
    41     NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
    42                    NS_ERROR_OUT_OF_MEMORY);
    43     sSpaceSharedString[i][0] = ' ';
    44     sTabSharedString[i][0] = ' ';
    45     uint32_t j;
    46     for (j = 1; j < 1 + i; ++j) {
    47       sSpaceSharedString[i][j] = '\n';
    48       sTabSharedString[i][j] = '\n';
    49     }
    50     for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
    51       sSpaceSharedString[i][j] = ' ';
    52       sTabSharedString[i][j] = '\t';
    53     }
    54   }
    56   // Create single-char strings
    57   for (i = 0; i < 256; ++i) {
    58     sSingleCharSharedString[i] = i;
    59   }
    61   return NS_OK;
    62 }
    64 // static
    65 void
    66 nsTextFragment::Shutdown()
    67 {
    68   uint32_t  i;
    69   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
    70     delete [] sSpaceSharedString[i];
    71     delete [] sTabSharedString[i];
    72     sSpaceSharedString[i] = nullptr;
    73     sTabSharedString[i] = nullptr;
    74   }
    75 }
    77 nsTextFragment::~nsTextFragment()
    78 {
    79   ReleaseText();
    80   MOZ_COUNT_DTOR(nsTextFragment);
    81 }
    83 void
    84 nsTextFragment::ReleaseText()
    85 {
    86   if (mState.mLength && m1b && mState.mInHeap) {
    87     moz_free(m2b); // m1b == m2b as far as moz_free is concerned
    88   }
    90   m1b = nullptr;
    91   mState.mIsBidi = false;
    93   // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
    94   mAllBits = 0;
    95 }
    97 nsTextFragment&
    98 nsTextFragment::operator=(const nsTextFragment& aOther)
    99 {
   100   ReleaseText();
   102   if (aOther.mState.mLength) {
   103     if (!aOther.mState.mInHeap) {
   104       m1b = aOther.m1b; // This will work even if aOther is using m2b
   105     }
   106     else {
   107       size_t m2bSize = aOther.mState.mLength *
   108         (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));
   110       m2b = static_cast<char16_t*>(moz_malloc(m2bSize));
   111       if (m2b) {
   112         memcpy(m2b, aOther.m2b, m2bSize);
   113       } else {
   114         // allocate a buffer for a single REPLACEMENT CHARACTER
   115         m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));
   116         m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER
   117         mState.mIs2b = true;
   118         mState.mInHeap = true;
   119         mState.mLength = 1;
   120       }
   121     }
   123     if (m1b) {
   124       mAllBits = aOther.mAllBits;
   125     }
   126   }
   128   return *this;
   129 }
   131 static inline int32_t
   132 FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)
   133 {
   134   typedef Non8BitParameters<sizeof(size_t)> p;
   135   const size_t mask = p::mask();
   136   const uint32_t alignMask = p::alignMask();
   137   const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
   138   const int32_t len = end - str;
   139   int32_t i = 0;
   141   // Align ourselves to a word boundary.
   142   int32_t alignLen =
   143     std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
   144   for (; i < alignLen; i++) {
   145     if (str[i] > 255)
   146       return i;
   147   }
   149   // Check one word at a time.
   150   const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
   151   for (; i < wordWalkEnd; i += numUnicharsPerWord) {
   152     const size_t word = *reinterpret_cast<const size_t*>(str + i);
   153     if (word & mask)
   154       return i;
   155   }
   157   // Take care of the remainder one character at a time.
   158   for (; i < len; i++) {
   159     if (str[i] > 255)
   160       return i;
   161   }
   163   return -1;
   164 }
   166 #ifdef MOZILLA_MAY_SUPPORT_SSE2
   167 namespace mozilla {
   168   namespace SSE2 {
   169     int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);
   170   }
   171 }
   172 #endif
   174 /*
   175  * This function returns -1 if all characters in str are 8 bit characters.
   176  * Otherwise, it returns a value less than or equal to the index of the first
   177  * non-8bit character in str. For example, if first non-8bit character is at
   178  * position 25, it may return 25, or for example 24, or 16. But it guarantees
   179  * there is no non-8bit character before returned value.
   180  */
   181 static inline int32_t
   182 FirstNon8Bit(const char16_t *str, const char16_t *end)
   183 {
   184 #ifdef MOZILLA_MAY_SUPPORT_SSE2
   185   if (mozilla::supports_sse2()) {
   186     return mozilla::SSE2::FirstNon8Bit(str, end);
   187   }
   188 #endif
   190   return FirstNon8BitUnvectorized(str, end);
   191 }
   193 bool
   194 nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)
   195 {
   196   ReleaseText();
   198   if (aLength == 0) {
   199     return true;
   200   }
   202   char16_t firstChar = *aBuffer;
   203   if (aLength == 1 && firstChar < 256) {
   204     m1b = sSingleCharSharedString + firstChar;
   205     mState.mInHeap = false;
   206     mState.mIs2b = false;
   207     mState.mLength = 1;
   209     return true;
   210   }
   212   const char16_t *ucp = aBuffer;
   213   const char16_t *uend = aBuffer + aLength;
   215   // Check if we can use a shared string
   216   if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
   217      (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
   218     if (firstChar == ' ') {
   219       ++ucp;
   220     }
   222     const char16_t* start = ucp;
   223     while (ucp < uend && *ucp == '\n') {
   224       ++ucp;
   225     }
   226     const char16_t* endNewLine = ucp;
   228     char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
   229     while (ucp < uend && *ucp == space) {
   230       ++ucp;
   231     }
   233     if (ucp == uend &&
   234         endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
   235         ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
   236       char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
   237       m1b = strings[endNewLine - start];
   239       // If we didn't find a space in the beginning, skip it now.
   240       if (firstChar != ' ') {
   241         ++m1b;
   242       }
   244       mState.mInHeap = false;
   245       mState.mIs2b = false;
   246       mState.mLength = aLength;
   248       return true;        
   249     }
   250   }
   252   // See if we need to store the data in ucs2 or not
   253   int32_t first16bit = FirstNon8Bit(ucp, uend);
   255   if (first16bit != -1) { // aBuffer contains no non-8bit character
   256     // Use ucs2 storage because we have to
   257     size_t m2bSize = aLength * sizeof(char16_t);
   258     m2b = (char16_t *)moz_malloc(m2bSize);
   259     if (!m2b) {
   260       return false;
   261     }
   262     memcpy(m2b, aBuffer, m2bSize);
   264     mState.mIs2b = true;
   265     if (aUpdateBidi) {
   266       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
   267     }
   269   } else {
   270     // Use 1 byte storage because we can
   271     char* buff = (char *)moz_malloc(aLength * sizeof(char));
   272     if (!buff) {
   273       return false;
   274     }
   276     // Copy data
   277     LossyConvertEncoding16to8 converter(buff);
   278     copy_string(aBuffer, aBuffer+aLength, converter);
   279     m1b = buff;
   280     mState.mIs2b = false;
   281   }
   283   // Setup our fields
   284   mState.mInHeap = true;
   285   mState.mLength = aLength;
   287   return true;
   288 }
   290 void
   291 nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)
   292 {
   293   NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
   294   NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
   296   if (aOffset < 0) {
   297     aOffset = 0;
   298   }
   300   if (uint32_t(aOffset + aCount) > GetLength()) {
   301     aCount = mState.mLength - aOffset;
   302   }
   304   if (aCount != 0) {
   305     if (mState.mIs2b) {
   306       memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);
   307     } else {
   308       const char *cp = m1b + aOffset;
   309       const char *end = cp + aCount;
   310       LossyConvertEncoding8to16 converter(aDest);
   311       copy_string(cp, end, converter);
   312     }
   313   }
   314 }
   316 bool
   317 nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)
   318 {
   319   // This is a common case because some callsites create a textnode
   320   // with a value by creating the node and then calling AppendData.
   321   if (mState.mLength == 0) {
   322     return SetTo(aBuffer, aLength, aUpdateBidi);
   323   }
   325   // Should we optimize for aData.Length() == 0?
   327   if (mState.mIs2b) {
   328     // Already a 2-byte string so the result will be too
   329     char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t));
   330     if (!buff) {
   331       return false;
   332     }
   334     memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
   335     mState.mLength += aLength;
   336     m2b = buff;
   338     if (aUpdateBidi) {
   339       UpdateBidiFlag(aBuffer, aLength);
   340     }
   342     return true;
   343   }
   345   // Current string is a 1-byte string, check if the new data fits in one byte too.
   346   int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
   348   if (first16bit != -1) { // aBuffer contains no non-8bit character
   349     // The old data was 1-byte, but the new is not so we have to expand it
   350     // all to 2-byte
   351     char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) *
   352                                                   sizeof(char16_t));
   353     if (!buff) {
   354       return false;
   355     }
   357     // Copy data into buff
   358     LossyConvertEncoding8to16 converter(buff);
   359     copy_string(m1b, m1b+mState.mLength, converter);
   361     memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
   362     mState.mLength += aLength;
   363     mState.mIs2b = true;
   365     if (mState.mInHeap) {
   366       moz_free(m2b);
   367     }
   368     m2b = buff;
   370     mState.mInHeap = true;
   372     if (aUpdateBidi) {
   373       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
   374     }
   376     return true;
   377   }
   379   // The new and the old data is all 1-byte
   380   char* buff;
   381   if (mState.mInHeap) {
   382     buff = (char*)moz_realloc(const_cast<char*>(m1b),
   383                                     (mState.mLength + aLength) * sizeof(char));
   384     if (!buff) {
   385       return false;
   386     }
   387   }
   388   else {
   389     buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char));
   390     if (!buff) {
   391       return false;
   392     }
   394     memcpy(buff, m1b, mState.mLength);
   395     mState.mInHeap = true;
   396   }
   398   // Copy aBuffer into buff.
   399   LossyConvertEncoding16to8 converter(buff + mState.mLength);
   400   copy_string(aBuffer, aBuffer + aLength, converter);
   402   m1b = buff;
   403   mState.mLength += aLength;
   405   return true;
   406 }
   408 /* virtual */ size_t
   409 nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
   410 {
   411   if (Is2b()) {
   412     return aMallocSizeOf(m2b);
   413   }
   415   if (mState.mInHeap) {
   416     return aMallocSizeOf(m1b);
   417   }
   419   return 0;
   420 }
   422 // To save time we only do this when we really want to know, not during
   423 // every allocation
   424 void
   425 nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
   426 {
   427   if (mState.mIs2b && !mState.mIsBidi) {
   428     const char16_t* cp = aBuffer;
   429     const char16_t* end = cp + aLength;
   430     while (cp < end) {
   431       char16_t ch1 = *cp++;
   432       uint32_t utf32Char = ch1;
   433       if (NS_IS_HIGH_SURROGATE(ch1) &&
   434           cp < end &&
   435           NS_IS_LOW_SURROGATE(*cp)) {
   436         char16_t ch2 = *cp++;
   437         utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
   438       }
   439       if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
   440         mState.mIsBidi = true;
   441         break;
   442       }
   443     }
   444   }
   445 }

mercurial