1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/base/src/nsTextFragment.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,445 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* 1.10 + * A class which represents a fragment of text (eg inside a text 1.11 + * node); if only codepoints below 256 are used, the text is stored as 1.12 + * a char*; otherwise the text is stored as a char16_t* 1.13 + */ 1.14 + 1.15 +#include "nsTextFragment.h" 1.16 +#include "nsCRT.h" 1.17 +#include "nsReadableUtils.h" 1.18 +#include "nsMemory.h" 1.19 +#include "nsBidiUtils.h" 1.20 +#include "nsUnicharUtils.h" 1.21 +#include "nsUTF8Utils.h" 1.22 +#include "mozilla/MemoryReporting.h" 1.23 +#include "mozilla/SSE.h" 1.24 +#include "nsTextFragmentImpl.h" 1.25 +#include <algorithm> 1.26 + 1.27 +#define TEXTFRAG_WHITE_AFTER_NEWLINE 50 1.28 +#define TEXTFRAG_MAX_NEWLINES 7 1.29 + 1.30 +// Static buffer used for common fragments 1.31 +static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; 1.32 +static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1]; 1.33 +static char sSingleCharSharedString[256]; 1.34 + 1.35 +// static 1.36 +nsresult 1.37 +nsTextFragment::Init() 1.38 +{ 1.39 + // Create whitespace strings 1.40 + uint32_t i; 1.41 + for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { 1.42 + sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; 1.43 + sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; 1.44 + NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i], 1.45 + NS_ERROR_OUT_OF_MEMORY); 1.46 + sSpaceSharedString[i][0] = ' '; 1.47 + sTabSharedString[i][0] = ' '; 1.48 + uint32_t j; 1.49 + for (j = 1; j < 1 + i; ++j) { 1.50 + sSpaceSharedString[i][j] = '\n'; 1.51 + sTabSharedString[i][j] = '\n'; 1.52 + } 1.53 + for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) { 1.54 + sSpaceSharedString[i][j] = ' '; 1.55 + sTabSharedString[i][j] = '\t'; 1.56 + } 1.57 + } 1.58 + 1.59 + // Create single-char strings 1.60 + for (i = 0; i < 256; ++i) { 1.61 + sSingleCharSharedString[i] = i; 1.62 + } 1.63 + 1.64 + return NS_OK; 1.65 +} 1.66 + 1.67 +// static 1.68 +void 1.69 +nsTextFragment::Shutdown() 1.70 +{ 1.71 + uint32_t i; 1.72 + for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { 1.73 + delete [] sSpaceSharedString[i]; 1.74 + delete [] sTabSharedString[i]; 1.75 + sSpaceSharedString[i] = nullptr; 1.76 + sTabSharedString[i] = nullptr; 1.77 + } 1.78 +} 1.79 + 1.80 +nsTextFragment::~nsTextFragment() 1.81 +{ 1.82 + ReleaseText(); 1.83 + MOZ_COUNT_DTOR(nsTextFragment); 1.84 +} 1.85 + 1.86 +void 1.87 +nsTextFragment::ReleaseText() 1.88 +{ 1.89 + if (mState.mLength && m1b && mState.mInHeap) { 1.90 + moz_free(m2b); // m1b == m2b as far as moz_free is concerned 1.91 + } 1.92 + 1.93 + m1b = nullptr; 1.94 + mState.mIsBidi = false; 1.95 + 1.96 + // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits; 1.97 + mAllBits = 0; 1.98 +} 1.99 + 1.100 +nsTextFragment& 1.101 +nsTextFragment::operator=(const nsTextFragment& aOther) 1.102 +{ 1.103 + ReleaseText(); 1.104 + 1.105 + if (aOther.mState.mLength) { 1.106 + if (!aOther.mState.mInHeap) { 1.107 + m1b = aOther.m1b; // This will work even if aOther is using m2b 1.108 + } 1.109 + else { 1.110 + size_t m2bSize = aOther.mState.mLength * 1.111 + (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char)); 1.112 + 1.113 + m2b = static_cast<char16_t*>(moz_malloc(m2bSize)); 1.114 + if (m2b) { 1.115 + memcpy(m2b, aOther.m2b, m2bSize); 1.116 + } else { 1.117 + // allocate a buffer for a single REPLACEMENT CHARACTER 1.118 + m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t))); 1.119 + m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER 1.120 + mState.mIs2b = true; 1.121 + mState.mInHeap = true; 1.122 + mState.mLength = 1; 1.123 + } 1.124 + } 1.125 + 1.126 + if (m1b) { 1.127 + mAllBits = aOther.mAllBits; 1.128 + } 1.129 + } 1.130 + 1.131 + return *this; 1.132 +} 1.133 + 1.134 +static inline int32_t 1.135 +FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end) 1.136 +{ 1.137 + typedef Non8BitParameters<sizeof(size_t)> p; 1.138 + const size_t mask = p::mask(); 1.139 + const uint32_t alignMask = p::alignMask(); 1.140 + const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); 1.141 + const int32_t len = end - str; 1.142 + int32_t i = 0; 1.143 + 1.144 + // Align ourselves to a word boundary. 1.145 + int32_t alignLen = 1.146 + std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t))); 1.147 + for (; i < alignLen; i++) { 1.148 + if (str[i] > 255) 1.149 + return i; 1.150 + } 1.151 + 1.152 + // Check one word at a time. 1.153 + const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; 1.154 + for (; i < wordWalkEnd; i += numUnicharsPerWord) { 1.155 + const size_t word = *reinterpret_cast<const size_t*>(str + i); 1.156 + if (word & mask) 1.157 + return i; 1.158 + } 1.159 + 1.160 + // Take care of the remainder one character at a time. 1.161 + for (; i < len; i++) { 1.162 + if (str[i] > 255) 1.163 + return i; 1.164 + } 1.165 + 1.166 + return -1; 1.167 +} 1.168 + 1.169 +#ifdef MOZILLA_MAY_SUPPORT_SSE2 1.170 +namespace mozilla { 1.171 + namespace SSE2 { 1.172 + int32_t FirstNon8Bit(const char16_t *str, const char16_t *end); 1.173 + } 1.174 +} 1.175 +#endif 1.176 + 1.177 +/* 1.178 + * This function returns -1 if all characters in str are 8 bit characters. 1.179 + * Otherwise, it returns a value less than or equal to the index of the first 1.180 + * non-8bit character in str. For example, if first non-8bit character is at 1.181 + * position 25, it may return 25, or for example 24, or 16. But it guarantees 1.182 + * there is no non-8bit character before returned value. 1.183 + */ 1.184 +static inline int32_t 1.185 +FirstNon8Bit(const char16_t *str, const char16_t *end) 1.186 +{ 1.187 +#ifdef MOZILLA_MAY_SUPPORT_SSE2 1.188 + if (mozilla::supports_sse2()) { 1.189 + return mozilla::SSE2::FirstNon8Bit(str, end); 1.190 + } 1.191 +#endif 1.192 + 1.193 + return FirstNon8BitUnvectorized(str, end); 1.194 +} 1.195 + 1.196 +bool 1.197 +nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi) 1.198 +{ 1.199 + ReleaseText(); 1.200 + 1.201 + if (aLength == 0) { 1.202 + return true; 1.203 + } 1.204 + 1.205 + char16_t firstChar = *aBuffer; 1.206 + if (aLength == 1 && firstChar < 256) { 1.207 + m1b = sSingleCharSharedString + firstChar; 1.208 + mState.mInHeap = false; 1.209 + mState.mIs2b = false; 1.210 + mState.mLength = 1; 1.211 + 1.212 + return true; 1.213 + } 1.214 + 1.215 + const char16_t *ucp = aBuffer; 1.216 + const char16_t *uend = aBuffer + aLength; 1.217 + 1.218 + // Check if we can use a shared string 1.219 + if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES && 1.220 + (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) { 1.221 + if (firstChar == ' ') { 1.222 + ++ucp; 1.223 + } 1.224 + 1.225 + const char16_t* start = ucp; 1.226 + while (ucp < uend && *ucp == '\n') { 1.227 + ++ucp; 1.228 + } 1.229 + const char16_t* endNewLine = ucp; 1.230 + 1.231 + char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' '; 1.232 + while (ucp < uend && *ucp == space) { 1.233 + ++ucp; 1.234 + } 1.235 + 1.236 + if (ucp == uend && 1.237 + endNewLine - start <= TEXTFRAG_MAX_NEWLINES && 1.238 + ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) { 1.239 + char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString; 1.240 + m1b = strings[endNewLine - start]; 1.241 + 1.242 + // If we didn't find a space in the beginning, skip it now. 1.243 + if (firstChar != ' ') { 1.244 + ++m1b; 1.245 + } 1.246 + 1.247 + mState.mInHeap = false; 1.248 + mState.mIs2b = false; 1.249 + mState.mLength = aLength; 1.250 + 1.251 + return true; 1.252 + } 1.253 + } 1.254 + 1.255 + // See if we need to store the data in ucs2 or not 1.256 + int32_t first16bit = FirstNon8Bit(ucp, uend); 1.257 + 1.258 + if (first16bit != -1) { // aBuffer contains no non-8bit character 1.259 + // Use ucs2 storage because we have to 1.260 + size_t m2bSize = aLength * sizeof(char16_t); 1.261 + m2b = (char16_t *)moz_malloc(m2bSize); 1.262 + if (!m2b) { 1.263 + return false; 1.264 + } 1.265 + memcpy(m2b, aBuffer, m2bSize); 1.266 + 1.267 + mState.mIs2b = true; 1.268 + if (aUpdateBidi) { 1.269 + UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); 1.270 + } 1.271 + 1.272 + } else { 1.273 + // Use 1 byte storage because we can 1.274 + char* buff = (char *)moz_malloc(aLength * sizeof(char)); 1.275 + if (!buff) { 1.276 + return false; 1.277 + } 1.278 + 1.279 + // Copy data 1.280 + LossyConvertEncoding16to8 converter(buff); 1.281 + copy_string(aBuffer, aBuffer+aLength, converter); 1.282 + m1b = buff; 1.283 + mState.mIs2b = false; 1.284 + } 1.285 + 1.286 + // Setup our fields 1.287 + mState.mInHeap = true; 1.288 + mState.mLength = aLength; 1.289 + 1.290 + return true; 1.291 +} 1.292 + 1.293 +void 1.294 +nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount) 1.295 +{ 1.296 + NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!"); 1.297 + NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!"); 1.298 + 1.299 + if (aOffset < 0) { 1.300 + aOffset = 0; 1.301 + } 1.302 + 1.303 + if (uint32_t(aOffset + aCount) > GetLength()) { 1.304 + aCount = mState.mLength - aOffset; 1.305 + } 1.306 + 1.307 + if (aCount != 0) { 1.308 + if (mState.mIs2b) { 1.309 + memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount); 1.310 + } else { 1.311 + const char *cp = m1b + aOffset; 1.312 + const char *end = cp + aCount; 1.313 + LossyConvertEncoding8to16 converter(aDest); 1.314 + copy_string(cp, end, converter); 1.315 + } 1.316 + } 1.317 +} 1.318 + 1.319 +bool 1.320 +nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi) 1.321 +{ 1.322 + // This is a common case because some callsites create a textnode 1.323 + // with a value by creating the node and then calling AppendData. 1.324 + if (mState.mLength == 0) { 1.325 + return SetTo(aBuffer, aLength, aUpdateBidi); 1.326 + } 1.327 + 1.328 + // Should we optimize for aData.Length() == 0? 1.329 + 1.330 + if (mState.mIs2b) { 1.331 + // Already a 2-byte string so the result will be too 1.332 + char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t)); 1.333 + if (!buff) { 1.334 + return false; 1.335 + } 1.336 + 1.337 + memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); 1.338 + mState.mLength += aLength; 1.339 + m2b = buff; 1.340 + 1.341 + if (aUpdateBidi) { 1.342 + UpdateBidiFlag(aBuffer, aLength); 1.343 + } 1.344 + 1.345 + return true; 1.346 + } 1.347 + 1.348 + // Current string is a 1-byte string, check if the new data fits in one byte too. 1.349 + int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength); 1.350 + 1.351 + if (first16bit != -1) { // aBuffer contains no non-8bit character 1.352 + // The old data was 1-byte, but the new is not so we have to expand it 1.353 + // all to 2-byte 1.354 + char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) * 1.355 + sizeof(char16_t)); 1.356 + if (!buff) { 1.357 + return false; 1.358 + } 1.359 + 1.360 + // Copy data into buff 1.361 + LossyConvertEncoding8to16 converter(buff); 1.362 + copy_string(m1b, m1b+mState.mLength, converter); 1.363 + 1.364 + memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); 1.365 + mState.mLength += aLength; 1.366 + mState.mIs2b = true; 1.367 + 1.368 + if (mState.mInHeap) { 1.369 + moz_free(m2b); 1.370 + } 1.371 + m2b = buff; 1.372 + 1.373 + mState.mInHeap = true; 1.374 + 1.375 + if (aUpdateBidi) { 1.376 + UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); 1.377 + } 1.378 + 1.379 + return true; 1.380 + } 1.381 + 1.382 + // The new and the old data is all 1-byte 1.383 + char* buff; 1.384 + if (mState.mInHeap) { 1.385 + buff = (char*)moz_realloc(const_cast<char*>(m1b), 1.386 + (mState.mLength + aLength) * sizeof(char)); 1.387 + if (!buff) { 1.388 + return false; 1.389 + } 1.390 + } 1.391 + else { 1.392 + buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char)); 1.393 + if (!buff) { 1.394 + return false; 1.395 + } 1.396 + 1.397 + memcpy(buff, m1b, mState.mLength); 1.398 + mState.mInHeap = true; 1.399 + } 1.400 + 1.401 + // Copy aBuffer into buff. 1.402 + LossyConvertEncoding16to8 converter(buff + mState.mLength); 1.403 + copy_string(aBuffer, aBuffer + aLength, converter); 1.404 + 1.405 + m1b = buff; 1.406 + mState.mLength += aLength; 1.407 + 1.408 + return true; 1.409 +} 1.410 + 1.411 +/* virtual */ size_t 1.412 +nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const 1.413 +{ 1.414 + if (Is2b()) { 1.415 + return aMallocSizeOf(m2b); 1.416 + } 1.417 + 1.418 + if (mState.mInHeap) { 1.419 + return aMallocSizeOf(m1b); 1.420 + } 1.421 + 1.422 + return 0; 1.423 +} 1.424 + 1.425 +// To save time we only do this when we really want to know, not during 1.426 +// every allocation 1.427 +void 1.428 +nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) 1.429 +{ 1.430 + if (mState.mIs2b && !mState.mIsBidi) { 1.431 + const char16_t* cp = aBuffer; 1.432 + const char16_t* end = cp + aLength; 1.433 + while (cp < end) { 1.434 + char16_t ch1 = *cp++; 1.435 + uint32_t utf32Char = ch1; 1.436 + if (NS_IS_HIGH_SURROGATE(ch1) && 1.437 + cp < end && 1.438 + NS_IS_LOW_SURROGATE(*cp)) { 1.439 + char16_t ch2 = *cp++; 1.440 + utf32Char = SURROGATE_TO_UCS4(ch1, ch2); 1.441 + } 1.442 + if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) { 1.443 + mState.mIsBidi = true; 1.444 + break; 1.445 + } 1.446 + } 1.447 + } 1.448 +}