content/base/src/nsTextFragment.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /*
michael@0 7 * A class which represents a fragment of text (eg inside a text
michael@0 8 * node); if only codepoints below 256 are used, the text is stored as
michael@0 9 * a char*; otherwise the text is stored as a char16_t*
michael@0 10 */
michael@0 11
michael@0 12 #include "nsTextFragment.h"
michael@0 13 #include "nsCRT.h"
michael@0 14 #include "nsReadableUtils.h"
michael@0 15 #include "nsMemory.h"
michael@0 16 #include "nsBidiUtils.h"
michael@0 17 #include "nsUnicharUtils.h"
michael@0 18 #include "nsUTF8Utils.h"
michael@0 19 #include "mozilla/MemoryReporting.h"
michael@0 20 #include "mozilla/SSE.h"
michael@0 21 #include "nsTextFragmentImpl.h"
michael@0 22 #include <algorithm>
michael@0 23
michael@0 24 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
michael@0 25 #define TEXTFRAG_MAX_NEWLINES 7
michael@0 26
michael@0 27 // Static buffer used for common fragments
michael@0 28 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
michael@0 29 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
michael@0 30 static char sSingleCharSharedString[256];
michael@0 31
michael@0 32 // static
michael@0 33 nsresult
michael@0 34 nsTextFragment::Init()
michael@0 35 {
michael@0 36 // Create whitespace strings
michael@0 37 uint32_t i;
michael@0 38 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
michael@0 39 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
michael@0 40 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
michael@0 41 NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
michael@0 42 NS_ERROR_OUT_OF_MEMORY);
michael@0 43 sSpaceSharedString[i][0] = ' ';
michael@0 44 sTabSharedString[i][0] = ' ';
michael@0 45 uint32_t j;
michael@0 46 for (j = 1; j < 1 + i; ++j) {
michael@0 47 sSpaceSharedString[i][j] = '\n';
michael@0 48 sTabSharedString[i][j] = '\n';
michael@0 49 }
michael@0 50 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
michael@0 51 sSpaceSharedString[i][j] = ' ';
michael@0 52 sTabSharedString[i][j] = '\t';
michael@0 53 }
michael@0 54 }
michael@0 55
michael@0 56 // Create single-char strings
michael@0 57 for (i = 0; i < 256; ++i) {
michael@0 58 sSingleCharSharedString[i] = i;
michael@0 59 }
michael@0 60
michael@0 61 return NS_OK;
michael@0 62 }
michael@0 63
michael@0 64 // static
michael@0 65 void
michael@0 66 nsTextFragment::Shutdown()
michael@0 67 {
michael@0 68 uint32_t i;
michael@0 69 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
michael@0 70 delete [] sSpaceSharedString[i];
michael@0 71 delete [] sTabSharedString[i];
michael@0 72 sSpaceSharedString[i] = nullptr;
michael@0 73 sTabSharedString[i] = nullptr;
michael@0 74 }
michael@0 75 }
michael@0 76
michael@0 77 nsTextFragment::~nsTextFragment()
michael@0 78 {
michael@0 79 ReleaseText();
michael@0 80 MOZ_COUNT_DTOR(nsTextFragment);
michael@0 81 }
michael@0 82
michael@0 83 void
michael@0 84 nsTextFragment::ReleaseText()
michael@0 85 {
michael@0 86 if (mState.mLength && m1b && mState.mInHeap) {
michael@0 87 moz_free(m2b); // m1b == m2b as far as moz_free is concerned
michael@0 88 }
michael@0 89
michael@0 90 m1b = nullptr;
michael@0 91 mState.mIsBidi = false;
michael@0 92
michael@0 93 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
michael@0 94 mAllBits = 0;
michael@0 95 }
michael@0 96
michael@0 97 nsTextFragment&
michael@0 98 nsTextFragment::operator=(const nsTextFragment& aOther)
michael@0 99 {
michael@0 100 ReleaseText();
michael@0 101
michael@0 102 if (aOther.mState.mLength) {
michael@0 103 if (!aOther.mState.mInHeap) {
michael@0 104 m1b = aOther.m1b; // This will work even if aOther is using m2b
michael@0 105 }
michael@0 106 else {
michael@0 107 size_t m2bSize = aOther.mState.mLength *
michael@0 108 (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));
michael@0 109
michael@0 110 m2b = static_cast<char16_t*>(moz_malloc(m2bSize));
michael@0 111 if (m2b) {
michael@0 112 memcpy(m2b, aOther.m2b, m2bSize);
michael@0 113 } else {
michael@0 114 // allocate a buffer for a single REPLACEMENT CHARACTER
michael@0 115 m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));
michael@0 116 m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER
michael@0 117 mState.mIs2b = true;
michael@0 118 mState.mInHeap = true;
michael@0 119 mState.mLength = 1;
michael@0 120 }
michael@0 121 }
michael@0 122
michael@0 123 if (m1b) {
michael@0 124 mAllBits = aOther.mAllBits;
michael@0 125 }
michael@0 126 }
michael@0 127
michael@0 128 return *this;
michael@0 129 }
michael@0 130
michael@0 131 static inline int32_t
michael@0 132 FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)
michael@0 133 {
michael@0 134 typedef Non8BitParameters<sizeof(size_t)> p;
michael@0 135 const size_t mask = p::mask();
michael@0 136 const uint32_t alignMask = p::alignMask();
michael@0 137 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
michael@0 138 const int32_t len = end - str;
michael@0 139 int32_t i = 0;
michael@0 140
michael@0 141 // Align ourselves to a word boundary.
michael@0 142 int32_t alignLen =
michael@0 143 std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
michael@0 144 for (; i < alignLen; i++) {
michael@0 145 if (str[i] > 255)
michael@0 146 return i;
michael@0 147 }
michael@0 148
michael@0 149 // Check one word at a time.
michael@0 150 const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
michael@0 151 for (; i < wordWalkEnd; i += numUnicharsPerWord) {
michael@0 152 const size_t word = *reinterpret_cast<const size_t*>(str + i);
michael@0 153 if (word & mask)
michael@0 154 return i;
michael@0 155 }
michael@0 156
michael@0 157 // Take care of the remainder one character at a time.
michael@0 158 for (; i < len; i++) {
michael@0 159 if (str[i] > 255)
michael@0 160 return i;
michael@0 161 }
michael@0 162
michael@0 163 return -1;
michael@0 164 }
michael@0 165
michael@0 166 #ifdef MOZILLA_MAY_SUPPORT_SSE2
michael@0 167 namespace mozilla {
michael@0 168 namespace SSE2 {
michael@0 169 int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);
michael@0 170 }
michael@0 171 }
michael@0 172 #endif
michael@0 173
michael@0 174 /*
michael@0 175 * This function returns -1 if all characters in str are 8 bit characters.
michael@0 176 * Otherwise, it returns a value less than or equal to the index of the first
michael@0 177 * non-8bit character in str. For example, if first non-8bit character is at
michael@0 178 * position 25, it may return 25, or for example 24, or 16. But it guarantees
michael@0 179 * there is no non-8bit character before returned value.
michael@0 180 */
michael@0 181 static inline int32_t
michael@0 182 FirstNon8Bit(const char16_t *str, const char16_t *end)
michael@0 183 {
michael@0 184 #ifdef MOZILLA_MAY_SUPPORT_SSE2
michael@0 185 if (mozilla::supports_sse2()) {
michael@0 186 return mozilla::SSE2::FirstNon8Bit(str, end);
michael@0 187 }
michael@0 188 #endif
michael@0 189
michael@0 190 return FirstNon8BitUnvectorized(str, end);
michael@0 191 }
michael@0 192
michael@0 193 bool
michael@0 194 nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)
michael@0 195 {
michael@0 196 ReleaseText();
michael@0 197
michael@0 198 if (aLength == 0) {
michael@0 199 return true;
michael@0 200 }
michael@0 201
michael@0 202 char16_t firstChar = *aBuffer;
michael@0 203 if (aLength == 1 && firstChar < 256) {
michael@0 204 m1b = sSingleCharSharedString + firstChar;
michael@0 205 mState.mInHeap = false;
michael@0 206 mState.mIs2b = false;
michael@0 207 mState.mLength = 1;
michael@0 208
michael@0 209 return true;
michael@0 210 }
michael@0 211
michael@0 212 const char16_t *ucp = aBuffer;
michael@0 213 const char16_t *uend = aBuffer + aLength;
michael@0 214
michael@0 215 // Check if we can use a shared string
michael@0 216 if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
michael@0 217 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
michael@0 218 if (firstChar == ' ') {
michael@0 219 ++ucp;
michael@0 220 }
michael@0 221
michael@0 222 const char16_t* start = ucp;
michael@0 223 while (ucp < uend && *ucp == '\n') {
michael@0 224 ++ucp;
michael@0 225 }
michael@0 226 const char16_t* endNewLine = ucp;
michael@0 227
michael@0 228 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
michael@0 229 while (ucp < uend && *ucp == space) {
michael@0 230 ++ucp;
michael@0 231 }
michael@0 232
michael@0 233 if (ucp == uend &&
michael@0 234 endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
michael@0 235 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
michael@0 236 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
michael@0 237 m1b = strings[endNewLine - start];
michael@0 238
michael@0 239 // If we didn't find a space in the beginning, skip it now.
michael@0 240 if (firstChar != ' ') {
michael@0 241 ++m1b;
michael@0 242 }
michael@0 243
michael@0 244 mState.mInHeap = false;
michael@0 245 mState.mIs2b = false;
michael@0 246 mState.mLength = aLength;
michael@0 247
michael@0 248 return true;
michael@0 249 }
michael@0 250 }
michael@0 251
michael@0 252 // See if we need to store the data in ucs2 or not
michael@0 253 int32_t first16bit = FirstNon8Bit(ucp, uend);
michael@0 254
michael@0 255 if (first16bit != -1) { // aBuffer contains no non-8bit character
michael@0 256 // Use ucs2 storage because we have to
michael@0 257 size_t m2bSize = aLength * sizeof(char16_t);
michael@0 258 m2b = (char16_t *)moz_malloc(m2bSize);
michael@0 259 if (!m2b) {
michael@0 260 return false;
michael@0 261 }
michael@0 262 memcpy(m2b, aBuffer, m2bSize);
michael@0 263
michael@0 264 mState.mIs2b = true;
michael@0 265 if (aUpdateBidi) {
michael@0 266 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
michael@0 267 }
michael@0 268
michael@0 269 } else {
michael@0 270 // Use 1 byte storage because we can
michael@0 271 char* buff = (char *)moz_malloc(aLength * sizeof(char));
michael@0 272 if (!buff) {
michael@0 273 return false;
michael@0 274 }
michael@0 275
michael@0 276 // Copy data
michael@0 277 LossyConvertEncoding16to8 converter(buff);
michael@0 278 copy_string(aBuffer, aBuffer+aLength, converter);
michael@0 279 m1b = buff;
michael@0 280 mState.mIs2b = false;
michael@0 281 }
michael@0 282
michael@0 283 // Setup our fields
michael@0 284 mState.mInHeap = true;
michael@0 285 mState.mLength = aLength;
michael@0 286
michael@0 287 return true;
michael@0 288 }
michael@0 289
michael@0 290 void
michael@0 291 nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)
michael@0 292 {
michael@0 293 NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
michael@0 294 NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
michael@0 295
michael@0 296 if (aOffset < 0) {
michael@0 297 aOffset = 0;
michael@0 298 }
michael@0 299
michael@0 300 if (uint32_t(aOffset + aCount) > GetLength()) {
michael@0 301 aCount = mState.mLength - aOffset;
michael@0 302 }
michael@0 303
michael@0 304 if (aCount != 0) {
michael@0 305 if (mState.mIs2b) {
michael@0 306 memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);
michael@0 307 } else {
michael@0 308 const char *cp = m1b + aOffset;
michael@0 309 const char *end = cp + aCount;
michael@0 310 LossyConvertEncoding8to16 converter(aDest);
michael@0 311 copy_string(cp, end, converter);
michael@0 312 }
michael@0 313 }
michael@0 314 }
michael@0 315
michael@0 316 bool
michael@0 317 nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)
michael@0 318 {
michael@0 319 // This is a common case because some callsites create a textnode
michael@0 320 // with a value by creating the node and then calling AppendData.
michael@0 321 if (mState.mLength == 0) {
michael@0 322 return SetTo(aBuffer, aLength, aUpdateBidi);
michael@0 323 }
michael@0 324
michael@0 325 // Should we optimize for aData.Length() == 0?
michael@0 326
michael@0 327 if (mState.mIs2b) {
michael@0 328 // Already a 2-byte string so the result will be too
michael@0 329 char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t));
michael@0 330 if (!buff) {
michael@0 331 return false;
michael@0 332 }
michael@0 333
michael@0 334 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
michael@0 335 mState.mLength += aLength;
michael@0 336 m2b = buff;
michael@0 337
michael@0 338 if (aUpdateBidi) {
michael@0 339 UpdateBidiFlag(aBuffer, aLength);
michael@0 340 }
michael@0 341
michael@0 342 return true;
michael@0 343 }
michael@0 344
michael@0 345 // Current string is a 1-byte string, check if the new data fits in one byte too.
michael@0 346 int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
michael@0 347
michael@0 348 if (first16bit != -1) { // aBuffer contains no non-8bit character
michael@0 349 // The old data was 1-byte, but the new is not so we have to expand it
michael@0 350 // all to 2-byte
michael@0 351 char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) *
michael@0 352 sizeof(char16_t));
michael@0 353 if (!buff) {
michael@0 354 return false;
michael@0 355 }
michael@0 356
michael@0 357 // Copy data into buff
michael@0 358 LossyConvertEncoding8to16 converter(buff);
michael@0 359 copy_string(m1b, m1b+mState.mLength, converter);
michael@0 360
michael@0 361 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
michael@0 362 mState.mLength += aLength;
michael@0 363 mState.mIs2b = true;
michael@0 364
michael@0 365 if (mState.mInHeap) {
michael@0 366 moz_free(m2b);
michael@0 367 }
michael@0 368 m2b = buff;
michael@0 369
michael@0 370 mState.mInHeap = true;
michael@0 371
michael@0 372 if (aUpdateBidi) {
michael@0 373 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
michael@0 374 }
michael@0 375
michael@0 376 return true;
michael@0 377 }
michael@0 378
michael@0 379 // The new and the old data is all 1-byte
michael@0 380 char* buff;
michael@0 381 if (mState.mInHeap) {
michael@0 382 buff = (char*)moz_realloc(const_cast<char*>(m1b),
michael@0 383 (mState.mLength + aLength) * sizeof(char));
michael@0 384 if (!buff) {
michael@0 385 return false;
michael@0 386 }
michael@0 387 }
michael@0 388 else {
michael@0 389 buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char));
michael@0 390 if (!buff) {
michael@0 391 return false;
michael@0 392 }
michael@0 393
michael@0 394 memcpy(buff, m1b, mState.mLength);
michael@0 395 mState.mInHeap = true;
michael@0 396 }
michael@0 397
michael@0 398 // Copy aBuffer into buff.
michael@0 399 LossyConvertEncoding16to8 converter(buff + mState.mLength);
michael@0 400 copy_string(aBuffer, aBuffer + aLength, converter);
michael@0 401
michael@0 402 m1b = buff;
michael@0 403 mState.mLength += aLength;
michael@0 404
michael@0 405 return true;
michael@0 406 }
michael@0 407
michael@0 408 /* virtual */ size_t
michael@0 409 nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
michael@0 410 {
michael@0 411 if (Is2b()) {
michael@0 412 return aMallocSizeOf(m2b);
michael@0 413 }
michael@0 414
michael@0 415 if (mState.mInHeap) {
michael@0 416 return aMallocSizeOf(m1b);
michael@0 417 }
michael@0 418
michael@0 419 return 0;
michael@0 420 }
michael@0 421
michael@0 422 // To save time we only do this when we really want to know, not during
michael@0 423 // every allocation
michael@0 424 void
michael@0 425 nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
michael@0 426 {
michael@0 427 if (mState.mIs2b && !mState.mIsBidi) {
michael@0 428 const char16_t* cp = aBuffer;
michael@0 429 const char16_t* end = cp + aLength;
michael@0 430 while (cp < end) {
michael@0 431 char16_t ch1 = *cp++;
michael@0 432 uint32_t utf32Char = ch1;
michael@0 433 if (NS_IS_HIGH_SURROGATE(ch1) &&
michael@0 434 cp < end &&
michael@0 435 NS_IS_LOW_SURROGATE(*cp)) {
michael@0 436 char16_t ch2 = *cp++;
michael@0 437 utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
michael@0 438 }
michael@0 439 if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
michael@0 440 mState.mIsBidi = true;
michael@0 441 break;
michael@0 442 }
michael@0 443 }
michael@0 444 }
michael@0 445 }

mercurial