michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * A class which represents a fragment of text (eg inside a text michael@0: * node); if only codepoints below 256 are used, the text is stored as michael@0: * a char*; otherwise the text is stored as a char16_t* michael@0: */ michael@0: michael@0: #include "nsTextFragment.h" michael@0: #include "nsCRT.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsMemory.h" michael@0: #include "nsBidiUtils.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsUTF8Utils.h" michael@0: #include "mozilla/MemoryReporting.h" michael@0: #include "mozilla/SSE.h" michael@0: #include "nsTextFragmentImpl.h" michael@0: #include michael@0: michael@0: #define TEXTFRAG_WHITE_AFTER_NEWLINE 50 michael@0: #define TEXTFRAG_MAX_NEWLINES 7 michael@0: michael@0: // Static buffer used for common fragments michael@0: static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; michael@0: static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1]; michael@0: static char sSingleCharSharedString[256]; michael@0: michael@0: // static michael@0: nsresult michael@0: nsTextFragment::Init() michael@0: { michael@0: // Create whitespace strings michael@0: uint32_t i; michael@0: for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { michael@0: sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; michael@0: sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; michael@0: NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i], michael@0: NS_ERROR_OUT_OF_MEMORY); michael@0: sSpaceSharedString[i][0] = ' '; michael@0: sTabSharedString[i][0] = ' '; michael@0: uint32_t j; michael@0: for (j = 1; j < 1 + i; ++j) { michael@0: sSpaceSharedString[i][j] = '\n'; michael@0: sTabSharedString[i][j] = '\n'; michael@0: } michael@0: for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) { michael@0: sSpaceSharedString[i][j] = ' '; michael@0: sTabSharedString[i][j] = '\t'; michael@0: } michael@0: } michael@0: michael@0: // Create single-char strings michael@0: for (i = 0; i < 256; ++i) { michael@0: sSingleCharSharedString[i] = i; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: // static michael@0: void michael@0: nsTextFragment::Shutdown() michael@0: { michael@0: uint32_t i; michael@0: for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { michael@0: delete [] sSpaceSharedString[i]; michael@0: delete [] sTabSharedString[i]; michael@0: sSpaceSharedString[i] = nullptr; michael@0: sTabSharedString[i] = nullptr; michael@0: } michael@0: } michael@0: michael@0: nsTextFragment::~nsTextFragment() michael@0: { michael@0: ReleaseText(); michael@0: MOZ_COUNT_DTOR(nsTextFragment); michael@0: } michael@0: michael@0: void michael@0: nsTextFragment::ReleaseText() michael@0: { michael@0: if (mState.mLength && m1b && mState.mInHeap) { michael@0: moz_free(m2b); // m1b == m2b as far as moz_free is concerned michael@0: } michael@0: michael@0: m1b = nullptr; michael@0: mState.mIsBidi = false; michael@0: michael@0: // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits; michael@0: mAllBits = 0; michael@0: } michael@0: michael@0: nsTextFragment& michael@0: nsTextFragment::operator=(const nsTextFragment& aOther) michael@0: { michael@0: ReleaseText(); michael@0: michael@0: if (aOther.mState.mLength) { michael@0: if (!aOther.mState.mInHeap) { michael@0: m1b = aOther.m1b; // This will work even if aOther is using m2b michael@0: } michael@0: else { michael@0: size_t m2bSize = aOther.mState.mLength * michael@0: (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char)); michael@0: michael@0: m2b = static_cast(moz_malloc(m2bSize)); michael@0: if (m2b) { michael@0: memcpy(m2b, aOther.m2b, m2bSize); michael@0: } else { michael@0: // allocate a buffer for a single REPLACEMENT CHARACTER michael@0: m2b = static_cast(moz_xmalloc(sizeof(char16_t))); michael@0: m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER michael@0: mState.mIs2b = true; michael@0: mState.mInHeap = true; michael@0: mState.mLength = 1; michael@0: } michael@0: } michael@0: michael@0: if (m1b) { michael@0: mAllBits = aOther.mAllBits; michael@0: } michael@0: } michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: static inline int32_t michael@0: FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end) michael@0: { michael@0: typedef Non8BitParameters p; michael@0: const size_t mask = p::mask(); michael@0: const uint32_t alignMask = p::alignMask(); michael@0: const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); michael@0: const int32_t len = end - str; michael@0: int32_t i = 0; michael@0: michael@0: // Align ourselves to a word boundary. michael@0: int32_t alignLen = michael@0: std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t))); michael@0: for (; i < alignLen; i++) { michael@0: if (str[i] > 255) michael@0: return i; michael@0: } michael@0: michael@0: // Check one word at a time. michael@0: const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; michael@0: for (; i < wordWalkEnd; i += numUnicharsPerWord) { michael@0: const size_t word = *reinterpret_cast(str + i); michael@0: if (word & mask) michael@0: return i; michael@0: } michael@0: michael@0: // Take care of the remainder one character at a time. michael@0: for (; i < len; i++) { michael@0: if (str[i] > 255) michael@0: return i; michael@0: } michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: #ifdef MOZILLA_MAY_SUPPORT_SSE2 michael@0: namespace mozilla { michael@0: namespace SSE2 { michael@0: int32_t FirstNon8Bit(const char16_t *str, const char16_t *end); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: /* michael@0: * This function returns -1 if all characters in str are 8 bit characters. michael@0: * Otherwise, it returns a value less than or equal to the index of the first michael@0: * non-8bit character in str. For example, if first non-8bit character is at michael@0: * position 25, it may return 25, or for example 24, or 16. But it guarantees michael@0: * there is no non-8bit character before returned value. michael@0: */ michael@0: static inline int32_t michael@0: FirstNon8Bit(const char16_t *str, const char16_t *end) michael@0: { michael@0: #ifdef MOZILLA_MAY_SUPPORT_SSE2 michael@0: if (mozilla::supports_sse2()) { michael@0: return mozilla::SSE2::FirstNon8Bit(str, end); michael@0: } michael@0: #endif michael@0: michael@0: return FirstNon8BitUnvectorized(str, end); michael@0: } michael@0: michael@0: bool michael@0: nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi) michael@0: { michael@0: ReleaseText(); michael@0: michael@0: if (aLength == 0) { michael@0: return true; michael@0: } michael@0: michael@0: char16_t firstChar = *aBuffer; michael@0: if (aLength == 1 && firstChar < 256) { michael@0: m1b = sSingleCharSharedString + firstChar; michael@0: mState.mInHeap = false; michael@0: mState.mIs2b = false; michael@0: mState.mLength = 1; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: const char16_t *ucp = aBuffer; michael@0: const char16_t *uend = aBuffer + aLength; michael@0: michael@0: // Check if we can use a shared string michael@0: if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES && michael@0: (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) { michael@0: if (firstChar == ' ') { michael@0: ++ucp; michael@0: } michael@0: michael@0: const char16_t* start = ucp; michael@0: while (ucp < uend && *ucp == '\n') { michael@0: ++ucp; michael@0: } michael@0: const char16_t* endNewLine = ucp; michael@0: michael@0: char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' '; michael@0: while (ucp < uend && *ucp == space) { michael@0: ++ucp; michael@0: } michael@0: michael@0: if (ucp == uend && michael@0: endNewLine - start <= TEXTFRAG_MAX_NEWLINES && michael@0: ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) { michael@0: char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString; michael@0: m1b = strings[endNewLine - start]; michael@0: michael@0: // If we didn't find a space in the beginning, skip it now. michael@0: if (firstChar != ' ') { michael@0: ++m1b; michael@0: } michael@0: michael@0: mState.mInHeap = false; michael@0: mState.mIs2b = false; michael@0: mState.mLength = aLength; michael@0: michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: // See if we need to store the data in ucs2 or not michael@0: int32_t first16bit = FirstNon8Bit(ucp, uend); michael@0: michael@0: if (first16bit != -1) { // aBuffer contains no non-8bit character michael@0: // Use ucs2 storage because we have to michael@0: size_t m2bSize = aLength * sizeof(char16_t); michael@0: m2b = (char16_t *)moz_malloc(m2bSize); michael@0: if (!m2b) { michael@0: return false; michael@0: } michael@0: memcpy(m2b, aBuffer, m2bSize); michael@0: michael@0: mState.mIs2b = true; michael@0: if (aUpdateBidi) { michael@0: UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); michael@0: } michael@0: michael@0: } else { michael@0: // Use 1 byte storage because we can michael@0: char* buff = (char *)moz_malloc(aLength * sizeof(char)); michael@0: if (!buff) { michael@0: return false; michael@0: } michael@0: michael@0: // Copy data michael@0: LossyConvertEncoding16to8 converter(buff); michael@0: copy_string(aBuffer, aBuffer+aLength, converter); michael@0: m1b = buff; michael@0: mState.mIs2b = false; michael@0: } michael@0: michael@0: // Setup our fields michael@0: mState.mInHeap = true; michael@0: mState.mLength = aLength; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount) michael@0: { michael@0: NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!"); michael@0: NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!"); michael@0: michael@0: if (aOffset < 0) { michael@0: aOffset = 0; michael@0: } michael@0: michael@0: if (uint32_t(aOffset + aCount) > GetLength()) { michael@0: aCount = mState.mLength - aOffset; michael@0: } michael@0: michael@0: if (aCount != 0) { michael@0: if (mState.mIs2b) { michael@0: memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount); michael@0: } else { michael@0: const char *cp = m1b + aOffset; michael@0: const char *end = cp + aCount; michael@0: LossyConvertEncoding8to16 converter(aDest); michael@0: copy_string(cp, end, converter); michael@0: } michael@0: } michael@0: } michael@0: michael@0: bool michael@0: nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi) michael@0: { michael@0: // This is a common case because some callsites create a textnode michael@0: // with a value by creating the node and then calling AppendData. michael@0: if (mState.mLength == 0) { michael@0: return SetTo(aBuffer, aLength, aUpdateBidi); michael@0: } michael@0: michael@0: // Should we optimize for aData.Length() == 0? michael@0: michael@0: if (mState.mIs2b) { michael@0: // Already a 2-byte string so the result will be too michael@0: char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t)); michael@0: if (!buff) { michael@0: return false; michael@0: } michael@0: michael@0: memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); michael@0: mState.mLength += aLength; michael@0: m2b = buff; michael@0: michael@0: if (aUpdateBidi) { michael@0: UpdateBidiFlag(aBuffer, aLength); michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // Current string is a 1-byte string, check if the new data fits in one byte too. michael@0: int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength); michael@0: michael@0: if (first16bit != -1) { // aBuffer contains no non-8bit character michael@0: // The old data was 1-byte, but the new is not so we have to expand it michael@0: // all to 2-byte michael@0: char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) * michael@0: sizeof(char16_t)); michael@0: if (!buff) { michael@0: return false; michael@0: } michael@0: michael@0: // Copy data into buff michael@0: LossyConvertEncoding8to16 converter(buff); michael@0: copy_string(m1b, m1b+mState.mLength, converter); michael@0: michael@0: memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); michael@0: mState.mLength += aLength; michael@0: mState.mIs2b = true; michael@0: michael@0: if (mState.mInHeap) { michael@0: moz_free(m2b); michael@0: } michael@0: m2b = buff; michael@0: michael@0: mState.mInHeap = true; michael@0: michael@0: if (aUpdateBidi) { michael@0: UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // The new and the old data is all 1-byte michael@0: char* buff; michael@0: if (mState.mInHeap) { michael@0: buff = (char*)moz_realloc(const_cast(m1b), michael@0: (mState.mLength + aLength) * sizeof(char)); michael@0: if (!buff) { michael@0: return false; michael@0: } michael@0: } michael@0: else { michael@0: buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char)); michael@0: if (!buff) { michael@0: return false; michael@0: } michael@0: michael@0: memcpy(buff, m1b, mState.mLength); michael@0: mState.mInHeap = true; michael@0: } michael@0: michael@0: // Copy aBuffer into buff. michael@0: LossyConvertEncoding16to8 converter(buff + mState.mLength); michael@0: copy_string(aBuffer, aBuffer + aLength, converter); michael@0: michael@0: m1b = buff; michael@0: mState.mLength += aLength; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: /* virtual */ size_t michael@0: nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const michael@0: { michael@0: if (Is2b()) { michael@0: return aMallocSizeOf(m2b); michael@0: } michael@0: michael@0: if (mState.mInHeap) { michael@0: return aMallocSizeOf(m1b); michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: // To save time we only do this when we really want to know, not during michael@0: // every allocation michael@0: void michael@0: nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) michael@0: { michael@0: if (mState.mIs2b && !mState.mIsBidi) { michael@0: const char16_t* cp = aBuffer; michael@0: const char16_t* end = cp + aLength; michael@0: while (cp < end) { michael@0: char16_t ch1 = *cp++; michael@0: uint32_t utf32Char = ch1; michael@0: if (NS_IS_HIGH_SURROGATE(ch1) && michael@0: cp < end && michael@0: NS_IS_LOW_SURROGATE(*cp)) { michael@0: char16_t ch2 = *cp++; michael@0: utf32Char = SURROGATE_TO_UCS4(ch1, ch2); michael@0: } michael@0: if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) { michael@0: mState.mIsBidi = true; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: }