Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 /*
7 * A class which represents a fragment of text (eg inside a text
8 * node); if only codepoints below 256 are used, the text is stored as
9 * a char*; otherwise the text is stored as a char16_t*
10 */
12 #include "nsTextFragment.h"
13 #include "nsCRT.h"
14 #include "nsReadableUtils.h"
15 #include "nsMemory.h"
16 #include "nsBidiUtils.h"
17 #include "nsUnicharUtils.h"
18 #include "nsUTF8Utils.h"
19 #include "mozilla/MemoryReporting.h"
20 #include "mozilla/SSE.h"
21 #include "nsTextFragmentImpl.h"
22 #include <algorithm>
24 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
25 #define TEXTFRAG_MAX_NEWLINES 7
27 // Static buffer used for common fragments
28 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
29 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
30 static char sSingleCharSharedString[256];
32 // static
33 nsresult
34 nsTextFragment::Init()
35 {
36 // Create whitespace strings
37 uint32_t i;
38 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
39 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
40 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
41 NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i],
42 NS_ERROR_OUT_OF_MEMORY);
43 sSpaceSharedString[i][0] = ' ';
44 sTabSharedString[i][0] = ' ';
45 uint32_t j;
46 for (j = 1; j < 1 + i; ++j) {
47 sSpaceSharedString[i][j] = '\n';
48 sTabSharedString[i][j] = '\n';
49 }
50 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
51 sSpaceSharedString[i][j] = ' ';
52 sTabSharedString[i][j] = '\t';
53 }
54 }
56 // Create single-char strings
57 for (i = 0; i < 256; ++i) {
58 sSingleCharSharedString[i] = i;
59 }
61 return NS_OK;
62 }
64 // static
65 void
66 nsTextFragment::Shutdown()
67 {
68 uint32_t i;
69 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
70 delete [] sSpaceSharedString[i];
71 delete [] sTabSharedString[i];
72 sSpaceSharedString[i] = nullptr;
73 sTabSharedString[i] = nullptr;
74 }
75 }
77 nsTextFragment::~nsTextFragment()
78 {
79 ReleaseText();
80 MOZ_COUNT_DTOR(nsTextFragment);
81 }
83 void
84 nsTextFragment::ReleaseText()
85 {
86 if (mState.mLength && m1b && mState.mInHeap) {
87 moz_free(m2b); // m1b == m2b as far as moz_free is concerned
88 }
90 m1b = nullptr;
91 mState.mIsBidi = false;
93 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
94 mAllBits = 0;
95 }
97 nsTextFragment&
98 nsTextFragment::operator=(const nsTextFragment& aOther)
99 {
100 ReleaseText();
102 if (aOther.mState.mLength) {
103 if (!aOther.mState.mInHeap) {
104 m1b = aOther.m1b; // This will work even if aOther is using m2b
105 }
106 else {
107 size_t m2bSize = aOther.mState.mLength *
108 (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char));
110 m2b = static_cast<char16_t*>(moz_malloc(m2bSize));
111 if (m2b) {
112 memcpy(m2b, aOther.m2b, m2bSize);
113 } else {
114 // allocate a buffer for a single REPLACEMENT CHARACTER
115 m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t)));
116 m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER
117 mState.mIs2b = true;
118 mState.mInHeap = true;
119 mState.mLength = 1;
120 }
121 }
123 if (m1b) {
124 mAllBits = aOther.mAllBits;
125 }
126 }
128 return *this;
129 }
131 static inline int32_t
132 FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end)
133 {
134 typedef Non8BitParameters<sizeof(size_t)> p;
135 const size_t mask = p::mask();
136 const uint32_t alignMask = p::alignMask();
137 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
138 const int32_t len = end - str;
139 int32_t i = 0;
141 // Align ourselves to a word boundary.
142 int32_t alignLen =
143 std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
144 for (; i < alignLen; i++) {
145 if (str[i] > 255)
146 return i;
147 }
149 // Check one word at a time.
150 const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
151 for (; i < wordWalkEnd; i += numUnicharsPerWord) {
152 const size_t word = *reinterpret_cast<const size_t*>(str + i);
153 if (word & mask)
154 return i;
155 }
157 // Take care of the remainder one character at a time.
158 for (; i < len; i++) {
159 if (str[i] > 255)
160 return i;
161 }
163 return -1;
164 }
166 #ifdef MOZILLA_MAY_SUPPORT_SSE2
167 namespace mozilla {
168 namespace SSE2 {
169 int32_t FirstNon8Bit(const char16_t *str, const char16_t *end);
170 }
171 }
172 #endif
174 /*
175 * This function returns -1 if all characters in str are 8 bit characters.
176 * Otherwise, it returns a value less than or equal to the index of the first
177 * non-8bit character in str. For example, if first non-8bit character is at
178 * position 25, it may return 25, or for example 24, or 16. But it guarantees
179 * there is no non-8bit character before returned value.
180 */
181 static inline int32_t
182 FirstNon8Bit(const char16_t *str, const char16_t *end)
183 {
184 #ifdef MOZILLA_MAY_SUPPORT_SSE2
185 if (mozilla::supports_sse2()) {
186 return mozilla::SSE2::FirstNon8Bit(str, end);
187 }
188 #endif
190 return FirstNon8BitUnvectorized(str, end);
191 }
193 bool
194 nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi)
195 {
196 ReleaseText();
198 if (aLength == 0) {
199 return true;
200 }
202 char16_t firstChar = *aBuffer;
203 if (aLength == 1 && firstChar < 256) {
204 m1b = sSingleCharSharedString + firstChar;
205 mState.mInHeap = false;
206 mState.mIs2b = false;
207 mState.mLength = 1;
209 return true;
210 }
212 const char16_t *ucp = aBuffer;
213 const char16_t *uend = aBuffer + aLength;
215 // Check if we can use a shared string
216 if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
217 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
218 if (firstChar == ' ') {
219 ++ucp;
220 }
222 const char16_t* start = ucp;
223 while (ucp < uend && *ucp == '\n') {
224 ++ucp;
225 }
226 const char16_t* endNewLine = ucp;
228 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
229 while (ucp < uend && *ucp == space) {
230 ++ucp;
231 }
233 if (ucp == uend &&
234 endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
235 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
236 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
237 m1b = strings[endNewLine - start];
239 // If we didn't find a space in the beginning, skip it now.
240 if (firstChar != ' ') {
241 ++m1b;
242 }
244 mState.mInHeap = false;
245 mState.mIs2b = false;
246 mState.mLength = aLength;
248 return true;
249 }
250 }
252 // See if we need to store the data in ucs2 or not
253 int32_t first16bit = FirstNon8Bit(ucp, uend);
255 if (first16bit != -1) { // aBuffer contains no non-8bit character
256 // Use ucs2 storage because we have to
257 size_t m2bSize = aLength * sizeof(char16_t);
258 m2b = (char16_t *)moz_malloc(m2bSize);
259 if (!m2b) {
260 return false;
261 }
262 memcpy(m2b, aBuffer, m2bSize);
264 mState.mIs2b = true;
265 if (aUpdateBidi) {
266 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
267 }
269 } else {
270 // Use 1 byte storage because we can
271 char* buff = (char *)moz_malloc(aLength * sizeof(char));
272 if (!buff) {
273 return false;
274 }
276 // Copy data
277 LossyConvertEncoding16to8 converter(buff);
278 copy_string(aBuffer, aBuffer+aLength, converter);
279 m1b = buff;
280 mState.mIs2b = false;
281 }
283 // Setup our fields
284 mState.mInHeap = true;
285 mState.mLength = aLength;
287 return true;
288 }
290 void
291 nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount)
292 {
293 NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
294 NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
296 if (aOffset < 0) {
297 aOffset = 0;
298 }
300 if (uint32_t(aOffset + aCount) > GetLength()) {
301 aCount = mState.mLength - aOffset;
302 }
304 if (aCount != 0) {
305 if (mState.mIs2b) {
306 memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount);
307 } else {
308 const char *cp = m1b + aOffset;
309 const char *end = cp + aCount;
310 LossyConvertEncoding8to16 converter(aDest);
311 copy_string(cp, end, converter);
312 }
313 }
314 }
316 bool
317 nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi)
318 {
319 // This is a common case because some callsites create a textnode
320 // with a value by creating the node and then calling AppendData.
321 if (mState.mLength == 0) {
322 return SetTo(aBuffer, aLength, aUpdateBidi);
323 }
325 // Should we optimize for aData.Length() == 0?
327 if (mState.mIs2b) {
328 // Already a 2-byte string so the result will be too
329 char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t));
330 if (!buff) {
331 return false;
332 }
334 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
335 mState.mLength += aLength;
336 m2b = buff;
338 if (aUpdateBidi) {
339 UpdateBidiFlag(aBuffer, aLength);
340 }
342 return true;
343 }
345 // Current string is a 1-byte string, check if the new data fits in one byte too.
346 int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength);
348 if (first16bit != -1) { // aBuffer contains no non-8bit character
349 // The old data was 1-byte, but the new is not so we have to expand it
350 // all to 2-byte
351 char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) *
352 sizeof(char16_t));
353 if (!buff) {
354 return false;
355 }
357 // Copy data into buff
358 LossyConvertEncoding8to16 converter(buff);
359 copy_string(m1b, m1b+mState.mLength, converter);
361 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t));
362 mState.mLength += aLength;
363 mState.mIs2b = true;
365 if (mState.mInHeap) {
366 moz_free(m2b);
367 }
368 m2b = buff;
370 mState.mInHeap = true;
372 if (aUpdateBidi) {
373 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
374 }
376 return true;
377 }
379 // The new and the old data is all 1-byte
380 char* buff;
381 if (mState.mInHeap) {
382 buff = (char*)moz_realloc(const_cast<char*>(m1b),
383 (mState.mLength + aLength) * sizeof(char));
384 if (!buff) {
385 return false;
386 }
387 }
388 else {
389 buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char));
390 if (!buff) {
391 return false;
392 }
394 memcpy(buff, m1b, mState.mLength);
395 mState.mInHeap = true;
396 }
398 // Copy aBuffer into buff.
399 LossyConvertEncoding16to8 converter(buff + mState.mLength);
400 copy_string(aBuffer, aBuffer + aLength, converter);
402 m1b = buff;
403 mState.mLength += aLength;
405 return true;
406 }
408 /* virtual */ size_t
409 nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
410 {
411 if (Is2b()) {
412 return aMallocSizeOf(m2b);
413 }
415 if (mState.mInHeap) {
416 return aMallocSizeOf(m1b);
417 }
419 return 0;
420 }
422 // To save time we only do this when we really want to know, not during
423 // every allocation
424 void
425 nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength)
426 {
427 if (mState.mIs2b && !mState.mIsBidi) {
428 const char16_t* cp = aBuffer;
429 const char16_t* end = cp + aLength;
430 while (cp < end) {
431 char16_t ch1 = *cp++;
432 uint32_t utf32Char = ch1;
433 if (NS_IS_HIGH_SURROGATE(ch1) &&
434 cp < end &&
435 NS_IS_LOW_SURROGATE(*cp)) {
436 char16_t ch2 = *cp++;
437 utf32Char = SURROGATE_TO_UCS4(ch1, ch2);
438 }
439 if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) {
440 mState.mIsBidi = true;
441 break;
442 }
443 }
444 }
445 }