|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * A class which represents a fragment of text (eg inside a text |
|
8 * node); if only codepoints below 256 are used, the text is stored as |
|
9 * a char*; otherwise the text is stored as a char16_t* |
|
10 */ |
|
11 |
|
12 #include "nsTextFragment.h" |
|
13 #include "nsCRT.h" |
|
14 #include "nsReadableUtils.h" |
|
15 #include "nsMemory.h" |
|
16 #include "nsBidiUtils.h" |
|
17 #include "nsUnicharUtils.h" |
|
18 #include "nsUTF8Utils.h" |
|
19 #include "mozilla/MemoryReporting.h" |
|
20 #include "mozilla/SSE.h" |
|
21 #include "nsTextFragmentImpl.h" |
|
22 #include <algorithm> |
|
23 |
|
24 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50 |
|
25 #define TEXTFRAG_MAX_NEWLINES 7 |
|
26 |
|
27 // Static buffer used for common fragments |
|
28 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; |
|
29 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1]; |
|
30 static char sSingleCharSharedString[256]; |
|
31 |
|
32 // static |
|
33 nsresult |
|
34 nsTextFragment::Init() |
|
35 { |
|
36 // Create whitespace strings |
|
37 uint32_t i; |
|
38 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { |
|
39 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; |
|
40 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; |
|
41 NS_ENSURE_TRUE(sSpaceSharedString[i] && sTabSharedString[i], |
|
42 NS_ERROR_OUT_OF_MEMORY); |
|
43 sSpaceSharedString[i][0] = ' '; |
|
44 sTabSharedString[i][0] = ' '; |
|
45 uint32_t j; |
|
46 for (j = 1; j < 1 + i; ++j) { |
|
47 sSpaceSharedString[i][j] = '\n'; |
|
48 sTabSharedString[i][j] = '\n'; |
|
49 } |
|
50 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) { |
|
51 sSpaceSharedString[i][j] = ' '; |
|
52 sTabSharedString[i][j] = '\t'; |
|
53 } |
|
54 } |
|
55 |
|
56 // Create single-char strings |
|
57 for (i = 0; i < 256; ++i) { |
|
58 sSingleCharSharedString[i] = i; |
|
59 } |
|
60 |
|
61 return NS_OK; |
|
62 } |
|
63 |
|
64 // static |
|
65 void |
|
66 nsTextFragment::Shutdown() |
|
67 { |
|
68 uint32_t i; |
|
69 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { |
|
70 delete [] sSpaceSharedString[i]; |
|
71 delete [] sTabSharedString[i]; |
|
72 sSpaceSharedString[i] = nullptr; |
|
73 sTabSharedString[i] = nullptr; |
|
74 } |
|
75 } |
|
76 |
|
77 nsTextFragment::~nsTextFragment() |
|
78 { |
|
79 ReleaseText(); |
|
80 MOZ_COUNT_DTOR(nsTextFragment); |
|
81 } |
|
82 |
|
83 void |
|
84 nsTextFragment::ReleaseText() |
|
85 { |
|
86 if (mState.mLength && m1b && mState.mInHeap) { |
|
87 moz_free(m2b); // m1b == m2b as far as moz_free is concerned |
|
88 } |
|
89 |
|
90 m1b = nullptr; |
|
91 mState.mIsBidi = false; |
|
92 |
|
93 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits; |
|
94 mAllBits = 0; |
|
95 } |
|
96 |
|
97 nsTextFragment& |
|
98 nsTextFragment::operator=(const nsTextFragment& aOther) |
|
99 { |
|
100 ReleaseText(); |
|
101 |
|
102 if (aOther.mState.mLength) { |
|
103 if (!aOther.mState.mInHeap) { |
|
104 m1b = aOther.m1b; // This will work even if aOther is using m2b |
|
105 } |
|
106 else { |
|
107 size_t m2bSize = aOther.mState.mLength * |
|
108 (aOther.mState.mIs2b ? sizeof(char16_t) : sizeof(char)); |
|
109 |
|
110 m2b = static_cast<char16_t*>(moz_malloc(m2bSize)); |
|
111 if (m2b) { |
|
112 memcpy(m2b, aOther.m2b, m2bSize); |
|
113 } else { |
|
114 // allocate a buffer for a single REPLACEMENT CHARACTER |
|
115 m2b = static_cast<char16_t*>(moz_xmalloc(sizeof(char16_t))); |
|
116 m2b[0] = 0xFFFD; // REPLACEMENT CHARACTER |
|
117 mState.mIs2b = true; |
|
118 mState.mInHeap = true; |
|
119 mState.mLength = 1; |
|
120 } |
|
121 } |
|
122 |
|
123 if (m1b) { |
|
124 mAllBits = aOther.mAllBits; |
|
125 } |
|
126 } |
|
127 |
|
128 return *this; |
|
129 } |
|
130 |
|
131 static inline int32_t |
|
132 FirstNon8BitUnvectorized(const char16_t *str, const char16_t *end) |
|
133 { |
|
134 typedef Non8BitParameters<sizeof(size_t)> p; |
|
135 const size_t mask = p::mask(); |
|
136 const uint32_t alignMask = p::alignMask(); |
|
137 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); |
|
138 const int32_t len = end - str; |
|
139 int32_t i = 0; |
|
140 |
|
141 // Align ourselves to a word boundary. |
|
142 int32_t alignLen = |
|
143 std::min(len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t))); |
|
144 for (; i < alignLen; i++) { |
|
145 if (str[i] > 255) |
|
146 return i; |
|
147 } |
|
148 |
|
149 // Check one word at a time. |
|
150 const int32_t wordWalkEnd = ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; |
|
151 for (; i < wordWalkEnd; i += numUnicharsPerWord) { |
|
152 const size_t word = *reinterpret_cast<const size_t*>(str + i); |
|
153 if (word & mask) |
|
154 return i; |
|
155 } |
|
156 |
|
157 // Take care of the remainder one character at a time. |
|
158 for (; i < len; i++) { |
|
159 if (str[i] > 255) |
|
160 return i; |
|
161 } |
|
162 |
|
163 return -1; |
|
164 } |
|
165 |
|
166 #ifdef MOZILLA_MAY_SUPPORT_SSE2 |
|
167 namespace mozilla { |
|
168 namespace SSE2 { |
|
169 int32_t FirstNon8Bit(const char16_t *str, const char16_t *end); |
|
170 } |
|
171 } |
|
172 #endif |
|
173 |
|
174 /* |
|
175 * This function returns -1 if all characters in str are 8 bit characters. |
|
176 * Otherwise, it returns a value less than or equal to the index of the first |
|
177 * non-8bit character in str. For example, if first non-8bit character is at |
|
178 * position 25, it may return 25, or for example 24, or 16. But it guarantees |
|
179 * there is no non-8bit character before returned value. |
|
180 */ |
|
181 static inline int32_t |
|
182 FirstNon8Bit(const char16_t *str, const char16_t *end) |
|
183 { |
|
184 #ifdef MOZILLA_MAY_SUPPORT_SSE2 |
|
185 if (mozilla::supports_sse2()) { |
|
186 return mozilla::SSE2::FirstNon8Bit(str, end); |
|
187 } |
|
188 #endif |
|
189 |
|
190 return FirstNon8BitUnvectorized(str, end); |
|
191 } |
|
192 |
|
193 bool |
|
194 nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi) |
|
195 { |
|
196 ReleaseText(); |
|
197 |
|
198 if (aLength == 0) { |
|
199 return true; |
|
200 } |
|
201 |
|
202 char16_t firstChar = *aBuffer; |
|
203 if (aLength == 1 && firstChar < 256) { |
|
204 m1b = sSingleCharSharedString + firstChar; |
|
205 mState.mInHeap = false; |
|
206 mState.mIs2b = false; |
|
207 mState.mLength = 1; |
|
208 |
|
209 return true; |
|
210 } |
|
211 |
|
212 const char16_t *ucp = aBuffer; |
|
213 const char16_t *uend = aBuffer + aLength; |
|
214 |
|
215 // Check if we can use a shared string |
|
216 if (aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES && |
|
217 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) { |
|
218 if (firstChar == ' ') { |
|
219 ++ucp; |
|
220 } |
|
221 |
|
222 const char16_t* start = ucp; |
|
223 while (ucp < uend && *ucp == '\n') { |
|
224 ++ucp; |
|
225 } |
|
226 const char16_t* endNewLine = ucp; |
|
227 |
|
228 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' '; |
|
229 while (ucp < uend && *ucp == space) { |
|
230 ++ucp; |
|
231 } |
|
232 |
|
233 if (ucp == uend && |
|
234 endNewLine - start <= TEXTFRAG_MAX_NEWLINES && |
|
235 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) { |
|
236 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString; |
|
237 m1b = strings[endNewLine - start]; |
|
238 |
|
239 // If we didn't find a space in the beginning, skip it now. |
|
240 if (firstChar != ' ') { |
|
241 ++m1b; |
|
242 } |
|
243 |
|
244 mState.mInHeap = false; |
|
245 mState.mIs2b = false; |
|
246 mState.mLength = aLength; |
|
247 |
|
248 return true; |
|
249 } |
|
250 } |
|
251 |
|
252 // See if we need to store the data in ucs2 or not |
|
253 int32_t first16bit = FirstNon8Bit(ucp, uend); |
|
254 |
|
255 if (first16bit != -1) { // aBuffer contains no non-8bit character |
|
256 // Use ucs2 storage because we have to |
|
257 size_t m2bSize = aLength * sizeof(char16_t); |
|
258 m2b = (char16_t *)moz_malloc(m2bSize); |
|
259 if (!m2b) { |
|
260 return false; |
|
261 } |
|
262 memcpy(m2b, aBuffer, m2bSize); |
|
263 |
|
264 mState.mIs2b = true; |
|
265 if (aUpdateBidi) { |
|
266 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); |
|
267 } |
|
268 |
|
269 } else { |
|
270 // Use 1 byte storage because we can |
|
271 char* buff = (char *)moz_malloc(aLength * sizeof(char)); |
|
272 if (!buff) { |
|
273 return false; |
|
274 } |
|
275 |
|
276 // Copy data |
|
277 LossyConvertEncoding16to8 converter(buff); |
|
278 copy_string(aBuffer, aBuffer+aLength, converter); |
|
279 m1b = buff; |
|
280 mState.mIs2b = false; |
|
281 } |
|
282 |
|
283 // Setup our fields |
|
284 mState.mInHeap = true; |
|
285 mState.mLength = aLength; |
|
286 |
|
287 return true; |
|
288 } |
|
289 |
|
290 void |
|
291 nsTextFragment::CopyTo(char16_t *aDest, int32_t aOffset, int32_t aCount) |
|
292 { |
|
293 NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!"); |
|
294 NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!"); |
|
295 |
|
296 if (aOffset < 0) { |
|
297 aOffset = 0; |
|
298 } |
|
299 |
|
300 if (uint32_t(aOffset + aCount) > GetLength()) { |
|
301 aCount = mState.mLength - aOffset; |
|
302 } |
|
303 |
|
304 if (aCount != 0) { |
|
305 if (mState.mIs2b) { |
|
306 memcpy(aDest, m2b + aOffset, sizeof(char16_t) * aCount); |
|
307 } else { |
|
308 const char *cp = m1b + aOffset; |
|
309 const char *end = cp + aCount; |
|
310 LossyConvertEncoding8to16 converter(aDest); |
|
311 copy_string(cp, end, converter); |
|
312 } |
|
313 } |
|
314 } |
|
315 |
|
316 bool |
|
317 nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi) |
|
318 { |
|
319 // This is a common case because some callsites create a textnode |
|
320 // with a value by creating the node and then calling AppendData. |
|
321 if (mState.mLength == 0) { |
|
322 return SetTo(aBuffer, aLength, aUpdateBidi); |
|
323 } |
|
324 |
|
325 // Should we optimize for aData.Length() == 0? |
|
326 |
|
327 if (mState.mIs2b) { |
|
328 // Already a 2-byte string so the result will be too |
|
329 char16_t* buff = (char16_t*)moz_realloc(m2b, (mState.mLength + aLength) * sizeof(char16_t)); |
|
330 if (!buff) { |
|
331 return false; |
|
332 } |
|
333 |
|
334 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); |
|
335 mState.mLength += aLength; |
|
336 m2b = buff; |
|
337 |
|
338 if (aUpdateBidi) { |
|
339 UpdateBidiFlag(aBuffer, aLength); |
|
340 } |
|
341 |
|
342 return true; |
|
343 } |
|
344 |
|
345 // Current string is a 1-byte string, check if the new data fits in one byte too. |
|
346 int32_t first16bit = FirstNon8Bit(aBuffer, aBuffer + aLength); |
|
347 |
|
348 if (first16bit != -1) { // aBuffer contains no non-8bit character |
|
349 // The old data was 1-byte, but the new is not so we have to expand it |
|
350 // all to 2-byte |
|
351 char16_t* buff = (char16_t*)moz_malloc((mState.mLength + aLength) * |
|
352 sizeof(char16_t)); |
|
353 if (!buff) { |
|
354 return false; |
|
355 } |
|
356 |
|
357 // Copy data into buff |
|
358 LossyConvertEncoding8to16 converter(buff); |
|
359 copy_string(m1b, m1b+mState.mLength, converter); |
|
360 |
|
361 memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(char16_t)); |
|
362 mState.mLength += aLength; |
|
363 mState.mIs2b = true; |
|
364 |
|
365 if (mState.mInHeap) { |
|
366 moz_free(m2b); |
|
367 } |
|
368 m2b = buff; |
|
369 |
|
370 mState.mInHeap = true; |
|
371 |
|
372 if (aUpdateBidi) { |
|
373 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); |
|
374 } |
|
375 |
|
376 return true; |
|
377 } |
|
378 |
|
379 // The new and the old data is all 1-byte |
|
380 char* buff; |
|
381 if (mState.mInHeap) { |
|
382 buff = (char*)moz_realloc(const_cast<char*>(m1b), |
|
383 (mState.mLength + aLength) * sizeof(char)); |
|
384 if (!buff) { |
|
385 return false; |
|
386 } |
|
387 } |
|
388 else { |
|
389 buff = (char*)moz_malloc((mState.mLength + aLength) * sizeof(char)); |
|
390 if (!buff) { |
|
391 return false; |
|
392 } |
|
393 |
|
394 memcpy(buff, m1b, mState.mLength); |
|
395 mState.mInHeap = true; |
|
396 } |
|
397 |
|
398 // Copy aBuffer into buff. |
|
399 LossyConvertEncoding16to8 converter(buff + mState.mLength); |
|
400 copy_string(aBuffer, aBuffer + aLength, converter); |
|
401 |
|
402 m1b = buff; |
|
403 mState.mLength += aLength; |
|
404 |
|
405 return true; |
|
406 } |
|
407 |
|
408 /* virtual */ size_t |
|
409 nsTextFragment::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const |
|
410 { |
|
411 if (Is2b()) { |
|
412 return aMallocSizeOf(m2b); |
|
413 } |
|
414 |
|
415 if (mState.mInHeap) { |
|
416 return aMallocSizeOf(m1b); |
|
417 } |
|
418 |
|
419 return 0; |
|
420 } |
|
421 |
|
422 // To save time we only do this when we really want to know, not during |
|
423 // every allocation |
|
424 void |
|
425 nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) |
|
426 { |
|
427 if (mState.mIs2b && !mState.mIsBidi) { |
|
428 const char16_t* cp = aBuffer; |
|
429 const char16_t* end = cp + aLength; |
|
430 while (cp < end) { |
|
431 char16_t ch1 = *cp++; |
|
432 uint32_t utf32Char = ch1; |
|
433 if (NS_IS_HIGH_SURROGATE(ch1) && |
|
434 cp < end && |
|
435 NS_IS_LOW_SURROGATE(*cp)) { |
|
436 char16_t ch2 = *cp++; |
|
437 utf32Char = SURROGATE_TO_UCS4(ch1, ch2); |
|
438 } |
|
439 if (UTF32_CHAR_IS_BIDI(utf32Char) || IsBidiControl(utf32Char)) { |
|
440 mState.mIsBidi = true; |
|
441 break; |
|
442 } |
|
443 } |
|
444 } |
|
445 } |