|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsLineBreaker.h" |
|
7 #include "nsContentUtils.h" |
|
8 #include "nsILineBreaker.h" |
|
9 #include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values |
|
10 #include "nsHyphenationManager.h" |
|
11 #include "nsHyphenator.h" |
|
12 #include "mozilla/gfx/2D.h" |
|
13 |
|
14 nsLineBreaker::nsLineBreaker() |
|
15 : mCurrentWordLanguage(nullptr), |
|
16 mCurrentWordContainsMixedLang(false), |
|
17 mCurrentWordContainsComplexChar(false), |
|
18 mAfterBreakableSpace(false), mBreakHere(false), |
|
19 mWordBreak(nsILineBreaker::kWordBreak_Normal) |
|
20 { |
|
21 } |
|
22 |
|
23 nsLineBreaker::~nsLineBreaker() |
|
24 { |
|
25 NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!"); |
|
26 } |
|
27 |
|
28 static void |
|
29 SetupCapitalization(const char16_t* aWord, uint32_t aLength, |
|
30 bool* aCapitalization) |
|
31 { |
|
32 // Capitalize the first alphanumeric character after a space or start |
|
33 // of the word. |
|
34 // The only space character a word can contain is NBSP. |
|
35 bool capitalizeNextChar = true; |
|
36 for (uint32_t i = 0; i < aLength; ++i) { |
|
37 uint32_t ch = aWord[i]; |
|
38 if (capitalizeNextChar) { |
|
39 if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength && |
|
40 NS_IS_LOW_SURROGATE(aWord[i + 1])) { |
|
41 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); |
|
42 } |
|
43 if (nsContentUtils::IsAlphanumeric(ch)) { |
|
44 aCapitalization[i] = true; |
|
45 capitalizeNextChar = false; |
|
46 } |
|
47 if (!IS_IN_BMP(ch)) { |
|
48 ++i; |
|
49 } |
|
50 } |
|
51 if (ch == 0xA0 /*NBSP*/) { |
|
52 capitalizeNextChar = true; |
|
53 } |
|
54 } |
|
55 } |
|
56 |
|
57 nsresult |
|
58 nsLineBreaker::FlushCurrentWord() |
|
59 { |
|
60 uint32_t length = mCurrentWord.Length(); |
|
61 nsAutoTArray<uint8_t,4000> breakState; |
|
62 if (!breakState.AppendElements(length)) |
|
63 return NS_ERROR_OUT_OF_MEMORY; |
|
64 |
|
65 nsTArray<bool> capitalizationState; |
|
66 |
|
67 if (!mCurrentWordContainsComplexChar) { |
|
68 // For break-strict set everything internal to "break", otherwise |
|
69 // to "no break"! |
|
70 memset(breakState.Elements(), |
|
71 mWordBreak == nsILineBreaker::kWordBreak_BreakAll ? |
|
72 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : |
|
73 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, |
|
74 length*sizeof(uint8_t)); |
|
75 } else { |
|
76 nsContentUtils::LineBreaker()-> |
|
77 GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak, |
|
78 breakState.Elements()); |
|
79 } |
|
80 |
|
81 bool autoHyphenate = mCurrentWordLanguage && |
|
82 !mCurrentWordContainsMixedLang; |
|
83 uint32_t i; |
|
84 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) { |
|
85 TextItem* ti = &mTextItems[i]; |
|
86 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) { |
|
87 autoHyphenate = false; |
|
88 } |
|
89 } |
|
90 if (autoHyphenate) { |
|
91 nsRefPtr<nsHyphenator> hyphenator = |
|
92 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage); |
|
93 if (hyphenator) { |
|
94 FindHyphenationPoints(hyphenator, |
|
95 mCurrentWord.Elements(), |
|
96 mCurrentWord.Elements() + length, |
|
97 breakState.Elements()); |
|
98 } |
|
99 } |
|
100 |
|
101 uint32_t offset = 0; |
|
102 for (i = 0; i < mTextItems.Length(); ++i) { |
|
103 TextItem* ti = &mTextItems[i]; |
|
104 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?"); |
|
105 |
|
106 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) { |
|
107 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; |
|
108 } |
|
109 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) { |
|
110 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0; |
|
111 memset(breakState.Elements() + offset + exclude, |
|
112 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, |
|
113 (ti->mLength - exclude)*sizeof(uint8_t)); |
|
114 } |
|
115 |
|
116 // Don't set the break state for the first character of the word, because |
|
117 // it was already set correctly earlier and we don't know what the true |
|
118 // value should be. |
|
119 uint32_t skipSet = i == 0 ? 1 : 0; |
|
120 if (ti->mSink) { |
|
121 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet, |
|
122 breakState.Elements() + offset + skipSet); |
|
123 |
|
124 if (ti->mFlags & BREAK_NEED_CAPITALIZATION) { |
|
125 if (capitalizationState.Length() == 0) { |
|
126 if (!capitalizationState.AppendElements(length)) |
|
127 return NS_ERROR_OUT_OF_MEMORY; |
|
128 memset(capitalizationState.Elements(), false, length*sizeof(bool)); |
|
129 SetupCapitalization(mCurrentWord.Elements(), length, |
|
130 capitalizationState.Elements()); |
|
131 } |
|
132 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength, |
|
133 capitalizationState.Elements() + offset); |
|
134 } |
|
135 } |
|
136 |
|
137 offset += ti->mLength; |
|
138 } |
|
139 |
|
140 mCurrentWord.Clear(); |
|
141 mTextItems.Clear(); |
|
142 mCurrentWordContainsComplexChar = false; |
|
143 mCurrentWordContainsMixedLang = false; |
|
144 mCurrentWordLanguage = nullptr; |
|
145 return NS_OK; |
|
146 } |
|
147 |
|
148 nsresult |
|
149 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength, |
|
150 uint32_t aFlags, nsILineBreakSink* aSink) |
|
151 { |
|
152 NS_ASSERTION(aLength > 0, "Appending empty text..."); |
|
153 |
|
154 uint32_t offset = 0; |
|
155 |
|
156 // Continue the current word |
|
157 if (mCurrentWord.Length() > 0) { |
|
158 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); |
|
159 |
|
160 while (offset < aLength && !IsSpace(aText[offset])) { |
|
161 mCurrentWord.AppendElement(aText[offset]); |
|
162 if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) { |
|
163 mCurrentWordContainsComplexChar = true; |
|
164 } |
|
165 UpdateCurrentWordLanguage(aHyphenationLanguage); |
|
166 ++offset; |
|
167 } |
|
168 |
|
169 if (offset > 0) { |
|
170 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); |
|
171 } |
|
172 |
|
173 if (offset == aLength) |
|
174 return NS_OK; |
|
175 |
|
176 // We encountered whitespace, so we're done with this word |
|
177 nsresult rv = FlushCurrentWord(); |
|
178 if (NS_FAILED(rv)) |
|
179 return rv; |
|
180 } |
|
181 |
|
182 nsAutoTArray<uint8_t,4000> breakState; |
|
183 if (aSink) { |
|
184 if (!breakState.AppendElements(aLength)) |
|
185 return NS_ERROR_OUT_OF_MEMORY; |
|
186 } |
|
187 |
|
188 nsTArray<bool> capitalizationState; |
|
189 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) { |
|
190 if (!capitalizationState.AppendElements(aLength)) |
|
191 return NS_ERROR_OUT_OF_MEMORY; |
|
192 memset(capitalizationState.Elements(), false, aLength*sizeof(bool)); |
|
193 } |
|
194 |
|
195 uint32_t start = offset; |
|
196 bool noBreaksNeeded = !aSink || |
|
197 (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && |
|
198 !mBreakHere && !mAfterBreakableSpace); |
|
199 if (noBreaksNeeded) { |
|
200 // Skip to the space before the last word, since either the break data |
|
201 // here is not needed, or no breaks are set in the sink and there cannot |
|
202 // be any breaks in this chunk; all we need is the context for the next |
|
203 // chunk (if any) |
|
204 offset = aLength; |
|
205 while (offset > start) { |
|
206 --offset; |
|
207 if (IsSpace(aText[offset])) |
|
208 break; |
|
209 } |
|
210 } |
|
211 uint32_t wordStart = offset; |
|
212 bool wordHasComplexChar = false; |
|
213 |
|
214 nsRefPtr<nsHyphenator> hyphenator; |
|
215 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && |
|
216 !(aFlags & BREAK_SUPPRESS_INSIDE) && |
|
217 aHyphenationLanguage) { |
|
218 hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage); |
|
219 } |
|
220 |
|
221 for (;;) { |
|
222 char16_t ch = aText[offset]; |
|
223 bool isSpace = IsSpace(ch); |
|
224 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); |
|
225 |
|
226 if (aSink) { |
|
227 breakState[offset] = |
|
228 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || |
|
229 (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? |
|
230 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : |
|
231 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; |
|
232 } |
|
233 mBreakHere = false; |
|
234 mAfterBreakableSpace = isBreakableSpace; |
|
235 |
|
236 if (isSpace) { |
|
237 if (offset > wordStart && aSink) { |
|
238 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) { |
|
239 if (wordHasComplexChar) { |
|
240 // Save current start-of-word state because GetJISx4051Breaks will |
|
241 // set it to false |
|
242 uint8_t currentStart = breakState[wordStart]; |
|
243 nsContentUtils::LineBreaker()-> |
|
244 GetJISx4051Breaks(aText + wordStart, offset - wordStart, |
|
245 mWordBreak, |
|
246 breakState.Elements() + wordStart); |
|
247 breakState[wordStart] = currentStart; |
|
248 } |
|
249 if (hyphenator) { |
|
250 FindHyphenationPoints(hyphenator, |
|
251 aText + wordStart, aText + offset, |
|
252 breakState.Elements() + wordStart); |
|
253 } |
|
254 } |
|
255 if (aFlags & BREAK_NEED_CAPITALIZATION) { |
|
256 SetupCapitalization(aText + wordStart, offset - wordStart, |
|
257 capitalizationState.Elements() + wordStart); |
|
258 } |
|
259 } |
|
260 wordHasComplexChar = false; |
|
261 ++offset; |
|
262 if (offset >= aLength) |
|
263 break; |
|
264 wordStart = offset; |
|
265 } else { |
|
266 if (!wordHasComplexChar && IsComplexChar(ch)) { |
|
267 wordHasComplexChar = true; |
|
268 } |
|
269 ++offset; |
|
270 if (offset >= aLength) { |
|
271 // Save this word |
|
272 mCurrentWordContainsComplexChar = wordHasComplexChar; |
|
273 uint32_t len = offset - wordStart; |
|
274 char16_t* elems = mCurrentWord.AppendElements(len); |
|
275 if (!elems) |
|
276 return NS_ERROR_OUT_OF_MEMORY; |
|
277 memcpy(elems, aText + wordStart, sizeof(char16_t)*len); |
|
278 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); |
|
279 // Ensure that the break-before for this word is written out |
|
280 offset = wordStart + 1; |
|
281 UpdateCurrentWordLanguage(aHyphenationLanguage); |
|
282 break; |
|
283 } |
|
284 } |
|
285 } |
|
286 |
|
287 if (!noBreaksNeeded) { |
|
288 // aSink must not be null |
|
289 aSink->SetBreaks(start, offset - start, breakState.Elements() + start); |
|
290 if (aFlags & BREAK_NEED_CAPITALIZATION) { |
|
291 aSink->SetCapitalization(start, offset - start, |
|
292 capitalizationState.Elements() + start); |
|
293 } |
|
294 } |
|
295 return NS_OK; |
|
296 } |
|
297 |
|
298 void |
|
299 nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator, |
|
300 const char16_t *aTextStart, |
|
301 const char16_t *aTextLimit, |
|
302 uint8_t *aBreakState) |
|
303 { |
|
304 nsDependentSubstring string(aTextStart, aTextLimit); |
|
305 AutoFallibleTArray<bool,200> hyphens; |
|
306 if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { |
|
307 for (uint32_t i = 0; i + 1 < string.Length(); ++i) { |
|
308 if (hyphens[i]) { |
|
309 aBreakState[i + 1] = |
|
310 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; |
|
311 } |
|
312 } |
|
313 } |
|
314 } |
|
315 |
|
316 nsresult |
|
317 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength, |
|
318 uint32_t aFlags, nsILineBreakSink* aSink) |
|
319 { |
|
320 NS_ASSERTION(aLength > 0, "Appending empty text..."); |
|
321 |
|
322 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) { |
|
323 // Defer to the Unicode path if capitalization or hyphenation is required |
|
324 nsAutoString str; |
|
325 const char* cp = reinterpret_cast<const char*>(aText); |
|
326 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str); |
|
327 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink); |
|
328 } |
|
329 |
|
330 uint32_t offset = 0; |
|
331 |
|
332 // Continue the current word |
|
333 if (mCurrentWord.Length() > 0) { |
|
334 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); |
|
335 |
|
336 while (offset < aLength && !IsSpace(aText[offset])) { |
|
337 mCurrentWord.AppendElement(aText[offset]); |
|
338 if (!mCurrentWordContainsComplexChar && |
|
339 IsComplexASCIIChar(aText[offset])) { |
|
340 mCurrentWordContainsComplexChar = true; |
|
341 } |
|
342 ++offset; |
|
343 } |
|
344 |
|
345 if (offset > 0) { |
|
346 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); |
|
347 } |
|
348 |
|
349 if (offset == aLength) { |
|
350 // We did not encounter whitespace so the word hasn't finished yet. |
|
351 return NS_OK; |
|
352 } |
|
353 |
|
354 // We encountered whitespace, so we're done with this word |
|
355 nsresult rv = FlushCurrentWord(); |
|
356 if (NS_FAILED(rv)) |
|
357 return rv; |
|
358 } |
|
359 |
|
360 nsAutoTArray<uint8_t,4000> breakState; |
|
361 if (aSink) { |
|
362 if (!breakState.AppendElements(aLength)) |
|
363 return NS_ERROR_OUT_OF_MEMORY; |
|
364 } |
|
365 |
|
366 uint32_t start = offset; |
|
367 bool noBreaksNeeded = !aSink || |
|
368 (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && |
|
369 !mBreakHere && !mAfterBreakableSpace); |
|
370 if (noBreaksNeeded) { |
|
371 // Skip to the space before the last word, since either the break data |
|
372 // here is not needed, or no breaks are set in the sink and there cannot |
|
373 // be any breaks in this chunk; all we need is the context for the next |
|
374 // chunk (if any) |
|
375 offset = aLength; |
|
376 while (offset > start) { |
|
377 --offset; |
|
378 if (IsSpace(aText[offset])) |
|
379 break; |
|
380 } |
|
381 } |
|
382 uint32_t wordStart = offset; |
|
383 bool wordHasComplexChar = false; |
|
384 |
|
385 for (;;) { |
|
386 uint8_t ch = aText[offset]; |
|
387 bool isSpace = IsSpace(ch); |
|
388 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); |
|
389 |
|
390 if (aSink) { |
|
391 // Consider word-break style. Since the break position of CJK scripts |
|
392 // will be set by nsILineBreaker, we don't consider CJK at this point. |
|
393 breakState[offset] = |
|
394 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || |
|
395 (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? |
|
396 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : |
|
397 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; |
|
398 } |
|
399 mBreakHere = false; |
|
400 mAfterBreakableSpace = isBreakableSpace; |
|
401 |
|
402 if (isSpace) { |
|
403 if (offset > wordStart && wordHasComplexChar) { |
|
404 if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) { |
|
405 // Save current start-of-word state because GetJISx4051Breaks will |
|
406 // set it to false |
|
407 uint8_t currentStart = breakState[wordStart]; |
|
408 nsContentUtils::LineBreaker()-> |
|
409 GetJISx4051Breaks(aText + wordStart, offset - wordStart, |
|
410 mWordBreak, |
|
411 breakState.Elements() + wordStart); |
|
412 breakState[wordStart] = currentStart; |
|
413 } |
|
414 wordHasComplexChar = false; |
|
415 } |
|
416 |
|
417 ++offset; |
|
418 if (offset >= aLength) |
|
419 break; |
|
420 wordStart = offset; |
|
421 } else { |
|
422 if (!wordHasComplexChar && IsComplexASCIIChar(ch)) { |
|
423 wordHasComplexChar = true; |
|
424 } |
|
425 ++offset; |
|
426 if (offset >= aLength) { |
|
427 // Save this word |
|
428 mCurrentWordContainsComplexChar = wordHasComplexChar; |
|
429 uint32_t len = offset - wordStart; |
|
430 char16_t* elems = mCurrentWord.AppendElements(len); |
|
431 if (!elems) |
|
432 return NS_ERROR_OUT_OF_MEMORY; |
|
433 uint32_t i; |
|
434 for (i = wordStart; i < offset; ++i) { |
|
435 elems[i - wordStart] = aText[i]; |
|
436 } |
|
437 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); |
|
438 // Ensure that the break-before for this word is written out |
|
439 offset = wordStart + 1; |
|
440 break; |
|
441 } |
|
442 } |
|
443 } |
|
444 |
|
445 if (!noBreaksNeeded) { |
|
446 aSink->SetBreaks(start, offset - start, breakState.Elements() + start); |
|
447 } |
|
448 return NS_OK; |
|
449 } |
|
450 |
|
451 void |
|
452 nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage) |
|
453 { |
|
454 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) { |
|
455 mCurrentWordContainsMixedLang = true; |
|
456 } else { |
|
457 mCurrentWordLanguage = aHyphenationLanguage; |
|
458 } |
|
459 } |
|
460 |
|
461 nsresult |
|
462 nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) |
|
463 { |
|
464 nsresult rv = FlushCurrentWord(); |
|
465 if (NS_FAILED(rv)) |
|
466 return rv; |
|
467 |
|
468 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE); |
|
469 if (mAfterBreakableSpace && !isBreakableSpace) { |
|
470 mBreakHere = true; |
|
471 } |
|
472 mAfterBreakableSpace = isBreakableSpace; |
|
473 return NS_OK; |
|
474 } |
|
475 |
|
476 nsresult |
|
477 nsLineBreaker::Reset(bool* aTrailingBreak) |
|
478 { |
|
479 nsresult rv = FlushCurrentWord(); |
|
480 if (NS_FAILED(rv)) |
|
481 return rv; |
|
482 |
|
483 *aTrailingBreak = mBreakHere || mAfterBreakableSpace; |
|
484 mBreakHere = false; |
|
485 mAfterBreakableSpace = false; |
|
486 return NS_OK; |
|
487 } |