1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/base/src/nsLineBreaker.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,487 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsLineBreaker.h" 1.10 +#include "nsContentUtils.h" 1.11 +#include "nsILineBreaker.h" 1.12 +#include "gfxFont.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values 1.13 +#include "nsHyphenationManager.h" 1.14 +#include "nsHyphenator.h" 1.15 +#include "mozilla/gfx/2D.h" 1.16 + 1.17 +nsLineBreaker::nsLineBreaker() 1.18 + : mCurrentWordLanguage(nullptr), 1.19 + mCurrentWordContainsMixedLang(false), 1.20 + mCurrentWordContainsComplexChar(false), 1.21 + mAfterBreakableSpace(false), mBreakHere(false), 1.22 + mWordBreak(nsILineBreaker::kWordBreak_Normal) 1.23 +{ 1.24 +} 1.25 + 1.26 +nsLineBreaker::~nsLineBreaker() 1.27 +{ 1.28 + NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!"); 1.29 +} 1.30 + 1.31 +static void 1.32 +SetupCapitalization(const char16_t* aWord, uint32_t aLength, 1.33 + bool* aCapitalization) 1.34 +{ 1.35 + // Capitalize the first alphanumeric character after a space or start 1.36 + // of the word. 1.37 + // The only space character a word can contain is NBSP. 1.38 + bool capitalizeNextChar = true; 1.39 + for (uint32_t i = 0; i < aLength; ++i) { 1.40 + uint32_t ch = aWord[i]; 1.41 + if (capitalizeNextChar) { 1.42 + if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength && 1.43 + NS_IS_LOW_SURROGATE(aWord[i + 1])) { 1.44 + ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); 1.45 + } 1.46 + if (nsContentUtils::IsAlphanumeric(ch)) { 1.47 + aCapitalization[i] = true; 1.48 + capitalizeNextChar = false; 1.49 + } 1.50 + if (!IS_IN_BMP(ch)) { 1.51 + ++i; 1.52 + } 1.53 + } 1.54 + if (ch == 0xA0 /*NBSP*/) { 1.55 + capitalizeNextChar = true; 1.56 + } 1.57 + } 1.58 +} 1.59 + 1.60 +nsresult 1.61 +nsLineBreaker::FlushCurrentWord() 1.62 +{ 1.63 + uint32_t length = mCurrentWord.Length(); 1.64 + nsAutoTArray<uint8_t,4000> breakState; 1.65 + if (!breakState.AppendElements(length)) 1.66 + return NS_ERROR_OUT_OF_MEMORY; 1.67 + 1.68 + nsTArray<bool> capitalizationState; 1.69 + 1.70 + if (!mCurrentWordContainsComplexChar) { 1.71 + // For break-strict set everything internal to "break", otherwise 1.72 + // to "no break"! 1.73 + memset(breakState.Elements(), 1.74 + mWordBreak == nsILineBreaker::kWordBreak_BreakAll ? 1.75 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : 1.76 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, 1.77 + length*sizeof(uint8_t)); 1.78 + } else { 1.79 + nsContentUtils::LineBreaker()-> 1.80 + GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak, 1.81 + breakState.Elements()); 1.82 + } 1.83 + 1.84 + bool autoHyphenate = mCurrentWordLanguage && 1.85 + !mCurrentWordContainsMixedLang; 1.86 + uint32_t i; 1.87 + for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) { 1.88 + TextItem* ti = &mTextItems[i]; 1.89 + if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) { 1.90 + autoHyphenate = false; 1.91 + } 1.92 + } 1.93 + if (autoHyphenate) { 1.94 + nsRefPtr<nsHyphenator> hyphenator = 1.95 + nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage); 1.96 + if (hyphenator) { 1.97 + FindHyphenationPoints(hyphenator, 1.98 + mCurrentWord.Elements(), 1.99 + mCurrentWord.Elements() + length, 1.100 + breakState.Elements()); 1.101 + } 1.102 + } 1.103 + 1.104 + uint32_t offset = 0; 1.105 + for (i = 0; i < mTextItems.Length(); ++i) { 1.106 + TextItem* ti = &mTextItems[i]; 1.107 + NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?"); 1.108 + 1.109 + if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) { 1.110 + breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 1.111 + } 1.112 + if (ti->mFlags & BREAK_SUPPRESS_INSIDE) { 1.113 + uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0; 1.114 + memset(breakState.Elements() + offset + exclude, 1.115 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, 1.116 + (ti->mLength - exclude)*sizeof(uint8_t)); 1.117 + } 1.118 + 1.119 + // Don't set the break state for the first character of the word, because 1.120 + // it was already set correctly earlier and we don't know what the true 1.121 + // value should be. 1.122 + uint32_t skipSet = i == 0 ? 1 : 0; 1.123 + if (ti->mSink) { 1.124 + ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet, 1.125 + breakState.Elements() + offset + skipSet); 1.126 + 1.127 + if (ti->mFlags & BREAK_NEED_CAPITALIZATION) { 1.128 + if (capitalizationState.Length() == 0) { 1.129 + if (!capitalizationState.AppendElements(length)) 1.130 + return NS_ERROR_OUT_OF_MEMORY; 1.131 + memset(capitalizationState.Elements(), false, length*sizeof(bool)); 1.132 + SetupCapitalization(mCurrentWord.Elements(), length, 1.133 + capitalizationState.Elements()); 1.134 + } 1.135 + ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength, 1.136 + capitalizationState.Elements() + offset); 1.137 + } 1.138 + } 1.139 + 1.140 + offset += ti->mLength; 1.141 + } 1.142 + 1.143 + mCurrentWord.Clear(); 1.144 + mTextItems.Clear(); 1.145 + mCurrentWordContainsComplexChar = false; 1.146 + mCurrentWordContainsMixedLang = false; 1.147 + mCurrentWordLanguage = nullptr; 1.148 + return NS_OK; 1.149 +} 1.150 + 1.151 +nsresult 1.152 +nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength, 1.153 + uint32_t aFlags, nsILineBreakSink* aSink) 1.154 +{ 1.155 + NS_ASSERTION(aLength > 0, "Appending empty text..."); 1.156 + 1.157 + uint32_t offset = 0; 1.158 + 1.159 + // Continue the current word 1.160 + if (mCurrentWord.Length() > 0) { 1.161 + NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); 1.162 + 1.163 + while (offset < aLength && !IsSpace(aText[offset])) { 1.164 + mCurrentWord.AppendElement(aText[offset]); 1.165 + if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) { 1.166 + mCurrentWordContainsComplexChar = true; 1.167 + } 1.168 + UpdateCurrentWordLanguage(aHyphenationLanguage); 1.169 + ++offset; 1.170 + } 1.171 + 1.172 + if (offset > 0) { 1.173 + mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); 1.174 + } 1.175 + 1.176 + if (offset == aLength) 1.177 + return NS_OK; 1.178 + 1.179 + // We encountered whitespace, so we're done with this word 1.180 + nsresult rv = FlushCurrentWord(); 1.181 + if (NS_FAILED(rv)) 1.182 + return rv; 1.183 + } 1.184 + 1.185 + nsAutoTArray<uint8_t,4000> breakState; 1.186 + if (aSink) { 1.187 + if (!breakState.AppendElements(aLength)) 1.188 + return NS_ERROR_OUT_OF_MEMORY; 1.189 + } 1.190 + 1.191 + nsTArray<bool> capitalizationState; 1.192 + if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) { 1.193 + if (!capitalizationState.AppendElements(aLength)) 1.194 + return NS_ERROR_OUT_OF_MEMORY; 1.195 + memset(capitalizationState.Elements(), false, aLength*sizeof(bool)); 1.196 + } 1.197 + 1.198 + uint32_t start = offset; 1.199 + bool noBreaksNeeded = !aSink || 1.200 + (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && 1.201 + !mBreakHere && !mAfterBreakableSpace); 1.202 + if (noBreaksNeeded) { 1.203 + // Skip to the space before the last word, since either the break data 1.204 + // here is not needed, or no breaks are set in the sink and there cannot 1.205 + // be any breaks in this chunk; all we need is the context for the next 1.206 + // chunk (if any) 1.207 + offset = aLength; 1.208 + while (offset > start) { 1.209 + --offset; 1.210 + if (IsSpace(aText[offset])) 1.211 + break; 1.212 + } 1.213 + } 1.214 + uint32_t wordStart = offset; 1.215 + bool wordHasComplexChar = false; 1.216 + 1.217 + nsRefPtr<nsHyphenator> hyphenator; 1.218 + if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && 1.219 + !(aFlags & BREAK_SUPPRESS_INSIDE) && 1.220 + aHyphenationLanguage) { 1.221 + hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage); 1.222 + } 1.223 + 1.224 + for (;;) { 1.225 + char16_t ch = aText[offset]; 1.226 + bool isSpace = IsSpace(ch); 1.227 + bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); 1.228 + 1.229 + if (aSink) { 1.230 + breakState[offset] = 1.231 + mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || 1.232 + (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? 1.233 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : 1.234 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 1.235 + } 1.236 + mBreakHere = false; 1.237 + mAfterBreakableSpace = isBreakableSpace; 1.238 + 1.239 + if (isSpace) { 1.240 + if (offset > wordStart && aSink) { 1.241 + if (!(aFlags & BREAK_SUPPRESS_INSIDE)) { 1.242 + if (wordHasComplexChar) { 1.243 + // Save current start-of-word state because GetJISx4051Breaks will 1.244 + // set it to false 1.245 + uint8_t currentStart = breakState[wordStart]; 1.246 + nsContentUtils::LineBreaker()-> 1.247 + GetJISx4051Breaks(aText + wordStart, offset - wordStart, 1.248 + mWordBreak, 1.249 + breakState.Elements() + wordStart); 1.250 + breakState[wordStart] = currentStart; 1.251 + } 1.252 + if (hyphenator) { 1.253 + FindHyphenationPoints(hyphenator, 1.254 + aText + wordStart, aText + offset, 1.255 + breakState.Elements() + wordStart); 1.256 + } 1.257 + } 1.258 + if (aFlags & BREAK_NEED_CAPITALIZATION) { 1.259 + SetupCapitalization(aText + wordStart, offset - wordStart, 1.260 + capitalizationState.Elements() + wordStart); 1.261 + } 1.262 + } 1.263 + wordHasComplexChar = false; 1.264 + ++offset; 1.265 + if (offset >= aLength) 1.266 + break; 1.267 + wordStart = offset; 1.268 + } else { 1.269 + if (!wordHasComplexChar && IsComplexChar(ch)) { 1.270 + wordHasComplexChar = true; 1.271 + } 1.272 + ++offset; 1.273 + if (offset >= aLength) { 1.274 + // Save this word 1.275 + mCurrentWordContainsComplexChar = wordHasComplexChar; 1.276 + uint32_t len = offset - wordStart; 1.277 + char16_t* elems = mCurrentWord.AppendElements(len); 1.278 + if (!elems) 1.279 + return NS_ERROR_OUT_OF_MEMORY; 1.280 + memcpy(elems, aText + wordStart, sizeof(char16_t)*len); 1.281 + mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); 1.282 + // Ensure that the break-before for this word is written out 1.283 + offset = wordStart + 1; 1.284 + UpdateCurrentWordLanguage(aHyphenationLanguage); 1.285 + break; 1.286 + } 1.287 + } 1.288 + } 1.289 + 1.290 + if (!noBreaksNeeded) { 1.291 + // aSink must not be null 1.292 + aSink->SetBreaks(start, offset - start, breakState.Elements() + start); 1.293 + if (aFlags & BREAK_NEED_CAPITALIZATION) { 1.294 + aSink->SetCapitalization(start, offset - start, 1.295 + capitalizationState.Elements() + start); 1.296 + } 1.297 + } 1.298 + return NS_OK; 1.299 +} 1.300 + 1.301 +void 1.302 +nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator, 1.303 + const char16_t *aTextStart, 1.304 + const char16_t *aTextLimit, 1.305 + uint8_t *aBreakState) 1.306 +{ 1.307 + nsDependentSubstring string(aTextStart, aTextLimit); 1.308 + AutoFallibleTArray<bool,200> hyphens; 1.309 + if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { 1.310 + for (uint32_t i = 0; i + 1 < string.Length(); ++i) { 1.311 + if (hyphens[i]) { 1.312 + aBreakState[i + 1] = 1.313 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; 1.314 + } 1.315 + } 1.316 + } 1.317 +} 1.318 + 1.319 +nsresult 1.320 +nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength, 1.321 + uint32_t aFlags, nsILineBreakSink* aSink) 1.322 +{ 1.323 + NS_ASSERTION(aLength > 0, "Appending empty text..."); 1.324 + 1.325 + if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) { 1.326 + // Defer to the Unicode path if capitalization or hyphenation is required 1.327 + nsAutoString str; 1.328 + const char* cp = reinterpret_cast<const char*>(aText); 1.329 + CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str); 1.330 + return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink); 1.331 + } 1.332 + 1.333 + uint32_t offset = 0; 1.334 + 1.335 + // Continue the current word 1.336 + if (mCurrentWord.Length() > 0) { 1.337 + NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set"); 1.338 + 1.339 + while (offset < aLength && !IsSpace(aText[offset])) { 1.340 + mCurrentWord.AppendElement(aText[offset]); 1.341 + if (!mCurrentWordContainsComplexChar && 1.342 + IsComplexASCIIChar(aText[offset])) { 1.343 + mCurrentWordContainsComplexChar = true; 1.344 + } 1.345 + ++offset; 1.346 + } 1.347 + 1.348 + if (offset > 0) { 1.349 + mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); 1.350 + } 1.351 + 1.352 + if (offset == aLength) { 1.353 + // We did not encounter whitespace so the word hasn't finished yet. 1.354 + return NS_OK; 1.355 + } 1.356 + 1.357 + // We encountered whitespace, so we're done with this word 1.358 + nsresult rv = FlushCurrentWord(); 1.359 + if (NS_FAILED(rv)) 1.360 + return rv; 1.361 + } 1.362 + 1.363 + nsAutoTArray<uint8_t,4000> breakState; 1.364 + if (aSink) { 1.365 + if (!breakState.AppendElements(aLength)) 1.366 + return NS_ERROR_OUT_OF_MEMORY; 1.367 + } 1.368 + 1.369 + uint32_t start = offset; 1.370 + bool noBreaksNeeded = !aSink || 1.371 + (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) && 1.372 + !mBreakHere && !mAfterBreakableSpace); 1.373 + if (noBreaksNeeded) { 1.374 + // Skip to the space before the last word, since either the break data 1.375 + // here is not needed, or no breaks are set in the sink and there cannot 1.376 + // be any breaks in this chunk; all we need is the context for the next 1.377 + // chunk (if any) 1.378 + offset = aLength; 1.379 + while (offset > start) { 1.380 + --offset; 1.381 + if (IsSpace(aText[offset])) 1.382 + break; 1.383 + } 1.384 + } 1.385 + uint32_t wordStart = offset; 1.386 + bool wordHasComplexChar = false; 1.387 + 1.388 + for (;;) { 1.389 + uint8_t ch = aText[offset]; 1.390 + bool isSpace = IsSpace(ch); 1.391 + bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); 1.392 + 1.393 + if (aSink) { 1.394 + // Consider word-break style. Since the break position of CJK scripts 1.395 + // will be set by nsILineBreaker, we don't consider CJK at this point. 1.396 + breakState[offset] = 1.397 + mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || 1.398 + (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ? 1.399 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL : 1.400 + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; 1.401 + } 1.402 + mBreakHere = false; 1.403 + mAfterBreakableSpace = isBreakableSpace; 1.404 + 1.405 + if (isSpace) { 1.406 + if (offset > wordStart && wordHasComplexChar) { 1.407 + if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) { 1.408 + // Save current start-of-word state because GetJISx4051Breaks will 1.409 + // set it to false 1.410 + uint8_t currentStart = breakState[wordStart]; 1.411 + nsContentUtils::LineBreaker()-> 1.412 + GetJISx4051Breaks(aText + wordStart, offset - wordStart, 1.413 + mWordBreak, 1.414 + breakState.Elements() + wordStart); 1.415 + breakState[wordStart] = currentStart; 1.416 + } 1.417 + wordHasComplexChar = false; 1.418 + } 1.419 + 1.420 + ++offset; 1.421 + if (offset >= aLength) 1.422 + break; 1.423 + wordStart = offset; 1.424 + } else { 1.425 + if (!wordHasComplexChar && IsComplexASCIIChar(ch)) { 1.426 + wordHasComplexChar = true; 1.427 + } 1.428 + ++offset; 1.429 + if (offset >= aLength) { 1.430 + // Save this word 1.431 + mCurrentWordContainsComplexChar = wordHasComplexChar; 1.432 + uint32_t len = offset - wordStart; 1.433 + char16_t* elems = mCurrentWord.AppendElements(len); 1.434 + if (!elems) 1.435 + return NS_ERROR_OUT_OF_MEMORY; 1.436 + uint32_t i; 1.437 + for (i = wordStart; i < offset; ++i) { 1.438 + elems[i - wordStart] = aText[i]; 1.439 + } 1.440 + mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); 1.441 + // Ensure that the break-before for this word is written out 1.442 + offset = wordStart + 1; 1.443 + break; 1.444 + } 1.445 + } 1.446 + } 1.447 + 1.448 + if (!noBreaksNeeded) { 1.449 + aSink->SetBreaks(start, offset - start, breakState.Elements() + start); 1.450 + } 1.451 + return NS_OK; 1.452 +} 1.453 + 1.454 +void 1.455 +nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage) 1.456 +{ 1.457 + if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) { 1.458 + mCurrentWordContainsMixedLang = true; 1.459 + } else { 1.460 + mCurrentWordLanguage = aHyphenationLanguage; 1.461 + } 1.462 +} 1.463 + 1.464 +nsresult 1.465 +nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) 1.466 +{ 1.467 + nsresult rv = FlushCurrentWord(); 1.468 + if (NS_FAILED(rv)) 1.469 + return rv; 1.470 + 1.471 + bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE); 1.472 + if (mAfterBreakableSpace && !isBreakableSpace) { 1.473 + mBreakHere = true; 1.474 + } 1.475 + mAfterBreakableSpace = isBreakableSpace; 1.476 + return NS_OK; 1.477 +} 1.478 + 1.479 +nsresult 1.480 +nsLineBreaker::Reset(bool* aTrailingBreak) 1.481 +{ 1.482 + nsresult rv = FlushCurrentWord(); 1.483 + if (NS_FAILED(rv)) 1.484 + return rv; 1.485 + 1.486 + *aTrailingBreak = mBreakHere || mAfterBreakableSpace; 1.487 + mBreakHere = false; 1.488 + mAfterBreakableSpace = false; 1.489 + return NS_OK; 1.490 +}