michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsHyphenator.h" michael@0: #include "nsIFile.h" michael@0: #include "nsUTF8Utils.h" michael@0: #include "nsUnicodeProperties.h" michael@0: #include "nsUnicharUtilCIID.h" michael@0: #include "nsIURI.h" michael@0: michael@0: #include "hyphen.h" michael@0: michael@0: nsHyphenator::nsHyphenator(nsIURI *aURI) michael@0: : mDict(nullptr) michael@0: { michael@0: nsCString uriSpec; michael@0: nsresult rv = aURI->GetSpec(uriSpec); michael@0: if (NS_FAILED(rv)) { michael@0: return; michael@0: } michael@0: mDict = hnj_hyphen_load(uriSpec.get()); michael@0: #ifdef DEBUG michael@0: if (mDict) { michael@0: printf("loaded hyphenation patterns from %s\n", uriSpec.get()); michael@0: } michael@0: #endif michael@0: } michael@0: michael@0: nsHyphenator::~nsHyphenator() michael@0: { michael@0: if (mDict != nullptr) { michael@0: hnj_hyphen_free((HyphenDict*)mDict); michael@0: mDict = nullptr; michael@0: } michael@0: } michael@0: michael@0: bool michael@0: nsHyphenator::IsValid() michael@0: { michael@0: return (mDict != nullptr); michael@0: } michael@0: michael@0: nsresult michael@0: nsHyphenator::Hyphenate(const nsAString& aString, michael@0: FallibleTArray& aHyphens) michael@0: { michael@0: if (!aHyphens.SetLength(aString.Length())) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: memset(aHyphens.Elements(), false, aHyphens.Length()); michael@0: michael@0: bool inWord = false; michael@0: uint32_t wordStart = 0, wordLimit = 0; michael@0: uint32_t chLen; michael@0: for (uint32_t i = 0; i < aString.Length(); i += chLen) { michael@0: uint32_t ch = aString[i]; michael@0: chLen = 1; michael@0: michael@0: if (NS_IS_HIGH_SURROGATE(ch)) { michael@0: if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { michael@0: ch = SURROGATE_TO_UCS4(ch, aString[i+1]); michael@0: chLen = 2; michael@0: } else { michael@0: NS_WARNING("unpaired surrogate found during hyphenation"); michael@0: } michael@0: } michael@0: michael@0: nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch); michael@0: if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { michael@0: if (!inWord) { michael@0: inWord = true; michael@0: wordStart = i; michael@0: } michael@0: wordLimit = i + chLen; michael@0: if (i + chLen < aString.Length()) { michael@0: continue; michael@0: } michael@0: } michael@0: michael@0: if (inWord) { michael@0: const char16_t *begin = aString.BeginReading(); michael@0: NS_ConvertUTF16toUTF8 utf8(begin + wordStart, michael@0: wordLimit - wordStart); michael@0: nsAutoTArray utf8hyphens; michael@0: utf8hyphens.SetLength(utf8.Length() + 5); michael@0: char **rep = nullptr; michael@0: int *pos = nullptr; michael@0: int *cut = nullptr; michael@0: int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, michael@0: utf8.BeginReading(), utf8.Length(), michael@0: utf8hyphens.Elements(), nullptr, michael@0: &rep, &pos, &cut); michael@0: if (!err) { michael@0: // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer michael@0: // from utf8 code unit indexing (which would match the utf8 input michael@0: // string directly) to Unicode character indexing. michael@0: // We then need to convert this to utf16 code unit offsets for Gecko. michael@0: const char *hyphPtr = utf8hyphens.Elements(); michael@0: const char16_t *cur = begin + wordStart; michael@0: const char16_t *end = begin + wordLimit; michael@0: while (cur < end) { michael@0: if (*hyphPtr & 0x01) { michael@0: aHyphens[cur - begin] = true; michael@0: } michael@0: cur++; michael@0: if (cur < end && NS_IS_LOW_SURROGATE(*cur) && michael@0: NS_IS_HIGH_SURROGATE(*(cur-1))) michael@0: { michael@0: cur++; michael@0: } michael@0: hyphPtr++; michael@0: } michael@0: } michael@0: } michael@0: michael@0: inWord = false; michael@0: } michael@0: michael@0: return NS_OK; michael@0: }