1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/hyphenation/src/nsHyphenator.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,122 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsHyphenator.h" 1.10 +#include "nsIFile.h" 1.11 +#include "nsUTF8Utils.h" 1.12 +#include "nsUnicodeProperties.h" 1.13 +#include "nsUnicharUtilCIID.h" 1.14 +#include "nsIURI.h" 1.15 + 1.16 +#include "hyphen.h" 1.17 + 1.18 +nsHyphenator::nsHyphenator(nsIURI *aURI) 1.19 + : mDict(nullptr) 1.20 +{ 1.21 + nsCString uriSpec; 1.22 + nsresult rv = aURI->GetSpec(uriSpec); 1.23 + if (NS_FAILED(rv)) { 1.24 + return; 1.25 + } 1.26 + mDict = hnj_hyphen_load(uriSpec.get()); 1.27 +#ifdef DEBUG 1.28 + if (mDict) { 1.29 + printf("loaded hyphenation patterns from %s\n", uriSpec.get()); 1.30 + } 1.31 +#endif 1.32 +} 1.33 + 1.34 +nsHyphenator::~nsHyphenator() 1.35 +{ 1.36 + if (mDict != nullptr) { 1.37 + hnj_hyphen_free((HyphenDict*)mDict); 1.38 + mDict = nullptr; 1.39 + } 1.40 +} 1.41 + 1.42 +bool 1.43 +nsHyphenator::IsValid() 1.44 +{ 1.45 + return (mDict != nullptr); 1.46 +} 1.47 + 1.48 +nsresult 1.49 +nsHyphenator::Hyphenate(const nsAString& aString, 1.50 + FallibleTArray<bool>& aHyphens) 1.51 +{ 1.52 + if (!aHyphens.SetLength(aString.Length())) { 1.53 + return NS_ERROR_OUT_OF_MEMORY; 1.54 + } 1.55 + memset(aHyphens.Elements(), false, aHyphens.Length()); 1.56 + 1.57 + bool inWord = false; 1.58 + uint32_t wordStart = 0, wordLimit = 0; 1.59 + uint32_t chLen; 1.60 + for (uint32_t i = 0; i < aString.Length(); i += chLen) { 1.61 + uint32_t ch = aString[i]; 1.62 + chLen = 1; 1.63 + 1.64 + if (NS_IS_HIGH_SURROGATE(ch)) { 1.65 + if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { 1.66 + ch = SURROGATE_TO_UCS4(ch, aString[i+1]); 1.67 + chLen = 2; 1.68 + } else { 1.69 + NS_WARNING("unpaired surrogate found during hyphenation"); 1.70 + } 1.71 + } 1.72 + 1.73 + nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch); 1.74 + if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { 1.75 + if (!inWord) { 1.76 + inWord = true; 1.77 + wordStart = i; 1.78 + } 1.79 + wordLimit = i + chLen; 1.80 + if (i + chLen < aString.Length()) { 1.81 + continue; 1.82 + } 1.83 + } 1.84 + 1.85 + if (inWord) { 1.86 + const char16_t *begin = aString.BeginReading(); 1.87 + NS_ConvertUTF16toUTF8 utf8(begin + wordStart, 1.88 + wordLimit - wordStart); 1.89 + nsAutoTArray<char,200> utf8hyphens; 1.90 + utf8hyphens.SetLength(utf8.Length() + 5); 1.91 + char **rep = nullptr; 1.92 + int *pos = nullptr; 1.93 + int *cut = nullptr; 1.94 + int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, 1.95 + utf8.BeginReading(), utf8.Length(), 1.96 + utf8hyphens.Elements(), nullptr, 1.97 + &rep, &pos, &cut); 1.98 + if (!err) { 1.99 + // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer 1.100 + // from utf8 code unit indexing (which would match the utf8 input 1.101 + // string directly) to Unicode character indexing. 1.102 + // We then need to convert this to utf16 code unit offsets for Gecko. 1.103 + const char *hyphPtr = utf8hyphens.Elements(); 1.104 + const char16_t *cur = begin + wordStart; 1.105 + const char16_t *end = begin + wordLimit; 1.106 + while (cur < end) { 1.107 + if (*hyphPtr & 0x01) { 1.108 + aHyphens[cur - begin] = true; 1.109 + } 1.110 + cur++; 1.111 + if (cur < end && NS_IS_LOW_SURROGATE(*cur) && 1.112 + NS_IS_HIGH_SURROGATE(*(cur-1))) 1.113 + { 1.114 + cur++; 1.115 + } 1.116 + hyphPtr++; 1.117 + } 1.118 + } 1.119 + } 1.120 + 1.121 + inWord = false; 1.122 + } 1.123 + 1.124 + return NS_OK; 1.125 +}