|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsHyphenator.h" |
|
7 #include "nsIFile.h" |
|
8 #include "nsUTF8Utils.h" |
|
9 #include "nsUnicodeProperties.h" |
|
10 #include "nsUnicharUtilCIID.h" |
|
11 #include "nsIURI.h" |
|
12 |
|
13 #include "hyphen.h" |
|
14 |
|
15 nsHyphenator::nsHyphenator(nsIURI *aURI) |
|
16 : mDict(nullptr) |
|
17 { |
|
18 nsCString uriSpec; |
|
19 nsresult rv = aURI->GetSpec(uriSpec); |
|
20 if (NS_FAILED(rv)) { |
|
21 return; |
|
22 } |
|
23 mDict = hnj_hyphen_load(uriSpec.get()); |
|
24 #ifdef DEBUG |
|
25 if (mDict) { |
|
26 printf("loaded hyphenation patterns from %s\n", uriSpec.get()); |
|
27 } |
|
28 #endif |
|
29 } |
|
30 |
|
31 nsHyphenator::~nsHyphenator() |
|
32 { |
|
33 if (mDict != nullptr) { |
|
34 hnj_hyphen_free((HyphenDict*)mDict); |
|
35 mDict = nullptr; |
|
36 } |
|
37 } |
|
38 |
|
39 bool |
|
40 nsHyphenator::IsValid() |
|
41 { |
|
42 return (mDict != nullptr); |
|
43 } |
|
44 |
|
45 nsresult |
|
46 nsHyphenator::Hyphenate(const nsAString& aString, |
|
47 FallibleTArray<bool>& aHyphens) |
|
48 { |
|
49 if (!aHyphens.SetLength(aString.Length())) { |
|
50 return NS_ERROR_OUT_OF_MEMORY; |
|
51 } |
|
52 memset(aHyphens.Elements(), false, aHyphens.Length()); |
|
53 |
|
54 bool inWord = false; |
|
55 uint32_t wordStart = 0, wordLimit = 0; |
|
56 uint32_t chLen; |
|
57 for (uint32_t i = 0; i < aString.Length(); i += chLen) { |
|
58 uint32_t ch = aString[i]; |
|
59 chLen = 1; |
|
60 |
|
61 if (NS_IS_HIGH_SURROGATE(ch)) { |
|
62 if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) { |
|
63 ch = SURROGATE_TO_UCS4(ch, aString[i+1]); |
|
64 chLen = 2; |
|
65 } else { |
|
66 NS_WARNING("unpaired surrogate found during hyphenation"); |
|
67 } |
|
68 } |
|
69 |
|
70 nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch); |
|
71 if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) { |
|
72 if (!inWord) { |
|
73 inWord = true; |
|
74 wordStart = i; |
|
75 } |
|
76 wordLimit = i + chLen; |
|
77 if (i + chLen < aString.Length()) { |
|
78 continue; |
|
79 } |
|
80 } |
|
81 |
|
82 if (inWord) { |
|
83 const char16_t *begin = aString.BeginReading(); |
|
84 NS_ConvertUTF16toUTF8 utf8(begin + wordStart, |
|
85 wordLimit - wordStart); |
|
86 nsAutoTArray<char,200> utf8hyphens; |
|
87 utf8hyphens.SetLength(utf8.Length() + 5); |
|
88 char **rep = nullptr; |
|
89 int *pos = nullptr; |
|
90 int *cut = nullptr; |
|
91 int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict, |
|
92 utf8.BeginReading(), utf8.Length(), |
|
93 utf8hyphens.Elements(), nullptr, |
|
94 &rep, &pos, &cut); |
|
95 if (!err) { |
|
96 // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer |
|
97 // from utf8 code unit indexing (which would match the utf8 input |
|
98 // string directly) to Unicode character indexing. |
|
99 // We then need to convert this to utf16 code unit offsets for Gecko. |
|
100 const char *hyphPtr = utf8hyphens.Elements(); |
|
101 const char16_t *cur = begin + wordStart; |
|
102 const char16_t *end = begin + wordLimit; |
|
103 while (cur < end) { |
|
104 if (*hyphPtr & 0x01) { |
|
105 aHyphens[cur - begin] = true; |
|
106 } |
|
107 cur++; |
|
108 if (cur < end && NS_IS_LOW_SURROGATE(*cur) && |
|
109 NS_IS_HIGH_SURROGATE(*(cur-1))) |
|
110 { |
|
111 cur++; |
|
112 } |
|
113 hyphPtr++; |
|
114 } |
|
115 } |
|
116 } |
|
117 |
|
118 inWord = false; |
|
119 } |
|
120 |
|
121 return NS_OK; |
|
122 } |