Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "mozEnglishWordUtils.h"
7 #include "nsReadableUtils.h"
8 #include "nsIServiceManager.h"
9 #include "nsUnicharUtils.h"
10 #include "nsUnicharUtilCIID.h"
11 #include "nsUnicodeProperties.h"
12 #include "nsCRT.h"
13 #include "mozilla/Likely.h"
15 NS_IMPL_CYCLE_COLLECTING_ADDREF(mozEnglishWordUtils)
16 NS_IMPL_CYCLE_COLLECTING_RELEASE(mozEnglishWordUtils)
18 NS_INTERFACE_MAP_BEGIN(mozEnglishWordUtils)
19 NS_INTERFACE_MAP_ENTRY(mozISpellI18NUtil)
20 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellI18NUtil)
21 NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozEnglishWordUtils)
22 NS_INTERFACE_MAP_END
24 NS_IMPL_CYCLE_COLLECTION(mozEnglishWordUtils,
25 mURLDetector)
27 mozEnglishWordUtils::mozEnglishWordUtils()
28 {
29 mLanguage.AssignLiteral("en");
31 nsresult rv;
32 mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv);
33 }
35 mozEnglishWordUtils::~mozEnglishWordUtils()
36 {
37 }
39 /* attribute wstring language; */
40 NS_IMETHODIMP mozEnglishWordUtils::GetLanguage(char16_t * *aLanguage)
41 {
42 nsresult rv = NS_OK;
43 NS_ENSURE_ARG_POINTER(aLanguage);
45 *aLanguage = ToNewUnicode(mLanguage);
46 if(!aLanguage) rv = NS_ERROR_OUT_OF_MEMORY;
47 return rv;
48 }
50 /* void GetRootForm (in wstring aWord, in uint32_t type, [array, size_is (count)] out wstring words, out uint32_t count); */
51 // return the possible root forms of aWord.
52 NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const char16_t *aWord, uint32_t type, char16_t ***words, uint32_t *count)
53 {
54 nsAutoString word(aWord);
55 char16_t **tmpPtr;
56 int32_t length = word.Length();
58 *count = 0;
60 mozEnglishWordUtils::myspCapitalization ct = captype(word);
61 switch (ct)
62 {
63 case HuhCap:
64 case NoCap:
65 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *));
66 if (!tmpPtr)
67 return NS_ERROR_OUT_OF_MEMORY;
68 tmpPtr[0] = ToNewUnicode(word);
69 if (!tmpPtr[0]) {
70 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
71 return NS_ERROR_OUT_OF_MEMORY;
72 }
73 *words = tmpPtr;
74 *count = 1;
75 break;
78 case AllCap:
79 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 3);
80 if (!tmpPtr)
81 return NS_ERROR_OUT_OF_MEMORY;
82 tmpPtr[0] = ToNewUnicode(word);
83 if (!tmpPtr[0]) {
84 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
85 return NS_ERROR_OUT_OF_MEMORY;
86 }
87 ToLowerCase(tmpPtr[0], tmpPtr[0], length);
89 tmpPtr[1] = ToNewUnicode(word);
90 if (!tmpPtr[1]) {
91 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
92 return NS_ERROR_OUT_OF_MEMORY;
93 }
94 ToLowerCase(tmpPtr[1], tmpPtr[1], length);
95 ToUpperCase(tmpPtr[1], tmpPtr[1], 1);
97 tmpPtr[2] = ToNewUnicode(word);
98 if (!tmpPtr[2]) {
99 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr);
100 return NS_ERROR_OUT_OF_MEMORY;
101 }
103 *words = tmpPtr;
104 *count = 3;
105 break;
107 case InitCap:
108 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 2);
109 if (!tmpPtr)
110 return NS_ERROR_OUT_OF_MEMORY;
112 tmpPtr[0] = ToNewUnicode(word);
113 if (!tmpPtr[0]) {
114 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
115 return NS_ERROR_OUT_OF_MEMORY;
116 }
117 ToLowerCase(tmpPtr[0], tmpPtr[0], length);
119 tmpPtr[1] = ToNewUnicode(word);
120 if (!tmpPtr[1]) {
121 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
122 return NS_ERROR_OUT_OF_MEMORY;
123 }
125 *words = tmpPtr;
126 *count = 2;
127 break;
128 default:
129 return NS_ERROR_FAILURE; // should never get here;
130 }
131 return NS_OK;
132 }
134 // This needs vast improvement
135 bool mozEnglishWordUtils::ucIsAlpha(char16_t aChar)
136 {
137 // XXX we have to fix callers to handle the full Unicode range
138 return nsIUGenCategory::kLetter == mozilla::unicode::GetGenCategory(aChar);
139 }
141 /* void FindNextWord (in wstring word, in uint32_t length, in uint32_t offset, out uint32_t begin, out uint32_t end); */
142 NS_IMETHODIMP mozEnglishWordUtils::FindNextWord(const char16_t *word, uint32_t length, uint32_t offset, int32_t *begin, int32_t *end)
143 {
144 const char16_t *p = word + offset;
145 const char16_t *endbuf = word + length;
146 const char16_t *startWord=p;
147 if(p<endbuf){
148 // XXX These loops should be modified to handle non-BMP characters.
149 // if previous character is a word character, need to advance out of the word
150 if (offset > 0 && ucIsAlpha(*(p-1))) {
151 while (p < endbuf && ucIsAlpha(*p))
152 p++;
153 }
154 while((p < endbuf) && (!ucIsAlpha(*p)))
155 {
156 p++;
157 }
158 startWord=p;
159 while((p < endbuf) && ((ucIsAlpha(*p))||(*p=='\'')))
160 {
161 p++;
162 }
164 // we could be trying to break down a url, we don't want to break a url into parts,
165 // instead we want to find out if it really is a url and if so, skip it, advancing startWord
166 // to a point after the url.
168 // before we spend more time looking to see if the word is a url, look for a url identifer
169 // and make sure that identifer isn't the last character in the word fragment.
170 if ( (*p == ':' || *p == '@' || *p == '.') && p < endbuf - 1) {
172 // ok, we have a possible url...do more research to find out if we really have one
173 // and determine the length of the url so we can skip over it.
175 if (mURLDetector)
176 {
177 int32_t startPos = -1;
178 int32_t endPos = -1;
180 mURLDetector->FindURLInPlaintext(startWord, endbuf - startWord, p - startWord, &startPos, &endPos);
182 // ok, if we got a url, adjust the array bounds, skip the current url text and find the next word again
183 if (startPos != -1 && endPos != -1) {
184 startWord = p + endPos + 1; // skip over the url
185 p = startWord; // reset p
187 // now recursively call FindNextWord to search for the next word now that we have skipped the url
188 return FindNextWord(word, length, startWord - word, begin, end);
189 }
190 }
191 }
193 while((p > startWord)&&(*(p-1) == '\'')){ // trim trailing apostrophes
194 p--;
195 }
196 }
197 else{
198 startWord = endbuf;
199 }
200 if(startWord == endbuf){
201 *begin = -1;
202 *end = -1;
203 }
204 else{
205 *begin = startWord-word;
206 *end = p-word;
207 }
208 return NS_OK;
209 }
211 mozEnglishWordUtils::myspCapitalization
212 mozEnglishWordUtils::captype(const nsString &word)
213 {
214 char16_t* lword=ToNewUnicode(word);
215 ToUpperCase(lword,lword,word.Length());
216 if(word.Equals(lword)){
217 nsMemory::Free(lword);
218 return AllCap;
219 }
221 ToLowerCase(lword,lword,word.Length());
222 if(word.Equals(lword)){
223 nsMemory::Free(lword);
224 return NoCap;
225 }
226 int32_t length=word.Length();
227 if(Substring(word,1,length-1).Equals(lword+1)){
228 nsMemory::Free(lword);
229 return InitCap;
230 }
231 nsMemory::Free(lword);
232 return HuhCap;
233 }
235 // Convert the list of words in iwords to the same capitalization aWord and
236 // return them in owords.
237 NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const char16_t *aWord, const char16_t **iwords, uint32_t icount, char16_t ***owords, uint32_t *ocount)
238 {
239 nsAutoString word(aWord);
240 nsresult rv = NS_OK;
242 int32_t length;
243 char16_t **tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *)*icount);
244 if (!tmpPtr)
245 return NS_ERROR_OUT_OF_MEMORY;
247 mozEnglishWordUtils::myspCapitalization ct = captype(word);
248 for(uint32_t i = 0; i < icount; ++i) {
249 length = NS_strlen(iwords[i]);
250 tmpPtr[i] = (char16_t *) nsMemory::Alloc(sizeof(char16_t) * (length + 1));
251 if (MOZ_UNLIKELY(!tmpPtr[i])) {
252 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr);
253 return NS_ERROR_OUT_OF_MEMORY;
254 }
255 memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(char16_t));
257 nsAutoString capTest(tmpPtr[i]);
258 mozEnglishWordUtils::myspCapitalization newCt=captype(capTest);
259 if(newCt == NoCap){
260 switch(ct)
261 {
262 case HuhCap:
263 case NoCap:
264 break;
265 case AllCap:
266 ToUpperCase(tmpPtr[i],tmpPtr[i],length);
267 rv = NS_OK;
268 break;
269 case InitCap:
270 ToUpperCase(tmpPtr[i],tmpPtr[i],1);
271 rv = NS_OK;
272 break;
273 default:
274 rv = NS_ERROR_FAILURE; // should never get here;
275 break;
277 }
278 }
279 }
280 if (NS_SUCCEEDED(rv)){
281 *owords = tmpPtr;
282 *ocount = icount;
283 }
284 return rv;
285 }