extensions/spellcheck/src/mozEnglishWordUtils.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "mozEnglishWordUtils.h"
michael@0 7 #include "nsReadableUtils.h"
michael@0 8 #include "nsIServiceManager.h"
michael@0 9 #include "nsUnicharUtils.h"
michael@0 10 #include "nsUnicharUtilCIID.h"
michael@0 11 #include "nsUnicodeProperties.h"
michael@0 12 #include "nsCRT.h"
michael@0 13 #include "mozilla/Likely.h"
michael@0 14
michael@0 15 NS_IMPL_CYCLE_COLLECTING_ADDREF(mozEnglishWordUtils)
michael@0 16 NS_IMPL_CYCLE_COLLECTING_RELEASE(mozEnglishWordUtils)
michael@0 17
michael@0 18 NS_INTERFACE_MAP_BEGIN(mozEnglishWordUtils)
michael@0 19 NS_INTERFACE_MAP_ENTRY(mozISpellI18NUtil)
michael@0 20 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellI18NUtil)
michael@0 21 NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozEnglishWordUtils)
michael@0 22 NS_INTERFACE_MAP_END
michael@0 23
michael@0 24 NS_IMPL_CYCLE_COLLECTION(mozEnglishWordUtils,
michael@0 25 mURLDetector)
michael@0 26
michael@0 27 mozEnglishWordUtils::mozEnglishWordUtils()
michael@0 28 {
michael@0 29 mLanguage.AssignLiteral("en");
michael@0 30
michael@0 31 nsresult rv;
michael@0 32 mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv);
michael@0 33 }
michael@0 34
michael@0 35 mozEnglishWordUtils::~mozEnglishWordUtils()
michael@0 36 {
michael@0 37 }
michael@0 38
michael@0 39 /* attribute wstring language; */
michael@0 40 NS_IMETHODIMP mozEnglishWordUtils::GetLanguage(char16_t * *aLanguage)
michael@0 41 {
michael@0 42 nsresult rv = NS_OK;
michael@0 43 NS_ENSURE_ARG_POINTER(aLanguage);
michael@0 44
michael@0 45 *aLanguage = ToNewUnicode(mLanguage);
michael@0 46 if(!aLanguage) rv = NS_ERROR_OUT_OF_MEMORY;
michael@0 47 return rv;
michael@0 48 }
michael@0 49
michael@0 50 /* void GetRootForm (in wstring aWord, in uint32_t type, [array, size_is (count)] out wstring words, out uint32_t count); */
michael@0 51 // return the possible root forms of aWord.
michael@0 52 NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const char16_t *aWord, uint32_t type, char16_t ***words, uint32_t *count)
michael@0 53 {
michael@0 54 nsAutoString word(aWord);
michael@0 55 char16_t **tmpPtr;
michael@0 56 int32_t length = word.Length();
michael@0 57
michael@0 58 *count = 0;
michael@0 59
michael@0 60 mozEnglishWordUtils::myspCapitalization ct = captype(word);
michael@0 61 switch (ct)
michael@0 62 {
michael@0 63 case HuhCap:
michael@0 64 case NoCap:
michael@0 65 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *));
michael@0 66 if (!tmpPtr)
michael@0 67 return NS_ERROR_OUT_OF_MEMORY;
michael@0 68 tmpPtr[0] = ToNewUnicode(word);
michael@0 69 if (!tmpPtr[0]) {
michael@0 70 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
michael@0 71 return NS_ERROR_OUT_OF_MEMORY;
michael@0 72 }
michael@0 73 *words = tmpPtr;
michael@0 74 *count = 1;
michael@0 75 break;
michael@0 76
michael@0 77
michael@0 78 case AllCap:
michael@0 79 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 3);
michael@0 80 if (!tmpPtr)
michael@0 81 return NS_ERROR_OUT_OF_MEMORY;
michael@0 82 tmpPtr[0] = ToNewUnicode(word);
michael@0 83 if (!tmpPtr[0]) {
michael@0 84 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
michael@0 85 return NS_ERROR_OUT_OF_MEMORY;
michael@0 86 }
michael@0 87 ToLowerCase(tmpPtr[0], tmpPtr[0], length);
michael@0 88
michael@0 89 tmpPtr[1] = ToNewUnicode(word);
michael@0 90 if (!tmpPtr[1]) {
michael@0 91 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
michael@0 92 return NS_ERROR_OUT_OF_MEMORY;
michael@0 93 }
michael@0 94 ToLowerCase(tmpPtr[1], tmpPtr[1], length);
michael@0 95 ToUpperCase(tmpPtr[1], tmpPtr[1], 1);
michael@0 96
michael@0 97 tmpPtr[2] = ToNewUnicode(word);
michael@0 98 if (!tmpPtr[2]) {
michael@0 99 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr);
michael@0 100 return NS_ERROR_OUT_OF_MEMORY;
michael@0 101 }
michael@0 102
michael@0 103 *words = tmpPtr;
michael@0 104 *count = 3;
michael@0 105 break;
michael@0 106
michael@0 107 case InitCap:
michael@0 108 tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 2);
michael@0 109 if (!tmpPtr)
michael@0 110 return NS_ERROR_OUT_OF_MEMORY;
michael@0 111
michael@0 112 tmpPtr[0] = ToNewUnicode(word);
michael@0 113 if (!tmpPtr[0]) {
michael@0 114 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr);
michael@0 115 return NS_ERROR_OUT_OF_MEMORY;
michael@0 116 }
michael@0 117 ToLowerCase(tmpPtr[0], tmpPtr[0], length);
michael@0 118
michael@0 119 tmpPtr[1] = ToNewUnicode(word);
michael@0 120 if (!tmpPtr[1]) {
michael@0 121 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr);
michael@0 122 return NS_ERROR_OUT_OF_MEMORY;
michael@0 123 }
michael@0 124
michael@0 125 *words = tmpPtr;
michael@0 126 *count = 2;
michael@0 127 break;
michael@0 128 default:
michael@0 129 return NS_ERROR_FAILURE; // should never get here;
michael@0 130 }
michael@0 131 return NS_OK;
michael@0 132 }
michael@0 133
michael@0 134 // This needs vast improvement
michael@0 135 bool mozEnglishWordUtils::ucIsAlpha(char16_t aChar)
michael@0 136 {
michael@0 137 // XXX we have to fix callers to handle the full Unicode range
michael@0 138 return nsIUGenCategory::kLetter == mozilla::unicode::GetGenCategory(aChar);
michael@0 139 }
michael@0 140
michael@0 141 /* void FindNextWord (in wstring word, in uint32_t length, in uint32_t offset, out uint32_t begin, out uint32_t end); */
michael@0 142 NS_IMETHODIMP mozEnglishWordUtils::FindNextWord(const char16_t *word, uint32_t length, uint32_t offset, int32_t *begin, int32_t *end)
michael@0 143 {
michael@0 144 const char16_t *p = word + offset;
michael@0 145 const char16_t *endbuf = word + length;
michael@0 146 const char16_t *startWord=p;
michael@0 147 if(p<endbuf){
michael@0 148 // XXX These loops should be modified to handle non-BMP characters.
michael@0 149 // if previous character is a word character, need to advance out of the word
michael@0 150 if (offset > 0 && ucIsAlpha(*(p-1))) {
michael@0 151 while (p < endbuf && ucIsAlpha(*p))
michael@0 152 p++;
michael@0 153 }
michael@0 154 while((p < endbuf) && (!ucIsAlpha(*p)))
michael@0 155 {
michael@0 156 p++;
michael@0 157 }
michael@0 158 startWord=p;
michael@0 159 while((p < endbuf) && ((ucIsAlpha(*p))||(*p=='\'')))
michael@0 160 {
michael@0 161 p++;
michael@0 162 }
michael@0 163
michael@0 164 // we could be trying to break down a url, we don't want to break a url into parts,
michael@0 165 // instead we want to find out if it really is a url and if so, skip it, advancing startWord
michael@0 166 // to a point after the url.
michael@0 167
michael@0 168 // before we spend more time looking to see if the word is a url, look for a url identifer
michael@0 169 // and make sure that identifer isn't the last character in the word fragment.
michael@0 170 if ( (*p == ':' || *p == '@' || *p == '.') && p < endbuf - 1) {
michael@0 171
michael@0 172 // ok, we have a possible url...do more research to find out if we really have one
michael@0 173 // and determine the length of the url so we can skip over it.
michael@0 174
michael@0 175 if (mURLDetector)
michael@0 176 {
michael@0 177 int32_t startPos = -1;
michael@0 178 int32_t endPos = -1;
michael@0 179
michael@0 180 mURLDetector->FindURLInPlaintext(startWord, endbuf - startWord, p - startWord, &startPos, &endPos);
michael@0 181
michael@0 182 // ok, if we got a url, adjust the array bounds, skip the current url text and find the next word again
michael@0 183 if (startPos != -1 && endPos != -1) {
michael@0 184 startWord = p + endPos + 1; // skip over the url
michael@0 185 p = startWord; // reset p
michael@0 186
michael@0 187 // now recursively call FindNextWord to search for the next word now that we have skipped the url
michael@0 188 return FindNextWord(word, length, startWord - word, begin, end);
michael@0 189 }
michael@0 190 }
michael@0 191 }
michael@0 192
michael@0 193 while((p > startWord)&&(*(p-1) == '\'')){ // trim trailing apostrophes
michael@0 194 p--;
michael@0 195 }
michael@0 196 }
michael@0 197 else{
michael@0 198 startWord = endbuf;
michael@0 199 }
michael@0 200 if(startWord == endbuf){
michael@0 201 *begin = -1;
michael@0 202 *end = -1;
michael@0 203 }
michael@0 204 else{
michael@0 205 *begin = startWord-word;
michael@0 206 *end = p-word;
michael@0 207 }
michael@0 208 return NS_OK;
michael@0 209 }
michael@0 210
michael@0 211 mozEnglishWordUtils::myspCapitalization
michael@0 212 mozEnglishWordUtils::captype(const nsString &word)
michael@0 213 {
michael@0 214 char16_t* lword=ToNewUnicode(word);
michael@0 215 ToUpperCase(lword,lword,word.Length());
michael@0 216 if(word.Equals(lword)){
michael@0 217 nsMemory::Free(lword);
michael@0 218 return AllCap;
michael@0 219 }
michael@0 220
michael@0 221 ToLowerCase(lword,lword,word.Length());
michael@0 222 if(word.Equals(lword)){
michael@0 223 nsMemory::Free(lword);
michael@0 224 return NoCap;
michael@0 225 }
michael@0 226 int32_t length=word.Length();
michael@0 227 if(Substring(word,1,length-1).Equals(lword+1)){
michael@0 228 nsMemory::Free(lword);
michael@0 229 return InitCap;
michael@0 230 }
michael@0 231 nsMemory::Free(lword);
michael@0 232 return HuhCap;
michael@0 233 }
michael@0 234
michael@0 235 // Convert the list of words in iwords to the same capitalization aWord and
michael@0 236 // return them in owords.
michael@0 237 NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const char16_t *aWord, const char16_t **iwords, uint32_t icount, char16_t ***owords, uint32_t *ocount)
michael@0 238 {
michael@0 239 nsAutoString word(aWord);
michael@0 240 nsresult rv = NS_OK;
michael@0 241
michael@0 242 int32_t length;
michael@0 243 char16_t **tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *)*icount);
michael@0 244 if (!tmpPtr)
michael@0 245 return NS_ERROR_OUT_OF_MEMORY;
michael@0 246
michael@0 247 mozEnglishWordUtils::myspCapitalization ct = captype(word);
michael@0 248 for(uint32_t i = 0; i < icount; ++i) {
michael@0 249 length = NS_strlen(iwords[i]);
michael@0 250 tmpPtr[i] = (char16_t *) nsMemory::Alloc(sizeof(char16_t) * (length + 1));
michael@0 251 if (MOZ_UNLIKELY(!tmpPtr[i])) {
michael@0 252 NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr);
michael@0 253 return NS_ERROR_OUT_OF_MEMORY;
michael@0 254 }
michael@0 255 memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(char16_t));
michael@0 256
michael@0 257 nsAutoString capTest(tmpPtr[i]);
michael@0 258 mozEnglishWordUtils::myspCapitalization newCt=captype(capTest);
michael@0 259 if(newCt == NoCap){
michael@0 260 switch(ct)
michael@0 261 {
michael@0 262 case HuhCap:
michael@0 263 case NoCap:
michael@0 264 break;
michael@0 265 case AllCap:
michael@0 266 ToUpperCase(tmpPtr[i],tmpPtr[i],length);
michael@0 267 rv = NS_OK;
michael@0 268 break;
michael@0 269 case InitCap:
michael@0 270 ToUpperCase(tmpPtr[i],tmpPtr[i],1);
michael@0 271 rv = NS_OK;
michael@0 272 break;
michael@0 273 default:
michael@0 274 rv = NS_ERROR_FAILURE; // should never get here;
michael@0 275 break;
michael@0 276
michael@0 277 }
michael@0 278 }
michael@0 279 }
michael@0 280 if (NS_SUCCEEDED(rv)){
michael@0 281 *owords = tmpPtr;
michael@0 282 *ocount = icount;
michael@0 283 }
michael@0 284 return rv;
michael@0 285 }
michael@0 286

mercurial