michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozEnglishWordUtils.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsUnicharUtils.h" michael@0: #include "nsUnicharUtilCIID.h" michael@0: #include "nsUnicodeProperties.h" michael@0: #include "nsCRT.h" michael@0: #include "mozilla/Likely.h" michael@0: michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(mozEnglishWordUtils) michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(mozEnglishWordUtils) michael@0: michael@0: NS_INTERFACE_MAP_BEGIN(mozEnglishWordUtils) michael@0: NS_INTERFACE_MAP_ENTRY(mozISpellI18NUtil) michael@0: NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellI18NUtil) michael@0: NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozEnglishWordUtils) michael@0: NS_INTERFACE_MAP_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION(mozEnglishWordUtils, michael@0: mURLDetector) michael@0: michael@0: mozEnglishWordUtils::mozEnglishWordUtils() michael@0: { michael@0: mLanguage.AssignLiteral("en"); michael@0: michael@0: nsresult rv; michael@0: mURLDetector = do_CreateInstance(MOZ_TXTTOHTMLCONV_CONTRACTID, &rv); michael@0: } michael@0: michael@0: mozEnglishWordUtils::~mozEnglishWordUtils() michael@0: { michael@0: } michael@0: michael@0: /* attribute wstring language; */ michael@0: NS_IMETHODIMP mozEnglishWordUtils::GetLanguage(char16_t * *aLanguage) michael@0: { michael@0: nsresult rv = NS_OK; michael@0: NS_ENSURE_ARG_POINTER(aLanguage); michael@0: michael@0: *aLanguage = ToNewUnicode(mLanguage); michael@0: if(!aLanguage) rv = NS_ERROR_OUT_OF_MEMORY; michael@0: return rv; michael@0: } michael@0: michael@0: /* void GetRootForm (in wstring aWord, in uint32_t type, [array, size_is (count)] out wstring words, out uint32_t count); */ michael@0: // return the possible root forms of aWord. michael@0: NS_IMETHODIMP mozEnglishWordUtils::GetRootForm(const char16_t *aWord, uint32_t type, char16_t ***words, uint32_t *count) michael@0: { michael@0: nsAutoString word(aWord); michael@0: char16_t **tmpPtr; michael@0: int32_t length = word.Length(); michael@0: michael@0: *count = 0; michael@0: michael@0: mozEnglishWordUtils::myspCapitalization ct = captype(word); michael@0: switch (ct) michael@0: { michael@0: case HuhCap: michael@0: case NoCap: michael@0: tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *)); michael@0: if (!tmpPtr) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: tmpPtr[0] = ToNewUnicode(word); michael@0: if (!tmpPtr[0]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: *words = tmpPtr; michael@0: *count = 1; michael@0: break; michael@0: michael@0: michael@0: case AllCap: michael@0: tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 3); michael@0: if (!tmpPtr) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: tmpPtr[0] = ToNewUnicode(word); michael@0: if (!tmpPtr[0]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: ToLowerCase(tmpPtr[0], tmpPtr[0], length); michael@0: michael@0: tmpPtr[1] = ToNewUnicode(word); michael@0: if (!tmpPtr[1]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: ToLowerCase(tmpPtr[1], tmpPtr[1], length); michael@0: ToUpperCase(tmpPtr[1], tmpPtr[1], 1); michael@0: michael@0: tmpPtr[2] = ToNewUnicode(word); michael@0: if (!tmpPtr[2]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(2, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: *words = tmpPtr; michael@0: *count = 3; michael@0: break; michael@0: michael@0: case InitCap: michael@0: tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *) * 2); michael@0: if (!tmpPtr) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: tmpPtr[0] = ToNewUnicode(word); michael@0: if (!tmpPtr[0]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(0, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: ToLowerCase(tmpPtr[0], tmpPtr[0], length); michael@0: michael@0: tmpPtr[1] = ToNewUnicode(word); michael@0: if (!tmpPtr[1]) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(1, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: *words = tmpPtr; michael@0: *count = 2; michael@0: break; michael@0: default: michael@0: return NS_ERROR_FAILURE; // should never get here; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: // This needs vast improvement michael@0: bool mozEnglishWordUtils::ucIsAlpha(char16_t aChar) michael@0: { michael@0: // XXX we have to fix callers to handle the full Unicode range michael@0: return nsIUGenCategory::kLetter == mozilla::unicode::GetGenCategory(aChar); michael@0: } michael@0: michael@0: /* void FindNextWord (in wstring word, in uint32_t length, in uint32_t offset, out uint32_t begin, out uint32_t end); */ michael@0: NS_IMETHODIMP mozEnglishWordUtils::FindNextWord(const char16_t *word, uint32_t length, uint32_t offset, int32_t *begin, int32_t *end) michael@0: { michael@0: const char16_t *p = word + offset; michael@0: const char16_t *endbuf = word + length; michael@0: const char16_t *startWord=p; michael@0: if(p 0 && ucIsAlpha(*(p-1))) { michael@0: while (p < endbuf && ucIsAlpha(*p)) michael@0: p++; michael@0: } michael@0: while((p < endbuf) && (!ucIsAlpha(*p))) michael@0: { michael@0: p++; michael@0: } michael@0: startWord=p; michael@0: while((p < endbuf) && ((ucIsAlpha(*p))||(*p=='\''))) michael@0: { michael@0: p++; michael@0: } michael@0: michael@0: // we could be trying to break down a url, we don't want to break a url into parts, michael@0: // instead we want to find out if it really is a url and if so, skip it, advancing startWord michael@0: // to a point after the url. michael@0: michael@0: // before we spend more time looking to see if the word is a url, look for a url identifer michael@0: // and make sure that identifer isn't the last character in the word fragment. michael@0: if ( (*p == ':' || *p == '@' || *p == '.') && p < endbuf - 1) { michael@0: michael@0: // ok, we have a possible url...do more research to find out if we really have one michael@0: // and determine the length of the url so we can skip over it. michael@0: michael@0: if (mURLDetector) michael@0: { michael@0: int32_t startPos = -1; michael@0: int32_t endPos = -1; michael@0: michael@0: mURLDetector->FindURLInPlaintext(startWord, endbuf - startWord, p - startWord, &startPos, &endPos); michael@0: michael@0: // ok, if we got a url, adjust the array bounds, skip the current url text and find the next word again michael@0: if (startPos != -1 && endPos != -1) { michael@0: startWord = p + endPos + 1; // skip over the url michael@0: p = startWord; // reset p michael@0: michael@0: // now recursively call FindNextWord to search for the next word now that we have skipped the url michael@0: return FindNextWord(word, length, startWord - word, begin, end); michael@0: } michael@0: } michael@0: } michael@0: michael@0: while((p > startWord)&&(*(p-1) == '\'')){ // trim trailing apostrophes michael@0: p--; michael@0: } michael@0: } michael@0: else{ michael@0: startWord = endbuf; michael@0: } michael@0: if(startWord == endbuf){ michael@0: *begin = -1; michael@0: *end = -1; michael@0: } michael@0: else{ michael@0: *begin = startWord-word; michael@0: *end = p-word; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: mozEnglishWordUtils::myspCapitalization michael@0: mozEnglishWordUtils::captype(const nsString &word) michael@0: { michael@0: char16_t* lword=ToNewUnicode(word); michael@0: ToUpperCase(lword,lword,word.Length()); michael@0: if(word.Equals(lword)){ michael@0: nsMemory::Free(lword); michael@0: return AllCap; michael@0: } michael@0: michael@0: ToLowerCase(lword,lword,word.Length()); michael@0: if(word.Equals(lword)){ michael@0: nsMemory::Free(lword); michael@0: return NoCap; michael@0: } michael@0: int32_t length=word.Length(); michael@0: if(Substring(word,1,length-1).Equals(lword+1)){ michael@0: nsMemory::Free(lword); michael@0: return InitCap; michael@0: } michael@0: nsMemory::Free(lword); michael@0: return HuhCap; michael@0: } michael@0: michael@0: // Convert the list of words in iwords to the same capitalization aWord and michael@0: // return them in owords. michael@0: NS_IMETHODIMP mozEnglishWordUtils::FromRootForm(const char16_t *aWord, const char16_t **iwords, uint32_t icount, char16_t ***owords, uint32_t *ocount) michael@0: { michael@0: nsAutoString word(aWord); michael@0: nsresult rv = NS_OK; michael@0: michael@0: int32_t length; michael@0: char16_t **tmpPtr = (char16_t **)nsMemory::Alloc(sizeof(char16_t *)*icount); michael@0: if (!tmpPtr) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: michael@0: mozEnglishWordUtils::myspCapitalization ct = captype(word); michael@0: for(uint32_t i = 0; i < icount; ++i) { michael@0: length = NS_strlen(iwords[i]); michael@0: tmpPtr[i] = (char16_t *) nsMemory::Alloc(sizeof(char16_t) * (length + 1)); michael@0: if (MOZ_UNLIKELY(!tmpPtr[i])) { michael@0: NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(i, tmpPtr); michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: memcpy(tmpPtr[i], iwords[i], (length + 1) * sizeof(char16_t)); michael@0: michael@0: nsAutoString capTest(tmpPtr[i]); michael@0: mozEnglishWordUtils::myspCapitalization newCt=captype(capTest); michael@0: if(newCt == NoCap){ michael@0: switch(ct) michael@0: { michael@0: case HuhCap: michael@0: case NoCap: michael@0: break; michael@0: case AllCap: michael@0: ToUpperCase(tmpPtr[i],tmpPtr[i],length); michael@0: rv = NS_OK; michael@0: break; michael@0: case InitCap: michael@0: ToUpperCase(tmpPtr[i],tmpPtr[i],1); michael@0: rv = NS_OK; michael@0: break; michael@0: default: michael@0: rv = NS_ERROR_FAILURE; // should never get here; michael@0: break; michael@0: michael@0: } michael@0: } michael@0: } michael@0: if (NS_SUCCEEDED(rv)){ michael@0: *owords = tmpPtr; michael@0: *ocount = icount; michael@0: } michael@0: return rv; michael@0: } michael@0: