1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/hunspell/src/suggestmgr.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2058 @@ 1.4 +/******* BEGIN LICENSE BLOCK ******* 1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 1.6 + * 1.7 + * The contents of this file are subject to the Mozilla Public License Version 1.8 + * 1.1 (the "License"); you may not use this file except in compliance with 1.9 + * the License. You may obtain a copy of the License at 1.10 + * http://www.mozilla.org/MPL/ 1.11 + * 1.12 + * Software distributed under the License is distributed on an "AS IS" basis, 1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 1.14 + * for the specific language governing rights and limitations under the 1.15 + * License. 1.16 + * 1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers 1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 1.20 + * 1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 1.22 + * David Einstein (deinst@world.std.com) 1.23 + * László Németh (nemethl@gyorsposta.hu) 1.24 + * Caolan McNamara (caolanm@redhat.com) 1.25 + * Davide Prina 1.26 + * Giuseppe Modugno 1.27 + * Gianluca Turconi 1.28 + * Simon Brouwer 1.29 + * Noll Janos 1.30 + * Biro Arpad 1.31 + * Goldman Eleonora 1.32 + * Sarlos Tamas 1.33 + * Bencsath Boldizsar 1.34 + * Halacsy Peter 1.35 + * Dvornik Laszlo 1.36 + * Gefferth Andras 1.37 + * Nagy Viktor 1.38 + * Varga Daniel 1.39 + * Chris Halls 1.40 + * Rene Engelhard 1.41 + * Bram Moolenaar 1.42 + * Dafydd Jones 1.43 + * Harri Pitkanen 1.44 + * Andras Timar 1.45 + * Tor Lillqvist 1.46 + * 1.47 + * Alternatively, the contents of this file may be used under the terms of 1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or 1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead 1.51 + * of those above. If you wish to allow use of your version of this file only 1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to 1.53 + * use your version of this file under the terms of the MPL, indicate your 1.54 + * decision by deleting the provisions above and replace them with the notice 1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete 1.56 + * the provisions above, a recipient may use your version of this file under 1.57 + * the terms of any one of the MPL, the GPL or the LGPL. 1.58 + * 1.59 + ******* END LICENSE BLOCK *******/ 1.60 + 1.61 +#include <stdlib.h> 1.62 +#include <string.h> 1.63 +#include <stdio.h> 1.64 +#include <ctype.h> 1.65 + 1.66 +#include "suggestmgr.hxx" 1.67 +#include "htypes.hxx" 1.68 +#include "csutil.hxx" 1.69 + 1.70 +const w_char W_VLINE = { '\0', '|' }; 1.71 + 1.72 +SuggestMgr::SuggestMgr(const char * tryme, int maxn, 1.73 + AffixMgr * aptr) 1.74 +{ 1.75 + 1.76 + // register affix manager and check in string of chars to 1.77 + // try when building candidate suggestions 1.78 + pAMgr = aptr; 1.79 + 1.80 + csconv = NULL; 1.81 + 1.82 + ckeyl = 0; 1.83 + ckey = NULL; 1.84 + ckey_utf = NULL; 1.85 + 1.86 + ctryl = 0; 1.87 + ctry = NULL; 1.88 + ctry_utf = NULL; 1.89 + 1.90 + utf8 = 0; 1.91 + langnum = 0; 1.92 + complexprefixes = 0; 1.93 + 1.94 + maxSug = maxn; 1.95 + nosplitsugs = 0; 1.96 + maxngramsugs = MAXNGRAMSUGS; 1.97 + maxcpdsugs = MAXCOMPOUNDSUGS; 1.98 + 1.99 + if (pAMgr) { 1.100 + langnum = pAMgr->get_langnum(); 1.101 + ckey = pAMgr->get_key_string(); 1.102 + nosplitsugs = pAMgr->get_nosplitsugs(); 1.103 + if (pAMgr->get_maxngramsugs() >= 0) 1.104 + maxngramsugs = pAMgr->get_maxngramsugs(); 1.105 + utf8 = pAMgr->get_utf8(); 1.106 + if (pAMgr->get_maxcpdsugs() >= 0) 1.107 + maxcpdsugs = pAMgr->get_maxcpdsugs(); 1.108 + if (!utf8) 1.109 + { 1.110 + char * enc = pAMgr->get_encoding(); 1.111 + csconv = get_current_cs(enc); 1.112 + free(enc); 1.113 + } 1.114 + complexprefixes = pAMgr->get_complexprefixes(); 1.115 + } 1.116 + 1.117 + if (ckey) { 1.118 + if (utf8) { 1.119 + w_char t[MAXSWL]; 1.120 + ckeyl = u8_u16(t, MAXSWL, ckey); 1.121 + ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char)); 1.122 + if (ckey_utf) memcpy(ckey_utf, t, ckeyl * sizeof(w_char)); 1.123 + else ckeyl = 0; 1.124 + } else { 1.125 + ckeyl = strlen(ckey); 1.126 + } 1.127 + } 1.128 + 1.129 + if (tryme) { 1.130 + ctry = mystrdup(tryme); 1.131 + if (ctry) ctryl = strlen(ctry); 1.132 + if (ctry && utf8) { 1.133 + w_char t[MAXSWL]; 1.134 + ctryl = u8_u16(t, MAXSWL, tryme); 1.135 + ctry_utf = (w_char *) malloc(ctryl * sizeof(w_char)); 1.136 + if (ctry_utf) memcpy(ctry_utf, t, ctryl * sizeof(w_char)); 1.137 + else ctryl = 0; 1.138 + } 1.139 + } 1.140 +} 1.141 + 1.142 + 1.143 +SuggestMgr::~SuggestMgr() 1.144 +{ 1.145 + pAMgr = NULL; 1.146 + if (ckey) free(ckey); 1.147 + ckey = NULL; 1.148 + if (ckey_utf) free(ckey_utf); 1.149 + ckey_utf = NULL; 1.150 + ckeyl = 0; 1.151 + if (ctry) free(ctry); 1.152 + ctry = NULL; 1.153 + if (ctry_utf) free(ctry_utf); 1.154 + ctry_utf = NULL; 1.155 + ctryl = 0; 1.156 + maxSug = 0; 1.157 +#ifdef MOZILLA_CLIENT 1.158 + delete [] csconv; 1.159 +#endif 1.160 +} 1.161 + 1.162 +int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest, 1.163 + int * timer, clock_t * timelimit) { 1.164 + int cwrd = 1; 1.165 + if (ns == maxSug) return maxSug; 1.166 + for (int k=0; k < ns; k++) { 1.167 + if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; 1.168 + } 1.169 + if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { 1.170 + wlst[ns] = mystrdup(candidate); 1.171 + if (wlst[ns] == NULL) { 1.172 + for (int j=0; j<ns; j++) free(wlst[j]); 1.173 + return -1; 1.174 + } 1.175 + ns++; 1.176 + } 1.177 + return ns; 1.178 +} 1.179 + 1.180 +// generate suggestions for a misspelled word 1.181 +// pass in address of array of char * pointers 1.182 +// onlycompoundsug: probably bad suggestions (need for ngram sugs, too) 1.183 + 1.184 +int SuggestMgr::suggest(char*** slst, const char * w, int nsug, 1.185 + int * onlycompoundsug) 1.186 +{ 1.187 + int nocompoundtwowords = 0; 1.188 + char ** wlst; 1.189 + w_char word_utf[MAXSWL]; 1.190 + int wl = 0; 1.191 + int nsugorig = nsug; 1.192 + char w2[MAXWORDUTF8LEN]; 1.193 + const char * word = w; 1.194 + int oldSug = 0; 1.195 + 1.196 + // word reversing wrapper for complex prefixes 1.197 + if (complexprefixes) { 1.198 + strcpy(w2, w); 1.199 + if (utf8) reverseword_utf(w2); else reverseword(w2); 1.200 + word = w2; 1.201 + } 1.202 + 1.203 + if (*slst) { 1.204 + wlst = *slst; 1.205 + } else { 1.206 + wlst = (char **) malloc(maxSug * sizeof(char *)); 1.207 + if (wlst == NULL) return -1; 1.208 + for (int i = 0; i < maxSug; i++) { 1.209 + wlst[i] = NULL; 1.210 + } 1.211 + } 1.212 + 1.213 + if (utf8) { 1.214 + wl = u8_u16(word_utf, MAXSWL, word); 1.215 + if (wl == -1) { 1.216 + *slst = wlst; 1.217 + return nsug; 1.218 + } 1.219 + } 1.220 + 1.221 + for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) { 1.222 + 1.223 + // limit compound suggestion 1.224 + if (cpdsuggest > 0) oldSug = nsug; 1.225 + 1.226 + // suggestions for an uppercase word (html -> HTML) 1.227 + if ((nsug < maxSug) && (nsug > -1)) { 1.228 + nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.229 + capchars(wlst, word, nsug, cpdsuggest); 1.230 + } 1.231 + 1.232 + // perhaps we made a typical fault of spelling 1.233 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.234 + nsug = replchars(wlst, word, nsug, cpdsuggest); 1.235 + } 1.236 + 1.237 + // perhaps we made chose the wrong char from a related set 1.238 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.239 + nsug = mapchars(wlst, word, nsug, cpdsuggest); 1.240 + } 1.241 + 1.242 + // only suggest compound words when no other suggestion 1.243 + if ((cpdsuggest == 0) && (nsug > nsugorig)) nocompoundtwowords=1; 1.244 + 1.245 + // did we swap the order of chars by mistake 1.246 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.247 + nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.248 + swapchar(wlst, word, nsug, cpdsuggest); 1.249 + } 1.250 + 1.251 + // did we swap the order of non adjacent chars by mistake 1.252 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.253 + nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.254 + longswapchar(wlst, word, nsug, cpdsuggest); 1.255 + } 1.256 + 1.257 + // did we just hit the wrong key in place of a good char (case and keyboard) 1.258 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.259 + nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.260 + badcharkey(wlst, word, nsug, cpdsuggest); 1.261 + } 1.262 + 1.263 + // did we add a char that should not be there 1.264 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.265 + nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.266 + extrachar(wlst, word, nsug, cpdsuggest); 1.267 + } 1.268 + 1.269 + 1.270 + // did we forgot a char 1.271 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.272 + nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.273 + forgotchar(wlst, word, nsug, cpdsuggest); 1.274 + } 1.275 + 1.276 + // did we move a char 1.277 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.278 + nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.279 + movechar(wlst, word, nsug, cpdsuggest); 1.280 + } 1.281 + 1.282 + // did we just hit the wrong key in place of a good char 1.283 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.284 + nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.285 + badchar(wlst, word, nsug, cpdsuggest); 1.286 + } 1.287 + 1.288 + // did we double two characters 1.289 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.290 + nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) : 1.291 + doubletwochars(wlst, word, nsug, cpdsuggest); 1.292 + } 1.293 + 1.294 + // perhaps we forgot to hit space and two words ran together 1.295 + if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) { 1.296 + nsug = twowords(wlst, word, nsug, cpdsuggest); 1.297 + } 1.298 + 1.299 + } // repeating ``for'' statement compounding support 1.300 + 1.301 + if (nsug < 0) { 1.302 + // we ran out of memory - we should free up as much as possible 1.303 + for (int i = 0; i < maxSug; i++) 1.304 + if (wlst[i] != NULL) free(wlst[i]); 1.305 + free(wlst); 1.306 + wlst = NULL; 1.307 + } 1.308 + 1.309 + if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) *onlycompoundsug = 1; 1.310 + 1.311 + *slst = wlst; 1.312 + return nsug; 1.313 +} 1.314 + 1.315 +// generate suggestions for a word with typical mistake 1.316 +// pass in address of array of char * pointers 1.317 +#ifdef HUNSPELL_EXPERIMENTAL 1.318 +int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug) 1.319 +{ 1.320 + int nocompoundtwowords = 0; 1.321 + char ** wlst; 1.322 + int oldSug; 1.323 + 1.324 + char w2[MAXWORDUTF8LEN]; 1.325 + const char * word = w; 1.326 + 1.327 + // word reversing wrapper for complex prefixes 1.328 + if (complexprefixes) { 1.329 + strcpy(w2, w); 1.330 + if (utf8) reverseword_utf(w2); else reverseword(w2); 1.331 + word = w2; 1.332 + } 1.333 + 1.334 + if (*slst) { 1.335 + wlst = *slst; 1.336 + } else { 1.337 + wlst = (char **) malloc(maxSug * sizeof(char *)); 1.338 + if (wlst == NULL) return -1; 1.339 + } 1.340 + 1.341 + for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) { 1.342 + 1.343 + // limit compound suggestion 1.344 + if (cpdsuggest > 0) oldSug = nsug; 1.345 + 1.346 + // perhaps we made a typical fault of spelling 1.347 + if ((nsug < maxSug) && (nsug > -1)) 1.348 + nsug = replchars(wlst, word, nsug, cpdsuggest); 1.349 + 1.350 + // perhaps we made chose the wrong char from a related set 1.351 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) 1.352 + nsug = mapchars(wlst, word, nsug, cpdsuggest); 1.353 + 1.354 + if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1; 1.355 + 1.356 + // perhaps we forgot to hit space and two words ran together 1.357 + 1.358 + if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && check_forbidden(word, strlen(word))) { 1.359 + nsug = twowords(wlst, word, nsug, cpdsuggest); 1.360 + } 1.361 + 1.362 + } // repeating ``for'' statement compounding support 1.363 + 1.364 + if (nsug < 0) { 1.365 + for (int i=0;i<maxSug; i++) 1.366 + if (wlst[i] != NULL) free(wlst[i]); 1.367 + free(wlst); 1.368 + return -1; 1.369 + } 1.370 + 1.371 + *slst = wlst; 1.372 + return nsug; 1.373 +} 1.374 +#endif // END OF HUNSPELL_EXPERIMENTAL CODE 1.375 + 1.376 +// suggestions for an uppercase word (html -> HTML) 1.377 +int SuggestMgr::capchars_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.378 +{ 1.379 + char candidate[MAXSWUTF8L]; 1.380 + w_char candidate_utf[MAXSWL]; 1.381 + memcpy(candidate_utf, word, wl * sizeof(w_char)); 1.382 + mkallcap_utf(candidate_utf, wl, langnum); 1.383 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.384 + return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.385 +} 1.386 + 1.387 +// suggestions for an uppercase word (html -> HTML) 1.388 +int SuggestMgr::capchars(char** wlst, const char * word, int ns, int cpdsuggest) 1.389 +{ 1.390 + char candidate[MAXSWUTF8L]; 1.391 + strcpy(candidate, word); 1.392 + mkallcap(candidate, csconv); 1.393 + return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.394 +} 1.395 + 1.396 +// suggestions for when chose the wrong char out of a related set 1.397 +int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest) 1.398 +{ 1.399 + char candidate[MAXSWUTF8L]; 1.400 + clock_t timelimit; 1.401 + int timer; 1.402 + candidate[0] = '\0'; 1.403 + 1.404 + int wl = strlen(word); 1.405 + if (wl < 2 || ! pAMgr) return ns; 1.406 + 1.407 + int nummap = pAMgr->get_nummap(); 1.408 + struct mapentry* maptable = pAMgr->get_maptable(); 1.409 + if (maptable==NULL) return ns; 1.410 + 1.411 + timelimit = clock(); 1.412 + timer = MINTIMER; 1.413 + return map_related(word, (char *) &candidate, 0, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit); 1.414 +} 1.415 + 1.416 +int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn, 1.417 + char** wlst, int cpdsuggest, int ns, 1.418 + const mapentry* maptable, int nummap, int * timer, clock_t * timelimit) 1.419 +{ 1.420 + if (*(word + wn) == '\0') { 1.421 + int cwrd = 1; 1.422 + *(candidate + cn) = '\0'; 1.423 + int wl = strlen(candidate); 1.424 + for (int m=0; m < ns; m++) 1.425 + if (strcmp(candidate, wlst[m]) == 0) cwrd = 0; 1.426 + if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) { 1.427 + if (ns < maxSug) { 1.428 + wlst[ns] = mystrdup(candidate); 1.429 + if (wlst[ns] == NULL) return -1; 1.430 + ns++; 1.431 + } 1.432 + } 1.433 + return ns; 1.434 + } 1.435 + int in_map = 0; 1.436 + for (int j = 0; j < nummap; j++) { 1.437 + for (int k = 0; k < maptable[j].len; k++) { 1.438 + int len = strlen(maptable[j].set[k]); 1.439 + if (strncmp(maptable[j].set[k], word + wn, len) == 0) { 1.440 + in_map = 1; 1.441 + for (int l = 0; l < maptable[j].len; l++) { 1.442 + strcpy(candidate + cn, maptable[j].set[l]); 1.443 + ns = map_related(word, candidate, wn + len, strlen(candidate), wlst, 1.444 + cpdsuggest, ns, maptable, nummap, timer, timelimit); 1.445 + if (!(*timer)) return ns; 1.446 + } 1.447 + } 1.448 + } 1.449 + } 1.450 + if (!in_map) { 1.451 + *(candidate + cn) = *(word + wn); 1.452 + ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest, 1.453 + ns, maptable, nummap, timer, timelimit); 1.454 + } 1.455 + return ns; 1.456 +} 1.457 + 1.458 +// suggestions for a typical fault of spelling, that 1.459 +// differs with more, than 1 letter from the right form. 1.460 +int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest) 1.461 +{ 1.462 + char candidate[MAXSWUTF8L]; 1.463 + const char * r; 1.464 + int lenr, lenp; 1.465 + int wl = strlen(word); 1.466 + if (wl < 2 || ! pAMgr) return ns; 1.467 + int numrep = pAMgr->get_numrep(); 1.468 + struct replentry* reptable = pAMgr->get_reptable(); 1.469 + if (reptable==NULL) return ns; 1.470 + for (int i=0; i < numrep; i++ ) { 1.471 + r = word; 1.472 + lenr = strlen(reptable[i].pattern2); 1.473 + lenp = strlen(reptable[i].pattern); 1.474 + // search every occurence of the pattern in the word 1.475 + while ((r=strstr(r, reptable[i].pattern)) != NULL && (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) && 1.476 + (!reptable[i].start || r == word)) { 1.477 + strcpy(candidate, word); 1.478 + if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break; 1.479 + strcpy(candidate+(r-word),reptable[i].pattern2); 1.480 + strcpy(candidate+(r-word)+lenr, r+lenp); 1.481 + ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL); 1.482 + if (ns == -1) return -1; 1.483 + // check REP suggestions with space 1.484 + char * sp = strchr(candidate, ' '); 1.485 + if (sp) { 1.486 + char * prev = candidate; 1.487 + while (sp) { 1.488 + *sp = '\0'; 1.489 + if (checkword(prev, strlen(prev), 0, NULL, NULL)) { 1.490 + int oldns = ns; 1.491 + *sp = ' '; 1.492 + ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL); 1.493 + if (ns == -1) return -1; 1.494 + if (oldns < ns) { 1.495 + free(wlst[ns - 1]); 1.496 + wlst[ns - 1] = mystrdup(candidate); 1.497 + if (!wlst[ns - 1]) return -1; 1.498 + } 1.499 + } 1.500 + *sp = ' '; 1.501 + prev = sp + 1; 1.502 + sp = strchr(prev, ' '); 1.503 + } 1.504 + } 1.505 + r++; // search for the next letter 1.506 + } 1.507 + } 1.508 + return ns; 1.509 +} 1.510 + 1.511 +// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation) 1.512 +int SuggestMgr::doubletwochars(char** wlst, const char * word, int ns, int cpdsuggest) 1.513 +{ 1.514 + char candidate[MAXSWUTF8L]; 1.515 + int state=0; 1.516 + int wl = strlen(word); 1.517 + if (wl < 5 || ! pAMgr) return ns; 1.518 + for (int i=2; i < wl; i++ ) { 1.519 + if (word[i]==word[i-2]) { 1.520 + state++; 1.521 + if (state==3) { 1.522 + strcpy(candidate,word); 1.523 + strcpy(candidate+i-1,word+i+1); 1.524 + ns = testsug(wlst, candidate, wl-2, ns, cpdsuggest, NULL, NULL); 1.525 + if (ns == -1) return -1; 1.526 + state=0; 1.527 + } 1.528 + } else { 1.529 + state=0; 1.530 + } 1.531 + } 1.532 + return ns; 1.533 +} 1.534 + 1.535 +// perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation) 1.536 +int SuggestMgr::doubletwochars_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.537 +{ 1.538 + w_char candidate_utf[MAXSWL]; 1.539 + char candidate[MAXSWUTF8L]; 1.540 + int state=0; 1.541 + if (wl < 5 || ! pAMgr) return ns; 1.542 + for (int i=2; i < wl; i++) { 1.543 + if (w_char_eq(word[i], word[i-2])) { 1.544 + state++; 1.545 + if (state==3) { 1.546 + memcpy(candidate_utf, word, (i - 1) * sizeof(w_char)); 1.547 + memcpy(candidate_utf+i-1, word+i+1, (wl-i-1) * sizeof(w_char)); 1.548 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl-2); 1.549 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.550 + if (ns == -1) return -1; 1.551 + state=0; 1.552 + } 1.553 + } else { 1.554 + state=0; 1.555 + } 1.556 + } 1.557 + return ns; 1.558 +} 1.559 + 1.560 +// error is wrong char in place of correct one (case and keyboard related version) 1.561 +int SuggestMgr::badcharkey(char ** wlst, const char * word, int ns, int cpdsuggest) 1.562 +{ 1.563 + char tmpc; 1.564 + char candidate[MAXSWUTF8L]; 1.565 + int wl = strlen(word); 1.566 + strcpy(candidate, word); 1.567 + // swap out each char one by one and try uppercase and neighbor 1.568 + // keyboard chars in its place to see if that makes a good word 1.569 + 1.570 + for (int i=0; i < wl; i++) { 1.571 + tmpc = candidate[i]; 1.572 + // check with uppercase letters 1.573 + candidate[i] = csconv[((unsigned char)tmpc)].cupper; 1.574 + if (tmpc != candidate[i]) { 1.575 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.576 + if (ns == -1) return -1; 1.577 + candidate[i] = tmpc; 1.578 + } 1.579 + // check neighbor characters in keyboard string 1.580 + if (!ckey) continue; 1.581 + char * loc = strchr(ckey, tmpc); 1.582 + while (loc) { 1.583 + if ((loc > ckey) && (*(loc - 1) != '|')) { 1.584 + candidate[i] = *(loc - 1); 1.585 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.586 + if (ns == -1) return -1; 1.587 + } 1.588 + if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) { 1.589 + candidate[i] = *(loc + 1); 1.590 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.591 + if (ns == -1) return -1; 1.592 + } 1.593 + loc = strchr(loc + 1, tmpc); 1.594 + } 1.595 + candidate[i] = tmpc; 1.596 + } 1.597 + return ns; 1.598 +} 1.599 + 1.600 +// error is wrong char in place of correct one (case and keyboard related version) 1.601 +int SuggestMgr::badcharkey_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.602 +{ 1.603 + w_char tmpc; 1.604 + w_char candidate_utf[MAXSWL]; 1.605 + char candidate[MAXSWUTF8L]; 1.606 + memcpy(candidate_utf, word, wl * sizeof(w_char)); 1.607 + // swap out each char one by one and try all the tryme 1.608 + // chars in its place to see if that makes a good word 1.609 + for (int i=0; i < wl; i++) { 1.610 + tmpc = candidate_utf[i]; 1.611 + // check with uppercase letters 1.612 + mkallcap_utf(candidate_utf + i, 1, langnum); 1.613 + if (!w_char_eq(tmpc, candidate_utf[i])) { 1.614 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.615 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.616 + if (ns == -1) return -1; 1.617 + candidate_utf[i] = tmpc; 1.618 + } 1.619 + // check neighbor characters in keyboard string 1.620 + if (!ckey) continue; 1.621 + w_char * loc = ckey_utf; 1.622 + while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)) loc++; 1.623 + while (loc < (ckey_utf + ckeyl)) { 1.624 + if ((loc > ckey_utf) && !w_char_eq(*(loc - 1), W_VLINE)) { 1.625 + candidate_utf[i] = *(loc - 1); 1.626 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.627 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.628 + if (ns == -1) return -1; 1.629 + } 1.630 + if (((loc + 1) < (ckey_utf + ckeyl)) && !w_char_eq(*(loc + 1), W_VLINE)) { 1.631 + candidate_utf[i] = *(loc + 1); 1.632 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.633 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.634 + if (ns == -1) return -1; 1.635 + } 1.636 + do { loc++; } while ((loc < (ckey_utf + ckeyl)) && !w_char_eq(*loc, tmpc)); 1.637 + } 1.638 + candidate_utf[i] = tmpc; 1.639 + } 1.640 + return ns; 1.641 +} 1.642 + 1.643 +// error is wrong char in place of correct one 1.644 +int SuggestMgr::badchar(char ** wlst, const char * word, int ns, int cpdsuggest) 1.645 +{ 1.646 + char tmpc; 1.647 + char candidate[MAXSWUTF8L]; 1.648 + clock_t timelimit = clock(); 1.649 + int timer = MINTIMER; 1.650 + int wl = strlen(word); 1.651 + strcpy(candidate, word); 1.652 + // swap out each char one by one and try all the tryme 1.653 + // chars in its place to see if that makes a good word 1.654 + for (int j=0; j < ctryl; j++) { 1.655 + for (int i=wl-1; i >= 0; i--) { 1.656 + tmpc = candidate[i]; 1.657 + if (ctry[j] == tmpc) continue; 1.658 + candidate[i] = ctry[j]; 1.659 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, &timer, &timelimit); 1.660 + if (ns == -1) return -1; 1.661 + if (!timer) return ns; 1.662 + candidate[i] = tmpc; 1.663 + } 1.664 + } 1.665 + return ns; 1.666 +} 1.667 + 1.668 +// error is wrong char in place of correct one 1.669 +int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.670 +{ 1.671 + w_char tmpc; 1.672 + w_char candidate_utf[MAXSWL]; 1.673 + char candidate[MAXSWUTF8L]; 1.674 + clock_t timelimit = clock(); 1.675 + int timer = MINTIMER; 1.676 + memcpy(candidate_utf, word, wl * sizeof(w_char)); 1.677 + // swap out each char one by one and try all the tryme 1.678 + // chars in its place to see if that makes a good word 1.679 + for (int j=0; j < ctryl; j++) { 1.680 + for (int i=wl-1; i >= 0; i--) { 1.681 + tmpc = candidate_utf[i]; 1.682 + if (w_char_eq(tmpc, ctry_utf[j])) continue; 1.683 + candidate_utf[i] = ctry_utf[j]; 1.684 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.685 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, &timelimit); 1.686 + if (ns == -1) return -1; 1.687 + if (!timer) return ns; 1.688 + candidate_utf[i] = tmpc; 1.689 + } 1.690 + } 1.691 + return ns; 1.692 +} 1.693 + 1.694 +// error is word has an extra letter it does not need 1.695 +int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.696 +{ 1.697 + char candidate[MAXSWUTF8L]; 1.698 + w_char candidate_utf[MAXSWL]; 1.699 + w_char * p; 1.700 + w_char tmpc = W_VLINE; // not used value, only for VCC warning message 1.701 + if (wl < 2) return ns; 1.702 + // try omitting one char of word at a time 1.703 + memcpy(candidate_utf, word, wl * sizeof(w_char)); 1.704 + for (p = candidate_utf + wl - 1; p >= candidate_utf; p--) { 1.705 + w_char tmpc2 = *p; 1.706 + if (p < candidate_utf + wl - 1) *p = tmpc; 1.707 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1); 1.708 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.709 + if (ns == -1) return -1; 1.710 + tmpc = tmpc2; 1.711 + } 1.712 + return ns; 1.713 +} 1.714 + 1.715 +// error is word has an extra letter it does not need 1.716 +int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest) 1.717 +{ 1.718 + char tmpc = '\0'; 1.719 + char candidate[MAXSWUTF8L]; 1.720 + char * p; 1.721 + int wl = strlen(word); 1.722 + if (wl < 2) return ns; 1.723 + // try omitting one char of word at a time 1.724 + strcpy (candidate, word); 1.725 + for (p = candidate + wl - 1; p >=candidate; p--) { 1.726 + char tmpc2 = *p; 1.727 + *p = tmpc; 1.728 + ns = testsug(wlst, candidate, wl-1, ns, cpdsuggest, NULL, NULL); 1.729 + if (ns == -1) return -1; 1.730 + tmpc = tmpc2; 1.731 + } 1.732 + return ns; 1.733 +} 1.734 + 1.735 +// error is missing a letter it needs 1.736 +int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest) 1.737 +{ 1.738 + char candidate[MAXSWUTF8L]; 1.739 + char * p; 1.740 + clock_t timelimit = clock(); 1.741 + int timer = MINTIMER; 1.742 + int wl = strlen(word); 1.743 + // try inserting a tryme character before every letter (and the null terminator) 1.744 + for (int i = 0; i < ctryl; i++) { 1.745 + strcpy(candidate, word); 1.746 + for (p = candidate + wl; p >= candidate; p--) { 1.747 + *(p+1) = *p; 1.748 + *p = ctry[i]; 1.749 + ns = testsug(wlst, candidate, wl+1, ns, cpdsuggest, &timer, &timelimit); 1.750 + if (ns == -1) return -1; 1.751 + if (!timer) return ns; 1.752 + } 1.753 + } 1.754 + return ns; 1.755 +} 1.756 + 1.757 +// error is missing a letter it needs 1.758 +int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.759 +{ 1.760 + w_char candidate_utf[MAXSWL]; 1.761 + char candidate[MAXSWUTF8L]; 1.762 + w_char * p; 1.763 + clock_t timelimit = clock(); 1.764 + int timer = MINTIMER; 1.765 + // try inserting a tryme character at the end of the word and before every letter 1.766 + for (int i = 0; i < ctryl; i++) { 1.767 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.768 + for (p = candidate_utf + wl; p >= candidate_utf; p--) { 1.769 + *(p + 1) = *p; 1.770 + *p = ctry_utf[i]; 1.771 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1); 1.772 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, &timer, &timelimit); 1.773 + if (ns == -1) return -1; 1.774 + if (!timer) return ns; 1.775 + } 1.776 + } 1.777 + return ns; 1.778 +} 1.779 + 1.780 + 1.781 +/* error is should have been two words */ 1.782 +int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest) 1.783 +{ 1.784 + char candidate[MAXSWUTF8L]; 1.785 + char * p; 1.786 + int c1, c2; 1.787 + int forbidden = 0; 1.788 + int cwrd; 1.789 + 1.790 + int wl=strlen(word); 1.791 + if (wl < 3) return ns; 1.792 + 1.793 + if (langnum == LANG_hu) forbidden = check_forbidden(word, wl); 1.794 + 1.795 + strcpy(candidate + 1, word); 1.796 + // split the string into two pieces after every char 1.797 + // if both pieces are good words make them a suggestion 1.798 + for (p = candidate + 1; p[1] != '\0'; p++) { 1.799 + p[-1] = *p; 1.800 + // go to end of the UTF-8 character 1.801 + while (utf8 && ((p[1] & 0xc0) == 0x80)) { 1.802 + *p = p[1]; 1.803 + p++; 1.804 + } 1.805 + if (utf8 && p[1] == '\0') break; // last UTF-8 character 1.806 + *p = '\0'; 1.807 + c1 = checkword(candidate,strlen(candidate), cpdsuggest, NULL, NULL); 1.808 + if (c1) { 1.809 + c2 = checkword((p+1),strlen(p+1), cpdsuggest, NULL, NULL); 1.810 + if (c2) { 1.811 + *p = ' '; 1.812 + 1.813 + // spec. Hungarian code (need a better compound word support) 1.814 + if ((langnum == LANG_hu) && !forbidden && 1.815 + // if 3 repeating letter, use - instead of space 1.816 + (((p[-1] == p[1]) && (((p>candidate+1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) || 1.817 + // or multiple compounding, with more, than 6 syllables 1.818 + ((c1 == 3) && (c2 >= 2)))) *p = '-'; 1.819 + 1.820 + cwrd = 1; 1.821 + for (int k=0; k < ns; k++) 1.822 + if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; 1.823 + if (ns < maxSug) { 1.824 + if (cwrd) { 1.825 + wlst[ns] = mystrdup(candidate); 1.826 + if (wlst[ns] == NULL) return -1; 1.827 + ns++; 1.828 + } 1.829 + } else return ns; 1.830 + // add two word suggestion with dash, if TRY string contains 1.831 + // "a" or "-" 1.832 + // NOTE: cwrd doesn't modified for REP twoword sugg. 1.833 + if (ctry && (strchr(ctry, 'a') || strchr(ctry, '-')) && 1.834 + mystrlen(p + 1) > 1 && 1.835 + mystrlen(candidate) - mystrlen(p) > 1) { 1.836 + *p = '-'; 1.837 + for (int k=0; k < ns; k++) 1.838 + if (strcmp(candidate,wlst[k]) == 0) cwrd = 0; 1.839 + if (ns < maxSug) { 1.840 + if (cwrd) { 1.841 + wlst[ns] = mystrdup(candidate); 1.842 + if (wlst[ns] == NULL) return -1; 1.843 + ns++; 1.844 + } 1.845 + } else return ns; 1.846 + } 1.847 + } 1.848 + } 1.849 + } 1.850 + return ns; 1.851 +} 1.852 + 1.853 + 1.854 +// error is adjacent letter were swapped 1.855 +int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest) 1.856 +{ 1.857 + char candidate[MAXSWUTF8L]; 1.858 + char * p; 1.859 + char tmpc; 1.860 + int wl=strlen(word); 1.861 + // try swapping adjacent chars one by one 1.862 + strcpy(candidate, word); 1.863 + for (p = candidate; p[1] != 0; p++) { 1.864 + tmpc = *p; 1.865 + *p = p[1]; 1.866 + p[1] = tmpc; 1.867 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.868 + if (ns == -1) return -1; 1.869 + p[1] = *p; 1.870 + *p = tmpc; 1.871 + } 1.872 + // try double swaps for short words 1.873 + // ahev -> have, owudl -> would 1.874 + if (wl == 4 || wl == 5) { 1.875 + candidate[0] = word[1]; 1.876 + candidate[1] = word[0]; 1.877 + candidate[2] = word[2]; 1.878 + candidate[wl - 2] = word[wl - 1]; 1.879 + candidate[wl - 1] = word[wl - 2]; 1.880 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.881 + if (ns == -1) return -1; 1.882 + if (wl == 5) { 1.883 + candidate[0] = word[0]; 1.884 + candidate[1] = word[2]; 1.885 + candidate[2] = word[1]; 1.886 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.887 + if (ns == -1) return -1; 1.888 + } 1.889 + } 1.890 + return ns; 1.891 +} 1.892 + 1.893 +// error is adjacent letter were swapped 1.894 +int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.895 +{ 1.896 + w_char candidate_utf[MAXSWL]; 1.897 + char candidate[MAXSWUTF8L]; 1.898 + w_char * p; 1.899 + w_char tmpc; 1.900 + int len = 0; 1.901 + // try swapping adjacent chars one by one 1.902 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.903 + for (p = candidate_utf; p < (candidate_utf + wl - 1); p++) { 1.904 + tmpc = *p; 1.905 + *p = p[1]; 1.906 + p[1] = tmpc; 1.907 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.908 + if (len == 0) len = strlen(candidate); 1.909 + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); 1.910 + if (ns == -1) return -1; 1.911 + p[1] = *p; 1.912 + *p = tmpc; 1.913 + } 1.914 + // try double swaps for short words 1.915 + // ahev -> have, owudl -> would, suodn -> sound 1.916 + if (wl == 4 || wl == 5) { 1.917 + candidate_utf[0] = word[1]; 1.918 + candidate_utf[1] = word[0]; 1.919 + candidate_utf[2] = word[2]; 1.920 + candidate_utf[wl - 2] = word[wl - 1]; 1.921 + candidate_utf[wl - 1] = word[wl - 2]; 1.922 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.923 + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); 1.924 + if (ns == -1) return -1; 1.925 + if (wl == 5) { 1.926 + candidate_utf[0] = word[0]; 1.927 + candidate_utf[1] = word[2]; 1.928 + candidate_utf[2] = word[1]; 1.929 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.930 + ns = testsug(wlst, candidate, len, ns, cpdsuggest, NULL, NULL); 1.931 + if (ns == -1) return -1; 1.932 + } 1.933 + } 1.934 + return ns; 1.935 +} 1.936 + 1.937 +// error is not adjacent letter were swapped 1.938 +int SuggestMgr::longswapchar(char ** wlst, const char * word, int ns, int cpdsuggest) 1.939 +{ 1.940 + char candidate[MAXSWUTF8L]; 1.941 + char * p; 1.942 + char * q; 1.943 + char tmpc; 1.944 + int wl=strlen(word); 1.945 + // try swapping not adjacent chars one by one 1.946 + strcpy(candidate, word); 1.947 + for (p = candidate; *p != 0; p++) { 1.948 + for (q = candidate; *q != 0; q++) { 1.949 + if (abs((int)(p-q)) > 1) { 1.950 + tmpc = *p; 1.951 + *p = *q; 1.952 + *q = tmpc; 1.953 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.954 + if (ns == -1) return -1; 1.955 + *q = *p; 1.956 + *p = tmpc; 1.957 + } 1.958 + } 1.959 + } 1.960 + return ns; 1.961 +} 1.962 + 1.963 + 1.964 +// error is adjacent letter were swapped 1.965 +int SuggestMgr::longswapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.966 +{ 1.967 + w_char candidate_utf[MAXSWL]; 1.968 + char candidate[MAXSWUTF8L]; 1.969 + w_char * p; 1.970 + w_char * q; 1.971 + w_char tmpc; 1.972 + // try swapping not adjacent chars 1.973 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.974 + for (p = candidate_utf; p < (candidate_utf + wl); p++) { 1.975 + for (q = candidate_utf; q < (candidate_utf + wl); q++) { 1.976 + if (abs((int)(p-q)) > 1) { 1.977 + tmpc = *p; 1.978 + *p = *q; 1.979 + *q = tmpc; 1.980 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.981 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.982 + if (ns == -1) return -1; 1.983 + *q = *p; 1.984 + *p = tmpc; 1.985 + } 1.986 + } 1.987 + } 1.988 + return ns; 1.989 +} 1.990 + 1.991 +// error is a letter was moved 1.992 +int SuggestMgr::movechar(char ** wlst, const char * word, int ns, int cpdsuggest) 1.993 +{ 1.994 + char candidate[MAXSWUTF8L]; 1.995 + char * p; 1.996 + char * q; 1.997 + char tmpc; 1.998 + 1.999 + int wl=strlen(word); 1.1000 + // try moving a char 1.1001 + strcpy(candidate, word); 1.1002 + for (p = candidate; *p != 0; p++) { 1.1003 + for (q = p + 1; (*q != 0) && ((q - p) < 10); q++) { 1.1004 + tmpc = *(q-1); 1.1005 + *(q-1) = *q; 1.1006 + *q = tmpc; 1.1007 + if ((q-p) < 2) continue; // omit swap char 1.1008 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.1009 + if (ns == -1) return -1; 1.1010 + } 1.1011 + strcpy(candidate, word); 1.1012 + } 1.1013 + for (p = candidate + wl - 1; p > candidate; p--) { 1.1014 + for (q = p - 1; (q >= candidate) && ((p - q) < 10); q--) { 1.1015 + tmpc = *(q+1); 1.1016 + *(q+1) = *q; 1.1017 + *q = tmpc; 1.1018 + if ((p-q) < 2) continue; // omit swap char 1.1019 + ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL); 1.1020 + if (ns == -1) return -1; 1.1021 + } 1.1022 + strcpy(candidate, word); 1.1023 + } 1.1024 + return ns; 1.1025 +} 1.1026 + 1.1027 +// error is a letter was moved 1.1028 +int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest) 1.1029 +{ 1.1030 + w_char candidate_utf[MAXSWL]; 1.1031 + char candidate[MAXSWUTF8L]; 1.1032 + w_char * p; 1.1033 + w_char * q; 1.1034 + w_char tmpc; 1.1035 + // try moving a char 1.1036 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.1037 + for (p = candidate_utf; p < (candidate_utf + wl); p++) { 1.1038 + for (q = p + 1; (q < (candidate_utf + wl)) && ((q - p) < 10); q++) { 1.1039 + tmpc = *(q-1); 1.1040 + *(q-1) = *q; 1.1041 + *q = tmpc; 1.1042 + if ((q-p) < 2) continue; // omit swap char 1.1043 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.1044 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.1045 + if (ns == -1) return -1; 1.1046 + } 1.1047 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.1048 + } 1.1049 + for (p = candidate_utf + wl - 1; p > candidate_utf; p--) { 1.1050 + for (q = p - 1; (q >= candidate_utf) && ((p - q) < 10); q--) { 1.1051 + tmpc = *(q+1); 1.1052 + *(q+1) = *q; 1.1053 + *q = tmpc; 1.1054 + if ((p-q) < 2) continue; // omit swap char 1.1055 + u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl); 1.1056 + ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL); 1.1057 + if (ns == -1) return -1; 1.1058 + } 1.1059 + memcpy (candidate_utf, word, wl * sizeof(w_char)); 1.1060 + } 1.1061 + return ns; 1.1062 +} 1.1063 + 1.1064 +// generate a set of suggestions for very poorly spelled words 1.1065 +int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md) 1.1066 +{ 1.1067 + 1.1068 + int i, j; 1.1069 + int lval; 1.1070 + int sc, scphon; 1.1071 + int lp, lpphon; 1.1072 + int nonbmp = 0; 1.1073 + 1.1074 + // exhaustively search through all root words 1.1075 + // keeping track of the MAX_ROOTS most similar root words 1.1076 + struct hentry * roots[MAX_ROOTS]; 1.1077 + char * rootsphon[MAX_ROOTS]; 1.1078 + int scores[MAX_ROOTS]; 1.1079 + int scoresphon[MAX_ROOTS]; 1.1080 + for (i = 0; i < MAX_ROOTS; i++) { 1.1081 + roots[i] = NULL; 1.1082 + scores[i] = -100 * i; 1.1083 + rootsphon[i] = NULL; 1.1084 + scoresphon[i] = -100 * i; 1.1085 + } 1.1086 + lp = MAX_ROOTS - 1; 1.1087 + lpphon = MAX_ROOTS - 1; 1.1088 + scphon = -20000; 1.1089 + int low = NGRAM_LOWERING; 1.1090 + 1.1091 + char w2[MAXWORDUTF8LEN]; 1.1092 + char f[MAXSWUTF8L]; 1.1093 + char * word = w; 1.1094 + 1.1095 + // word reversing wrapper for complex prefixes 1.1096 + if (complexprefixes) { 1.1097 + strcpy(w2, w); 1.1098 + if (utf8) reverseword_utf(w2); else reverseword(w2); 1.1099 + word = w2; 1.1100 + } 1.1101 + 1.1102 + char mw[MAXSWUTF8L]; 1.1103 + w_char u8[MAXSWL]; 1.1104 + int nc = strlen(word); 1.1105 + int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; 1.1106 + 1.1107 + // set character based ngram suggestion for words with non-BMP Unicode characters 1.1108 + if (n == -1) { 1.1109 + utf8 = 0; // XXX not state-free 1.1110 + n = nc; 1.1111 + nonbmp = 1; 1.1112 + low = 0; 1.1113 + } 1.1114 + 1.1115 + struct hentry* hp = NULL; 1.1116 + int col = -1; 1.1117 + phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; 1.1118 + char target[MAXSWUTF8L]; 1.1119 + char candidate[MAXSWUTF8L]; 1.1120 + if (ph) { 1.1121 + if (utf8) { 1.1122 + w_char _w[MAXSWL]; 1.1123 + int _wl = u8_u16(_w, MAXSWL, word); 1.1124 + mkallcap_utf(_w, _wl, langnum); 1.1125 + u16_u8(candidate, MAXSWUTF8L, _w, _wl); 1.1126 + } else { 1.1127 + strcpy(candidate, word); 1.1128 + if (!nonbmp) mkallcap(candidate, csconv); 1.1129 + } 1.1130 + phonet(candidate, target, nc, *ph); // XXX phonet() is 8-bit (nc, not n) 1.1131 + } 1.1132 + 1.1133 + FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; 1.1134 + FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL; 1.1135 + FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; 1.1136 + FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; 1.1137 + 1.1138 + for (i = 0; i < md; i++) { 1.1139 + while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) { 1.1140 + if ((hp->astr) && (pAMgr) && 1.1141 + (TESTAFF(hp->astr, forbiddenword, hp->alen) || 1.1142 + TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || 1.1143 + TESTAFF(hp->astr, nosuggest, hp->alen) || 1.1144 + TESTAFF(hp->astr, nongramsuggest, hp->alen) || 1.1145 + TESTAFF(hp->astr, onlyincompound, hp->alen))) continue; 1.1146 + 1.1147 + sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + 1.1148 + leftcommonsubstring(word, HENTRY_WORD(hp)); 1.1149 + 1.1150 + // check special pronounciation 1.1151 + if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { 1.1152 + int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + 1.1153 + + leftcommonsubstring(word, f); 1.1154 + if (sc2 > sc) sc = sc2; 1.1155 + } 1.1156 + 1.1157 + scphon = -20000; 1.1158 + if (ph && (sc > 2) && (abs(n - (int) hp->clen) <= 3)) { 1.1159 + char target2[MAXSWUTF8L]; 1.1160 + if (utf8) { 1.1161 + w_char _w[MAXSWL]; 1.1162 + int _wl = u8_u16(_w, MAXSWL, HENTRY_WORD(hp)); 1.1163 + mkallcap_utf(_w, _wl, langnum); 1.1164 + u16_u8(candidate, MAXSWUTF8L, _w, _wl); 1.1165 + } else { 1.1166 + strcpy(candidate, HENTRY_WORD(hp)); 1.1167 + mkallcap(candidate, csconv); 1.1168 + } 1.1169 + phonet(candidate, target2, -1, *ph); 1.1170 + scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE); 1.1171 + } 1.1172 + 1.1173 + if (sc > scores[lp]) { 1.1174 + scores[lp] = sc; 1.1175 + roots[lp] = hp; 1.1176 + lval = sc; 1.1177 + for (j=0; j < MAX_ROOTS; j++) 1.1178 + if (scores[j] < lval) { 1.1179 + lp = j; 1.1180 + lval = scores[j]; 1.1181 + } 1.1182 + } 1.1183 + 1.1184 + 1.1185 + if (scphon > scoresphon[lpphon]) { 1.1186 + scoresphon[lpphon] = scphon; 1.1187 + rootsphon[lpphon] = HENTRY_WORD(hp); 1.1188 + lval = scphon; 1.1189 + for (j=0; j < MAX_ROOTS; j++) 1.1190 + if (scoresphon[j] < lval) { 1.1191 + lpphon = j; 1.1192 + lval = scoresphon[j]; 1.1193 + } 1.1194 + } 1.1195 + }} 1.1196 + 1.1197 + // find minimum threshold for a passable suggestion 1.1198 + // mangle original word three differnt ways 1.1199 + // and score them to generate a minimum acceptable score 1.1200 + int thresh = 0; 1.1201 + for (int sp = 1; sp < 4; sp++) { 1.1202 + if (utf8) { 1.1203 + for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*'; 1.1204 + u16_u8(mw, MAXSWUTF8L, u8, n); 1.1205 + thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); 1.1206 + } else { 1.1207 + strcpy(mw, word); 1.1208 + for (int k=sp; k < n; k+=4) *(mw + k) = '*'; 1.1209 + thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); 1.1210 + } 1.1211 + } 1.1212 + thresh = thresh / 3; 1.1213 + thresh--; 1.1214 + 1.1215 + // now expand affixes on each of these root words and 1.1216 + // and use length adjusted ngram scores to select 1.1217 + // possible suggestions 1.1218 + char * guess[MAX_GUESS]; 1.1219 + char * guessorig[MAX_GUESS]; 1.1220 + int gscore[MAX_GUESS]; 1.1221 + for(i=0;i<MAX_GUESS;i++) { 1.1222 + guess[i] = NULL; 1.1223 + guessorig[i] = NULL; 1.1224 + gscore[i] = -100 * i; 1.1225 + } 1.1226 + 1.1227 + lp = MAX_GUESS - 1; 1.1228 + 1.1229 + struct guessword * glst; 1.1230 + glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword)); 1.1231 + if (! glst) { 1.1232 + if (nonbmp) utf8 = 1; 1.1233 + return ns; 1.1234 + } 1.1235 + 1.1236 + for (i = 0; i < MAX_ROOTS; i++) { 1.1237 + if (roots[i]) { 1.1238 + struct hentry * rp = roots[i]; 1.1239 + int nw = pAMgr->expand_rootword(glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, 1.1240 + rp->astr, rp->alen, word, nc, 1.1241 + ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) : NULL)); 1.1242 + 1.1243 + for (int k = 0; k < nw ; k++) { 1.1244 + sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) + 1.1245 + leftcommonsubstring(word, glst[k].word); 1.1246 + 1.1247 + if (sc > thresh) { 1.1248 + if (sc > gscore[lp]) { 1.1249 + if (guess[lp]) { 1.1250 + free (guess[lp]); 1.1251 + if (guessorig[lp]) { 1.1252 + free(guessorig[lp]); 1.1253 + guessorig[lp] = NULL; 1.1254 + } 1.1255 + } 1.1256 + gscore[lp] = sc; 1.1257 + guess[lp] = glst[k].word; 1.1258 + guessorig[lp] = glst[k].orig; 1.1259 + lval = sc; 1.1260 + for (j=0; j < MAX_GUESS; j++) 1.1261 + if (gscore[j] < lval) { 1.1262 + lp = j; 1.1263 + lval = gscore[j]; 1.1264 + } 1.1265 + } else { 1.1266 + free(glst[k].word); 1.1267 + if (glst[k].orig) free(glst[k].orig); 1.1268 + } 1.1269 + } else { 1.1270 + free(glst[k].word); 1.1271 + if (glst[k].orig) free(glst[k].orig); 1.1272 + } 1.1273 + } 1.1274 + } 1.1275 + } 1.1276 + free(glst); 1.1277 + 1.1278 + // now we are done generating guesses 1.1279 + // sort in order of decreasing score 1.1280 + 1.1281 + 1.1282 + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); 1.1283 + if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); 1.1284 + 1.1285 + // weight suggestions with a similarity index, based on 1.1286 + // the longest common subsequent algorithm and resort 1.1287 + 1.1288 + int is_swap = 0; 1.1289 + int re = 0; 1.1290 + double fact = 1.0; 1.1291 + if (pAMgr) { 1.1292 + int maxd = pAMgr->get_maxdiff(); 1.1293 + if (maxd >= 0) fact = (10.0 - maxd)/5.0; 1.1294 + } 1.1295 + 1.1296 + for (i=0; i < MAX_GUESS; i++) { 1.1297 + if (guess[i]) { 1.1298 + // lowering guess[i] 1.1299 + char gl[MAXSWUTF8L]; 1.1300 + int len; 1.1301 + if (utf8) { 1.1302 + w_char _w[MAXSWL]; 1.1303 + len = u8_u16(_w, MAXSWL, guess[i]); 1.1304 + mkallsmall_utf(_w, len, langnum); 1.1305 + u16_u8(gl, MAXSWUTF8L, _w, len); 1.1306 + } else { 1.1307 + strcpy(gl, guess[i]); 1.1308 + if (!nonbmp) mkallsmall(gl, csconv); 1.1309 + len = strlen(guess[i]); 1.1310 + } 1.1311 + 1.1312 + int _lcs = lcslen(word, gl); 1.1313 + 1.1314 + // same characters with different casing 1.1315 + if ((n == len) && (n == _lcs)) { 1.1316 + gscore[i] += 2000; 1.1317 + break; 1.1318 + } 1.1319 + // using 2-gram instead of 3, and other weightening 1.1320 + 1.1321 + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + 1.1322 + ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); 1.1323 + 1.1324 + gscore[i] = 1.1325 + // length of longest common subsequent minus length difference 1.1326 + 2 * _lcs - abs((int) (n - len)) + 1.1327 + // weight length of the left common substring 1.1328 + leftcommonsubstring(word, gl) + 1.1329 + // weight equal character positions 1.1330 + (!nonbmp && commoncharacterpositions(word, gl, &is_swap) ? 1: 0) + 1.1331 + // swap character (not neighboring) 1.1332 + ((is_swap) ? 10 : 0) + 1.1333 + // ngram 1.1334 + ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) + 1.1335 + // weighted ngrams 1.1336 + re + 1.1337 + // different limit for dictionaries with PHONE rules 1.1338 + (ph ? (re < len * fact ? -1000 : 0) : (re < (n + len)*fact? -1000 : 0)); 1.1339 + } 1.1340 + } 1.1341 + 1.1342 + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); 1.1343 + 1.1344 +// phonetic version 1.1345 + if (ph) for (i=0; i < MAX_ROOTS; i++) { 1.1346 + if (rootsphon[i]) { 1.1347 + // lowering rootphon[i] 1.1348 + char gl[MAXSWUTF8L]; 1.1349 + int len; 1.1350 + if (utf8) { 1.1351 + w_char _w[MAXSWL]; 1.1352 + len = u8_u16(_w, MAXSWL, rootsphon[i]); 1.1353 + mkallsmall_utf(_w, len, langnum); 1.1354 + u16_u8(gl, MAXSWUTF8L, _w, len); 1.1355 + } else { 1.1356 + strcpy(gl, rootsphon[i]); 1.1357 + if (!nonbmp) mkallsmall(gl, csconv); 1.1358 + len = strlen(rootsphon[i]); 1.1359 + } 1.1360 + 1.1361 + // heuristic weigthing of ngram scores 1.1362 + scoresphon[i] += 2 * lcslen(word, gl) - abs((int) (n - len)) + 1.1363 + // weight length of the left common substring 1.1364 + leftcommonsubstring(word, gl); 1.1365 + } 1.1366 + } 1.1367 + 1.1368 + if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); 1.1369 + 1.1370 + // copy over 1.1371 + int oldns = ns; 1.1372 + 1.1373 + int same = 0; 1.1374 + for (i=0; i < MAX_GUESS; i++) { 1.1375 + if (guess[i]) { 1.1376 + if ((ns < oldns + maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] > 1000))) { 1.1377 + int unique = 1; 1.1378 + // leave only excellent suggestions, if exists 1.1379 + if (gscore[i] > 1000) same = 1; else if (gscore[i] < -100) { 1.1380 + same = 1; 1.1381 + // keep the best ngram suggestions, unless in ONLYMAXDIFF mode 1.1382 + if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) { 1.1383 + free(guess[i]); 1.1384 + if (guessorig[i]) free(guessorig[i]); 1.1385 + continue; 1.1386 + } 1.1387 + } 1.1388 + for (j = 0; j < ns; j++) { 1.1389 + // don't suggest previous suggestions or a previous suggestion with prefixes or affixes 1.1390 + if ((!guessorig[i] && strstr(guess[i], wlst[j])) || 1.1391 + (guessorig[i] && strstr(guessorig[i], wlst[j])) || 1.1392 + // check forbidden words 1.1393 + !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0; 1.1394 + } 1.1395 + if (unique) { 1.1396 + wlst[ns++] = guess[i]; 1.1397 + if (guessorig[i]) { 1.1398 + free(guess[i]); 1.1399 + wlst[ns-1] = guessorig[i]; 1.1400 + } 1.1401 + } else { 1.1402 + free(guess[i]); 1.1403 + if (guessorig[i]) free(guessorig[i]); 1.1404 + } 1.1405 + } else { 1.1406 + free(guess[i]); 1.1407 + if (guessorig[i]) free(guessorig[i]); 1.1408 + } 1.1409 + } 1.1410 + } 1.1411 + 1.1412 + oldns = ns; 1.1413 + if (ph) for (i=0; i < MAX_ROOTS; i++) { 1.1414 + if (rootsphon[i]) { 1.1415 + if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) { 1.1416 + int unique = 1; 1.1417 + for (j = 0; j < ns; j++) { 1.1418 + // don't suggest previous suggestions or a previous suggestion with prefixes or affixes 1.1419 + if (strstr(rootsphon[i], wlst[j]) || 1.1420 + // check forbidden words 1.1421 + !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0; 1.1422 + } 1.1423 + if (unique) { 1.1424 + wlst[ns++] = mystrdup(rootsphon[i]); 1.1425 + if (!wlst[ns - 1]) return ns - 1; 1.1426 + } 1.1427 + } 1.1428 + } 1.1429 + } 1.1430 + 1.1431 + if (nonbmp) utf8 = 1; 1.1432 + return ns; 1.1433 +} 1.1434 + 1.1435 + 1.1436 +// see if a candidate suggestion is spelled correctly 1.1437 +// needs to check both root words and words with affixes 1.1438 + 1.1439 +// obsolote MySpell-HU modifications: 1.1440 +// return value 2 and 3 marks compounding with hyphen (-) 1.1441 +// `3' marks roots without suffix 1.1442 +int SuggestMgr::checkword(const char * word, int len, int cpdsuggest, int * timer, clock_t * timelimit) 1.1443 +{ 1.1444 + struct hentry * rv=NULL; 1.1445 + struct hentry * rv2=NULL; 1.1446 + int nosuffix = 0; 1.1447 + 1.1448 + // check time limit 1.1449 + if (timer) { 1.1450 + (*timer)--; 1.1451 + if (!(*timer) && timelimit) { 1.1452 + if ((clock() - *timelimit) > TIMELIMIT) return 0; 1.1453 + *timer = MAXPLUSTIMER; 1.1454 + } 1.1455 + } 1.1456 + 1.1457 + if (pAMgr) { 1.1458 + if (cpdsuggest==1) { 1.1459 + if (pAMgr->get_compound()) { 1.1460 + rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, 0); //EXT 1.1461 + if (rv && (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || 1.1462 + !(TESTAFF(rv2->astr,pAMgr->get_forbiddenword(),rv2->alen) || 1.1463 + TESTAFF(rv2->astr,pAMgr->get_nosuggest(),rv2->alen)))) return 3; // XXX obsolote categorisation + only ICONV needs affix flag check? 1.1464 + } 1.1465 + return 0; 1.1466 + } 1.1467 + 1.1468 + rv = pAMgr->lookup(word); 1.1469 + 1.1470 + if (rv) { 1.1471 + if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen) 1.1472 + || TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen))) return 0; 1.1473 + while (rv) { 1.1474 + if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) || 1.1475 + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || 1.1476 + TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) { 1.1477 + rv = rv->next_homonym; 1.1478 + } else break; 1.1479 + } 1.1480 + } else rv = pAMgr->prefix_check(word, len, 0); // only prefix, and prefix + suffix XXX 1.1481 + 1.1482 + if (rv) { 1.1483 + nosuffix=1; 1.1484 + } else { 1.1485 + rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, NULL); // only suffix 1.1486 + } 1.1487 + 1.1488 + if (!rv && pAMgr->have_contclass()) { 1.1489 + rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL); 1.1490 + if (!rv) rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL); 1.1491 + } 1.1492 + 1.1493 + // check forbidden words 1.1494 + if ((rv) && (rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen) || 1.1495 + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || 1.1496 + TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen) || 1.1497 + TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) return 0; 1.1498 + 1.1499 + if (rv) { // XXX obsolote 1.1500 + if ((pAMgr->get_compoundflag()) && 1.1501 + TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) return 2 + nosuffix; 1.1502 + return 1; 1.1503 + } 1.1504 + } 1.1505 + return 0; 1.1506 +} 1.1507 + 1.1508 +int SuggestMgr::check_forbidden(const char * word, int len) 1.1509 +{ 1.1510 + struct hentry * rv = NULL; 1.1511 + 1.1512 + if (pAMgr) { 1.1513 + rv = pAMgr->lookup(word); 1.1514 + if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) || 1.1515 + TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL; 1.1516 + if (!(pAMgr->prefix_check(word,len,1))) 1.1517 + rv = pAMgr->suffix_check(word,len, 0, NULL, NULL, 0, NULL); // prefix+suffix, suffix 1.1518 + // check forbidden words 1.1519 + if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)) return 1; 1.1520 + } 1.1521 + return 0; 1.1522 +} 1.1523 + 1.1524 +#ifdef HUNSPELL_EXPERIMENTAL 1.1525 +// suggest possible stems 1.1526 +int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug) 1.1527 +{ 1.1528 + char ** wlst; 1.1529 + 1.1530 + struct hentry * rv = NULL; 1.1531 + 1.1532 + char w2[MAXSWUTF8L]; 1.1533 + const char * word = w; 1.1534 + 1.1535 + // word reversing wrapper for complex prefixes 1.1536 + if (complexprefixes) { 1.1537 + strcpy(w2, w); 1.1538 + if (utf8) reverseword_utf(w2); else reverseword(w2); 1.1539 + word = w2; 1.1540 + } 1.1541 + 1.1542 + int wl = strlen(word); 1.1543 + 1.1544 + 1.1545 + if (*slst) { 1.1546 + wlst = *slst; 1.1547 + } else { 1.1548 + wlst = (char **) calloc(maxSug, sizeof(char *)); 1.1549 + if (wlst == NULL) return -1; 1.1550 + } 1.1551 + 1.1552 + rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug); 1.1553 + 1.1554 + // delete dash from end of word 1.1555 + if (nsug > 0) { 1.1556 + for (int j=0; j < nsug; j++) { 1.1557 + if (wlst[j][strlen(wlst[j]) - 1] == '-') wlst[j][strlen(wlst[j]) - 1] = '\0'; 1.1558 + } 1.1559 + } 1.1560 + 1.1561 + *slst = wlst; 1.1562 + return nsug; 1.1563 +} 1.1564 +#endif // END OF HUNSPELL_EXPERIMENTAL CODE 1.1565 + 1.1566 + 1.1567 +char * SuggestMgr::suggest_morph(const char * w) 1.1568 +{ 1.1569 + char result[MAXLNLEN]; 1.1570 + char * r = (char *) result; 1.1571 + char * st; 1.1572 + 1.1573 + struct hentry * rv = NULL; 1.1574 + 1.1575 + *result = '\0'; 1.1576 + 1.1577 + if (! pAMgr) return NULL; 1.1578 + 1.1579 + char w2[MAXSWUTF8L]; 1.1580 + const char * word = w; 1.1581 + 1.1582 + // word reversing wrapper for complex prefixes 1.1583 + if (complexprefixes) { 1.1584 + strcpy(w2, w); 1.1585 + if (utf8) reverseword_utf(w2); else reverseword(w2); 1.1586 + word = w2; 1.1587 + } 1.1588 + 1.1589 + rv = pAMgr->lookup(word); 1.1590 + 1.1591 + while (rv) { 1.1592 + if ((!rv->astr) || !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || 1.1593 + TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || 1.1594 + TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) { 1.1595 + if (!HENTRY_FIND(rv, MORPH_STEM)) { 1.1596 + mystrcat(result, " ", MAXLNLEN); 1.1597 + mystrcat(result, MORPH_STEM, MAXLNLEN); 1.1598 + mystrcat(result, word, MAXLNLEN); 1.1599 + } 1.1600 + if (HENTRY_DATA(rv)) { 1.1601 + mystrcat(result, " ", MAXLNLEN); 1.1602 + mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN); 1.1603 + } 1.1604 + mystrcat(result, "\n", MAXLNLEN); 1.1605 + } 1.1606 + rv = rv->next_homonym; 1.1607 + } 1.1608 + 1.1609 + st = pAMgr->affix_check_morph(word,strlen(word)); 1.1610 + if (st) { 1.1611 + mystrcat(result, st, MAXLNLEN); 1.1612 + free(st); 1.1613 + } 1.1614 + 1.1615 + if (pAMgr->get_compound() && (*result == '\0')) 1.1616 + pAMgr->compound_check_morph(word, strlen(word), 1.1617 + 0, 0, 100, 0,NULL, 0, &r, NULL); 1.1618 + 1.1619 + return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL; 1.1620 +} 1.1621 + 1.1622 +#ifdef HUNSPELL_EXPERIMENTAL 1.1623 +char * SuggestMgr::suggest_morph_for_spelling_error(const char * word) 1.1624 +{ 1.1625 + char * p = NULL; 1.1626 + char ** wlst = (char **) calloc(maxSug, sizeof(char *)); 1.1627 + if (!**wlst) return NULL; 1.1628 + // we will use only the first suggestion 1.1629 + for (int i = 0; i < maxSug - 1; i++) wlst[i] = ""; 1.1630 + int ns = suggest(&wlst, word, maxSug - 1, NULL); 1.1631 + if (ns == maxSug) { 1.1632 + p = suggest_morph(wlst[maxSug - 1]); 1.1633 + free(wlst[maxSug - 1]); 1.1634 + } 1.1635 + if (wlst) free(wlst); 1.1636 + return p; 1.1637 +} 1.1638 +#endif // END OF HUNSPELL_EXPERIMENTAL CODE 1.1639 + 1.1640 +/* affixation */ 1.1641 +char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern) 1.1642 +{ 1.1643 + char result[MAXLNLEN]; 1.1644 + *result = '\0'; 1.1645 + int sfxcount = get_sfxcount(pattern); 1.1646 + 1.1647 + if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL; 1.1648 + 1.1649 + if (HENTRY_DATA(rv)) { 1.1650 + char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen, 1.1651 + HENTRY_DATA(rv), pattern, 0); 1.1652 + if (aff) { 1.1653 + mystrcat(result, aff, MAXLNLEN); 1.1654 + mystrcat(result, "\n", MAXLNLEN); 1.1655 + free(aff); 1.1656 + } 1.1657 + } 1.1658 + 1.1659 + // check all allomorphs 1.1660 + char allomorph[MAXLNLEN]; 1.1661 + char * p = NULL; 1.1662 + if (HENTRY_DATA(rv)) p = (char *) strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); 1.1663 + while (p) { 1.1664 + struct hentry * rv2 = NULL; 1.1665 + p += MORPH_TAG_LEN; 1.1666 + int plen = fieldlen(p); 1.1667 + strncpy(allomorph, p, plen); 1.1668 + allomorph[plen] = '\0'; 1.1669 + rv2 = pAMgr->lookup(allomorph); 1.1670 + while (rv2) { 1.1671 +// if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= sfxcount) { 1.1672 + if (HENTRY_DATA(rv2)) { 1.1673 + char * st = (char *) strstr(HENTRY_DATA2(rv2), MORPH_STEM); 1.1674 + if (st && (strncmp(st + MORPH_TAG_LEN, 1.1675 + HENTRY_WORD(rv), fieldlen(st + MORPH_TAG_LEN)) == 0)) { 1.1676 + char * aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, rv2->alen, 1.1677 + HENTRY_DATA(rv2), pattern, 0); 1.1678 + if (aff) { 1.1679 + mystrcat(result, aff, MAXLNLEN); 1.1680 + mystrcat(result, "\n", MAXLNLEN); 1.1681 + free(aff); 1.1682 + } 1.1683 + } 1.1684 + } 1.1685 + rv2 = rv2->next_homonym; 1.1686 + } 1.1687 + p = strstr(p + plen, MORPH_ALLOMORPH); 1.1688 + } 1.1689 + 1.1690 + return (*result) ? mystrdup(result) : NULL; 1.1691 +} 1.1692 + 1.1693 +char * SuggestMgr::suggest_gen(char ** desc, int n, char * pattern) { 1.1694 + char result[MAXLNLEN]; 1.1695 + char result2[MAXLNLEN]; 1.1696 + char newpattern[MAXLNLEN]; 1.1697 + *newpattern = '\0'; 1.1698 + if (n == 0) return 0; 1.1699 + *result2 = '\0'; 1.1700 + struct hentry * rv = NULL; 1.1701 + if (!pAMgr) return NULL; 1.1702 + 1.1703 +// search affixed forms with and without derivational suffixes 1.1704 + while(1) { 1.1705 + 1.1706 + for (int k = 0; k < n; k++) { 1.1707 + *result = '\0'; 1.1708 + // add compound word parts (except the last one) 1.1709 + char * s = (char *) desc[k]; 1.1710 + char * part = strstr(s, MORPH_PART); 1.1711 + if (part) { 1.1712 + char * nextpart = strstr(part + 1, MORPH_PART); 1.1713 + while (nextpart) { 1.1714 + copy_field(result + strlen(result), part, MORPH_PART); 1.1715 + part = nextpart; 1.1716 + nextpart = strstr(part + 1, MORPH_PART); 1.1717 + } 1.1718 + s = part; 1.1719 + } 1.1720 + 1.1721 + char **pl; 1.1722 + char tok[MAXLNLEN]; 1.1723 + strcpy(tok, s); 1.1724 + char * alt = strstr(tok, " | "); 1.1725 + while (alt) { 1.1726 + alt[1] = MSEP_ALT; 1.1727 + alt = strstr(alt, " | "); 1.1728 + } 1.1729 + int pln = line_tok(tok, &pl, MSEP_ALT); 1.1730 + for (int i = 0; i < pln; i++) { 1.1731 + // remove inflectional and terminal suffixes 1.1732 + char * is = strstr(pl[i], MORPH_INFL_SFX); 1.1733 + if (is) *is = '\0'; 1.1734 + char * ts = strstr(pl[i], MORPH_TERM_SFX); 1.1735 + while (ts) { 1.1736 + *ts = '_'; 1.1737 + ts = strstr(pl[i], MORPH_TERM_SFX); 1.1738 + } 1.1739 + char * st = strstr(s, MORPH_STEM); 1.1740 + if (st) { 1.1741 + copy_field(tok, st, MORPH_STEM); 1.1742 + rv = pAMgr->lookup(tok); 1.1743 + while (rv) { 1.1744 + char newpat[MAXLNLEN]; 1.1745 + strcpy(newpat, pl[i]); 1.1746 + strcat(newpat, pattern); 1.1747 + char * sg = suggest_hentry_gen(rv, newpat); 1.1748 + if (!sg) sg = suggest_hentry_gen(rv, pattern); 1.1749 + if (sg) { 1.1750 + char ** gen; 1.1751 + int genl = line_tok(sg, &gen, MSEP_REC); 1.1752 + free(sg); 1.1753 + sg = NULL; 1.1754 + for (int j = 0; j < genl; j++) { 1.1755 + if (strstr(pl[i], MORPH_SURF_PFX)) { 1.1756 + int r2l = strlen(result2); 1.1757 + result2[r2l] = MSEP_REC; 1.1758 + strcpy(result2 + r2l + 1, result); 1.1759 + copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX); 1.1760 + mystrcat(result2, gen[j], MAXLNLEN); 1.1761 + } else { 1.1762 + sprintf(result2 + strlen(result2), "%c%s%s", 1.1763 + MSEP_REC, result, gen[j]); 1.1764 + } 1.1765 + } 1.1766 + freelist(&gen, genl); 1.1767 + } 1.1768 + rv = rv->next_homonym; 1.1769 + } 1.1770 + } 1.1771 + } 1.1772 + freelist(&pl, pln); 1.1773 + } 1.1774 + 1.1775 + if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) break; 1.1776 + strcpy(newpattern, pattern); 1.1777 + pattern = newpattern; 1.1778 + char * ds = strstr(pattern, MORPH_DERI_SFX); 1.1779 + while (ds) { 1.1780 + strncpy(ds, MORPH_TERM_SFX, MORPH_TAG_LEN); 1.1781 + ds = strstr(pattern, MORPH_DERI_SFX); 1.1782 + } 1.1783 + } 1.1784 + return (*result2 ? mystrdup(result2) : NULL); 1.1785 +} 1.1786 + 1.1787 + 1.1788 +// generate an n-gram score comparing s1 and s2 1.1789 +int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt) 1.1790 +{ 1.1791 + int nscore = 0; 1.1792 + int ns; 1.1793 + int l1; 1.1794 + int l2; 1.1795 + int test = 0; 1.1796 + 1.1797 + if (utf8) { 1.1798 + w_char su1[MAXSWL]; 1.1799 + w_char su2[MAXSWL]; 1.1800 + l1 = u8_u16(su1, MAXSWL, s1); 1.1801 + l2 = u8_u16(su2, MAXSWL, s2); 1.1802 + if ((l2 <= 0) || (l1 == -1)) return 0; 1.1803 + // lowering dictionary word 1.1804 + if (opt & NGRAM_LOWERING) mkallsmall_utf(su2, l2, langnum); 1.1805 + for (int j = 1; j <= n; j++) { 1.1806 + ns = 0; 1.1807 + for (int i = 0; i <= (l1-j); i++) { 1.1808 + int k = 0; 1.1809 + for (int l = 0; l <= (l2-j); l++) { 1.1810 + for (k = 0; k < j; k++) { 1.1811 + w_char * c1 = su1 + i + k; 1.1812 + w_char * c2 = su2 + l + k; 1.1813 + if ((c1->l != c2->l) || (c1->h != c2->h)) break; 1.1814 + } 1.1815 + if (k == j) { 1.1816 + ns++; 1.1817 + break; 1.1818 + } 1.1819 + } 1.1820 + if (k != j && opt & NGRAM_WEIGHTED) { 1.1821 + ns--; 1.1822 + test++; 1.1823 + if (i == 0 || i == l1-j) ns--; // side weight 1.1824 + } 1.1825 + } 1.1826 + nscore = nscore + ns; 1.1827 + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break; 1.1828 + } 1.1829 + } else { 1.1830 + l2 = strlen(s2); 1.1831 + if (l2 == 0) return 0; 1.1832 + l1 = strlen(s1); 1.1833 + char *t = mystrdup(s2); 1.1834 + if (opt & NGRAM_LOWERING) mkallsmall(t, csconv); 1.1835 + for (int j = 1; j <= n; j++) { 1.1836 + ns = 0; 1.1837 + for (int i = 0; i <= (l1-j); i++) { 1.1838 + char c = *(s1 + i + j); 1.1839 + *(s1 + i + j) = '\0'; 1.1840 + if (strstr(t,(s1+i))) { 1.1841 + ns++; 1.1842 + } else if (opt & NGRAM_WEIGHTED) { 1.1843 + ns--; 1.1844 +test++; 1.1845 + if (i == 0 || i == l1-j) ns--; // side weight 1.1846 + } 1.1847 + *(s1 + i + j ) = c; 1.1848 + } 1.1849 + nscore = nscore + ns; 1.1850 + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break; 1.1851 + } 1.1852 + free(t); 1.1853 + } 1.1854 + 1.1855 + ns = 0; 1.1856 + if (opt & NGRAM_LONGER_WORSE) ns = (l2-l1)-2; 1.1857 + if (opt & NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2; 1.1858 + ns = (nscore - ((ns > 0) ? ns : 0)); 1.1859 + return ns; 1.1860 +} 1.1861 + 1.1862 +// length of the left common substring of s1 and (decapitalised) s2 1.1863 +int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) { 1.1864 + if (utf8) { 1.1865 + w_char su1[MAXSWL]; 1.1866 + w_char su2[MAXSWL]; 1.1867 + su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0; 1.1868 + // decapitalize dictionary word 1.1869 + if (complexprefixes) { 1.1870 + int l1 = u8_u16(su1, MAXSWL, s1); 1.1871 + int l2 = u8_u16(su2, MAXSWL, s2); 1.1872 + if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1; 1.1873 + } else { 1.1874 + int i; 1.1875 + u8_u16(su1, 1, s1); 1.1876 + u8_u16(su2, 1, s2); 1.1877 + unsigned short idx = (su2->h << 8) + su2->l; 1.1878 + unsigned short otheridx = (su1->h << 8) + su1->l; 1.1879 + if (otheridx != idx && 1.1880 + (otheridx != unicodetolower(idx, langnum))) return 0; 1.1881 + int l1 = u8_u16(su1, MAXSWL, s1); 1.1882 + int l2 = u8_u16(su2, MAXSWL, s2); 1.1883 + for(i = 1; (i < l1) && (i < l2) && 1.1884 + (su1[i].l == su2[i].l) && (su1[i].h == su2[i].h); i++); 1.1885 + return i; 1.1886 + } 1.1887 + } else { 1.1888 + if (complexprefixes) { 1.1889 + int l1 = strlen(s1); 1.1890 + int l2 = strlen(s2); 1.1891 + if (*(s2+l1-1) == *(s2+l2-1)) return 1; 1.1892 + } else { 1.1893 + char * olds = s1; 1.1894 + // decapitalise dictionary word 1.1895 + if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) return 0; 1.1896 + do { 1.1897 + s1++; s2++; 1.1898 + } while ((*s1 == *s2) && (*s1 != '\0')); 1.1899 + return (int)(s1 - olds); 1.1900 + } 1.1901 + } 1.1902 + return 0; 1.1903 +} 1.1904 + 1.1905 +int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_swap) { 1.1906 + int num = 0; 1.1907 + int diff = 0; 1.1908 + int diffpos[2]; 1.1909 + *is_swap = 0; 1.1910 + if (utf8) { 1.1911 + w_char su1[MAXSWL]; 1.1912 + w_char su2[MAXSWL]; 1.1913 + int l1 = u8_u16(su1, MAXSWL, s1); 1.1914 + int l2 = u8_u16(su2, MAXSWL, s2); 1.1915 + // decapitalize dictionary word 1.1916 + if (complexprefixes) { 1.1917 + mkallsmall_utf(su2+l2-1, 1, langnum); 1.1918 + } else { 1.1919 + mkallsmall_utf(su2, 1, langnum); 1.1920 + } 1.1921 + for (int i = 0; (i < l1) && (i < l2); i++) { 1.1922 + if (((short *) su1)[i] == ((short *) su2)[i]) { 1.1923 + num++; 1.1924 + } else { 1.1925 + if (diff < 2) diffpos[diff] = i; 1.1926 + diff++; 1.1927 + } 1.1928 + } 1.1929 + if ((diff == 2) && (l1 == l2) && 1.1930 + (((short *) su1)[diffpos[0]] == ((short *) su2)[diffpos[1]]) && 1.1931 + (((short *) su1)[diffpos[1]] == ((short *) su2)[diffpos[0]])) *is_swap = 1; 1.1932 + } else { 1.1933 + int i; 1.1934 + char t[MAXSWUTF8L]; 1.1935 + strcpy(t, s2); 1.1936 + // decapitalize dictionary word 1.1937 + if (complexprefixes) { 1.1938 + int l2 = strlen(t); 1.1939 + *(t+l2-1) = csconv[((unsigned char)*(t+l2-1))].clower; 1.1940 + } else { 1.1941 + mkallsmall(t, csconv); 1.1942 + } 1.1943 + for (i = 0; (*(s1+i) != 0) && (*(t+i) != 0); i++) { 1.1944 + if (*(s1+i) == *(t+i)) { 1.1945 + num++; 1.1946 + } else { 1.1947 + if (diff < 2) diffpos[diff] = i; 1.1948 + diff++; 1.1949 + } 1.1950 + } 1.1951 + if ((diff == 2) && (*(s1+i) == 0) && (*(t+i) == 0) && 1.1952 + (*(s1+diffpos[0]) == *(t+diffpos[1])) && 1.1953 + (*(s1+diffpos[1]) == *(t+diffpos[0]))) *is_swap = 1; 1.1954 + } 1.1955 + return num; 1.1956 +} 1.1957 + 1.1958 +int SuggestMgr::mystrlen(const char * word) { 1.1959 + if (utf8) { 1.1960 + w_char w[MAXSWL]; 1.1961 + return u8_u16(w, MAXSWL, word); 1.1962 + } else return strlen(word); 1.1963 +} 1.1964 + 1.1965 +// sort in decreasing order of score 1.1966 +void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n ) 1.1967 +{ 1.1968 + int m = 1; 1.1969 + while (m < n) { 1.1970 + int j = m; 1.1971 + while (j > 0) { 1.1972 + if (rsc[j-1] < rsc[j]) { 1.1973 + int sctmp = rsc[j-1]; 1.1974 + char * wdtmp = rword[j-1]; 1.1975 + rsc[j-1] = rsc[j]; 1.1976 + rword[j-1] = rword[j]; 1.1977 + rsc[j] = sctmp; 1.1978 + rword[j] = wdtmp; 1.1979 + if (rword2) { 1.1980 + wdtmp = rword2[j-1]; 1.1981 + rword2[j-1] = rword2[j]; 1.1982 + rword2[j] = wdtmp; 1.1983 + } 1.1984 + j--; 1.1985 + } else break; 1.1986 + } 1.1987 + m++; 1.1988 + } 1.1989 + return; 1.1990 +} 1.1991 + 1.1992 +// longest common subsequence 1.1993 +void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char ** result) { 1.1994 + int n, m; 1.1995 + w_char su[MAXSWL]; 1.1996 + w_char su2[MAXSWL]; 1.1997 + char * b; 1.1998 + char * c; 1.1999 + int i; 1.2000 + int j; 1.2001 + if (utf8) { 1.2002 + m = u8_u16(su, MAXSWL, s); 1.2003 + n = u8_u16(su2, MAXSWL, s2); 1.2004 + } else { 1.2005 + m = strlen(s); 1.2006 + n = strlen(s2); 1.2007 + } 1.2008 + c = (char *) malloc((m + 1) * (n + 1)); 1.2009 + b = (char *) malloc((m + 1) * (n + 1)); 1.2010 + if (!c || !b) { 1.2011 + if (c) free(c); 1.2012 + if (b) free(b); 1.2013 + *result = NULL; 1.2014 + return; 1.2015 + } 1.2016 + for (i = 1; i <= m; i++) c[i*(n+1)] = 0; 1.2017 + for (j = 0; j <= n; j++) c[j] = 0; 1.2018 + for (i = 1; i <= m; i++) { 1.2019 + for (j = 1; j <= n; j++) { 1.2020 + if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1))) 1.2021 + || ((!utf8) && ((*(s+i-1)) == (*(s2+j-1))))) { 1.2022 + c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1; 1.2023 + b[i*(n+1) + j] = LCS_UPLEFT; 1.2024 + } else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) { 1.2025 + c[i*(n+1) + j] = c[(i-1)*(n+1) + j]; 1.2026 + b[i*(n+1) + j] = LCS_UP; 1.2027 + } else { 1.2028 + c[i*(n+1) + j] = c[i*(n+1) + j-1]; 1.2029 + b[i*(n+1) + j] = LCS_LEFT; 1.2030 + } 1.2031 + } 1.2032 + } 1.2033 + *result = b; 1.2034 + free(c); 1.2035 + *l1 = m; 1.2036 + *l2 = n; 1.2037 +} 1.2038 + 1.2039 +int SuggestMgr::lcslen(const char * s, const char* s2) { 1.2040 + int m; 1.2041 + int n; 1.2042 + int i; 1.2043 + int j; 1.2044 + char * result; 1.2045 + int len = 0; 1.2046 + lcs(s, s2, &m, &n, &result); 1.2047 + if (!result) return 0; 1.2048 + i = m; 1.2049 + j = n; 1.2050 + while ((i != 0) && (j != 0)) { 1.2051 + if (result[i*(n+1) + j] == LCS_UPLEFT) { 1.2052 + len++; 1.2053 + i--; 1.2054 + j--; 1.2055 + } else if (result[i*(n+1) + j] == LCS_UP) { 1.2056 + i--; 1.2057 + } else j--; 1.2058 + } 1.2059 + free(result); 1.2060 + return len; 1.2061 +}