1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/hyphenation/src/hyphen.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1160 @@ 1.4 +/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both 1.5 + * licenses follows. 1.6 + */ 1.7 + 1.8 +/* LibHnj - a library for high quality hyphenation and justification 1.9 + * Copyright (C) 1998 Raph Levien, 1.10 + * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 1.11 + * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) 1.12 + * (C) 2006, 2007, 2008, 2010 László Németh (nemeth at OOo) 1.13 + * 1.14 + * This library is free software; you can redistribute it and/or 1.15 + * modify it under the terms of the GNU Library General Public 1.16 + * License as published by the Free Software Foundation; either 1.17 + * version 2 of the License, or (at your option) any later version. 1.18 + * 1.19 + * This library is distributed in the hope that it will be useful, 1.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.22 + * Library General Public License for more details. 1.23 + * 1.24 + * You should have received a copy of the GNU Library General Public 1.25 + * License along with this library; if not, write to the 1.26 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 1.27 + * Boston, MA 02111-1307 USA. 1.28 +*/ 1.29 + 1.30 +/* 1.31 + * The contents of this file are subject to the Mozilla Public License 1.32 + * Version 1.0 (the "MPL"); you may not use this file except in 1.33 + * compliance with the MPL. You may obtain a copy of the MPL at 1.34 + * http://www.mozilla.org/MPL/ 1.35 + * 1.36 + * Software distributed under the MPL is distributed on an "AS IS" basis, 1.37 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL 1.38 + * for the specific language governing rights and limitations under the 1.39 + * MPL. 1.40 + * 1.41 + */ 1.42 +#include <stdlib.h> /* for NULL, malloc */ 1.43 +#include <stdio.h> /* for fprintf */ 1.44 +#include <string.h> /* for strdup */ 1.45 + 1.46 +#ifdef UNX 1.47 +#include <unistd.h> /* for exit */ 1.48 +#endif 1.49 + 1.50 +#define noVERBOSE 1.51 + 1.52 +/* calculate hyphenmin values with long ligature length (2 or 3 characters 1.53 + * instead of 1 or 2) for comparison with hyphenation without ligatures */ 1.54 +#define noLONG_LIGATURE 1.55 + 1.56 +#ifdef LONG_LIGATURE 1.57 +#define LIG_xx 1 1.58 +#define LIG_xxx 2 1.59 +#else 1.60 +#define LIG_xx 0 1.61 +#define LIG_xxx 1 1.62 +#endif 1.63 + 1.64 +#include "hnjalloc.h" 1.65 +#include "hyphen.h" 1.66 + 1.67 +static char * 1.68 +hnj_strdup (const char *s) 1.69 +{ 1.70 + char *new; 1.71 + int l; 1.72 + 1.73 + l = strlen (s); 1.74 + new = hnj_malloc (l + 1); 1.75 + memcpy (new, s, l); 1.76 + new[l] = 0; 1.77 + return new; 1.78 +} 1.79 + 1.80 +/* remove cross-platform text line end characters */ 1.81 +void hnj_strchomp(char * s) 1.82 +{ 1.83 + int k = strlen(s); 1.84 + if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; 1.85 + if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; 1.86 +} 1.87 + 1.88 +/* a little bit of a hash table implementation. This simply maps strings 1.89 + to state numbers */ 1.90 + 1.91 +typedef struct _HashTab HashTab; 1.92 +typedef struct _HashEntry HashEntry; 1.93 + 1.94 +/* A cheap, but effective, hack. */ 1.95 +#define HASH_SIZE 31627 1.96 + 1.97 +struct _HashTab { 1.98 + HashEntry *entries[HASH_SIZE]; 1.99 +}; 1.100 + 1.101 +struct _HashEntry { 1.102 + HashEntry *next; 1.103 + char *key; 1.104 + int val; 1.105 +}; 1.106 + 1.107 +/* a char* hash function from ASU - adapted from Gtk+ */ 1.108 +static unsigned int 1.109 +hnj_string_hash (const char *s) 1.110 +{ 1.111 + const char *p; 1.112 + unsigned int h=0, g; 1.113 + for(p = s; *p != '\0'; p += 1) { 1.114 + h = ( h << 4 ) + *p; 1.115 + if ( ( g = h & 0xf0000000 ) ) { 1.116 + h = h ^ (g >> 24); 1.117 + h = h ^ g; 1.118 + } 1.119 + } 1.120 + return h /* % M */; 1.121 +} 1.122 + 1.123 +static HashTab * 1.124 +hnj_hash_new (void) 1.125 +{ 1.126 + HashTab *hashtab; 1.127 + int i; 1.128 + 1.129 + hashtab = hnj_malloc (sizeof(HashTab)); 1.130 + for (i = 0; i < HASH_SIZE; i++) 1.131 + hashtab->entries[i] = NULL; 1.132 + 1.133 + return hashtab; 1.134 +} 1.135 + 1.136 +static void 1.137 +hnj_hash_free (HashTab *hashtab) 1.138 +{ 1.139 + int i; 1.140 + HashEntry *e, *next; 1.141 + 1.142 + for (i = 0; i < HASH_SIZE; i++) 1.143 + for (e = hashtab->entries[i]; e; e = next) 1.144 + { 1.145 + next = e->next; 1.146 + hnj_free (e->key); 1.147 + hnj_free (e); 1.148 + } 1.149 + 1.150 + hnj_free (hashtab); 1.151 +} 1.152 + 1.153 +/* assumes that key is not already present! */ 1.154 +static void 1.155 +hnj_hash_insert (HashTab *hashtab, const char *key, int val) 1.156 +{ 1.157 + int i; 1.158 + HashEntry *e; 1.159 + 1.160 + i = hnj_string_hash (key) % HASH_SIZE; 1.161 + e = hnj_malloc (sizeof(HashEntry)); 1.162 + e->next = hashtab->entries[i]; 1.163 + e->key = hnj_strdup (key); 1.164 + e->val = val; 1.165 + hashtab->entries[i] = e; 1.166 +} 1.167 + 1.168 +/* return val if found, otherwise -1 */ 1.169 +static int 1.170 +hnj_hash_lookup (HashTab *hashtab, const char *key) 1.171 +{ 1.172 + int i; 1.173 + HashEntry *e; 1.174 + i = hnj_string_hash (key) % HASH_SIZE; 1.175 + for (e = hashtab->entries[i]; e; e = e->next) 1.176 + if (!strcmp (key, e->key)) 1.177 + return e->val; 1.178 + return -1; 1.179 +} 1.180 + 1.181 +/* Get the state number, allocating a new state if necessary. */ 1.182 +static int 1.183 +hnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string) 1.184 +{ 1.185 + int state_num; 1.186 + 1.187 + state_num = hnj_hash_lookup (hashtab, string); 1.188 + 1.189 + if (state_num >= 0) 1.190 + return state_num; 1.191 + 1.192 + hnj_hash_insert (hashtab, string, dict->num_states); 1.193 + /* predicate is true if dict->num_states is a power of two */ 1.194 + if (!(dict->num_states & (dict->num_states - 1))) 1.195 + { 1.196 + dict->states = hnj_realloc (dict->states, 1.197 + (dict->num_states << 1) * 1.198 + sizeof(HyphenState)); 1.199 + } 1.200 + dict->states[dict->num_states].match = NULL; 1.201 + dict->states[dict->num_states].repl = NULL; 1.202 + dict->states[dict->num_states].fallback_state = -1; 1.203 + dict->states[dict->num_states].num_trans = 0; 1.204 + dict->states[dict->num_states].trans = NULL; 1.205 + return dict->num_states++; 1.206 +} 1.207 + 1.208 +/* add a transition from state1 to state2 through ch - assumes that the 1.209 + transition does not already exist */ 1.210 +static void 1.211 +hnj_add_trans (HyphenDict *dict, int state1, int state2, char ch) 1.212 +{ 1.213 + int num_trans; 1.214 + 1.215 + num_trans = dict->states[state1].num_trans; 1.216 + if (num_trans == 0) 1.217 + { 1.218 + dict->states[state1].trans = hnj_malloc (sizeof(HyphenTrans)); 1.219 + } 1.220 + else if (!(num_trans & (num_trans - 1))) 1.221 + { 1.222 + dict->states[state1].trans = hnj_realloc (dict->states[state1].trans, 1.223 + (num_trans << 1) * 1.224 + sizeof(HyphenTrans)); 1.225 + } 1.226 + dict->states[state1].trans[num_trans].ch = ch; 1.227 + dict->states[state1].trans[num_trans].new_state = state2; 1.228 + dict->states[state1].num_trans++; 1.229 +} 1.230 + 1.231 +#ifdef VERBOSE 1.232 +HashTab *global[1]; 1.233 + 1.234 +static char * 1.235 +get_state_str (int state, int level) 1.236 +{ 1.237 + int i; 1.238 + HashEntry *e; 1.239 + 1.240 + for (i = 0; i < HASH_SIZE; i++) 1.241 + for (e = global[level]->entries[i]; e; e = e->next) 1.242 + if (e->val == state) 1.243 + return e->key; 1.244 + return NULL; 1.245 +} 1.246 +#endif 1.247 + 1.248 +void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) { 1.249 + int i, j; 1.250 + char word[MAX_CHARS]; 1.251 + char pattern[MAX_CHARS]; 1.252 + char * repl; 1.253 + signed char replindex; 1.254 + signed char replcut; 1.255 + int state_num = 0; 1.256 + int last_state; 1.257 + char ch; 1.258 + int found; 1.259 + 1.260 + if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { 1.261 + dict->lhmin = atoi(buf + 13); 1.262 + return; 1.263 + } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { 1.264 + dict->rhmin = atoi(buf + 14); 1.265 + return; 1.266 + } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { 1.267 + dict->clhmin = atoi(buf + 21); 1.268 + return; 1.269 + } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { 1.270 + dict->crhmin = atoi(buf + 22); 1.271 + return; 1.272 + } else if (strncmp(buf, "NOHYPHEN", 8) == 0) { 1.273 + char * space = buf + 8; 1.274 + while (*space != '\0' && (*space == ' ' || *space == '\t')) space++; 1.275 + if (*buf != '\0') dict->nohyphen = hnj_strdup(space); 1.276 + if (dict->nohyphen) { 1.277 + char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1; 1.278 + *nhe = 0; 1.279 + for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) { 1.280 + if (*nhe == ',') { 1.281 + dict->nohyphenl++; 1.282 + *nhe = 0; 1.283 + } 1.284 + } 1.285 + } 1.286 + return; 1.287 + } 1.288 + j = 0; 1.289 + pattern[j] = '0'; 1.290 + repl = strchr(buf, '/'); 1.291 + replindex = 0; 1.292 + replcut = 0; 1.293 + if (repl) { 1.294 + char * index = strchr(repl + 1, ','); 1.295 + *repl = '\0'; 1.296 + if (index) { 1.297 + char * index2 = strchr(index + 1, ','); 1.298 + *index = '\0'; 1.299 + if (index2) { 1.300 + *index2 = '\0'; 1.301 + replindex = (signed char) atoi(index + 1) - 1; 1.302 + replcut = (signed char) atoi(index2 + 1); 1.303 + } 1.304 + } else { 1.305 + hnj_strchomp(repl + 1); 1.306 + replindex = 0; 1.307 + replcut = (signed char) strlen(buf); 1.308 + } 1.309 + repl = hnj_strdup(repl + 1); 1.310 + } 1.311 + for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++) 1.312 + { 1.313 + if (buf[i] >= '0' && buf[i] <= '9') 1.314 + pattern[j] = buf[i]; 1.315 + else 1.316 + { 1.317 + word[j] = buf[i]; 1.318 + pattern[++j] = '0'; 1.319 + } 1.320 + } 1.321 + word[j] = '\0'; 1.322 + pattern[j + 1] = '\0'; 1.323 + 1.324 + i = 0; 1.325 + if (!repl) { 1.326 + /* Optimize away leading zeroes */ 1.327 + for (; pattern[i] == '0'; i++); 1.328 + } else { 1.329 + if (*word == '.') i++; 1.330 + /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ 1.331 + if (dict->utf8) { 1.332 + int pu = -1; /* unicode character position */ 1.333 + int ps = -1; /* unicode start position (original replindex) */ 1.334 + int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ 1.335 + for (; pc < (strlen(word) + 1); pc++) { 1.336 + /* beginning of an UTF-8 character (not '10' start bits) */ 1.337 + if ((((unsigned char) word[pc]) >> 6) != 2) pu++; 1.338 + if ((ps < 0) && (replindex == pu)) { 1.339 + ps = replindex; 1.340 + replindex = (signed char) pc; 1.341 + } 1.342 + if ((ps >= 0) && ((pu - ps) == replcut)) { 1.343 + replcut = (signed char) (pc - replindex); 1.344 + break; 1.345 + } 1.346 + } 1.347 + if (*word == '.') replindex--; 1.348 + } 1.349 + } 1.350 + 1.351 +#ifdef VERBOSE 1.352 + printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); 1.353 +#endif 1.354 + found = hnj_hash_lookup (hashtab, word); 1.355 + state_num = hnj_get_state (dict, hashtab, word); 1.356 + dict->states[state_num].match = hnj_strdup (pattern + i); 1.357 + dict->states[state_num].repl = repl; 1.358 + dict->states[state_num].replindex = replindex; 1.359 + if (!replcut) { 1.360 + dict->states[state_num].replcut = (signed char) strlen(word); 1.361 + } else { 1.362 + dict->states[state_num].replcut = replcut; 1.363 + } 1.364 + 1.365 + /* now, put in the prefix transitions */ 1.366 + for (; found < 0 ;j--) 1.367 + { 1.368 + last_state = state_num; 1.369 + ch = word[j - 1]; 1.370 + word[j - 1] = '\0'; 1.371 + found = hnj_hash_lookup (hashtab, word); 1.372 + state_num = hnj_get_state (dict, hashtab, word); 1.373 + hnj_add_trans (dict, state_num, last_state, ch); 1.374 + } 1.375 +} 1.376 + 1.377 +HyphenDict * 1.378 +hnj_hyphen_load (const char *fn) 1.379 +{ 1.380 + HyphenDict *dict[2]; 1.381 + HashTab *hashtab; 1.382 + FILE *f; 1.383 + char buf[MAX_CHARS]; 1.384 + int nextlevel = 0; 1.385 + int i, j, k; 1.386 + HashEntry *e; 1.387 + int state_num = 0; 1.388 + 1.389 + f = fopen (fn, "r"); 1.390 + if (f == NULL) 1.391 + return NULL; 1.392 + 1.393 +// loading one or two dictionaries (separated by NEXTLEVEL keyword) 1.394 +for (k = 0; k < 2; k++) { 1.395 + hashtab = hnj_hash_new (); 1.396 +#ifdef VERBOSE 1.397 + global[k] = hashtab; 1.398 +#endif 1.399 + hnj_hash_insert (hashtab, "", 0); 1.400 + dict[k] = hnj_malloc (sizeof(HyphenDict)); 1.401 + dict[k]->num_states = 1; 1.402 + dict[k]->states = hnj_malloc (sizeof(HyphenState)); 1.403 + dict[k]->states[0].match = NULL; 1.404 + dict[k]->states[0].repl = NULL; 1.405 + dict[k]->states[0].fallback_state = -1; 1.406 + dict[k]->states[0].num_trans = 0; 1.407 + dict[k]->states[0].trans = NULL; 1.408 + dict[k]->nextlevel = NULL; 1.409 + dict[k]->lhmin = 0; 1.410 + dict[k]->rhmin = 0; 1.411 + dict[k]->clhmin = 0; 1.412 + dict[k]->crhmin = 0; 1.413 + dict[k]->nohyphen = NULL; 1.414 + dict[k]->nohyphenl = 0; 1.415 + 1.416 + /* read in character set info */ 1.417 + if (k == 0) { 1.418 + for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; 1.419 + if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { 1.420 + for (i=0;i<MAX_NAME;i++) 1.421 + if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) 1.422 + dict[k]->cset[i] = 0; 1.423 + } else { 1.424 + dict[k]->cset[0] = 0; 1.425 + } 1.426 + dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); 1.427 + } else { 1.428 + strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1); 1.429 + dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0'; 1.430 + dict[k]->utf8 = dict[0]->utf8; 1.431 + } 1.432 + 1.433 + if (k == 0 || nextlevel) { 1.434 + while (fgets (buf, sizeof(buf), f) != NULL) { 1.435 + if (strncmp(buf, "NEXTLEVEL", 9) == 0) { 1.436 + nextlevel = 1; 1.437 + break; 1.438 + } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab); 1.439 + } 1.440 + } else if (k == 1) { 1.441 + /* default first level: hyphen and ASCII apostrophe */ 1.442 + if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab); 1.443 + else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab); 1.444 + strncpy(buf, "1-1\n", MAX_CHARS-1); // buf rewritten by hnj_hyphen_load here 1.445 + buf[MAX_CHARS-1] = '\0'; 1.446 + hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */ 1.447 + hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */ 1.448 + if (dict[0]->utf8) { 1.449 + hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */ 1.450 + hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */ 1.451 + } 1.452 + } 1.453 + 1.454 + /* Could do unioning of matches here (instead of the preprocessor script). 1.455 + If we did, the pseudocode would look something like this: 1.456 + 1.457 + foreach state in the hash table 1.458 + foreach i = [1..length(state) - 1] 1.459 + state to check is substr (state, i) 1.460 + look it up 1.461 + if found, and if there is a match, union the match in. 1.462 + 1.463 + It's also possible to avoid the quadratic blowup by doing the 1.464 + search in order of increasing state string sizes - then you 1.465 + can break the loop after finding the first match. 1.466 + 1.467 + This step should be optional in any case - if there is a 1.468 + preprocessed rule table, it's always faster to use that. 1.469 + 1.470 +*/ 1.471 + 1.472 + /* put in the fallback states */ 1.473 + for (i = 0; i < HASH_SIZE; i++) 1.474 + for (e = hashtab->entries[i]; e; e = e->next) 1.475 + { 1.476 + if (*(e->key)) for (j = 1; 1; j++) 1.477 + { 1.478 + state_num = hnj_hash_lookup (hashtab, e->key + j); 1.479 + if (state_num >= 0) 1.480 + break; 1.481 + } 1.482 + /* KBH: FIXME state 0 fallback_state should always be -1? */ 1.483 + if (e->val) 1.484 + dict[k]->states[e->val].fallback_state = state_num; 1.485 + } 1.486 +#ifdef VERBOSE 1.487 + for (i = 0; i < HASH_SIZE; i++) 1.488 + for (e = hashtab->entries[i]; e; e = e->next) 1.489 + { 1.490 + printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val, 1.491 + dict[k]->states[e->val].fallback_state); 1.492 + for (j = 0; j < dict[k]->states[e->val].num_trans; j++) 1.493 + printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, 1.494 + dict[k]->states[e->val].trans[j].new_state); 1.495 + } 1.496 +#endif 1.497 + 1.498 +#ifndef VERBOSE 1.499 + hnj_hash_free (hashtab); 1.500 +#endif 1.501 + state_num = 0; 1.502 +} 1.503 + fclose(f); 1.504 + if (nextlevel) dict[0]->nextlevel = dict[1]; 1.505 + else { 1.506 + dict[1] -> nextlevel = dict[0]; 1.507 + dict[1]->lhmin = dict[0]->lhmin; 1.508 + dict[1]->rhmin = dict[0]->rhmin; 1.509 + dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3); 1.510 + dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3); 1.511 +#ifdef VERBOSE 1.512 + HashTab *r = global[0]; 1.513 + global[0] = global[1]; 1.514 + global[1] = r; 1.515 +#endif 1.516 + return dict[1]; 1.517 + } 1.518 + return dict[0]; 1.519 +} 1.520 + 1.521 +void hnj_hyphen_free (HyphenDict *dict) 1.522 +{ 1.523 + int state_num; 1.524 + HyphenState *hstate; 1.525 + 1.526 + for (state_num = 0; state_num < dict->num_states; state_num++) 1.527 + { 1.528 + hstate = &dict->states[state_num]; 1.529 + if (hstate->match) 1.530 + hnj_free (hstate->match); 1.531 + if (hstate->repl) 1.532 + hnj_free (hstate->repl); 1.533 + if (hstate->trans) 1.534 + hnj_free (hstate->trans); 1.535 + } 1.536 + if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel); 1.537 + 1.538 + if (dict->nohyphen) hnj_free(dict->nohyphen); 1.539 + 1.540 + hnj_free (dict->states); 1.541 + 1.542 + hnj_free (dict); 1.543 +} 1.544 + 1.545 +#define MAX_WORD 256 1.546 + 1.547 +int hnj_hyphen_hyphenate (HyphenDict *dict, 1.548 + const char *word, int word_size, 1.549 + char *hyphens) 1.550 +{ 1.551 + char *prep_word; 1.552 + int i, j, k; 1.553 + int state; 1.554 + char ch; 1.555 + HyphenState *hstate; 1.556 + char *match; 1.557 + int offset; 1.558 + 1.559 + prep_word = hnj_malloc (word_size + 3); 1.560 + 1.561 + j = 0; 1.562 + prep_word[j++] = '.'; 1.563 + 1.564 + for (i = 0; i < word_size; i++) { 1.565 + if (word[i] <= '9' && word[i] >= '0') { 1.566 + prep_word[j++] = '.'; 1.567 + } else { 1.568 + prep_word[j++] = word[i]; 1.569 + } 1.570 + } 1.571 + 1.572 + prep_word[j++] = '.'; 1.573 + prep_word[j] = '\0'; 1.574 + 1.575 + for (i = 0; i < word_size + 5; i++) 1.576 + hyphens[i] = '0'; 1.577 + 1.578 +#ifdef VERBOSE 1.579 + printf ("prep_word = %s\n", prep_word); 1.580 +#endif 1.581 + 1.582 + /* now, run the finite state machine */ 1.583 + state = 0; 1.584 + for (i = 0; i < j; i++) 1.585 + { 1.586 + ch = prep_word[i]; 1.587 + for (;;) 1.588 + { 1.589 + 1.590 + if (state == -1) { 1.591 + /* return 1; */ 1.592 + /* KBH: FIXME shouldn't this be as follows? */ 1.593 + state = 0; 1.594 + goto try_next_letter; 1.595 + } 1.596 + 1.597 +#ifdef VERBOSE 1.598 + char *state_str; 1.599 + state_str = get_state_str (state, 0); 1.600 + 1.601 + for (k = 0; k < i - strlen (state_str); k++) 1.602 + putchar (' '); 1.603 + printf ("%s", state_str); 1.604 +#endif 1.605 + 1.606 + hstate = &dict->states[state]; 1.607 + for (k = 0; k < hstate->num_trans; k++) 1.608 + if (hstate->trans[k].ch == ch) 1.609 + { 1.610 + state = hstate->trans[k].new_state; 1.611 + goto found_state; 1.612 + } 1.613 + state = hstate->fallback_state; 1.614 +#ifdef VERBOSE 1.615 + printf (" falling back, fallback_state %d\n", state); 1.616 +#endif 1.617 + } 1.618 + found_state: 1.619 +#ifdef VERBOSE 1.620 + printf ("found state %d\n",state); 1.621 +#endif 1.622 + /* Additional optimization is possible here - especially, 1.623 + elimination of trailing zeroes from the match. Leading zeroes 1.624 + have already been optimized. */ 1.625 + match = dict->states[state].match; 1.626 + /* replacing rules not handled by hyphen_hyphenate() */ 1.627 + if (match && !dict->states[state].repl) 1.628 + { 1.629 + offset = i + 1 - strlen (match); 1.630 +#ifdef VERBOSE 1.631 + for (k = 0; k < offset; k++) 1.632 + putchar (' '); 1.633 + printf ("%s\n", match); 1.634 +#endif 1.635 + /* This is a linear search because I tried a binary search and 1.636 + found it to be just a teeny bit slower. */ 1.637 + for (k = 0; match[k]; k++) 1.638 + if (hyphens[offset + k] < match[k]) 1.639 + hyphens[offset + k] = match[k]; 1.640 + } 1.641 + 1.642 + /* KBH: we need this to make sure we keep looking in a word */ 1.643 + /* for patterns even if the current character is not known in state 0 */ 1.644 + /* since patterns for hyphenation may occur anywhere in the word */ 1.645 + try_next_letter: ; 1.646 + 1.647 + } 1.648 +#ifdef VERBOSE 1.649 + for (i = 0; i < j; i++) 1.650 + putchar (hyphens[i]); 1.651 + putchar ('\n'); 1.652 +#endif 1.653 + 1.654 + for (i = 0; i < j - 4; i++) 1.655 +#if 0 1.656 + if (hyphens[i + 1] & 1) 1.657 + hyphens[i] = '-'; 1.658 +#else 1.659 + hyphens[i] = hyphens[i + 1]; 1.660 +#endif 1.661 + hyphens[0] = '0'; 1.662 + for (; i < word_size; i++) 1.663 + hyphens[i] = '0'; 1.664 + hyphens[word_size] = '\0'; 1.665 + 1.666 + hnj_free (prep_word); 1.667 + 1.668 + return 0; 1.669 +} 1.670 + 1.671 +/* Unicode ligature length */ 1.672 +int hnj_ligature(unsigned char c) { 1.673 + switch (c) { 1.674 + case 0x80: /* ff */ 1.675 + case 0x81: /* fi */ 1.676 + case 0x82: return LIG_xx; /* fl */ 1.677 + case 0x83: /* ffi */ 1.678 + case 0x84: return LIG_xxx; /* ffl */ 1.679 + case 0x85: /* long st */ 1.680 + case 0x86: return LIG_xx; /* st */ 1.681 + } 1.682 + return 0; 1.683 +} 1.684 + 1.685 +/* character length of the first n byte of the input word */ 1.686 +int hnj_hyphen_strnlen(const char * word, int n, int utf8) 1.687 +{ 1.688 + int i = 0; 1.689 + int j = 0; 1.690 + while (j < n && word[j] != '\0') { 1.691 + i++; 1.692 + // Unicode ligature support 1.693 + if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC)) { 1.694 + i += hnj_ligature(word[j + 2]); 1.695 + } 1.696 + for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++); 1.697 + } 1.698 + return i; 1.699 +} 1.700 + 1.701 +int hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens, 1.702 + char *** rep, int ** pos, int ** cut, int lhmin) 1.703 +{ 1.704 + int i = 1, j; 1.705 + 1.706 + // Unicode ligature support 1.707 + if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC)) { 1.708 + i += hnj_ligature(word[2]); 1.709 + } 1.710 + 1.711 + // ignore numbers 1.712 + for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--; 1.713 + 1.714 + for (j = 0; i < lhmin && word[j] != '\0'; i++) do { 1.715 + // check length of the non-standard part 1.716 + if (*rep && *pos && *cut && (*rep)[j]) { 1.717 + char * rh = strchr((*rep)[j], '='); 1.718 + if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) + 1.719 + hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) { 1.720 + free((*rep)[j]); 1.721 + (*rep)[j] = NULL; 1.722 + hyphens[j] = '0'; 1.723 + } 1.724 + } else { 1.725 + hyphens[j] = '0'; 1.726 + } 1.727 + j++; 1.728 + 1.729 + // Unicode ligature support 1.730 + if (utf8 && ((unsigned char) word[j] == 0xEF) && ((unsigned char) word[j + 1] == 0xAC)) { 1.731 + i += hnj_ligature(word[j + 2]); 1.732 + } 1.733 + } while (utf8 && (word[j] & 0xc0) == 0x80); 1.734 + return 0; 1.735 +} 1.736 + 1.737 +int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, 1.738 + char *** rep, int ** pos, int ** cut, int rhmin) 1.739 +{ 1.740 + int i = 0; 1.741 + int j; 1.742 + 1.743 + // ignore numbers 1.744 + for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--; 1.745 + 1.746 + for (j = word_size - 1; i < rhmin && j > 0; j--) { 1.747 + // check length of the non-standard part 1.748 + if (*rep && *pos && *cut && (*rep)[j]) { 1.749 + char * rh = strchr((*rep)[j], '='); 1.750 + if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) + 1.751 + hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) { 1.752 + free((*rep)[j]); 1.753 + (*rep)[j] = NULL; 1.754 + hyphens[j] = '0'; 1.755 + } 1.756 + } else { 1.757 + hyphens[j] = '0'; 1.758 + } 1.759 + if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++; 1.760 + } 1.761 + return 0; 1.762 +} 1.763 + 1.764 +// recursive function for compound level hyphenation 1.765 +int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, 1.766 + char * hyphens, char *** rep, int ** pos, int ** cut, 1.767 + int clhmin, int crhmin, int lend, int rend) 1.768 +{ 1.769 + char *prep_word; 1.770 + int i, j, k; 1.771 + int state; 1.772 + char ch; 1.773 + HyphenState *hstate; 1.774 + char *match; 1.775 + char *repl; 1.776 + signed char replindex; 1.777 + signed char replcut; 1.778 + int offset; 1.779 + int * matchlen; 1.780 + int * matchindex; 1.781 + char ** matchrepl; 1.782 + int isrepl = 0; 1.783 + int nHyphCount; 1.784 + 1.785 + size_t prep_word_size = word_size + 3; 1.786 + prep_word = hnj_malloc (prep_word_size); 1.787 + matchlen = hnj_malloc ((word_size + 3) * sizeof(int)); 1.788 + matchindex = hnj_malloc ((word_size + 3) * sizeof(int)); 1.789 + matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *)); 1.790 + 1.791 + j = 0; 1.792 + prep_word[j++] = '.'; 1.793 + 1.794 + for (i = 0; i < word_size; i++) { 1.795 + if (word[i] <= '9' && word[i] >= '0') { 1.796 + prep_word[j++] = '.'; 1.797 + } else { 1.798 + prep_word[j++] = word[i]; 1.799 + } 1.800 + } 1.801 + 1.802 + 1.803 + 1.804 + prep_word[j++] = '.'; 1.805 + prep_word[j] = '\0'; 1.806 + 1.807 + for (i = 0; i < j; i++) 1.808 + hyphens[i] = '0'; 1.809 + 1.810 +#ifdef VERBOSE 1.811 + printf ("prep_word = %s\n", prep_word); 1.812 +#endif 1.813 + 1.814 + /* now, run the finite state machine */ 1.815 + state = 0; 1.816 + for (i = 0; i < j; i++) 1.817 + { 1.818 + ch = prep_word[i]; 1.819 + for (;;) 1.820 + { 1.821 + 1.822 + if (state == -1) { 1.823 + /* return 1; */ 1.824 + /* KBH: FIXME shouldn't this be as follows? */ 1.825 + state = 0; 1.826 + goto try_next_letter; 1.827 + } 1.828 + 1.829 +#ifdef VERBOSE 1.830 + char *state_str; 1.831 + state_str = get_state_str (state, 1); 1.832 + 1.833 + for (k = 0; k < i - strlen (state_str); k++) 1.834 + putchar (' '); 1.835 + printf ("%s", state_str); 1.836 +#endif 1.837 + 1.838 + hstate = &dict->states[state]; 1.839 + for (k = 0; k < hstate->num_trans; k++) 1.840 + if (hstate->trans[k].ch == ch) 1.841 + { 1.842 + state = hstate->trans[k].new_state; 1.843 + goto found_state; 1.844 + } 1.845 + state = hstate->fallback_state; 1.846 +#ifdef VERBOSE 1.847 + printf (" falling back, fallback_state %d\n", state); 1.848 +#endif 1.849 + } 1.850 + found_state: 1.851 +#ifdef VERBOSE 1.852 + printf ("found state %d\n",state); 1.853 +#endif 1.854 + /* Additional optimization is possible here - especially, 1.855 + elimination of trailing zeroes from the match. Leading zeroes 1.856 + have already been optimized. */ 1.857 + match = dict->states[state].match; 1.858 + repl = dict->states[state].repl; 1.859 + replindex = dict->states[state].replindex; 1.860 + replcut = dict->states[state].replcut; 1.861 + /* replacing rules not handled by hyphen_hyphenate() */ 1.862 + if (match) 1.863 + { 1.864 + offset = i + 1 - strlen (match); 1.865 +#ifdef VERBOSE 1.866 + for (k = 0; k < offset; k++) 1.867 + putchar (' '); 1.868 + printf ("%s (%s)\n", match, repl); 1.869 +#endif 1.870 + if (repl) { 1.871 + if (!isrepl) for(; isrepl < word_size; isrepl++) { 1.872 + matchrepl[isrepl] = NULL; 1.873 + matchindex[isrepl] = -1; 1.874 + } 1.875 + matchlen[offset + replindex] = replcut; 1.876 + } 1.877 + /* This is a linear search because I tried a binary search and 1.878 + found it to be just a teeny bit slower. */ 1.879 + for (k = 0; match[k]; k++) { 1.880 + if ((hyphens[offset + k] < match[k])) { 1.881 + hyphens[offset + k] = match[k]; 1.882 + if (match[k]&1) { 1.883 + matchrepl[offset + k] = repl; 1.884 + if (repl && (k >= replindex) && (k <= replindex + replcut)) { 1.885 + matchindex[offset + replindex] = offset + k; 1.886 + } 1.887 + } 1.888 + } 1.889 + } 1.890 + 1.891 + } 1.892 + 1.893 + /* KBH: we need this to make sure we keep looking in a word */ 1.894 + /* for patterns even if the current character is not known in state 0 */ 1.895 + /* since patterns for hyphenation may occur anywhere in the word */ 1.896 + try_next_letter: ; 1.897 + 1.898 + } 1.899 +#ifdef VERBOSE 1.900 + for (i = 0; i < j; i++) 1.901 + putchar (hyphens[i]); 1.902 + putchar ('\n'); 1.903 +#endif 1.904 + 1.905 + for (i = 0; i < j - 3; i++) 1.906 +#if 0 1.907 + if (hyphens[i + 1] & 1) 1.908 + hyphens[i] = '-'; 1.909 +#else 1.910 + hyphens[i] = hyphens[i + 1]; 1.911 +#endif 1.912 + for (; i < word_size; i++) 1.913 + hyphens[i] = '0'; 1.914 + hyphens[word_size] = '\0'; 1.915 + 1.916 + /* now create a new char string showing hyphenation positions */ 1.917 + /* count the hyphens and allocate space for the new hyphenated string */ 1.918 + nHyphCount = 0; 1.919 + for (i = 0; i < word_size; i++) 1.920 + if (hyphens[i]&1) 1.921 + nHyphCount++; 1.922 + j = 0; 1.923 + for (i = 0; i < word_size; i++) { 1.924 + if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { 1.925 + if (rep && pos && cut) { 1.926 + if (!*rep) 1.927 + *rep = (char **) calloc(word_size, sizeof(char *)); 1.928 + if (!*pos) 1.929 + *pos = (int *) calloc(word_size, sizeof(int)); 1.930 + if (!*cut) { 1.931 + *cut = (int *) calloc(word_size, sizeof(int)); 1.932 + } 1.933 + (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]); 1.934 + (*pos)[matchindex[i] - 1] = matchindex[i] - i; 1.935 + (*cut)[matchindex[i] - 1] = matchlen[i]; 1.936 + } 1.937 + j += strlen(matchrepl[matchindex[i]]); 1.938 + i += matchlen[i] - 1; 1.939 + } 1.940 + } 1.941 + 1.942 + hnj_free (matchrepl); 1.943 + hnj_free (matchlen); 1.944 + hnj_free (matchindex); 1.945 + 1.946 + // recursive hyphenation of the first (compound) level segments 1.947 + if (dict->nextlevel) { 1.948 + char ** rep2; 1.949 + int * pos2; 1.950 + int * cut2; 1.951 + char * hyphens2; 1.952 + int begin = 0; 1.953 + 1.954 + rep2 = hnj_malloc (word_size * sizeof(char *)); 1.955 + pos2 = hnj_malloc (word_size * sizeof(int)); 1.956 + cut2 = hnj_malloc (word_size * sizeof(int)); 1.957 + hyphens2 = hnj_malloc (word_size + 3); 1.958 + for (i = 0; i < word_size; i++) rep2[i] = NULL; 1.959 + for (i = 0; i < word_size; i++) if 1.960 + (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 1.961 + if (i - begin > 1) { 1.962 + int hyph = 0; 1.963 + prep_word[i + 2] = '\0'; 1.964 + /* non-standard hyphenation at compound boundary (Schiffahrt) */ 1.965 + if (rep && *rep && *pos && *cut && (*rep)[i]) { 1.966 + char * l = strchr((*rep)[i], '='); 1.967 + size_t offset = 2 + i - (*pos)[i]; 1.968 + strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1); 1.969 + prep_word[prep_word_size - 1] = '\0'; 1.970 + if (l) { 1.971 + hyph = (l - (*rep)[i]) - (*pos)[i]; 1.972 + prep_word[2 + i + hyph] = '\0'; 1.973 + } 1.974 + } 1.975 + hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, 1.976 + hyphens2, &rep2, &pos2, &cut2, clhmin, 1.977 + crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); 1.978 + for (j = 0; j < i - begin - 1; j++) { 1.979 + hyphens[begin + j] = hyphens2[j]; 1.980 + if (rep2[j] && rep && pos && cut) { 1.981 + if (!*rep && !*pos && !*cut) { 1.982 + int k; 1.983 + *rep = (char **) malloc(sizeof(char *) * word_size); 1.984 + *pos = (int *) malloc(sizeof(int) * word_size); 1.985 + *cut = (int *) malloc(sizeof(int) * word_size); 1.986 + for (k = 0; k < word_size; k++) { 1.987 + (*rep)[k] = NULL; 1.988 + (*pos)[k] = 0; 1.989 + (*cut)[k] = 0; 1.990 + } 1.991 + } 1.992 + (*rep)[begin + j] = rep2[j]; 1.993 + (*pos)[begin + j] = pos2[j]; 1.994 + (*cut)[begin + j] = cut2[j]; 1.995 + } 1.996 + } 1.997 + prep_word[i + 2] = word[i + 1]; 1.998 + if (*rep && *pos && *cut && (*rep)[i]) { 1.999 + size_t offset = 1; 1.1000 + strncpy(prep_word + offset, word, prep_word_size - offset - 1); 1.1001 + prep_word[prep_word_size - 1] = '\0'; 1.1002 + } 1.1003 + } 1.1004 + begin = i + 1; 1.1005 + for (j = 0; j < word_size; j++) rep2[j] = NULL; 1.1006 + } 1.1007 + 1.1008 + // non-compound 1.1009 + if (begin == 0) { 1.1010 + hnj_hyphen_hyph_(dict->nextlevel, word, word_size, 1.1011 + hyphens, rep, pos, cut, clhmin, crhmin, lend, rend); 1.1012 + if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, 1.1013 + rep, pos, cut, clhmin); 1.1014 + if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, 1.1015 + rep, pos, cut, crhmin); 1.1016 + } 1.1017 + 1.1018 + free(rep2); 1.1019 + free(cut2); 1.1020 + free(pos2); 1.1021 + free(hyphens2); 1.1022 + } 1.1023 + 1.1024 + hnj_free (prep_word); 1.1025 + return 0; 1.1026 +} 1.1027 + 1.1028 +/* UTF-8 normalization of hyphen and non-standard positions */ 1.1029 +int hnj_hyphen_norm(const char *word, int word_size, char * hyphens, 1.1030 + char *** rep, int ** pos, int ** cut) 1.1031 +{ 1.1032 + int i, j, k; 1.1033 + if ((((unsigned char) word[0]) >> 6) == 2) { 1.1034 + fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word); 1.1035 + return 1; 1.1036 + } 1.1037 + 1.1038 + /* calculate UTF-8 character positions */ 1.1039 + for (i = 0, j = -1; i < word_size; i++) { 1.1040 + /* beginning of an UTF-8 character (not '10' start bits) */ 1.1041 + if ((((unsigned char) word[i]) >> 6) != 2) j++; 1.1042 + hyphens[j] = hyphens[i]; 1.1043 + if (rep && pos && cut && *rep && *pos && *cut) { 1.1044 + int l = (*pos)[i]; 1.1045 + (*pos)[j] = 0; 1.1046 + for (k = 0; k < l; k++) { 1.1047 + if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++; 1.1048 + } 1.1049 + k = i - l + 1; 1.1050 + l = k + (*cut)[i]; 1.1051 + (*cut)[j] = 0; 1.1052 + for (; k < l; k++) { 1.1053 + if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++; 1.1054 + } 1.1055 + (*rep)[j] = (*rep)[i]; 1.1056 + if (j < i) { 1.1057 + (*rep)[i] = NULL; 1.1058 + (*pos)[i] = 0; 1.1059 + (*cut)[i] = 0; 1.1060 + } 1.1061 + } 1.1062 + } 1.1063 + hyphens[j + 1] = '\0'; 1.1064 +#ifdef VERBOSE 1.1065 + printf ("nums: %s\n", hyphens); 1.1066 +#endif 1.1067 + return 0; 1.1068 +} 1.1069 + 1.1070 +/* get the word with all possible hyphenations (output: hyphword) */ 1.1071 +void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, 1.1072 + char * hyphword, char *** rep, int ** pos, int ** cut) 1.1073 +{ 1.1074 + int hyphenslen = l + 5; 1.1075 + 1.1076 + int i, j; 1.1077 + for (i = 0, j = 0; i < l; i++, j++) { 1.1078 + if (hyphens[i]&1) { 1.1079 + hyphword[j] = word[i]; 1.1080 + if (*rep && *pos && *cut && (*rep)[i]) { 1.1081 + size_t offset = j - (*pos)[i] + 1; 1.1082 + strncpy(hyphword + offset, (*rep)[i], hyphenslen - offset - 1); 1.1083 + hyphword[hyphenslen-1] = '\0'; 1.1084 + j += strlen((*rep)[i]) - (*pos)[i]; 1.1085 + i += (*cut)[i] - (*pos)[i]; 1.1086 + } else hyphword[++j] = '='; 1.1087 + } else hyphword[j] = word[i]; 1.1088 + } 1.1089 + hyphword[j] = '\0'; 1.1090 +} 1.1091 + 1.1092 + 1.1093 +/* main api function with default hyphenmin parameters */ 1.1094 +int hnj_hyphen_hyphenate2 (HyphenDict *dict, 1.1095 + const char *word, int word_size, char * hyphens, 1.1096 + char *hyphword, char *** rep, int ** pos, int ** cut) 1.1097 +{ 1.1098 + hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, 1.1099 + dict->clhmin, dict->crhmin, 1, 1); 1.1100 + hnj_hyphen_lhmin(dict->utf8, word, word_size, 1.1101 + hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2)); 1.1102 + hnj_hyphen_rhmin(dict->utf8, word, word_size, 1.1103 + hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2)); 1.1104 + 1.1105 + /* nohyphen */ 1.1106 + if (dict->nohyphen) { 1.1107 + char * nh = dict->nohyphen; 1.1108 + int nhi; 1.1109 + for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { 1.1110 + char * nhy = (char *) strstr(word, nh); 1.1111 + while (nhy) { 1.1112 + hyphens[nhy - word + strlen(nh) - 1] = '0'; 1.1113 + if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0'; 1.1114 + nhy = (char *) strstr(nhy + 1, nh); 1.1115 + } 1.1116 + nh = nh + strlen(nh) + 1; 1.1117 + } 1.1118 + } 1.1119 + 1.1120 + if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); 1.1121 + if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); 1.1122 +#ifdef VERBOSE 1.1123 + printf ("nums: %s\n", hyphens); 1.1124 +#endif 1.1125 + return 0; 1.1126 +} 1.1127 + 1.1128 +/* previous main api function with hyphenmin parameters */ 1.1129 +int hnj_hyphen_hyphenate3 (HyphenDict *dict, 1.1130 + const char *word, int word_size, char * hyphens, 1.1131 + char *hyphword, char *** rep, int ** pos, int ** cut, 1.1132 + int lhmin, int rhmin, int clhmin, int crhmin) 1.1133 +{ 1.1134 + lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin; 1.1135 + rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin; 1.1136 + clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin; 1.1137 + crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin; 1.1138 + hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, 1.1139 + clhmin, crhmin, 1, 1); 1.1140 + hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, 1.1141 + rep, pos, cut, (lhmin > 0 ? lhmin : 2)); 1.1142 + hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, 1.1143 + rep, pos, cut, (rhmin > 0 ? rhmin : 2)); 1.1144 + if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); 1.1145 + 1.1146 + /* nohyphen */ 1.1147 + if (dict->nohyphen) { 1.1148 + char * nh = dict->nohyphen; 1.1149 + int nhi; 1.1150 + for (nhi = 0; nhi <= dict->nohyphenl; nhi++) { 1.1151 + char * nhy = (char *) strstr(word, nh); 1.1152 + while (nhy) { 1.1153 + hyphens[nhy - word + strlen(nh) - 1] = 0; 1.1154 + if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0; 1.1155 + nhy = (char *) strstr(nhy + 1, nh); 1.1156 + } 1.1157 + nh = nh + strlen(nh) + 1; 1.1158 + } 1.1159 + } 1.1160 + 1.1161 + if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); 1.1162 + return 0; 1.1163 +}