michael@0: /* Hyphen - hyphenation library using converted TeX hyphenation patterns michael@0: * michael@0: * (C) 1998 Raph Levien michael@0: * (C) 2001 ALTLinux, Moscow michael@0: * (C) 2006, 2007, 2008 László Németh michael@0: * michael@0: * This was part of libHnj library by Raph Levien. michael@0: * michael@0: * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj michael@0: * to use it in OpenOffice.org. michael@0: * michael@0: * Non-standard and compound word hyphenation support by László Németh. michael@0: * michael@0: * License is the original LibHnj license: michael@0: * michael@0: * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both michael@0: * licenses follows. michael@0: */ michael@0: michael@0: /* LibHnj - a library for high quality hyphenation and justification michael@0: * Copyright (C) 1998 Raph Levien michael@0: * michael@0: * This library is free software; you can redistribute it and/or michael@0: * modify it under the terms of the GNU Library General Public michael@0: * License as published by the Free Software Foundation; either michael@0: * version 2 of the License, or (at your option) any later version. michael@0: * michael@0: * This library is distributed in the hope that it will be useful, michael@0: * but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: * Library General Public License for more details. michael@0: * michael@0: * You should have received a copy of the GNU Library General Public michael@0: * License along with this library; if not, write to the michael@0: * Free Software Foundation, Inc., 59 Temple Place - Suite 330, michael@0: * Boston, MA 02111-1307 USA. michael@0: */ michael@0: michael@0: /* michael@0: * The contents of this file are subject to the Mozilla Public License michael@0: * Version 1.0 (the "MPL"); you may not use this file except in michael@0: * compliance with the MPL. You may obtain a copy of the MPL at michael@0: * http://www.mozilla.org/MPL/ michael@0: * michael@0: * Software distributed under the MPL is distributed on an "AS IS" basis, michael@0: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL michael@0: * for the specific language governing rights and limitations under the michael@0: * MPL. michael@0: * michael@0: */ michael@0: #ifndef __HYPHEN_H__ michael@0: #define __HYPHEN_H__ michael@0: michael@0: #ifdef __cplusplus michael@0: extern "C" { michael@0: #endif /* __cplusplus */ michael@0: michael@0: typedef struct _HyphenDict HyphenDict; michael@0: typedef struct _HyphenState HyphenState; michael@0: typedef struct _HyphenTrans HyphenTrans; michael@0: #define MAX_CHARS 100 michael@0: #define MAX_NAME 20 michael@0: michael@0: struct _HyphenDict { michael@0: /* user options */ michael@0: char lhmin; /* lefthyphenmin: min. hyph. distance from the left side */ michael@0: char rhmin; /* righthyphenmin: min. hyph. distance from the right side */ michael@0: char clhmin; /* min. hyph. distance from the left compound boundary */ michael@0: char crhmin; /* min. hyph. distance from the right compound boundary */ michael@0: char * nohyphen; /* comma separated list of characters or character michael@0: sequences with forbidden hyphenation */ michael@0: int nohyphenl; /* count of elements in nohyphen */ michael@0: /* system variables */ michael@0: int num_states; michael@0: char cset[MAX_NAME]; michael@0: int utf8; michael@0: HyphenState *states; michael@0: HyphenDict *nextlevel; michael@0: }; michael@0: michael@0: struct _HyphenState { michael@0: char *match; michael@0: char *repl; michael@0: signed char replindex; michael@0: signed char replcut; michael@0: int fallback_state; michael@0: int num_trans; michael@0: HyphenTrans *trans; michael@0: }; michael@0: michael@0: struct _HyphenTrans { michael@0: char ch; michael@0: int new_state; michael@0: }; michael@0: michael@0: HyphenDict *hnj_hyphen_load (const char *fn); michael@0: void hnj_hyphen_free (HyphenDict *dict); michael@0: michael@0: /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ michael@0: int hnj_hyphen_hyphenate (HyphenDict *dict, michael@0: const char *word, int word_size, michael@0: char *hyphens); michael@0: michael@0: /* michael@0: michael@0: int hnj_hyphen_hyphenate2(): non-standard hyphenation. michael@0: michael@0: (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish michael@0: etc. orthography, see documentation.) michael@0: michael@0: input data: michael@0: word: input word michael@0: word_size: byte length of the input word michael@0: michael@0: hyphens: allocated character buffer (size = word_size + 5) michael@0: hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL michael@0: rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers michael@0: (size=word_size) or with NULL value) or NULL michael@0: michael@0: output data: michael@0: hyphens: hyphenation vector (hyphenation points signed with odd numbers) michael@0: hyphenated_word: hyphenated input word (hyphens signed with `='), michael@0: optional (NULL input) michael@0: rep: NULL (only standard hyph.), or replacements (hyphenation points michael@0: signed with `=' in replacements); michael@0: pos: NULL, or difference of the actual position and the beginning michael@0: positions of the change in input words; michael@0: cut: NULL, or counts of the removed characters of the original words michael@0: at hyphenation, michael@0: michael@0: Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the michael@0: character positions of the input word. michael@0: michael@0: For example: michael@0: Schiffahrt -> Schiff=fahrt, michael@0: pattern: f1f/ff=f,1,2 michael@0: output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2 michael@0: michael@0: Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size michael@0: length arrays): michael@0: michael@0: char ** rep = NULL; michael@0: int * pos = NULL; michael@0: int * cut = NULL; michael@0: char hyphens[MAXWORDLEN]; michael@0: hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut); michael@0: michael@0: See example in the source distribution. michael@0: michael@0: */ michael@0: michael@0: int hnj_hyphen_hyphenate2 (HyphenDict *dict, michael@0: const char *word, int word_size, char * hyphens, michael@0: char *hyphenated_word, char *** rep, int ** pos, int ** cut); michael@0: michael@0: /* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */ michael@0: /* lhmin: lefthyphenmin michael@0: * rhmin: righthyphenmin michael@0: * clhmin: compoundlefthyphemin michael@0: * crhmin: compoundrighthyphenmin michael@0: * (see documentation) */ michael@0: michael@0: int hnj_hyphen_hyphenate3 (HyphenDict *dict, michael@0: const char *word, int word_size, char * hyphens, michael@0: char *hyphword, char *** rep, int ** pos, int ** cut, michael@0: int lhmin, int rhmin, int clhmin, int crhmin); michael@0: michael@0: #ifdef __cplusplus michael@0: } michael@0: #endif /* __cplusplus */ michael@0: michael@0: #endif /* __HYPHEN_H__ */