|
1 /* Hyphen - hyphenation library using converted TeX hyphenation patterns |
|
2 * |
|
3 * (C) 1998 Raph Levien |
|
4 * (C) 2001 ALTLinux, Moscow |
|
5 * (C) 2006, 2007, 2008 László Németh |
|
6 * |
|
7 * This was part of libHnj library by Raph Levien. |
|
8 * |
|
9 * Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj |
|
10 * to use it in OpenOffice.org. |
|
11 * |
|
12 * Non-standard and compound word hyphenation support by László Németh. |
|
13 * |
|
14 * License is the original LibHnj license: |
|
15 * |
|
16 * LibHnj is dual licensed under LGPL and MPL. Boilerplate for both |
|
17 * licenses follows. |
|
18 */ |
|
19 |
|
20 /* LibHnj - a library for high quality hyphenation and justification |
|
21 * Copyright (C) 1998 Raph Levien |
|
22 * |
|
23 * This library is free software; you can redistribute it and/or |
|
24 * modify it under the terms of the GNU Library General Public |
|
25 * License as published by the Free Software Foundation; either |
|
26 * version 2 of the License, or (at your option) any later version. |
|
27 * |
|
28 * This library is distributed in the hope that it will be useful, |
|
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
31 * Library General Public License for more details. |
|
32 * |
|
33 * You should have received a copy of the GNU Library General Public |
|
34 * License along with this library; if not, write to the |
|
35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
36 * Boston, MA 02111-1307 USA. |
|
37 */ |
|
38 |
|
39 /* |
|
40 * The contents of this file are subject to the Mozilla Public License |
|
41 * Version 1.0 (the "MPL"); you may not use this file except in |
|
42 * compliance with the MPL. You may obtain a copy of the MPL at |
|
43 * http://www.mozilla.org/MPL/ |
|
44 * |
|
45 * Software distributed under the MPL is distributed on an "AS IS" basis, |
|
46 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL |
|
47 * for the specific language governing rights and limitations under the |
|
48 * MPL. |
|
49 * |
|
50 */ |
|
51 #ifndef __HYPHEN_H__ |
|
52 #define __HYPHEN_H__ |
|
53 |
|
54 #ifdef __cplusplus |
|
55 extern "C" { |
|
56 #endif /* __cplusplus */ |
|
57 |
|
58 typedef struct _HyphenDict HyphenDict; |
|
59 typedef struct _HyphenState HyphenState; |
|
60 typedef struct _HyphenTrans HyphenTrans; |
|
61 #define MAX_CHARS 100 |
|
62 #define MAX_NAME 20 |
|
63 |
|
64 struct _HyphenDict { |
|
65 /* user options */ |
|
66 char lhmin; /* lefthyphenmin: min. hyph. distance from the left side */ |
|
67 char rhmin; /* righthyphenmin: min. hyph. distance from the right side */ |
|
68 char clhmin; /* min. hyph. distance from the left compound boundary */ |
|
69 char crhmin; /* min. hyph. distance from the right compound boundary */ |
|
70 char * nohyphen; /* comma separated list of characters or character |
|
71 sequences with forbidden hyphenation */ |
|
72 int nohyphenl; /* count of elements in nohyphen */ |
|
73 /* system variables */ |
|
74 int num_states; |
|
75 char cset[MAX_NAME]; |
|
76 int utf8; |
|
77 HyphenState *states; |
|
78 HyphenDict *nextlevel; |
|
79 }; |
|
80 |
|
81 struct _HyphenState { |
|
82 char *match; |
|
83 char *repl; |
|
84 signed char replindex; |
|
85 signed char replcut; |
|
86 int fallback_state; |
|
87 int num_trans; |
|
88 HyphenTrans *trans; |
|
89 }; |
|
90 |
|
91 struct _HyphenTrans { |
|
92 char ch; |
|
93 int new_state; |
|
94 }; |
|
95 |
|
96 HyphenDict *hnj_hyphen_load (const char *fn); |
|
97 void hnj_hyphen_free (HyphenDict *dict); |
|
98 |
|
99 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ |
|
100 int hnj_hyphen_hyphenate (HyphenDict *dict, |
|
101 const char *word, int word_size, |
|
102 char *hyphens); |
|
103 |
|
104 /* |
|
105 |
|
106 int hnj_hyphen_hyphenate2(): non-standard hyphenation. |
|
107 |
|
108 (It supports Catalan, Dutch, German, Hungarian, Norwegian, Swedish |
|
109 etc. orthography, see documentation.) |
|
110 |
|
111 input data: |
|
112 word: input word |
|
113 word_size: byte length of the input word |
|
114 |
|
115 hyphens: allocated character buffer (size = word_size + 5) |
|
116 hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL |
|
117 rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers |
|
118 (size=word_size) or with NULL value) or NULL |
|
119 |
|
120 output data: |
|
121 hyphens: hyphenation vector (hyphenation points signed with odd numbers) |
|
122 hyphenated_word: hyphenated input word (hyphens signed with `='), |
|
123 optional (NULL input) |
|
124 rep: NULL (only standard hyph.), or replacements (hyphenation points |
|
125 signed with `=' in replacements); |
|
126 pos: NULL, or difference of the actual position and the beginning |
|
127 positions of the change in input words; |
|
128 cut: NULL, or counts of the removed characters of the original words |
|
129 at hyphenation, |
|
130 |
|
131 Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the |
|
132 character positions of the input word. |
|
133 |
|
134 For example: |
|
135 Schiffahrt -> Schiff=fahrt, |
|
136 pattern: f1f/ff=f,1,2 |
|
137 output: rep[5]="ff=f", pos[5] = 1, cut[5] = 2 |
|
138 |
|
139 Note: hnj_hyphen_hyphenate2() can allocate rep, pos, cut (word_size |
|
140 length arrays): |
|
141 |
|
142 char ** rep = NULL; |
|
143 int * pos = NULL; |
|
144 int * cut = NULL; |
|
145 char hyphens[MAXWORDLEN]; |
|
146 hnj_hyphen_hyphenate2(dict, "example", 7, hyphens, NULL, &rep, &pos, &cut); |
|
147 |
|
148 See example in the source distribution. |
|
149 |
|
150 */ |
|
151 |
|
152 int hnj_hyphen_hyphenate2 (HyphenDict *dict, |
|
153 const char *word, int word_size, char * hyphens, |
|
154 char *hyphenated_word, char *** rep, int ** pos, int ** cut); |
|
155 |
|
156 /* like hnj_hyphen_hyphenate2, but with hyphenmin parameters */ |
|
157 /* lhmin: lefthyphenmin |
|
158 * rhmin: righthyphenmin |
|
159 * clhmin: compoundlefthyphemin |
|
160 * crhmin: compoundrighthyphenmin |
|
161 * (see documentation) */ |
|
162 |
|
163 int hnj_hyphen_hyphenate3 (HyphenDict *dict, |
|
164 const char *word, int word_size, char * hyphens, |
|
165 char *hyphword, char *** rep, int ** pos, int ** cut, |
|
166 int lhmin, int rhmin, int clhmin, int crhmin); |
|
167 |
|
168 #ifdef __cplusplus |
|
169 } |
|
170 #endif /* __cplusplus */ |
|
171 |
|
172 #endif /* __HYPHEN_H__ */ |