|
1 .\" Hey, Emacs! This is -*-nroff-*- you know... |
|
2 .\" |
|
3 .\" gendict.1: manual page for the gendict utility |
|
4 .\" |
|
5 .\" Copyright (C) 2012 International Business Machines Corporation and others |
|
6 .\" |
|
7 .TH GENDICT 1 "1 June 2012" "ICU MANPAGE" "ICU @VERSION@ Manual" |
|
8 .SH NAME |
|
9 .B gendict |
|
10 \- Compiles word list into ICU string trie dictionary |
|
11 .SH SYNOPSIS |
|
12 .B gendict |
|
13 [ |
|
14 .BR "\fB\-\-uchars" |
|
15 | |
|
16 .BR "\fB\-\-bytes" |
|
17 .BI "\fB\-\-transform" " transform" |
|
18 ] |
|
19 [ |
|
20 .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
|
21 ] |
|
22 [ |
|
23 .BR "\-V\fP, \fB\-\-version" |
|
24 ] |
|
25 [ |
|
26 .BR "\-c\fP, \fB\-\-copyright" |
|
27 ] |
|
28 [ |
|
29 .BR "\-v\fP, \fB\-\-verbose" |
|
30 ] |
|
31 [ |
|
32 .BI "\-i\fP, \fB\-\-icudatadir" " directory" |
|
33 ] |
|
34 .IR " input-file" |
|
35 .IR " output\-file" |
|
36 .SH DESCRIPTION |
|
37 .B gendict |
|
38 reads the word list from |
|
39 .I dictionary-file |
|
40 and creates a string trie dictionary file. Normally this data file has the |
|
41 .B .dict |
|
42 extension. |
|
43 .PP |
|
44 Words begin at the beginning of a line and are terminated by the first whitespace. |
|
45 Lines that begin with whitespace are ignored. |
|
46 .SH OPTIONS |
|
47 .TP |
|
48 .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
|
49 Print help about usage and exit. |
|
50 .TP |
|
51 .BR "\-V\fP, \fB\-\-version" |
|
52 Print the version of |
|
53 .B gendict |
|
54 and exit. |
|
55 .TP |
|
56 .BR "\-c\fP, \fB\-\-copyright" |
|
57 Embeds the standard ICU copyright into the |
|
58 .IR output-file . |
|
59 .TP |
|
60 .BR "\-v\fP, \fB\-\-verbose" |
|
61 Display extra informative messages during execution. |
|
62 .TP |
|
63 .BI "\-i\fP, \fB\-\-icudatadir" " directory" |
|
64 Look for any necessary ICU data files in |
|
65 .IR directory . |
|
66 For example, the file |
|
67 .B pnames.icu |
|
68 must be located when ICU's data is not built as a shared library. |
|
69 The default ICU data directory is specified by the environment variable |
|
70 .BR ICU_DATA . |
|
71 Most configurations of ICU do not require this argument. |
|
72 .TP |
|
73 .BR "\fB\-\-uchars" |
|
74 Set the output trie type to UChar. Mutually exclusive with |
|
75 .BR --bytes. |
|
76 .TP |
|
77 .BR "\fB\-\-bytes" |
|
78 Set the output trie type to Bytes. Mutually exclusive with |
|
79 .BR --uchars. |
|
80 .TP |
|
81 .BR "\fB\-\-transform" |
|
82 Set the transform type. Should only be specified with |
|
83 .BR --bytes. |
|
84 Currently supported transforms are: |
|
85 .BR offset-<hex-number>, |
|
86 which specifies an offset to subtract from all input characters. |
|
87 It should be noted that the offset transform also maps U+200D |
|
88 to 0xFF and U+200C to 0xFE, in order to offer compatibility to |
|
89 languages that require these characters. |
|
90 A transform must be specified for a bytes trie, and when applied |
|
91 to the non-value characters in the |
|
92 .IR input-file |
|
93 must produce output between 0x00 and 0xFF. |
|
94 .TP |
|
95 .BI " input\-file" |
|
96 The source file to read. |
|
97 .TP |
|
98 .BI " output\-file" |
|
99 The file to write the output dictionary to. |
|
100 .SH CAVEATS |
|
101 The |
|
102 .IR input-file |
|
103 is assumed to be encoded in UTF-8. |
|
104 The integers in the |
|
105 .IR input-file |
|
106 that are used as values must be made up of ASCII digits. They |
|
107 may be specified either in hex, by using a 0x prefix, or in |
|
108 decimal. |
|
109 Either |
|
110 .BI --bytes |
|
111 or |
|
112 .BI --uchars |
|
113 must be specified. |
|
114 .SH ENVIRONMENT |
|
115 .TP 10 |
|
116 .B ICU_DATA |
|
117 Specifies the directory containing ICU data. Defaults to |
|
118 .BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ . |
|
119 Some tools in ICU depend on the presence of the trailing slash. It is thus |
|
120 important to make sure that it is present if |
|
121 .B ICU_DATA |
|
122 is set. |
|
123 .SH AUTHORS |
|
124 Maxime Serrano |
|
125 .SH VERSION |
|
126 1.0 |
|
127 .SH COPYRIGHT |
|
128 Copyright (C) 2012 International Business Machines Corporation and others |
|
129 .SH SEE ALSO |
|
130 .BR http://www.icu-project.org/userguide/boundaryAnalysis.html |
|
131 |