Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | .\" Hey, Emacs! This is -*-nroff-*- you know... |
michael@0 | 2 | .\" |
michael@0 | 3 | .\" gendict.1: manual page for the gendict utility |
michael@0 | 4 | .\" |
michael@0 | 5 | .\" Copyright (C) 2012 International Business Machines Corporation and others |
michael@0 | 6 | .\" |
michael@0 | 7 | .TH GENDICT 1 "1 June 2012" "ICU MANPAGE" "ICU @VERSION@ Manual" |
michael@0 | 8 | .SH NAME |
michael@0 | 9 | .B gendict |
michael@0 | 10 | \- Compiles word list into ICU string trie dictionary |
michael@0 | 11 | .SH SYNOPSIS |
michael@0 | 12 | .B gendict |
michael@0 | 13 | [ |
michael@0 | 14 | .BR "\fB\-\-uchars" |
michael@0 | 15 | | |
michael@0 | 16 | .BR "\fB\-\-bytes" |
michael@0 | 17 | .BI "\fB\-\-transform" " transform" |
michael@0 | 18 | ] |
michael@0 | 19 | [ |
michael@0 | 20 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
michael@0 | 21 | ] |
michael@0 | 22 | [ |
michael@0 | 23 | .BR "\-V\fP, \fB\-\-version" |
michael@0 | 24 | ] |
michael@0 | 25 | [ |
michael@0 | 26 | .BR "\-c\fP, \fB\-\-copyright" |
michael@0 | 27 | ] |
michael@0 | 28 | [ |
michael@0 | 29 | .BR "\-v\fP, \fB\-\-verbose" |
michael@0 | 30 | ] |
michael@0 | 31 | [ |
michael@0 | 32 | .BI "\-i\fP, \fB\-\-icudatadir" " directory" |
michael@0 | 33 | ] |
michael@0 | 34 | .IR " input-file" |
michael@0 | 35 | .IR " output\-file" |
michael@0 | 36 | .SH DESCRIPTION |
michael@0 | 37 | .B gendict |
michael@0 | 38 | reads the word list from |
michael@0 | 39 | .I dictionary-file |
michael@0 | 40 | and creates a string trie dictionary file. Normally this data file has the |
michael@0 | 41 | .B .dict |
michael@0 | 42 | extension. |
michael@0 | 43 | .PP |
michael@0 | 44 | Words begin at the beginning of a line and are terminated by the first whitespace. |
michael@0 | 45 | Lines that begin with whitespace are ignored. |
michael@0 | 46 | .SH OPTIONS |
michael@0 | 47 | .TP |
michael@0 | 48 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
michael@0 | 49 | Print help about usage and exit. |
michael@0 | 50 | .TP |
michael@0 | 51 | .BR "\-V\fP, \fB\-\-version" |
michael@0 | 52 | Print the version of |
michael@0 | 53 | .B gendict |
michael@0 | 54 | and exit. |
michael@0 | 55 | .TP |
michael@0 | 56 | .BR "\-c\fP, \fB\-\-copyright" |
michael@0 | 57 | Embeds the standard ICU copyright into the |
michael@0 | 58 | .IR output-file . |
michael@0 | 59 | .TP |
michael@0 | 60 | .BR "\-v\fP, \fB\-\-verbose" |
michael@0 | 61 | Display extra informative messages during execution. |
michael@0 | 62 | .TP |
michael@0 | 63 | .BI "\-i\fP, \fB\-\-icudatadir" " directory" |
michael@0 | 64 | Look for any necessary ICU data files in |
michael@0 | 65 | .IR directory . |
michael@0 | 66 | For example, the file |
michael@0 | 67 | .B pnames.icu |
michael@0 | 68 | must be located when ICU's data is not built as a shared library. |
michael@0 | 69 | The default ICU data directory is specified by the environment variable |
michael@0 | 70 | .BR ICU_DATA . |
michael@0 | 71 | Most configurations of ICU do not require this argument. |
michael@0 | 72 | .TP |
michael@0 | 73 | .BR "\fB\-\-uchars" |
michael@0 | 74 | Set the output trie type to UChar. Mutually exclusive with |
michael@0 | 75 | .BR --bytes. |
michael@0 | 76 | .TP |
michael@0 | 77 | .BR "\fB\-\-bytes" |
michael@0 | 78 | Set the output trie type to Bytes. Mutually exclusive with |
michael@0 | 79 | .BR --uchars. |
michael@0 | 80 | .TP |
michael@0 | 81 | .BR "\fB\-\-transform" |
michael@0 | 82 | Set the transform type. Should only be specified with |
michael@0 | 83 | .BR --bytes. |
michael@0 | 84 | Currently supported transforms are: |
michael@0 | 85 | .BR offset-<hex-number>, |
michael@0 | 86 | which specifies an offset to subtract from all input characters. |
michael@0 | 87 | It should be noted that the offset transform also maps U+200D |
michael@0 | 88 | to 0xFF and U+200C to 0xFE, in order to offer compatibility to |
michael@0 | 89 | languages that require these characters. |
michael@0 | 90 | A transform must be specified for a bytes trie, and when applied |
michael@0 | 91 | to the non-value characters in the |
michael@0 | 92 | .IR input-file |
michael@0 | 93 | must produce output between 0x00 and 0xFF. |
michael@0 | 94 | .TP |
michael@0 | 95 | .BI " input\-file" |
michael@0 | 96 | The source file to read. |
michael@0 | 97 | .TP |
michael@0 | 98 | .BI " output\-file" |
michael@0 | 99 | The file to write the output dictionary to. |
michael@0 | 100 | .SH CAVEATS |
michael@0 | 101 | The |
michael@0 | 102 | .IR input-file |
michael@0 | 103 | is assumed to be encoded in UTF-8. |
michael@0 | 104 | The integers in the |
michael@0 | 105 | .IR input-file |
michael@0 | 106 | that are used as values must be made up of ASCII digits. They |
michael@0 | 107 | may be specified either in hex, by using a 0x prefix, or in |
michael@0 | 108 | decimal. |
michael@0 | 109 | Either |
michael@0 | 110 | .BI --bytes |
michael@0 | 111 | or |
michael@0 | 112 | .BI --uchars |
michael@0 | 113 | must be specified. |
michael@0 | 114 | .SH ENVIRONMENT |
michael@0 | 115 | .TP 10 |
michael@0 | 116 | .B ICU_DATA |
michael@0 | 117 | Specifies the directory containing ICU data. Defaults to |
michael@0 | 118 | .BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ . |
michael@0 | 119 | Some tools in ICU depend on the presence of the trailing slash. It is thus |
michael@0 | 120 | important to make sure that it is present if |
michael@0 | 121 | .B ICU_DATA |
michael@0 | 122 | is set. |
michael@0 | 123 | .SH AUTHORS |
michael@0 | 124 | Maxime Serrano |
michael@0 | 125 | .SH VERSION |
michael@0 | 126 | 1.0 |
michael@0 | 127 | .SH COPYRIGHT |
michael@0 | 128 | Copyright (C) 2012 International Business Machines Corporation and others |
michael@0 | 129 | .SH SEE ALSO |
michael@0 | 130 | .BR http://www.icu-project.org/userguide/boundaryAnalysis.html |
michael@0 | 131 |