intl/icu/source/tools/gendict/gendict.1.in

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/gendict/gendict.1.in	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,131 @@
     1.4 +.\" Hey, Emacs! This is -*-nroff-*- you know...
     1.5 +.\"
     1.6 +.\" gendict.1: manual page for the gendict utility
     1.7 +.\"
     1.8 +.\" Copyright (C) 2012 International Business Machines Corporation and others
     1.9 +.\"
    1.10 +.TH GENDICT 1 "1 June 2012" "ICU MANPAGE" "ICU @VERSION@ Manual"
    1.11 +.SH NAME
    1.12 +.B gendict
    1.13 +\- Compiles word list into ICU string trie dictionary
    1.14 +.SH SYNOPSIS
    1.15 +.B gendict
    1.16 +[
    1.17 +.BR "\fB\-\-uchars"
    1.18 +|
    1.19 +.BR "\fB\-\-bytes"
    1.20 +.BI "\fB\-\-transform" " transform"
    1.21 +]
    1.22 +[
    1.23 +.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
    1.24 +]
    1.25 +[
    1.26 +.BR "\-V\fP, \fB\-\-version"
    1.27 +]
    1.28 +[
    1.29 +.BR "\-c\fP, \fB\-\-copyright"
    1.30 +]
    1.31 +[
    1.32 +.BR "\-v\fP, \fB\-\-verbose"
    1.33 +]
    1.34 +[
    1.35 +.BI "\-i\fP, \fB\-\-icudatadir" " directory"
    1.36 +]
    1.37 +.IR " input-file"
    1.38 +.IR " output\-file"
    1.39 +.SH DESCRIPTION
    1.40 +.B gendict
    1.41 +reads the word list from
    1.42 +.I dictionary-file
    1.43 +and creates a string trie dictionary file. Normally this data file has the 
    1.44 +.B .dict
    1.45 +extension.
    1.46 +.PP
    1.47 +Words begin at the beginning of a line and are terminated by the first whitespace.
    1.48 +Lines that begin with whitespace are ignored.
    1.49 +.SH OPTIONS
    1.50 +.TP
    1.51 +.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
    1.52 +Print help about usage and exit.
    1.53 +.TP
    1.54 +.BR "\-V\fP, \fB\-\-version"
    1.55 +Print the version of
    1.56 +.B gendict
    1.57 +and exit.
    1.58 +.TP
    1.59 +.BR "\-c\fP, \fB\-\-copyright"
    1.60 +Embeds the standard ICU copyright into the
    1.61 +.IR output-file .
    1.62 +.TP
    1.63 +.BR "\-v\fP, \fB\-\-verbose"
    1.64 +Display extra informative messages during execution.
    1.65 +.TP
    1.66 +.BI "\-i\fP, \fB\-\-icudatadir" " directory"
    1.67 +Look for any necessary ICU data files in
    1.68 +.IR directory .
    1.69 +For example, the file
    1.70 +.B pnames.icu
    1.71 +must be located when ICU's data is not built as a shared library.
    1.72 +The default ICU data directory is specified by the environment variable
    1.73 +.BR ICU_DATA .
    1.74 +Most configurations of ICU do not require this argument.
    1.75 +.TP
    1.76 +.BR "\fB\-\-uchars"
    1.77 +Set the output trie type to UChar. Mutually exclusive with
    1.78 +.BR --bytes.
    1.79 +.TP
    1.80 +.BR "\fB\-\-bytes"
    1.81 +Set the output trie type to Bytes. Mutually exclusive with 
    1.82 +.BR --uchars.
    1.83 +.TP
    1.84 +.BR "\fB\-\-transform"
    1.85 +Set the transform type. Should only be specified with
    1.86 +.BR --bytes.
    1.87 +Currently supported transforms are:
    1.88 +.BR offset-<hex-number>,
    1.89 +which specifies an offset to subtract from all input characters.
    1.90 +It should be noted that the offset transform also maps U+200D 
    1.91 +to 0xFF and U+200C to 0xFE, in order to offer compatibility to 
    1.92 +languages that require these characters.
    1.93 +A transform must be specified for a bytes trie, and when applied 
    1.94 +to the non-value characters in the 
    1.95 +.IR input-file
    1.96 +must produce output between 0x00 and 0xFF.
    1.97 +.TP
    1.98 +.BI " input\-file"
    1.99 +The source file to read.
   1.100 +.TP
   1.101 +.BI " output\-file"
   1.102 +The file to write the output dictionary to.
   1.103 +.SH CAVEATS
   1.104 +The 
   1.105 +.IR input-file
   1.106 +is assumed to be encoded in UTF-8.
   1.107 +The integers in the 
   1.108 +.IR input-file 
   1.109 +that are used as values must be made up of ASCII digits. They 
   1.110 +may be specified either in hex, by using a 0x prefix, or in 
   1.111 +decimal.
   1.112 +Either
   1.113 +.BI --bytes
   1.114 +or 
   1.115 +.BI --uchars
   1.116 +must be specified.
   1.117 +.SH ENVIRONMENT
   1.118 +.TP 10
   1.119 +.B ICU_DATA
   1.120 +Specifies the directory containing ICU data. Defaults to
   1.121 +.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
   1.122 +Some tools in ICU depend on the presence of the trailing slash. It is thus
   1.123 +important to make sure that it is present if
   1.124 +.B ICU_DATA
   1.125 +is set.
   1.126 +.SH AUTHORS
   1.127 +Maxime Serrano
   1.128 +.SH VERSION
   1.129 +1.0
   1.130 +.SH COPYRIGHT
   1.131 +Copyright (C) 2012 International Business Machines Corporation and others
   1.132 +.SH SEE ALSO
   1.133 +.BR http://www.icu-project.org/userguide/boundaryAnalysis.html
   1.134 +

mercurial