Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | .\" Hey, Emacs! This is -*-nroff-*- you know... |
michael@0 | 2 | .\" |
michael@0 | 3 | .\" uconv.1: manual page for the uconv utility. |
michael@0 | 4 | .\" |
michael@0 | 5 | .\" Copyright (C) 2000-2013 IBM, Inc. and others. |
michael@0 | 6 | .\" |
michael@0 | 7 | .\" Manual page by Yves Arrouye <yves@realnames.com>. |
michael@0 | 8 | .\" |
michael@0 | 9 | .TH UCONV 1 "2005-jul-1" "ICU MANPAGE" "ICU @VERSION@ Manual" |
michael@0 | 10 | .SH NAME |
michael@0 | 11 | .B uconv |
michael@0 | 12 | \- convert data from one encoding to another |
michael@0 | 13 | .SH SYNOPSIS |
michael@0 | 14 | .B uconv |
michael@0 | 15 | [ |
michael@0 | 16 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
michael@0 | 17 | ] |
michael@0 | 18 | [ |
michael@0 | 19 | .BI "\-V\fP, \fB\-\-version" |
michael@0 | 20 | ] |
michael@0 | 21 | [ |
michael@0 | 22 | .BI "\-s\fP, \fB\-\-silent" |
michael@0 | 23 | ] |
michael@0 | 24 | [ |
michael@0 | 25 | .BI "\-v\fP, \fB\-\-verbose" |
michael@0 | 26 | ] |
michael@0 | 27 | [ |
michael@0 | 28 | .BI "\-l\fP, \fB\-\-list" |
michael@0 | 29 | | |
michael@0 | 30 | .BI "\-l\fP, \fB\-\-list\-code" " code" |
michael@0 | 31 | | |
michael@0 | 32 | .BI "\-\-default-code" |
michael@0 | 33 | | |
michael@0 | 34 | .BI "\-L\fP, \fB\-\-list\-transliterators" |
michael@0 | 35 | ] |
michael@0 | 36 | [ |
michael@0 | 37 | .BI "\-\-canon" |
michael@0 | 38 | ] |
michael@0 | 39 | [ |
michael@0 | 40 | .BI "\-x" " transliteration |
michael@0 | 41 | ] |
michael@0 | 42 | [ |
michael@0 | 43 | .BI "\-\-to\-callback" " callback" |
michael@0 | 44 | | |
michael@0 | 45 | .B "\-c" |
michael@0 | 46 | ] |
michael@0 | 47 | [ |
michael@0 | 48 | .BI "\-\-from\-callback" " callback" |
michael@0 | 49 | | |
michael@0 | 50 | .B "\-i" |
michael@0 | 51 | ] |
michael@0 | 52 | [ |
michael@0 | 53 | .BI "\-\-callback" " callback" |
michael@0 | 54 | ] |
michael@0 | 55 | [ |
michael@0 | 56 | .BI "\-\-fallback" |
michael@0 | 57 | | |
michael@0 | 58 | .BI "\-\-no\-fallback" |
michael@0 | 59 | ] |
michael@0 | 60 | [ |
michael@0 | 61 | .BI "\-b\fP, \fB\-\-block\-size" " size" |
michael@0 | 62 | ] |
michael@0 | 63 | [ |
michael@0 | 64 | .BI "\-f\fP, \fB\-\-from\-code" " encoding" |
michael@0 | 65 | ] |
michael@0 | 66 | [ |
michael@0 | 67 | .BI "\-t\fP, \fB\-\-to\-code" " encoding" |
michael@0 | 68 | ] |
michael@0 | 69 | [ |
michael@0 | 70 | .BI "\-\-add\-signature" |
michael@0 | 71 | ] |
michael@0 | 72 | [ |
michael@0 | 73 | .BI "\-\-remove\-signature" |
michael@0 | 74 | ] |
michael@0 | 75 | [ |
michael@0 | 76 | .BI "\-o\fP, \fB\-\-output" " file" |
michael@0 | 77 | ] |
michael@0 | 78 | [ |
michael@0 | 79 | .IR file .\|.\|. |
michael@0 | 80 | ] |
michael@0 | 81 | .SH DESCRIPTION |
michael@0 | 82 | .B uconv |
michael@0 | 83 | converts, or transcodes, each given |
michael@0 | 84 | .I file |
michael@0 | 85 | (or its standard input if no |
michael@0 | 86 | .I file |
michael@0 | 87 | is specified) from one |
michael@0 | 88 | .I encoding |
michael@0 | 89 | to another. |
michael@0 | 90 | The transcoding is done using Unicode as a pivot encoding |
michael@0 | 91 | (i.e. the data are first transcoded from their original encoding to |
michael@0 | 92 | Unicode, and then from Unicode to the destination encoding). |
michael@0 | 93 | .PP |
michael@0 | 94 | If an |
michael@0 | 95 | .I encoding |
michael@0 | 96 | is not specified or is |
michael@0 | 97 | .BR - , |
michael@0 | 98 | the default encoding is used. Thus, calling |
michael@0 | 99 | .B uconv |
michael@0 | 100 | with no |
michael@0 | 101 | .I encoding |
michael@0 | 102 | provides an easy way to validate and sanitize data files for |
michael@0 | 103 | further consumption by tools requiring data in the default encoding. |
michael@0 | 104 | .PP |
michael@0 | 105 | When calling |
michael@0 | 106 | .BR uconv , |
michael@0 | 107 | it is possible to specify callbacks that are used to handle invalid |
michael@0 | 108 | characters in the input, or characters that cannot be transcoded to |
michael@0 | 109 | the destination encoding. Some encodings, for example, offer a default |
michael@0 | 110 | substitution character that can be used to represent the occurence of |
michael@0 | 111 | such characters in the input. Other callbacks offer a useful visual |
michael@0 | 112 | representation of the invalid data. |
michael@0 | 113 | .PP |
michael@0 | 114 | .B uconv |
michael@0 | 115 | can also run the specified |
michael@0 | 116 | .IR transliteration |
michael@0 | 117 | on the transcoded data, |
michael@0 | 118 | in which case transliteration will happen as an intermediate step, |
michael@0 | 119 | after the data have been transcoded to Unicode. |
michael@0 | 120 | The |
michael@0 | 121 | .I transliteration |
michael@0 | 122 | can be either a list of semicolon-separated transliterator names, |
michael@0 | 123 | or an arbitrarily complex set of rules in the ICU transliteration |
michael@0 | 124 | rules format. |
michael@0 | 125 | .PP |
michael@0 | 126 | For transcoding purposes, |
michael@0 | 127 | .B uconv |
michael@0 | 128 | options are compatible with those of |
michael@0 | 129 | .BR iconv (1), |
michael@0 | 130 | making it easy to replace it in scripts. It is not necessarily the case, |
michael@0 | 131 | however, that the encoding names used by |
michael@0 | 132 | .B uconv |
michael@0 | 133 | and ICU are the same as the ones used by |
michael@0 | 134 | .BR iconv (1). |
michael@0 | 135 | Also, options that provide informational data, such as the |
michael@0 | 136 | .B \-l\fP, \fB\-\-list |
michael@0 | 137 | one offered by some |
michael@0 | 138 | .BR iconv (1) |
michael@0 | 139 | variants such as GNU's, produce data in a slightly different and |
michael@0 | 140 | easier to parse format. |
michael@0 | 141 | .SH OPTIONS |
michael@0 | 142 | .TP |
michael@0 | 143 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" |
michael@0 | 144 | Print help about usage and exit. |
michael@0 | 145 | .TP |
michael@0 | 146 | .BR "\-V\fP, \fB\-\-version" |
michael@0 | 147 | Print the version of |
michael@0 | 148 | .B uconv |
michael@0 | 149 | and exit. |
michael@0 | 150 | .TP |
michael@0 | 151 | .BI "\-s\fP, \fB\-\-silent" |
michael@0 | 152 | Suppress messages during execution. |
michael@0 | 153 | .TP |
michael@0 | 154 | .BI "\-v\fP, \fB\-\-verbose" |
michael@0 | 155 | Display extra informative messages during execution. |
michael@0 | 156 | .TP |
michael@0 | 157 | .BI "\-l\fP, \fB\-\-list" |
michael@0 | 158 | List all the available encodings and exit. |
michael@0 | 159 | .TP |
michael@0 | 160 | .BI "\-l\fP, \fB\-\-list\-code" " code" |
michael@0 | 161 | List only the |
michael@0 | 162 | .I code |
michael@0 | 163 | encoding and exit. If |
michael@0 | 164 | .I code |
michael@0 | 165 | is not a proper encoding, exit with an error. |
michael@0 | 166 | .TP |
michael@0 | 167 | .BI "\-\-default-code" |
michael@0 | 168 | List only the name of the default encoding and exit. |
michael@0 | 169 | .TP |
michael@0 | 170 | .BI "\-L\fP, \fB\-\-list\-transliterators" |
michael@0 | 171 | List all the available transliterators and exit. |
michael@0 | 172 | .TP |
michael@0 | 173 | .BI "\--canon" |
michael@0 | 174 | If used with |
michael@0 | 175 | .BI "\-l\fP, \fB\-\-list" |
michael@0 | 176 | or |
michael@0 | 177 | .BR "\-\-default-code" , |
michael@0 | 178 | the list of encodings is produced in a format compatible with |
michael@0 | 179 | .BR convrtrs.txt (5). |
michael@0 | 180 | If used with |
michael@0 | 181 | .BR "\-L\fP, \fB\-\-list\-transliterators" , |
michael@0 | 182 | print only one transliterator name per line. |
michael@0 | 183 | .TP |
michael@0 | 184 | .BI "\-x" " transliteration" |
michael@0 | 185 | Run the given |
michael@0 | 186 | .IR transliteration |
michael@0 | 187 | on the transcoded Unicode data, |
michael@0 | 188 | and use the transliterated data as input for the transcoding to |
michael@0 | 189 | the the destination encoding. |
michael@0 | 190 | .TP |
michael@0 | 191 | .BI "\-\-to\-callback" " callback" |
michael@0 | 192 | Use |
michael@0 | 193 | .I callback |
michael@0 | 194 | to handle characters that cannot be transcoded to the destination |
michael@0 | 195 | encoding. See section |
michael@0 | 196 | .B CALLBACKS |
michael@0 | 197 | for details on valid callbacks. |
michael@0 | 198 | .TP |
michael@0 | 199 | .B "\-c" |
michael@0 | 200 | Omit invalid characters from the output. |
michael@0 | 201 | Same as |
michael@0 | 202 | .BR "\-\-to\-callback skip" . |
michael@0 | 203 | .TP |
michael@0 | 204 | .BI "\-\-from\-callback" " callback" |
michael@0 | 205 | Use |
michael@0 | 206 | .I callback |
michael@0 | 207 | to handle characters that cannot be transcoded from the original |
michael@0 | 208 | encoding. See section |
michael@0 | 209 | .B CALLBACKS |
michael@0 | 210 | for details on valid callbacks. |
michael@0 | 211 | .TP |
michael@0 | 212 | .B "\-i" |
michael@0 | 213 | Ignore invalid sequences in the input. |
michael@0 | 214 | Same as |
michael@0 | 215 | .BR "\-\-from\-callback skip" . |
michael@0 | 216 | .TP |
michael@0 | 217 | .BI "\-\-callback" " callback" |
michael@0 | 218 | Use |
michael@0 | 219 | .I callback |
michael@0 | 220 | to handle both characters that cannot be transcoded from the original |
michael@0 | 221 | encoding and characters that cannot be transcoded to the destination |
michael@0 | 222 | encoding. See section |
michael@0 | 223 | .B CALLBACKS |
michael@0 | 224 | for details on valid callbacks. |
michael@0 | 225 | .TP |
michael@0 | 226 | .BI "\-\-fallback" |
michael@0 | 227 | Use the fallback mapping when transcoding from |
michael@0 | 228 | Unicode to the destination encoding. |
michael@0 | 229 | .TP |
michael@0 | 230 | .BI "\-\-no\-fallback" |
michael@0 | 231 | Do not use the fallback mapping when transcoding from Unicode to the |
michael@0 | 232 | destination encoding. |
michael@0 | 233 | This is the default. |
michael@0 | 234 | .TP |
michael@0 | 235 | .BI "\-b\fP, \fB\-\-block\-size" " size" |
michael@0 | 236 | Read input in blocks of |
michael@0 | 237 | .I size |
michael@0 | 238 | bytes at a time. The default block size is |
michael@0 | 239 | 4096. |
michael@0 | 240 | .TP |
michael@0 | 241 | .BI "\-f\fP, \fB\-\-from\-code" " encoding" |
michael@0 | 242 | Set the original encoding of the data to |
michael@0 | 243 | .IR encoding . |
michael@0 | 244 | .TP |
michael@0 | 245 | .BI "\-t\fP, \fB\-\-to\-code" " encoding" |
michael@0 | 246 | Transcode the data to |
michael@0 | 247 | .IR encoding . |
michael@0 | 248 | .TP |
michael@0 | 249 | .BI "\-\-add\-signature" |
michael@0 | 250 | Add a U+FEFF Unicode signature character (BOM) if the output charset |
michael@0 | 251 | supports it and does not add one anyway. |
michael@0 | 252 | .TP |
michael@0 | 253 | .BI "\-\-remove\-signature" |
michael@0 | 254 | Remove a U+FEFF Unicode signature character (BOM). |
michael@0 | 255 | .TP |
michael@0 | 256 | .BI "\-o\fP, \fB\-\-output" " file" |
michael@0 | 257 | Write the transcoded data to |
michael@0 | 258 | .IR file . |
michael@0 | 259 | .SH CALLBACKS |
michael@0 | 260 | .B uconv |
michael@0 | 261 | supports specifying callbacks to handle invalid data. Callbacks can be |
michael@0 | 262 | set for both directions of transcoding: from the original encoding to |
michael@0 | 263 | Unicode, with the |
michael@0 | 264 | .BR "\-\-from\-callback" |
michael@0 | 265 | option, and from Unicode to the destination encoding, with the |
michael@0 | 266 | .BR "\-\-to\-callback" |
michael@0 | 267 | option. |
michael@0 | 268 | .PP |
michael@0 | 269 | The following is a list of valid |
michael@0 | 270 | .I callback |
michael@0 | 271 | names, along with a description of their behavior. The list of |
michael@0 | 272 | callbacks actually supported by |
michael@0 | 273 | .B uconv |
michael@0 | 274 | is displayed when it is called with |
michael@0 | 275 | .BR "\-h\fP, \fB\-\-help" . |
michael@0 | 276 | .PP |
michael@0 | 277 | .TP \w'\fBescape-unicode'u+3n |
michael@0 | 278 | .B substitute |
michael@0 | 279 | Write the the encoding's substitute sequence, or the Unicode |
michael@0 | 280 | replacement character |
michael@0 | 281 | .B U+FFFD |
michael@0 | 282 | when transcoding to Unicode. |
michael@0 | 283 | .TP |
michael@0 | 284 | .B skip |
michael@0 | 285 | Ignore the invalid data. |
michael@0 | 286 | .TP |
michael@0 | 287 | .B stop |
michael@0 | 288 | Stop with an error when encountering invalid data. |
michael@0 | 289 | This is the default callback. |
michael@0 | 290 | .TP |
michael@0 | 291 | .B escape |
michael@0 | 292 | Same as |
michael@0 | 293 | .BR escape-icu . |
michael@0 | 294 | .TP |
michael@0 | 295 | .B escape-icu |
michael@0 | 296 | Replace the missing characters with a string of the format |
michael@0 | 297 | .BR %U\fIhhhh\fP |
michael@0 | 298 | for plane 0 characters, and |
michael@0 | 299 | .BR %U\fIhhhh\fP%U\fIhhhh\fP |
michael@0 | 300 | for planes 1 and above characters, |
michael@0 | 301 | where |
michael@0 | 302 | .I hhhh |
michael@0 | 303 | is the hexadecimal value of one of the UTF-16 code units representing the |
michael@0 | 304 | character. Characters from planes 1 and above are written as a pair of |
michael@0 | 305 | UTF-16 surrogate code units. |
michael@0 | 306 | .TP |
michael@0 | 307 | .B escape-java |
michael@0 | 308 | Replace the missing characters with a string of the format |
michael@0 | 309 | .BR \eu\fIhhhh\fP |
michael@0 | 310 | for plane 0 characters, and |
michael@0 | 311 | .BR \eu\fIhhhh\fP\eu\fIhhhh\fP |
michael@0 | 312 | for planes 1 and above characters, |
michael@0 | 313 | where |
michael@0 | 314 | .I hhhh |
michael@0 | 315 | is the hexadecimal value of one of the UTF-16 code units representing the |
michael@0 | 316 | character. Characters from planes 1 and above are written as a pair of |
michael@0 | 317 | UTF-16 surrogate code units. |
michael@0 | 318 | .TP |
michael@0 | 319 | .B escape-c |
michael@0 | 320 | Replace the missing characters with a string of the format |
michael@0 | 321 | .BR \eu\fIhhhh\fP |
michael@0 | 322 | for plane 0 characters, and |
michael@0 | 323 | .BR \eU\fIhhhhhhhh\fP |
michael@0 | 324 | for planes 1 and above characters, |
michael@0 | 325 | where |
michael@0 | 326 | .I hhhh |
michael@0 | 327 | and |
michael@0 | 328 | .I hhhhhhhh |
michael@0 | 329 | are the hexadecimal values of the Unicode codepoint. |
michael@0 | 330 | .TP |
michael@0 | 331 | .B escape-xml |
michael@0 | 332 | Same as |
michael@0 | 333 | .BR escape-xml-hex . |
michael@0 | 334 | .TP |
michael@0 | 335 | .B escape-xml-hex |
michael@0 | 336 | Replace the missing characters with a string of the format |
michael@0 | 337 | .BR &#x\fIhhhh\fP; , |
michael@0 | 338 | where |
michael@0 | 339 | .I hhhh |
michael@0 | 340 | is the hexadecimal value of the Unicode codepoint. |
michael@0 | 341 | .TP |
michael@0 | 342 | .B escape-xml-dec |
michael@0 | 343 | Replace the missing characters with a string of the format |
michael@0 | 344 | .BR &#\fInnnn\fP; , |
michael@0 | 345 | where |
michael@0 | 346 | .I nnnn |
michael@0 | 347 | is the decimal value of the Unicode codepoint. |
michael@0 | 348 | .TP |
michael@0 | 349 | .B escape-unicode |
michael@0 | 350 | Replace the missing characters with a string of the format |
michael@0 | 351 | .BR {U+\fIhhhh\fP} , |
michael@0 | 352 | where |
michael@0 | 353 | .I hhhh |
michael@0 | 354 | is the hexadecimal value of the Unicode codepoint. |
michael@0 | 355 | That hexadecimal string is of variable length and can use from 4 to |
michael@0 | 356 | 6 digits. |
michael@0 | 357 | This is the format universally used to denote a Unicode codepoint in |
michael@0 | 358 | the litterature, delimited by curly braces for easy recognition of those |
michael@0 | 359 | substitutions in the output. |
michael@0 | 360 | .SH EXAMPLES |
michael@0 | 361 | Convert data from a given |
michael@0 | 362 | .I encoding |
michael@0 | 363 | to the platform encoding: |
michael@0 | 364 | |
michael@0 | 365 | .RS 4 |
michael@0 | 366 | .B \fR$ \fPuconv \-f \fIencoding\fP |
michael@0 | 367 | .RE |
michael@0 | 368 | .PP |
michael@0 | 369 | Check if a |
michael@0 | 370 | .I file |
michael@0 | 371 | contains valid data for a given |
michael@0 | 372 | .IR encoding : |
michael@0 | 373 | |
michael@0 | 374 | .RS 4 |
michael@0 | 375 | .B \fR$ \fPuconv \-f \fIencoding\fP \-c \fIfile\fP >/dev/null |
michael@0 | 376 | .RE |
michael@0 | 377 | .PP |
michael@0 | 378 | Convert a UTF-8 |
michael@0 | 379 | .I file |
michael@0 | 380 | to a given |
michael@0 | 381 | .I encoding |
michael@0 | 382 | and ensure that the resulting text is good for any version of HTML: |
michael@0 | 383 | |
michael@0 | 384 | .RS 4 |
michael@0 | 385 | .B \fR$ \fPuconv \-f utf-8 \-t \fIencoding\fP \e |
michael@0 | 386 | .br |
michael@0 | 387 | .B " \-\-callback escape-xml-dec \fIfile\fP" |
michael@0 | 388 | .RE |
michael@0 | 389 | .PP |
michael@0 | 390 | Display the names of the Unicode code points in a UTF-file: |
michael@0 | 391 | |
michael@0 | 392 | .RS 4 |
michael@0 | 393 | .B \fR$ \fPuconv \-f utf-8 \-x any-name \fIfile\fP |
michael@0 | 394 | .RE |
michael@0 | 395 | .PP |
michael@0 | 396 | Print the name of a Unicode code point whose value is known (\fBU+30AB\fP |
michael@0 | 397 | in this example): |
michael@0 | 398 | |
michael@0 | 399 | .RS 4 |
michael@0 | 400 | .B \fR$ \fPecho '\eu30ab' | uconv \-x 'hex-any; any-name'; echo |
michael@0 | 401 | .br |
michael@0 | 402 | {KATAKANA LETTER KA}{LINE FEED} |
michael@0 | 403 | .br |
michael@0 | 404 | $ |
michael@0 | 405 | .RE |
michael@0 | 406 | |
michael@0 | 407 | (The names are delimited by curly braces. |
michael@0 | 408 | Also, the name of the line terminator is also displayed.) |
michael@0 | 409 | .PP |
michael@0 | 410 | Normalize UTF-8 data using Unicode NFKC, remove all control characters, |
michael@0 | 411 | and map Katakana to Hiragana: |
michael@0 | 412 | |
michael@0 | 413 | .RS 4 |
michael@0 | 414 | .B \fR$ \fPuconv \-f utf-8 \-t utf-8 \e |
michael@0 | 415 | .br |
michael@0 | 416 | .B " \-x '::nfkc; [:Cc:] >; ::katakana-hiragana;'" |
michael@0 | 417 | .SH CAVEATS AND BUGS |
michael@0 | 418 | .B uconv |
michael@0 | 419 | does report errors as occuring at the first invalid byte |
michael@0 | 420 | encountered. This may be confusing to users of GNU |
michael@0 | 421 | .BR iconv (1), |
michael@0 | 422 | which reports errors as occuring at the first byte of an invalid |
michael@0 | 423 | sequence. For multi-byte character sets or encodings, this means that |
michael@0 | 424 | .BR uconv |
michael@0 | 425 | error positions may be at a later offset in the input stream than |
michael@0 | 426 | would be the case with GNU |
michael@0 | 427 | .BR iconv (1). |
michael@0 | 428 | .PP |
michael@0 | 429 | The reporting of error positions when a transliterator is used may be |
michael@0 | 430 | inaccurate or unavailable, in which case |
michael@0 | 431 | .BR uconv |
michael@0 | 432 | will report the offset in the output stream at which the error |
michael@0 | 433 | occured. |
michael@0 | 434 | .SH AUTHORS |
michael@0 | 435 | Jonas Utterstroem |
michael@0 | 436 | .br |
michael@0 | 437 | Yves Arrouye |
michael@0 | 438 | .SH VERSION |
michael@0 | 439 | @VERSION@ |
michael@0 | 440 | .SH COPYRIGHT |
michael@0 | 441 | Copyright (C) 2000-2005 IBM, Inc. and others. |
michael@0 | 442 | .SH SEE ALSO |
michael@0 | 443 | .BR iconv (1) |