|
1 /* |
|
2 *************************************************************************** |
|
3 * Copyright (C) 2008-2013, International Business Machines Corporation |
|
4 * and others. All Rights Reserved. |
|
5 *************************************************************************** |
|
6 * file name: uspoof.h |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2008Feb13 |
|
12 * created by: Andy Heninger |
|
13 * |
|
14 * Unicode Spoof Detection |
|
15 */ |
|
16 |
|
17 #ifndef USPOOF_H |
|
18 #define USPOOF_H |
|
19 |
|
20 #include "unicode/utypes.h" |
|
21 #include "unicode/uset.h" |
|
22 #include "unicode/parseerr.h" |
|
23 #include "unicode/localpointer.h" |
|
24 |
|
25 #if !UCONFIG_NO_NORMALIZATION |
|
26 |
|
27 |
|
28 #if U_SHOW_CPLUSPLUS_API |
|
29 #include "unicode/unistr.h" |
|
30 #include "unicode/uniset.h" |
|
31 #endif |
|
32 |
|
33 |
|
34 /** |
|
35 * \file |
|
36 * \brief Unicode Security and Spoofing Detection, C API. |
|
37 * |
|
38 * These functions are intended to check strings, typically |
|
39 * identifiers of some type, such as URLs, for the presence of |
|
40 * characters that are likely to be visually confusing - |
|
41 * for cases where the displayed form of an identifier may |
|
42 * not be what it appears to be. |
|
43 * |
|
44 * Unicode Technical Report #36, http://unicode.org/reports/tr36, and |
|
45 * Unicode Technical Standard #39, http://unicode.org/reports/tr39 |
|
46 * "Unicode security considerations", give more background on |
|
47 * security an spoofing issues with Unicode identifiers. |
|
48 * The tests and checks provided by this module implement the recommendations |
|
49 * from those Unicode documents. |
|
50 * |
|
51 * The tests available on identifiers fall into two general categories: |
|
52 * -# Single identifier tests. Check whether an identifier is |
|
53 * potentially confusable with any other string, or is suspicious |
|
54 * for other reasons. |
|
55 * -# Two identifier tests. Check whether two specific identifiers are confusable. |
|
56 * This does not consider whether either of strings is potentially |
|
57 * confusable with any string other than the exact one specified. |
|
58 * |
|
59 * The steps to perform confusability testing are |
|
60 * -# Open a USpoofChecker. |
|
61 * -# Configure the USPoofChecker for the desired set of tests. The tests that will |
|
62 * be performed are specified by a set of USpoofChecks flags. |
|
63 * -# Perform the checks using the pre-configured USpoofChecker. The results indicate |
|
64 * which (if any) of the selected tests have identified possible problems with the identifier. |
|
65 * Results are reported as a set of USpoofChecks flags; this mirrors the form in which |
|
66 * the set of tests to perform was originally specified to the USpoofChecker. |
|
67 * |
|
68 * A USpoofChecker may be used repeatedly to perform checks on any number of identifiers. |
|
69 * |
|
70 * Thread Safety: The test functions for checking a single identifier, or for testing |
|
71 * whether two identifiers are possible confusable, are thread safe. |
|
72 * They may called concurrently, from multiple threads, using the same USpoofChecker instance. |
|
73 * |
|
74 * More generally, the standard ICU thread safety rules apply: functions that take a |
|
75 * const USpoofChecker parameter are thread safe. Those that take a non-const |
|
76 * USpoofChecier are not thread safe. |
|
77 * |
|
78 * |
|
79 * Descriptions of the available checks. |
|
80 * |
|
81 * When testing whether pairs of identifiers are confusable, with the uspoof_areConfusable() |
|
82 * family of functions, the relevant tests are |
|
83 * |
|
84 * -# USPOOF_SINGLE_SCRIPT_CONFUSABLE: All of the characters from the two identifiers are |
|
85 * from a single script, and the two identifiers are visually confusable. |
|
86 * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: At least one of the identifiers contains characters |
|
87 * from more than one script, and the two identifiers are visually confusable. |
|
88 * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: Each of the two identifiers is of a single script, but |
|
89 * the two identifiers are from different scripts, and they are visually confusable. |
|
90 * |
|
91 * The safest approach is to enable all three of these checks as a group. |
|
92 * |
|
93 * USPOOF_ANY_CASE is a modifier for the above tests. If the identifiers being checked can |
|
94 * be of mixed case and are used in a case-sensitive manner, this option should be specified. |
|
95 * |
|
96 * If the identifiers being checked are used in a case-insensitive manner, and if they are |
|
97 * displayed to users in lower-case form only, the USPOOF_ANY_CASE option should not be |
|
98 * specified. Confusabality issues involving upper case letters will not be reported. |
|
99 * |
|
100 * When performing tests on a single identifier, with the uspoof_check() family of functions, |
|
101 * the relevant tests are: |
|
102 * |
|
103 * -# USPOOF_MIXED_SCRIPT_CONFUSABLE: the identifier contains characters from multiple |
|
104 * scripts, and there exists an identifier of a single script that is visually confusable. |
|
105 * -# USPOOF_WHOLE_SCRIPT_CONFUSABLE: the identifier consists of characters from a single |
|
106 * script, and there exists a visually confusable identifier. |
|
107 * The visually confusable identifier also consists of characters from a single script. |
|
108 * but not the same script as the identifier being checked. |
|
109 * -# USPOOF_ANY_CASE: modifies the mixed script and whole script confusables tests. If |
|
110 * specified, the checks will consider confusable characters of any case. If this flag is not |
|
111 * set, the test is performed assuming case folded identifiers. |
|
112 * -# USPOOF_SINGLE_SCRIPT: check that the identifier contains only characters from a |
|
113 * single script. (Characters from the 'common' and 'inherited' scripts are ignored.) |
|
114 * This is not a test for confusable identifiers |
|
115 * -# USPOOF_INVISIBLE: check an identifier for the presence of invisible characters, |
|
116 * such as zero-width spaces, or character sequences that are |
|
117 * likely not to display, such as multiple occurrences of the same |
|
118 * non-spacing mark. This check does not test the input string as a whole |
|
119 * for conformance to any particular syntax for identifiers. |
|
120 * -# USPOOF_CHAR_LIMIT: check that an identifier contains only characters from a specified set |
|
121 * of acceptable characters. See uspoof_setAllowedChars() and |
|
122 * uspoof_setAllowedLocales(). |
|
123 * |
|
124 * Note on Scripts: |
|
125 * Characters from the Unicode Scripts "Common" and "Inherited" are ignored when considering |
|
126 * the script of an identifier. Common characters include digits and symbols that |
|
127 * are normally used with text from more than one script. |
|
128 * |
|
129 * Identifier Skeletons: A skeleton is a transformation of an identifier, such that |
|
130 * all identifiers that are confusable with each other have the same skeleton. |
|
131 * Using skeletons, it is possible to build a dictionary data structure for |
|
132 * a set of identifiers, and then quickly test whether a new identifier is |
|
133 * confusable with an identifier already in the set. The uspoof_getSkeleton() |
|
134 * family of functions will produce the skeleton from an identifier. |
|
135 * |
|
136 * Note that skeletons are not guaranteed to be stable between versions |
|
137 * of Unicode or ICU, so an applications should not rely on creating a permanent, |
|
138 * or difficult to update, database of skeletons. Instabilities result from |
|
139 * identifying new pairs or sequences of characters that are visually |
|
140 * confusable, and thus must be mapped to the same skeleton character(s). |
|
141 * |
|
142 */ |
|
143 |
|
144 struct USpoofChecker; |
|
145 typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */ |
|
146 |
|
147 /** |
|
148 * Enum for the kinds of checks that USpoofChecker can perform. |
|
149 * These enum values are used both to select the set of checks that |
|
150 * will be performed, and to report results from the check function. |
|
151 * |
|
152 * @stable ICU 4.2 |
|
153 */ |
|
154 typedef enum USpoofChecks { |
|
155 /** Single script confusable test. |
|
156 * When testing whether two identifiers are confusable, report that they are if |
|
157 * both are from the same script and they are visually confusable. |
|
158 * Note: this test is not applicable to a check of a single identifier. |
|
159 */ |
|
160 USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1, |
|
161 |
|
162 /** Mixed script confusable test. |
|
163 * When checking a single identifier, report a problem if |
|
164 * the identifier contains multiple scripts, and |
|
165 * is confusable with some other identifier in a single script |
|
166 * When testing whether two identifiers are confusable, report that they are if |
|
167 * the two IDs are visually confusable, |
|
168 * and at least one contains characters from more than one script. |
|
169 */ |
|
170 USPOOF_MIXED_SCRIPT_CONFUSABLE = 2, |
|
171 |
|
172 /** Whole script confusable test. |
|
173 * When checking a single identifier, report a problem if |
|
174 * The identifier is of a single script, and |
|
175 * there exists a confusable identifier in another script. |
|
176 * When testing whether two identifiers are confusable, report that they are if |
|
177 * each is of a single script, |
|
178 * the scripts of the two identifiers are different, and |
|
179 * the identifiers are visually confusable. |
|
180 */ |
|
181 USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4, |
|
182 |
|
183 /** Any Case Modifier for confusable identifier tests. |
|
184 If specified, consider all characters, of any case, when looking for confusables. |
|
185 If USPOOF_ANY_CASE is not specified, identifiers being checked are assumed to have been |
|
186 case folded. Upper case confusable characters will not be checked. |
|
187 Selects between Lower Case Confusable and |
|
188 Any Case Confusable. */ |
|
189 USPOOF_ANY_CASE = 8, |
|
190 |
|
191 /** |
|
192 * Check that an identifier is no looser than the specified RestrictionLevel. |
|
193 * The default if uspoof_setRestrctionLevel() is not called is HIGHLY_RESTRICTIVE. |
|
194 * |
|
195 * If USPOOF_AUX_INFO is enabled the actual restriction level of the |
|
196 * identifier being tested will also be returned by uspoof_check(). |
|
197 * |
|
198 * @see URestrictionLevel |
|
199 * @see uspoof_setRestrictionLevel |
|
200 * @see USPOOF_AUX_INFO |
|
201 * |
|
202 * @stable ICU 51 |
|
203 */ |
|
204 USPOOF_RESTRICTION_LEVEL = 16, |
|
205 |
|
206 #ifndef U_HIDE_DEPRECATED_API |
|
207 /** Check that an identifier contains only characters from a |
|
208 * single script (plus chars from the common and inherited scripts.) |
|
209 * Applies to checks of a single identifier check only. |
|
210 * @deprecated ICU 51 Use RESTRICTION_LEVEL instead. |
|
211 */ |
|
212 USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL, |
|
213 #endif /* U_HIDE_DEPRECATED_API */ |
|
214 |
|
215 /** Check an identifier for the presence of invisible characters, |
|
216 * such as zero-width spaces, or character sequences that are |
|
217 * likely not to display, such as multiple occurrences of the same |
|
218 * non-spacing mark. This check does not test the input string as a whole |
|
219 * for conformance to any particular syntax for identifiers. |
|
220 */ |
|
221 USPOOF_INVISIBLE = 32, |
|
222 |
|
223 /** Check that an identifier contains only characters from a specified set |
|
224 * of acceptable characters. See uspoof_setAllowedChars() and |
|
225 * uspoof_setAllowedLocales(). |
|
226 */ |
|
227 USPOOF_CHAR_LIMIT = 64, |
|
228 |
|
229 #ifndef U_HIDE_DRAFT_API |
|
230 /** |
|
231 * Check that an identifier does not include decimal digits from |
|
232 * more than one numbering system. |
|
233 * |
|
234 * @draft ICU 51 |
|
235 */ |
|
236 USPOOF_MIXED_NUMBERS = 128, |
|
237 #endif /* U_HIDE_DRAFT_API */ |
|
238 |
|
239 /** |
|
240 * Enable all spoof checks. |
|
241 * |
|
242 * @stable ICU 4.6 |
|
243 */ |
|
244 USPOOF_ALL_CHECKS = 0xFFFF, |
|
245 |
|
246 #ifndef U_HIDE_DRAFT_API |
|
247 /** |
|
248 * Enable the return of auxillary (non-error) information in the |
|
249 * upper bits of the check results value. |
|
250 * |
|
251 * If this "check" is not enabled, the results of uspoof_check() will be zero when an |
|
252 * identifier passes all of the enabled checks. |
|
253 * |
|
254 * If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero |
|
255 * when an identifier passes all checks. |
|
256 * |
|
257 * @draft ICU 51 |
|
258 */ |
|
259 USPOOF_AUX_INFO = 0x40000000 |
|
260 #endif /* U_HIDE_DRAFT_API */ |
|
261 |
|
262 } USpoofChecks; |
|
263 |
|
264 |
|
265 #ifndef U_HIDE_DRAFT_API |
|
266 /** |
|
267 * Constants from UAX #39 for use in setRestrictionLevel(), and |
|
268 * for returned identifier restriction levels in check results. |
|
269 * @draft ICU 51 |
|
270 */ |
|
271 typedef enum URestrictionLevel { |
|
272 /** |
|
273 * Only ASCII characters: U+0000..U+007F |
|
274 * |
|
275 * @draft ICU 51 |
|
276 */ |
|
277 USPOOF_ASCII = 0x10000000, |
|
278 /** |
|
279 * All characters in each identifier must be from a single script, or from the combinations: Latin + Han + |
|
280 * Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the |
|
281 * vast majority of Latin-script users; also that TR36 has ASCII instead of Latin. |
|
282 * |
|
283 * @draft ICU 51 |
|
284 */ |
|
285 USPOOF_HIGHLY_RESTRICTIVE = 0x20000000, |
|
286 /** |
|
287 * Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive |
|
288 * |
|
289 * @draft ICU 51 |
|
290 */ |
|
291 USPOOF_MODERATELY_RESTRICTIVE = 0x30000000, |
|
292 /** |
|
293 * Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive. |
|
294 * |
|
295 * @draft ICU 51 |
|
296 */ |
|
297 USPOOF_MINIMALLY_RESTRICTIVE = 0x40000000, |
|
298 /** |
|
299 * Any valid identifiers, including characters outside of the Identifier Profile. |
|
300 * |
|
301 * @draft ICU 51 |
|
302 */ |
|
303 USPOOF_UNRESTRICTIVE = 0x50000000 |
|
304 } URestrictionLevel; |
|
305 #endif /* U_HIDE_DRAFT_API */ |
|
306 |
|
307 /** |
|
308 * Create a Unicode Spoof Checker, configured to perform all |
|
309 * checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT. |
|
310 * Note that additional checks may be added in the future, |
|
311 * resulting in the changes to the default checking behavior. |
|
312 * |
|
313 * @param status The error code, set if this function encounters a problem. |
|
314 * @return the newly created Spoof Checker |
|
315 * @stable ICU 4.2 |
|
316 */ |
|
317 U_STABLE USpoofChecker * U_EXPORT2 |
|
318 uspoof_open(UErrorCode *status); |
|
319 |
|
320 |
|
321 /** |
|
322 * Open a Spoof checker from its serialized from, stored in 32-bit-aligned memory. |
|
323 * Inverse of uspoof_serialize(). |
|
324 * The memory containing the serialized data must remain valid and unchanged |
|
325 * as long as the spoof checker, or any cloned copies of the spoof checker, |
|
326 * are in use. Ownership of the memory remains with the caller. |
|
327 * The spoof checker (and any clones) must be closed prior to deleting the |
|
328 * serialized data. |
|
329 * |
|
330 * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data |
|
331 * @param length the number of bytes available at data; |
|
332 * can be more than necessary |
|
333 * @param pActualLength receives the actual number of bytes at data taken up by the data; |
|
334 * can be NULL |
|
335 * @param pErrorCode ICU error code |
|
336 * @return the spoof checker. |
|
337 * |
|
338 * @see uspoof_open |
|
339 * @see uspoof_serialize |
|
340 * @stable ICU 4.2 |
|
341 */ |
|
342 U_STABLE USpoofChecker * U_EXPORT2 |
|
343 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, |
|
344 UErrorCode *pErrorCode); |
|
345 |
|
346 /** |
|
347 * Open a Spoof Checker from the source form of the spoof data. |
|
348 * The Three inputs correspond to the Unicode data files confusables.txt |
|
349 * confusablesWholeScript.txt and xidmdifications.txt as described in |
|
350 * Unicode UAX #39. The syntax of the source data is as described in UAX #39 for |
|
351 * these files, and the content of these files is acceptable input. |
|
352 * |
|
353 * The character encoding of the (char *) input text is UTF-8. |
|
354 * |
|
355 * @param confusables a pointer to the confusable characters definitions, |
|
356 * as found in file confusables.txt from unicode.org. |
|
357 * @param confusablesLen The length of the confusables text, or -1 if the |
|
358 * input string is zero terminated. |
|
359 * @param confusablesWholeScript |
|
360 * a pointer to the whole script confusables definitions, |
|
361 * as found in the file confusablesWholeScript.txt from unicode.org. |
|
362 * @param confusablesWholeScriptLen The length of the whole script confusables text, or |
|
363 * -1 if the input string is zero terminated. |
|
364 * @param errType In the event of an error in the input, indicates |
|
365 * which of the input files contains the error. |
|
366 * The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or |
|
367 * USPOOF_WHOLE_SCRIPT_CONFUSABLE, or |
|
368 * zero if no errors are found. |
|
369 * @param pe In the event of an error in the input, receives the position |
|
370 * in the input text (line, offset) of the error. |
|
371 * @param status an in/out ICU UErrorCode. Among the possible errors is |
|
372 * U_PARSE_ERROR, which is used to report syntax errors |
|
373 * in the input. |
|
374 * @return A spoof checker that uses the rules from the input files. |
|
375 * @stable ICU 4.2 |
|
376 */ |
|
377 U_STABLE USpoofChecker * U_EXPORT2 |
|
378 uspoof_openFromSource(const char *confusables, int32_t confusablesLen, |
|
379 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, |
|
380 int32_t *errType, UParseError *pe, UErrorCode *status); |
|
381 |
|
382 |
|
383 /** |
|
384 * Close a Spoof Checker, freeing any memory that was being held by |
|
385 * its implementation. |
|
386 * @stable ICU 4.2 |
|
387 */ |
|
388 U_STABLE void U_EXPORT2 |
|
389 uspoof_close(USpoofChecker *sc); |
|
390 |
|
391 #if U_SHOW_CPLUSPLUS_API |
|
392 |
|
393 U_NAMESPACE_BEGIN |
|
394 |
|
395 /** |
|
396 * \class LocalUSpoofCheckerPointer |
|
397 * "Smart pointer" class, closes a USpoofChecker via uspoof_close(). |
|
398 * For most methods see the LocalPointerBase base class. |
|
399 * |
|
400 * @see LocalPointerBase |
|
401 * @see LocalPointer |
|
402 * @stable ICU 4.4 |
|
403 */ |
|
404 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close); |
|
405 |
|
406 U_NAMESPACE_END |
|
407 |
|
408 #endif |
|
409 |
|
410 /** |
|
411 * Clone a Spoof Checker. The clone will be set to perform the same checks |
|
412 * as the original source. |
|
413 * |
|
414 * @param sc The source USpoofChecker |
|
415 * @param status The error code, set if this function encounters a problem. |
|
416 * @return |
|
417 * @stable ICU 4.2 |
|
418 */ |
|
419 U_STABLE USpoofChecker * U_EXPORT2 |
|
420 uspoof_clone(const USpoofChecker *sc, UErrorCode *status); |
|
421 |
|
422 |
|
423 /** |
|
424 * Specify the set of checks that will be performed by the check |
|
425 * functions of this Spoof Checker. |
|
426 * |
|
427 * @param sc The USpoofChecker |
|
428 * @param checks The set of checks that this spoof checker will perform. |
|
429 * The value is a bit set, obtained by OR-ing together |
|
430 * values from enum USpoofChecks. |
|
431 * @param status The error code, set if this function encounters a problem. |
|
432 * @stable ICU 4.2 |
|
433 * |
|
434 */ |
|
435 U_STABLE void U_EXPORT2 |
|
436 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status); |
|
437 |
|
438 /** |
|
439 * Get the set of checks that this Spoof Checker has been configured to perform. |
|
440 * |
|
441 * @param sc The USpoofChecker |
|
442 * @param status The error code, set if this function encounters a problem. |
|
443 * @return The set of checks that this spoof checker will perform. |
|
444 * The value is a bit set, obtained by OR-ing together |
|
445 * values from enum USpoofChecks. |
|
446 * @stable ICU 4.2 |
|
447 * |
|
448 */ |
|
449 U_STABLE int32_t U_EXPORT2 |
|
450 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status); |
|
451 |
|
452 #ifndef U_HIDE_DRAFT_API |
|
453 /** |
|
454 * Set the loosest restriction level allowed. The default if this function |
|
455 * is not called is HIGHLY_RESTRICTIVE. |
|
456 * Calling this function also enables the RESTRICTION_LEVEL check. |
|
457 * @param restrictionLevel The loosest restriction level allowed. |
|
458 * @see URestrictionLevel |
|
459 * @draft ICU 51 |
|
460 */ |
|
461 U_DRAFT void U_EXPORT2 |
|
462 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel); |
|
463 |
|
464 |
|
465 /** |
|
466 * Get the Restriction Level that will be tested if the checks include RESTRICTION_LEVEL. |
|
467 * |
|
468 * @return The restriction level |
|
469 * @see URestrictionLevel |
|
470 * @draft ICU 51 |
|
471 */ |
|
472 U_DRAFT URestrictionLevel U_EXPORT2 |
|
473 uspoof_getRestrictionLevel(const USpoofChecker *sc); |
|
474 #endif /* U_HIDE_DRAFT_API */ |
|
475 |
|
476 /** |
|
477 * Limit characters that are acceptable in identifiers being checked to those |
|
478 * normally used with the languages associated with the specified locales. |
|
479 * Any previously specified list of locales is replaced by the new settings. |
|
480 * |
|
481 * A set of languages is determined from the locale(s), and |
|
482 * from those a set of acceptable Unicode scripts is determined. |
|
483 * Characters from this set of scripts, along with characters from |
|
484 * the "common" and "inherited" Unicode Script categories |
|
485 * will be permitted. |
|
486 * |
|
487 * Supplying an empty string removes all restrictions; |
|
488 * characters from any script will be allowed. |
|
489 * |
|
490 * The USPOOF_CHAR_LIMIT test is automatically enabled for this |
|
491 * USpoofChecker when calling this function with a non-empty list |
|
492 * of locales. |
|
493 * |
|
494 * The Unicode Set of characters that will be allowed is accessible |
|
495 * via the uspoof_getAllowedChars() function. uspoof_setAllowedLocales() |
|
496 * will <i>replace</i> any previously applied set of allowed characters. |
|
497 * |
|
498 * Adjustments, such as additions or deletions of certain classes of characters, |
|
499 * can be made to the result of uspoof_setAllowedLocales() by |
|
500 * fetching the resulting set with uspoof_getAllowedChars(), |
|
501 * manipulating it with the Unicode Set API, then resetting the |
|
502 * spoof detectors limits with uspoof_setAllowedChars() |
|
503 * |
|
504 * @param sc The USpoofChecker |
|
505 * @param localesList A list list of locales, from which the language |
|
506 * and associated script are extracted. The locales |
|
507 * are comma-separated if there is more than one. |
|
508 * White space may not appear within an individual locale, |
|
509 * but is ignored otherwise. |
|
510 * The locales are syntactically like those from the |
|
511 * HTTP Accept-Language header. |
|
512 * If the localesList is empty, no restrictions will be placed on |
|
513 * the allowed characters. |
|
514 * |
|
515 * @param status The error code, set if this function encounters a problem. |
|
516 * @stable ICU 4.2 |
|
517 */ |
|
518 U_STABLE void U_EXPORT2 |
|
519 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status); |
|
520 |
|
521 /** |
|
522 * Get a list of locales for the scripts that are acceptable in strings |
|
523 * to be checked. If no limitations on scripts have been specified, |
|
524 * an empty string will be returned. |
|
525 * |
|
526 * uspoof_setAllowedChars() will reset the list of allowed to be empty. |
|
527 * |
|
528 * The format of the returned list is the same as that supplied to |
|
529 * uspoof_setAllowedLocales(), but returned list may not be identical |
|
530 * to the originally specified string; the string may be reformatted, |
|
531 * and information other than languages from |
|
532 * the originally specified locales may be omitted. |
|
533 * |
|
534 * @param sc The USpoofChecker |
|
535 * @param status The error code, set if this function encounters a problem. |
|
536 * @return A string containing a list of locales corresponding |
|
537 * to the acceptable scripts, formatted like an |
|
538 * HTTP Accept Language value. |
|
539 * |
|
540 * @stable ICU 4.2 |
|
541 */ |
|
542 U_STABLE const char * U_EXPORT2 |
|
543 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status); |
|
544 |
|
545 |
|
546 /** |
|
547 * Limit the acceptable characters to those specified by a Unicode Set. |
|
548 * Any previously specified character limit is |
|
549 * is replaced by the new settings. This includes limits on |
|
550 * characters that were set with the uspoof_setAllowedLocales() function. |
|
551 * |
|
552 * The USPOOF_CHAR_LIMIT test is automatically enabled for this |
|
553 * USpoofChecker by this function. |
|
554 * |
|
555 * @param sc The USpoofChecker |
|
556 * @param chars A Unicode Set containing the list of |
|
557 * characters that are permitted. Ownership of the set |
|
558 * remains with the caller. The incoming set is cloned by |
|
559 * this function, so there are no restrictions on modifying |
|
560 * or deleting the USet after calling this function. |
|
561 * @param status The error code, set if this function encounters a problem. |
|
562 * @stable ICU 4.2 |
|
563 */ |
|
564 U_STABLE void U_EXPORT2 |
|
565 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status); |
|
566 |
|
567 |
|
568 /** |
|
569 * Get a USet for the characters permitted in an identifier. |
|
570 * This corresponds to the limits imposed by the Set Allowed Characters |
|
571 * functions. Limitations imposed by other checks will not be |
|
572 * reflected in the set returned by this function. |
|
573 * |
|
574 * The returned set will be frozen, meaning that it cannot be modified |
|
575 * by the caller. |
|
576 * |
|
577 * Ownership of the returned set remains with the Spoof Detector. The |
|
578 * returned set will become invalid if the spoof detector is closed, |
|
579 * or if a new set of allowed characters is specified. |
|
580 * |
|
581 * |
|
582 * @param sc The USpoofChecker |
|
583 * @param status The error code, set if this function encounters a problem. |
|
584 * @return A USet containing the characters that are permitted by |
|
585 * the USPOOF_CHAR_LIMIT test. |
|
586 * @stable ICU 4.2 |
|
587 */ |
|
588 U_STABLE const USet * U_EXPORT2 |
|
589 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status); |
|
590 |
|
591 |
|
592 #if U_SHOW_CPLUSPLUS_API |
|
593 /** |
|
594 * Limit the acceptable characters to those specified by a Unicode Set. |
|
595 * Any previously specified character limit is |
|
596 * is replaced by the new settings. This includes limits on |
|
597 * characters that were set with the uspoof_setAllowedLocales() function. |
|
598 * |
|
599 * The USPOOF_CHAR_LIMIT test is automatically enabled for this |
|
600 * USoofChecker by this function. |
|
601 * |
|
602 * @param sc The USpoofChecker |
|
603 * @param chars A Unicode Set containing the list of |
|
604 * characters that are permitted. Ownership of the set |
|
605 * remains with the caller. The incoming set is cloned by |
|
606 * this function, so there are no restrictions on modifying |
|
607 * or deleting the UnicodeSet after calling this function. |
|
608 * @param status The error code, set if this function encounters a problem. |
|
609 * @stable ICU 4.2 |
|
610 */ |
|
611 U_STABLE void U_EXPORT2 |
|
612 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status); |
|
613 |
|
614 |
|
615 /** |
|
616 * Get a UnicodeSet for the characters permitted in an identifier. |
|
617 * This corresponds to the limits imposed by the Set Allowed Characters / |
|
618 * UnicodeSet functions. Limitations imposed by other checks will not be |
|
619 * reflected in the set returned by this function. |
|
620 * |
|
621 * The returned set will be frozen, meaning that it cannot be modified |
|
622 * by the caller. |
|
623 * |
|
624 * Ownership of the returned set remains with the Spoof Detector. The |
|
625 * returned set will become invalid if the spoof detector is closed, |
|
626 * or if a new set of allowed characters is specified. |
|
627 * |
|
628 * |
|
629 * @param sc The USpoofChecker |
|
630 * @param status The error code, set if this function encounters a problem. |
|
631 * @return A UnicodeSet containing the characters that are permitted by |
|
632 * the USPOOF_CHAR_LIMIT test. |
|
633 * @stable ICU 4.2 |
|
634 */ |
|
635 U_STABLE const icu::UnicodeSet * U_EXPORT2 |
|
636 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status); |
|
637 #endif |
|
638 |
|
639 |
|
640 /** |
|
641 * Check the specified string for possible security issues. |
|
642 * The text to be checked will typically be an identifier of some sort. |
|
643 * The set of checks to be performed is specified with uspoof_setChecks(). |
|
644 * |
|
645 * @param sc The USpoofChecker |
|
646 * @param id The identifier to be checked for possible security issues, |
|
647 * in UTF-16 format. |
|
648 * @param length the length of the string to be checked, expressed in |
|
649 * 16 bit UTF-16 code units, or -1 if the string is |
|
650 * zero terminated. |
|
651 * @param position An out parameter. |
|
652 * Originally, the index of the first string position that failed a check. |
|
653 * Now, always returns zero. |
|
654 * This parameter may be null. |
|
655 * @param status The error code, set if an error occurred while attempting to |
|
656 * perform the check. |
|
657 * Spoofing or security issues detected with the input string are |
|
658 * not reported here, but through the function's return value. |
|
659 * @return An integer value with bits set for any potential security |
|
660 * or spoofing issues detected. The bits are defined by |
|
661 * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) |
|
662 * will be zero if the input string passes all of the |
|
663 * enabled checks. |
|
664 * @stable ICU 4.2 |
|
665 */ |
|
666 U_STABLE int32_t U_EXPORT2 |
|
667 uspoof_check(const USpoofChecker *sc, |
|
668 const UChar *id, int32_t length, |
|
669 int32_t *position, |
|
670 UErrorCode *status); |
|
671 |
|
672 |
|
673 /** |
|
674 * Check the specified string for possible security issues. |
|
675 * The text to be checked will typically be an identifier of some sort. |
|
676 * The set of checks to be performed is specified with uspoof_setChecks(). |
|
677 * |
|
678 * @param sc The USpoofChecker |
|
679 * @param id A identifier to be checked for possible security issues, in UTF8 format. |
|
680 * @param length the length of the string to be checked, or -1 if the string is |
|
681 * zero terminated. |
|
682 * @param position An out parameter. |
|
683 * Originally, the index of the first string position that failed a check. |
|
684 * Now, always returns zero. |
|
685 * This parameter may be null. |
|
686 * @deprecated ICU 51 |
|
687 * @param status The error code, set if an error occurred while attempting to |
|
688 * perform the check. |
|
689 * Spoofing or security issues detected with the input string are |
|
690 * not reported here, but through the function's return value. |
|
691 * If the input contains invalid UTF-8 sequences, |
|
692 * a status of U_INVALID_CHAR_FOUND will be returned. |
|
693 * @return An integer value with bits set for any potential security |
|
694 * or spoofing issues detected. The bits are defined by |
|
695 * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) |
|
696 * will be zero if the input string passes all of the |
|
697 * enabled checks. |
|
698 * @stable ICU 4.2 |
|
699 */ |
|
700 U_STABLE int32_t U_EXPORT2 |
|
701 uspoof_checkUTF8(const USpoofChecker *sc, |
|
702 const char *id, int32_t length, |
|
703 int32_t *position, |
|
704 UErrorCode *status); |
|
705 |
|
706 |
|
707 #if U_SHOW_CPLUSPLUS_API |
|
708 /** |
|
709 * Check the specified string for possible security issues. |
|
710 * The text to be checked will typically be an identifier of some sort. |
|
711 * The set of checks to be performed is specified with uspoof_setChecks(). |
|
712 * |
|
713 * @param sc The USpoofChecker |
|
714 * @param id A identifier to be checked for possible security issues. |
|
715 * @param position An out parameter. |
|
716 * Originally, the index of the first string position that failed a check. |
|
717 * Now, always returns zero. |
|
718 * This parameter may be null. |
|
719 * @deprecated ICU 51 |
|
720 * @param status The error code, set if an error occurred while attempting to |
|
721 * perform the check. |
|
722 * Spoofing or security issues detected with the input string are |
|
723 * not reported here, but through the function's return value. |
|
724 * @return An integer value with bits set for any potential security |
|
725 * or spoofing issues detected. The bits are defined by |
|
726 * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) |
|
727 * will be zero if the input string passes all of the |
|
728 * enabled checks. |
|
729 * @stable ICU 4.2 |
|
730 */ |
|
731 U_STABLE int32_t U_EXPORT2 |
|
732 uspoof_checkUnicodeString(const USpoofChecker *sc, |
|
733 const icu::UnicodeString &id, |
|
734 int32_t *position, |
|
735 UErrorCode *status); |
|
736 |
|
737 #endif |
|
738 |
|
739 |
|
740 /** |
|
741 * Check the whether two specified strings are visually confusable. |
|
742 * The types of confusability to be tested - single script, mixed script, |
|
743 * or whole script - are determined by the check options set for the |
|
744 * USpoofChecker. |
|
745 * |
|
746 * The tests to be performed are controlled by the flags |
|
747 * USPOOF_SINGLE_SCRIPT_CONFUSABLE |
|
748 * USPOOF_MIXED_SCRIPT_CONFUSABLE |
|
749 * USPOOF_WHOLE_SCRIPT_CONFUSABLE |
|
750 * At least one of these tests must be selected. |
|
751 * |
|
752 * USPOOF_ANY_CASE is a modifier for the tests. Select it if the identifiers |
|
753 * may be of mixed case. |
|
754 * If identifiers are case folded for comparison and |
|
755 * display to the user, do not select the USPOOF_ANY_CASE option. |
|
756 * |
|
757 * |
|
758 * @param sc The USpoofChecker |
|
759 * @param id1 The first of the two identifiers to be compared for |
|
760 * confusability. The strings are in UTF-16 format. |
|
761 * @param length1 the length of the first identifer, expressed in |
|
762 * 16 bit UTF-16 code units, or -1 if the string is |
|
763 * nul terminated. |
|
764 * @param id2 The second of the two identifiers to be compared for |
|
765 * confusability. The identifiers are in UTF-16 format. |
|
766 * @param length2 The length of the second identifiers, expressed in |
|
767 * 16 bit UTF-16 code units, or -1 if the string is |
|
768 * nul terminated. |
|
769 * @param status The error code, set if an error occurred while attempting to |
|
770 * perform the check. |
|
771 * Confusability of the identifiers is not reported here, |
|
772 * but through this function's return value. |
|
773 * @return An integer value with bit(s) set corresponding to |
|
774 * the type of confusability found, as defined by |
|
775 * enum USpoofChecks. Zero is returned if the identifiers |
|
776 * are not confusable. |
|
777 * @stable ICU 4.2 |
|
778 */ |
|
779 U_STABLE int32_t U_EXPORT2 |
|
780 uspoof_areConfusable(const USpoofChecker *sc, |
|
781 const UChar *id1, int32_t length1, |
|
782 const UChar *id2, int32_t length2, |
|
783 UErrorCode *status); |
|
784 |
|
785 |
|
786 |
|
787 /** |
|
788 * Check the whether two specified strings are visually confusable. |
|
789 * The types of confusability to be tested - single script, mixed script, |
|
790 * or whole script - are determined by the check options set for the |
|
791 * USpoofChecker. |
|
792 * |
|
793 * @param sc The USpoofChecker |
|
794 * @param id1 The first of the two identifiers to be compared for |
|
795 * confusability. The strings are in UTF-8 format. |
|
796 * @param length1 the length of the first identifiers, in bytes, or -1 |
|
797 * if the string is nul terminated. |
|
798 * @param id2 The second of the two identifiers to be compared for |
|
799 * confusability. The strings are in UTF-8 format. |
|
800 * @param length2 The length of the second string in bytes, or -1 |
|
801 * if the string is nul terminated. |
|
802 * @param status The error code, set if an error occurred while attempting to |
|
803 * perform the check. |
|
804 * Confusability of the strings is not reported here, |
|
805 * but through this function's return value. |
|
806 * @return An integer value with bit(s) set corresponding to |
|
807 * the type of confusability found, as defined by |
|
808 * enum USpoofChecks. Zero is returned if the strings |
|
809 * are not confusable. |
|
810 * @stable ICU 4.2 |
|
811 */ |
|
812 U_STABLE int32_t U_EXPORT2 |
|
813 uspoof_areConfusableUTF8(const USpoofChecker *sc, |
|
814 const char *id1, int32_t length1, |
|
815 const char *id2, int32_t length2, |
|
816 UErrorCode *status); |
|
817 |
|
818 |
|
819 |
|
820 |
|
821 #if U_SHOW_CPLUSPLUS_API |
|
822 /** |
|
823 * Check the whether two specified strings are visually confusable. |
|
824 * The types of confusability to be tested - single script, mixed script, |
|
825 * or whole script - are determined by the check options set for the |
|
826 * USpoofChecker. |
|
827 * |
|
828 * @param sc The USpoofChecker |
|
829 * @param s1 The first of the two identifiers to be compared for |
|
830 * confusability. The strings are in UTF-8 format. |
|
831 * @param s2 The second of the two identifiers to be compared for |
|
832 * confusability. The strings are in UTF-8 format. |
|
833 * @param status The error code, set if an error occurred while attempting to |
|
834 * perform the check. |
|
835 * Confusability of the identifiers is not reported here, |
|
836 * but through this function's return value. |
|
837 * @return An integer value with bit(s) set corresponding to |
|
838 * the type of confusability found, as defined by |
|
839 * enum USpoofChecks. Zero is returned if the identifiers |
|
840 * are not confusable. |
|
841 * @stable ICU 4.2 |
|
842 */ |
|
843 U_STABLE int32_t U_EXPORT2 |
|
844 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, |
|
845 const icu::UnicodeString &s1, |
|
846 const icu::UnicodeString &s2, |
|
847 UErrorCode *status); |
|
848 #endif |
|
849 |
|
850 |
|
851 /** |
|
852 * Get the "skeleton" for an identifier. |
|
853 * Skeletons are a transformation of the input identifier; |
|
854 * Two identifiers are confusable if their skeletons are identical. |
|
855 * See Unicode UAX #39 for additional information. |
|
856 * |
|
857 * Using skeletons directly makes it possible to quickly check |
|
858 * whether an identifier is confusable with any of some large |
|
859 * set of existing identifiers, by creating an efficiently |
|
860 * searchable collection of the skeletons. |
|
861 * |
|
862 * @param sc The USpoofChecker |
|
863 * @param type The type of skeleton, corresponding to which |
|
864 * of the Unicode confusable data tables to use. |
|
865 * The default is Mixed-Script, Lowercase. |
|
866 * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and |
|
867 * USPOOF_ANY_CASE_CONFUSABLE. The two flags may be ORed. |
|
868 * @param id The input identifier whose skeleton will be computed. |
|
869 * @param length The length of the input identifier, expressed in 16 bit |
|
870 * UTF-16 code units, or -1 if the string is zero terminated. |
|
871 * @param dest The output buffer, to receive the skeleton string. |
|
872 * @param destCapacity The length of the output buffer, in 16 bit units. |
|
873 * The destCapacity may be zero, in which case the function will |
|
874 * return the actual length of the skeleton. |
|
875 * @param status The error code, set if an error occurred while attempting to |
|
876 * perform the check. |
|
877 * @return The length of the skeleton string. The returned length |
|
878 * is always that of the complete skeleton, even when the |
|
879 * supplied buffer is too small (or of zero length) |
|
880 * |
|
881 * @stable ICU 4.2 |
|
882 */ |
|
883 U_STABLE int32_t U_EXPORT2 |
|
884 uspoof_getSkeleton(const USpoofChecker *sc, |
|
885 uint32_t type, |
|
886 const UChar *id, int32_t length, |
|
887 UChar *dest, int32_t destCapacity, |
|
888 UErrorCode *status); |
|
889 |
|
890 /** |
|
891 * Get the "skeleton" for an identifier. |
|
892 * Skeletons are a transformation of the input identifier; |
|
893 * Two identifiers are confusable if their skeletons are identical. |
|
894 * See Unicode UAX #39 for additional information. |
|
895 * |
|
896 * Using skeletons directly makes it possible to quickly check |
|
897 * whether an identifier is confusable with any of some large |
|
898 * set of existing identifiers, by creating an efficiently |
|
899 * searchable collection of the skeletons. |
|
900 * |
|
901 * @param sc The USpoofChecker |
|
902 * @param type The type of skeleton, corresponding to which |
|
903 * of the Unicode confusable data tables to use. |
|
904 * The default is Mixed-Script, Lowercase. |
|
905 * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and |
|
906 * USPOOF_ANY_CASE. The two flags may be ORed. |
|
907 * @param id The UTF-8 format identifier whose skeleton will be computed. |
|
908 * @param length The length of the input string, in bytes, |
|
909 * or -1 if the string is zero terminated. |
|
910 * @param dest The output buffer, to receive the skeleton string. |
|
911 * @param destCapacity The length of the output buffer, in bytes. |
|
912 * The destCapacity may be zero, in which case the function will |
|
913 * return the actual length of the skeleton. |
|
914 * @param status The error code, set if an error occurred while attempting to |
|
915 * perform the check. Possible Errors include U_INVALID_CHAR_FOUND |
|
916 * for invalid UTF-8 sequences, and |
|
917 * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small |
|
918 * to hold the complete skeleton. |
|
919 * @return The length of the skeleton string, in bytes. The returned length |
|
920 * is always that of the complete skeleton, even when the |
|
921 * supplied buffer is too small (or of zero length) |
|
922 * |
|
923 * @stable ICU 4.2 |
|
924 */ |
|
925 U_STABLE int32_t U_EXPORT2 |
|
926 uspoof_getSkeletonUTF8(const USpoofChecker *sc, |
|
927 uint32_t type, |
|
928 const char *id, int32_t length, |
|
929 char *dest, int32_t destCapacity, |
|
930 UErrorCode *status); |
|
931 |
|
932 #if U_SHOW_CPLUSPLUS_API |
|
933 /** |
|
934 * Get the "skeleton" for an identifier. |
|
935 * Skeletons are a transformation of the input identifier; |
|
936 * Two identifiers are confusable if their skeletons are identical. |
|
937 * See Unicode UAX #39 for additional information. |
|
938 * |
|
939 * Using skeletons directly makes it possible to quickly check |
|
940 * whether an identifier is confusable with any of some large |
|
941 * set of existing identifiers, by creating an efficiently |
|
942 * searchable collection of the skeletons. |
|
943 * |
|
944 * @param sc The USpoofChecker. |
|
945 * @param type The type of skeleton, corresponding to which |
|
946 * of the Unicode confusable data tables to use. |
|
947 * The default is Mixed-Script, Lowercase. |
|
948 * Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and |
|
949 * USPOOF_ANY_CASE_CONFUSABLE. The two flags may be ORed. |
|
950 * @param id The input identifier whose skeleton will be computed. |
|
951 * @param dest The output identifier, to receive the skeleton string. |
|
952 * @param status The error code, set if an error occurred while attempting to |
|
953 * perform the check. |
|
954 * @return A reference to the destination (skeleton) string. |
|
955 * |
|
956 * @stable ICU 4.2 |
|
957 */ |
|
958 U_I18N_API icu::UnicodeString & U_EXPORT2 |
|
959 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, |
|
960 uint32_t type, |
|
961 const icu::UnicodeString &id, |
|
962 icu::UnicodeString &dest, |
|
963 UErrorCode *status); |
|
964 #endif /* U_SHOW_CPLUSPLUS_API */ |
|
965 |
|
966 |
|
967 #ifndef U_HIDE_DRAFT_API |
|
968 /** |
|
969 * Get the set of Candidate Characters for Inclusion in Identifiers, as defined |
|
970 * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers |
|
971 * |
|
972 * The returned set is frozen. Ownership of the set remains with the ICU library; it must not |
|
973 * be deleted by the caller. |
|
974 * |
|
975 * @param status The error code, set if a problem occurs while creating the set. |
|
976 * |
|
977 * @draft ICU 51 |
|
978 */ |
|
979 U_DRAFT const USet * U_EXPORT2 |
|
980 uspoof_getInclusionSet(UErrorCode *status); |
|
981 |
|
982 /** |
|
983 * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined |
|
984 * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts |
|
985 * |
|
986 * The returned set is frozen. Ownership of the set remains with the ICU library; it must not |
|
987 * be deleted by the caller. |
|
988 * |
|
989 * @param status The error code, set if a problem occurs while creating the set. |
|
990 * |
|
991 * @draft ICU 51 |
|
992 */ |
|
993 U_DRAFT const USet * U_EXPORT2 |
|
994 uspoof_getRecommendedSet(UErrorCode *status); |
|
995 |
|
996 #if U_SHOW_CPLUSPLUS_API |
|
997 |
|
998 /** |
|
999 * Get the set of Candidate Characters for Inclusion in Identifiers, as defined |
|
1000 * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers |
|
1001 * |
|
1002 * The returned set is frozen. Ownership of the set remains with the ICU library; it must not |
|
1003 * be deleted by the caller. |
|
1004 * |
|
1005 * @param status The error code, set if a problem occurs while creating the set. |
|
1006 * |
|
1007 * @draft ICU 51 |
|
1008 */ |
|
1009 U_DRAFT const icu::UnicodeSet * U_EXPORT2 |
|
1010 uspoof_getInclusionUnicodeSet(UErrorCode *status); |
|
1011 |
|
1012 /** |
|
1013 * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined |
|
1014 * in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Recommended_Scripts |
|
1015 * |
|
1016 * The returned set is frozen. Ownership of the set remains with the ICU library; it must not |
|
1017 * be deleted by the caller. |
|
1018 * |
|
1019 * @param status The error code, set if a problem occurs while creating the set. |
|
1020 * |
|
1021 * @draft ICU 51 |
|
1022 */ |
|
1023 U_DRAFT const icu::UnicodeSet * U_EXPORT2 |
|
1024 uspoof_getRecommendedUnicodeSet(UErrorCode *status); |
|
1025 |
|
1026 #endif /* U_SHOW_CPLUSPLUS_API */ |
|
1027 #endif /* U_HIDE_DRAFT_API */ |
|
1028 |
|
1029 /** |
|
1030 * Serialize the data for a spoof detector into a chunk of memory. |
|
1031 * The flattened spoof detection tables can later be used to efficiently |
|
1032 * instantiate a new Spoof Detector. |
|
1033 * |
|
1034 * The serialized spoof checker includes only the data compiled from the |
|
1035 * Unicode data tables by uspoof_openFromSource(); it does not include |
|
1036 * include any other state or configuration that may have been set. |
|
1037 * |
|
1038 * @param sc the Spoof Detector whose data is to be serialized. |
|
1039 * @param data a pointer to 32-bit-aligned memory to be filled with the data, |
|
1040 * can be NULL if capacity==0 |
|
1041 * @param capacity the number of bytes available at data, |
|
1042 * or 0 for preflighting |
|
1043 * @param status an in/out ICU UErrorCode; possible errors include: |
|
1044 * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization |
|
1045 * - U_ILLEGAL_ARGUMENT_ERROR the data or capacity parameters are bad |
|
1046 * @return the number of bytes written or needed for the spoof data |
|
1047 * |
|
1048 * @see utrie2_openFromSerialized() |
|
1049 * @stable ICU 4.2 |
|
1050 */ |
|
1051 U_STABLE int32_t U_EXPORT2 |
|
1052 uspoof_serialize(USpoofChecker *sc, |
|
1053 void *data, int32_t capacity, |
|
1054 UErrorCode *status); |
|
1055 |
|
1056 |
|
1057 #endif |
|
1058 |
|
1059 #endif /* USPOOF_H */ |