|
1 /* |
|
2 *************************************************************************** |
|
3 * Copyright (C) 2008-2009, International Business Machines Corporation |
|
4 * and others. All Rights Reserved. |
|
5 *************************************************************************** |
|
6 * file name: uspoof_build.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2008 Dec 8 |
|
12 * created by: Andy Heninger |
|
13 * |
|
14 * Unicode Spoof Detection Data Builder |
|
15 * Builder-related functions are kept in separate files so that applications not needing |
|
16 * the builder can more easily exclude them, typically by means of static linking. |
|
17 * |
|
18 * There are three relatively independent sets of Spoof data, |
|
19 * Confusables, |
|
20 * Whole Script Confusables |
|
21 * ID character extensions. |
|
22 * |
|
23 * The data tables for each are built separately, each from its own definitions |
|
24 */ |
|
25 |
|
26 #include "unicode/utypes.h" |
|
27 #include "unicode/uspoof.h" |
|
28 #include "unicode/unorm.h" |
|
29 #include "unicode/uregex.h" |
|
30 #include "unicode/ustring.h" |
|
31 #include "cmemory.h" |
|
32 #include "uspoof_impl.h" |
|
33 #include "uhash.h" |
|
34 #include "uvector.h" |
|
35 #include "uassert.h" |
|
36 #include "uarrsort.h" |
|
37 #include "uspoof_conf.h" |
|
38 #include "uspoof_wsconf.h" |
|
39 |
|
40 #if !UCONFIG_NO_NORMALIZATION |
|
41 |
|
42 U_NAMESPACE_USE |
|
43 |
|
44 |
|
45 // The main data building function |
|
46 |
|
47 U_CAPI USpoofChecker * U_EXPORT2 |
|
48 uspoof_openFromSource(const char *confusables, int32_t confusablesLen, |
|
49 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, |
|
50 int32_t *errorType, UParseError *pe, UErrorCode *status) { |
|
51 |
|
52 if (U_FAILURE(*status)) { |
|
53 return NULL; |
|
54 } |
|
55 #if UCONFIG_NO_REGULAR_EXPRESSIONS |
|
56 *status = U_UNSUPPORTED_ERROR; |
|
57 return NULL; |
|
58 #else |
|
59 if (errorType!=NULL) { |
|
60 *errorType = 0; |
|
61 } |
|
62 if (pe != NULL) { |
|
63 pe->line = 0; |
|
64 pe->offset = 0; |
|
65 pe->preContext[0] = 0; |
|
66 pe->postContext[0] = 0; |
|
67 } |
|
68 |
|
69 // Set up a shell of a spoof detector, with empty data. |
|
70 SpoofData *newSpoofData = new SpoofData(*status); |
|
71 SpoofImpl *This = new SpoofImpl(newSpoofData, *status); |
|
72 |
|
73 // Compile the binary data from the source (text) format. |
|
74 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); |
|
75 buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); |
|
76 |
|
77 if (U_FAILURE(*status)) { |
|
78 delete This; |
|
79 This = NULL; |
|
80 } |
|
81 return (USpoofChecker *)This; |
|
82 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS |
|
83 } |
|
84 |
|
85 #endif |