Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | *************************************************************************** |
michael@0 | 3 | * Copyright (C) 2008-2009, International Business Machines Corporation |
michael@0 | 4 | * and others. All Rights Reserved. |
michael@0 | 5 | *************************************************************************** |
michael@0 | 6 | * file name: uspoof_build.cpp |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2008 Dec 8 |
michael@0 | 12 | * created by: Andy Heninger |
michael@0 | 13 | * |
michael@0 | 14 | * Unicode Spoof Detection Data Builder |
michael@0 | 15 | * Builder-related functions are kept in separate files so that applications not needing |
michael@0 | 16 | * the builder can more easily exclude them, typically by means of static linking. |
michael@0 | 17 | * |
michael@0 | 18 | * There are three relatively independent sets of Spoof data, |
michael@0 | 19 | * Confusables, |
michael@0 | 20 | * Whole Script Confusables |
michael@0 | 21 | * ID character extensions. |
michael@0 | 22 | * |
michael@0 | 23 | * The data tables for each are built separately, each from its own definitions |
michael@0 | 24 | */ |
michael@0 | 25 | |
michael@0 | 26 | #include "unicode/utypes.h" |
michael@0 | 27 | #include "unicode/uspoof.h" |
michael@0 | 28 | #include "unicode/unorm.h" |
michael@0 | 29 | #include "unicode/uregex.h" |
michael@0 | 30 | #include "unicode/ustring.h" |
michael@0 | 31 | #include "cmemory.h" |
michael@0 | 32 | #include "uspoof_impl.h" |
michael@0 | 33 | #include "uhash.h" |
michael@0 | 34 | #include "uvector.h" |
michael@0 | 35 | #include "uassert.h" |
michael@0 | 36 | #include "uarrsort.h" |
michael@0 | 37 | #include "uspoof_conf.h" |
michael@0 | 38 | #include "uspoof_wsconf.h" |
michael@0 | 39 | |
michael@0 | 40 | #if !UCONFIG_NO_NORMALIZATION |
michael@0 | 41 | |
michael@0 | 42 | U_NAMESPACE_USE |
michael@0 | 43 | |
michael@0 | 44 | |
michael@0 | 45 | // The main data building function |
michael@0 | 46 | |
michael@0 | 47 | U_CAPI USpoofChecker * U_EXPORT2 |
michael@0 | 48 | uspoof_openFromSource(const char *confusables, int32_t confusablesLen, |
michael@0 | 49 | const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, |
michael@0 | 50 | int32_t *errorType, UParseError *pe, UErrorCode *status) { |
michael@0 | 51 | |
michael@0 | 52 | if (U_FAILURE(*status)) { |
michael@0 | 53 | return NULL; |
michael@0 | 54 | } |
michael@0 | 55 | #if UCONFIG_NO_REGULAR_EXPRESSIONS |
michael@0 | 56 | *status = U_UNSUPPORTED_ERROR; |
michael@0 | 57 | return NULL; |
michael@0 | 58 | #else |
michael@0 | 59 | if (errorType!=NULL) { |
michael@0 | 60 | *errorType = 0; |
michael@0 | 61 | } |
michael@0 | 62 | if (pe != NULL) { |
michael@0 | 63 | pe->line = 0; |
michael@0 | 64 | pe->offset = 0; |
michael@0 | 65 | pe->preContext[0] = 0; |
michael@0 | 66 | pe->postContext[0] = 0; |
michael@0 | 67 | } |
michael@0 | 68 | |
michael@0 | 69 | // Set up a shell of a spoof detector, with empty data. |
michael@0 | 70 | SpoofData *newSpoofData = new SpoofData(*status); |
michael@0 | 71 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); |
michael@0 | 72 | |
michael@0 | 73 | // Compile the binary data from the source (text) format. |
michael@0 | 74 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); |
michael@0 | 75 | buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); |
michael@0 | 76 | |
michael@0 | 77 | if (U_FAILURE(*status)) { |
michael@0 | 78 | delete This; |
michael@0 | 79 | This = NULL; |
michael@0 | 80 | } |
michael@0 | 81 | return (USpoofChecker *)This; |
michael@0 | 82 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | #endif |