intl/icu/source/i18n/uspoof_build.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 ***************************************************************************
michael@0 3 * Copyright (C) 2008-2009, International Business Machines Corporation
michael@0 4 * and others. All Rights Reserved.
michael@0 5 ***************************************************************************
michael@0 6 * file name: uspoof_build.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2008 Dec 8
michael@0 12 * created by: Andy Heninger
michael@0 13 *
michael@0 14 * Unicode Spoof Detection Data Builder
michael@0 15 * Builder-related functions are kept in separate files so that applications not needing
michael@0 16 * the builder can more easily exclude them, typically by means of static linking.
michael@0 17 *
michael@0 18 * There are three relatively independent sets of Spoof data,
michael@0 19 * Confusables,
michael@0 20 * Whole Script Confusables
michael@0 21 * ID character extensions.
michael@0 22 *
michael@0 23 * The data tables for each are built separately, each from its own definitions
michael@0 24 */
michael@0 25
michael@0 26 #include "unicode/utypes.h"
michael@0 27 #include "unicode/uspoof.h"
michael@0 28 #include "unicode/unorm.h"
michael@0 29 #include "unicode/uregex.h"
michael@0 30 #include "unicode/ustring.h"
michael@0 31 #include "cmemory.h"
michael@0 32 #include "uspoof_impl.h"
michael@0 33 #include "uhash.h"
michael@0 34 #include "uvector.h"
michael@0 35 #include "uassert.h"
michael@0 36 #include "uarrsort.h"
michael@0 37 #include "uspoof_conf.h"
michael@0 38 #include "uspoof_wsconf.h"
michael@0 39
michael@0 40 #if !UCONFIG_NO_NORMALIZATION
michael@0 41
michael@0 42 U_NAMESPACE_USE
michael@0 43
michael@0 44
michael@0 45 // The main data building function
michael@0 46
michael@0 47 U_CAPI USpoofChecker * U_EXPORT2
michael@0 48 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
michael@0 49 const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
michael@0 50 int32_t *errorType, UParseError *pe, UErrorCode *status) {
michael@0 51
michael@0 52 if (U_FAILURE(*status)) {
michael@0 53 return NULL;
michael@0 54 }
michael@0 55 #if UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 56 *status = U_UNSUPPORTED_ERROR;
michael@0 57 return NULL;
michael@0 58 #else
michael@0 59 if (errorType!=NULL) {
michael@0 60 *errorType = 0;
michael@0 61 }
michael@0 62 if (pe != NULL) {
michael@0 63 pe->line = 0;
michael@0 64 pe->offset = 0;
michael@0 65 pe->preContext[0] = 0;
michael@0 66 pe->postContext[0] = 0;
michael@0 67 }
michael@0 68
michael@0 69 // Set up a shell of a spoof detector, with empty data.
michael@0 70 SpoofData *newSpoofData = new SpoofData(*status);
michael@0 71 SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
michael@0 72
michael@0 73 // Compile the binary data from the source (text) format.
michael@0 74 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
michael@0 75 buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
michael@0 76
michael@0 77 if (U_FAILURE(*status)) {
michael@0 78 delete This;
michael@0 79 This = NULL;
michael@0 80 }
michael@0 81 return (USpoofChecker *)This;
michael@0 82 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 83 }
michael@0 84
michael@0 85 #endif

mercurial