michael@0: /* michael@0: *************************************************************************** michael@0: * Copyright (C) 2008-2009, International Business Machines Corporation michael@0: * and others. All Rights Reserved. michael@0: *************************************************************************** michael@0: * file name: uspoof_build.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2008 Dec 8 michael@0: * created by: Andy Heninger michael@0: * michael@0: * Unicode Spoof Detection Data Builder michael@0: * Builder-related functions are kept in separate files so that applications not needing michael@0: * the builder can more easily exclude them, typically by means of static linking. michael@0: * michael@0: * There are three relatively independent sets of Spoof data, michael@0: * Confusables, michael@0: * Whole Script Confusables michael@0: * ID character extensions. michael@0: * michael@0: * The data tables for each are built separately, each from its own definitions michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uspoof.h" michael@0: #include "unicode/unorm.h" michael@0: #include "unicode/uregex.h" michael@0: #include "unicode/ustring.h" michael@0: #include "cmemory.h" michael@0: #include "uspoof_impl.h" michael@0: #include "uhash.h" michael@0: #include "uvector.h" michael@0: #include "uassert.h" michael@0: #include "uarrsort.h" michael@0: #include "uspoof_conf.h" michael@0: #include "uspoof_wsconf.h" michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: michael@0: // The main data building function michael@0: michael@0: U_CAPI USpoofChecker * U_EXPORT2 michael@0: uspoof_openFromSource(const char *confusables, int32_t confusablesLen, michael@0: const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, michael@0: int32_t *errorType, UParseError *pe, UErrorCode *status) { michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: return NULL; michael@0: } michael@0: #if UCONFIG_NO_REGULAR_EXPRESSIONS michael@0: *status = U_UNSUPPORTED_ERROR; michael@0: return NULL; michael@0: #else michael@0: if (errorType!=NULL) { michael@0: *errorType = 0; michael@0: } michael@0: if (pe != NULL) { michael@0: pe->line = 0; michael@0: pe->offset = 0; michael@0: pe->preContext[0] = 0; michael@0: pe->postContext[0] = 0; michael@0: } michael@0: michael@0: // Set up a shell of a spoof detector, with empty data. michael@0: SpoofData *newSpoofData = new SpoofData(*status); michael@0: SpoofImpl *This = new SpoofImpl(newSpoofData, *status); michael@0: michael@0: // Compile the binary data from the source (text) format. michael@0: ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); michael@0: buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); michael@0: michael@0: if (U_FAILURE(*status)) { michael@0: delete This; michael@0: This = NULL; michael@0: } michael@0: return (USpoofChecker *)This; michael@0: #endif // UCONFIG_NO_REGULAR_EXPRESSIONS michael@0: } michael@0: michael@0: #endif