intl/icu/source/tools/gensprep/gensprep.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2003-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  gensprep.c
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2003-02-06
    14 *   created by: Ram Viswanadha
    15 *
    16 *   This program reads the Profile.txt files,
    17 *   parses them, and extracts the data for StringPrep profile.
    18 *   It then preprocesses it and writes a binary file for efficient use
    19 *   in various StringPrep conversion processes.
    20 */
    22 #define USPREP_TYPE_NAMES_ARRAY 1
    24 #include <stdio.h>
    25 #include <stdlib.h>
    27 #include "cmemory.h"
    28 #include "cstring.h"
    29 #include "unewdata.h"
    30 #include "uoptions.h"
    31 #include "uparse.h"
    32 #include "sprpimpl.h"
    34 #include "unicode/uclean.h"
    35 #include "unicode/udata.h"
    36 #include "unicode/utypes.h"
    37 #include "unicode/putil.h"
    40 U_CDECL_BEGIN
    41 #include "gensprep.h"
    42 U_CDECL_END
    44 UBool beVerbose=FALSE, haveCopyright=TRUE;
    46 #define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"
    48 #define NORMALIZE_DIRECTIVE "normalize"
    49 #define NORMALIZE_DIRECTIVE_LEN 9
    50 #define CHECK_BIDI_DIRECTIVE "check-bidi"
    51 #define CHECK_BIDI_DIRECTIVE_LEN 10
    53 /* prototypes --------------------------------------------------------------- */
    55 static void
    56 parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode);
    58 static void
    59 parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode);
    62 /* -------------------------------------------------------------------------- */
    64 static UOption options[]={
    65     UOPTION_HELP_H,
    66     UOPTION_HELP_QUESTION_MARK,
    67     UOPTION_VERBOSE,
    68     UOPTION_COPYRIGHT,
    69     UOPTION_DESTDIR,
    70     UOPTION_SOURCEDIR,
    71     UOPTION_ICUDATADIR,
    72     UOPTION_BUNDLE_NAME,
    73     { "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
    74     { "norm-correction", NULL, NULL, NULL, 'm', UOPT_REQUIRES_ARG, 0 },
    75     { "check-bidi", NULL, NULL, NULL,  'k', UOPT_NO_ARG, 0},
    76     { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
    77 };
    79 enum{
    80     HELP,
    81     HELP_QUESTION_MARK,
    82     VERBOSE,
    83     COPYRIGHT,
    84     DESTDIR,
    85     SOURCEDIR,
    86     ICUDATADIR,
    87     BUNDLE_NAME,
    88     NORMALIZE,
    89     NORM_CORRECTION_DIR,
    90     CHECK_BIDI,
    91     UNICODE_VERSION
    92 };
    94 static int printHelp(int argc, char* argv[]){
    95     /*
    96      * Broken into chucks because the C89 standard says the minimum
    97      * required supported string length is 509 bytes.
    98      */
    99     fprintf(stderr,
   100         "Usage: %s [-options] [file_name]\n"
   101         "\n"
   102         "Read the files specified and\n"
   103         "create a binary file [package-name]_[bundle-name]." DATA_TYPE " with the StringPrep profile data\n"
   104         "\n",
   105         argv[0]);
   106     fprintf(stderr,
   107         "Options:\n"
   108         "\t-h or -? or --help       print this usage text\n"
   109         "\t-v or --verbose          verbose output\n"
   110         "\t-c or --copyright        include a copyright notice\n");
   111     fprintf(stderr,
   112         "\t-d or --destdir          destination directory, followed by the path\n"
   113         "\t-s or --sourcedir        source directory of ICU data, followed by the path\n"
   114         "\t-b or --bundle-name      generate the ouput data file with the name specified\n"
   115         "\t-i or --icudatadir       directory for locating any needed intermediate data files,\n"
   116         "\t                         followed by path, defaults to %s\n",
   117         u_getDataDirectory());
   118     fprintf(stderr,
   119         "\t-n or --normalize        turn on the option for normalization and include mappings\n"
   120         "\t                         from NormalizationCorrections.txt from the given path,\n"
   121         "\t                         e.g: /test/icu/source/data/unidata\n");
   122     fprintf(stderr,
   123         "\t-m or --norm-correction  use NormalizationCorrections.txt from the given path\n"
   124         "\t                         when the input file contains a normalization directive.\n"
   125         "\t                         unlike -n/--normalize, this option does not force the\n"
   126         "\t                         normalization.\n");
   127     fprintf(stderr,
   128         "\t-k or --check-bidi       turn on the option for checking for BiDi in the profile\n"
   129         "\t-u or --unicode          version of Unicode to be used with this profile followed by the version\n"
   130         );
   131     return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   132 }
   135 extern int
   136 main(int argc, char* argv[]) {
   137 #if !UCONFIG_NO_IDNA
   138     char* filename = NULL;
   139 #endif
   140     const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
   141     const char *bundleName=NULL, *inputFileName = NULL;
   142     char *basename=NULL;
   143     int32_t sprepOptions = 0;
   145     UErrorCode errorCode=U_ZERO_ERROR;
   147     U_MAIN_INIT_ARGS(argc, argv);
   149     /* preset then read command line options */
   150     options[DESTDIR].value=u_getDataDirectory();
   151     options[SOURCEDIR].value="";
   152     options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
   153     options[BUNDLE_NAME].value = DATA_NAME;
   154     options[NORMALIZE].value = "";
   156     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
   158     /* error handling, printing usage message */
   159     if(argc<0) {
   160         fprintf(stderr,
   161             "error in command line argument \"%s\"\n",
   162             argv[-argc]);
   163     }
   164     if(argc<0 || options[HELP].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
   165         return printHelp(argc, argv);
   167     }
   169     /* get the options values */
   170     beVerbose=options[VERBOSE].doesOccur;
   171     haveCopyright=options[COPYRIGHT].doesOccur;
   172     srcDir=options[SOURCEDIR].value;
   173     destDir=options[DESTDIR].value;
   174     bundleName = options[BUNDLE_NAME].value;
   175     if(options[NORMALIZE].doesOccur) {
   176         icuUniDataDir = options[NORMALIZE].value;
   177     } else {
   178         icuUniDataDir = options[NORM_CORRECTION_DIR].value;
   179     }
   181     if(argc<2) {
   182         /* print the help message */
   183         return printHelp(argc, argv);
   184     } else {
   185         inputFileName = argv[1];
   186     }
   187     if(!options[UNICODE_VERSION].doesOccur){
   188         return printHelp(argc, argv);
   189     }
   190     if(options[ICUDATADIR].doesOccur) {
   191         u_setDataDirectory(options[ICUDATADIR].value);
   192     }
   193 #if UCONFIG_NO_IDNA
   195     fprintf(stderr,
   196         "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
   197         " because UCONFIG_NO_IDNA is set, \n"
   198         "see icu/source/common/unicode/uconfig.h\n");
   199     generateData(destDir, bundleName);
   201 #else
   203     setUnicodeVersion(options[UNICODE_VERSION].value);
   204     filename = (char* ) uprv_malloc(uprv_strlen(srcDir) + 300); /* hopefully this should be enough */
   206     /* prepare the filename beginning with the source dir */
   207     if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL && uprv_strchr(srcDir,U_FILE_ALT_SEP_CHAR) == NULL){
   208         filename[0] = '.';
   209         filename[1] = U_FILE_SEP_CHAR;
   210         uprv_strcpy(filename+2,srcDir);
   211     }else{
   212         uprv_strcpy(filename, srcDir);
   213     }
   215     basename=filename+uprv_strlen(filename);
   216     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
   217         *basename++=U_FILE_SEP_CHAR;
   218     }
   220     /* initialize */
   221     init();
   223     /* process the file */
   224     uprv_strcpy(basename,inputFileName);
   225     parseMappings(filename,FALSE, &errorCode);
   226     if(U_FAILURE(errorCode)) {
   227         fprintf(stderr, "Could not open file %s for reading. Error: %s \n", filename, u_errorName(errorCode));
   228         return errorCode;
   229     }
   231     if(options[NORMALIZE].doesOccur){ /* this option might be set by @normalize;; in the source file */
   232         /* set up directory for NormalizationCorrections.txt */
   233         uprv_strcpy(filename,icuUniDataDir);
   234         basename=filename+uprv_strlen(filename);
   235         if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
   236             *basename++=U_FILE_SEP_CHAR;
   237         }
   239         *basename++=U_FILE_SEP_CHAR;
   240         uprv_strcpy(basename,NORM_CORRECTIONS_FILE_NAME);
   242         parseNormalizationCorrections(filename,&errorCode);
   243         if(U_FAILURE(errorCode)){
   244             fprintf(stderr,"Could not open file %s for reading \n", filename);
   245             return errorCode;
   246         }
   247         sprepOptions |= _SPREP_NORMALIZATION_ON;
   248     }
   250     if(options[CHECK_BIDI].doesOccur){ /* this option might be set by @check-bidi;; in the source file */
   251         sprepOptions |= _SPREP_CHECK_BIDI_ON;
   252     }
   254     setOptions(sprepOptions);
   256     /* process parsed data */
   257     if(U_SUCCESS(errorCode)) {
   258         /* write the data file */
   259         generateData(destDir, bundleName);
   261         cleanUpData();
   262     }
   264     uprv_free(filename);
   266     u_cleanup();
   268 #endif
   270     return errorCode;
   271 }
   273 #if !UCONFIG_NO_IDNA
   275 static void U_CALLCONV
   276 normalizationCorrectionsLineFn(void *context,
   277                     char *fields[][2], int32_t fieldCount,
   278                     UErrorCode *pErrorCode) {
   279     uint32_t mapping[40];
   280     char *end, *s;
   281     uint32_t code;
   282     int32_t length;
   283     UVersionInfo version;
   284     UVersionInfo thisVersion;
   286     /* get the character code, field 0 */
   287     code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
   288     if(U_FAILURE(*pErrorCode)) {
   289         fprintf(stderr, "gensprep: error parsing NormalizationCorrections.txt mapping at %s\n", fields[0][0]);
   290         exit(*pErrorCode);
   291     }
   292     /* Original (erroneous) decomposition */
   293     s = fields[1][0];
   295     /* parse the mapping string */
   296     length=u_parseCodePoints(s, mapping, sizeof(mapping)/4, pErrorCode);
   298     /* ignore corrected decomposition */
   300     u_versionFromString(version,fields[3][0] );
   301     u_versionFromString(thisVersion, "3.2.0");
   305     if(U_FAILURE(*pErrorCode)) {
   306         fprintf(stderr, "gensprep error parsing NormalizationCorrections.txt of U+%04lx - %s\n",
   307                 (long)code, u_errorName(*pErrorCode));
   308         exit(*pErrorCode);
   309     }
   311     /* store the mapping */
   312     if( version[0] > thisVersion[0] || 
   313         ((version[0]==thisVersion[0]) && (version[1] > thisVersion[1]))
   314         ){
   315         storeMapping(code,mapping, length, USPREP_MAP, pErrorCode);
   316     }
   317     setUnicodeVersionNC(version);
   318 }
   320 static void
   321 parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
   322     char *fields[4][2];
   324     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   325         return;
   326     }
   328     u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);
   330     /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */
   332     if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
   333         fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
   334         exit(*pErrorCode);
   335     }
   336 }
   338 static void U_CALLCONV
   339 strprepProfileLineFn(void *context,
   340               char *fields[][2], int32_t fieldCount,
   341               UErrorCode *pErrorCode) {
   342     uint32_t mapping[40];
   343     char *end, *map;
   344     uint32_t code;
   345     int32_t length;
   346    /*UBool* mapWithNorm = (UBool*) context;*/
   347     const char* typeName;
   348     uint32_t rangeStart=0,rangeEnd =0;
   349     const char* filename = (const char*) context;
   350     const char *s;
   352     s = u_skipWhitespace(fields[0][0]);
   353     if (*s == '@') {
   354         /* special directive */
   355         s++;
   356         length = fields[0][1] - s;
   357         if (length >= NORMALIZE_DIRECTIVE_LEN
   358             && uprv_strncmp(s, NORMALIZE_DIRECTIVE, NORMALIZE_DIRECTIVE_LEN) == 0) {
   359             options[NORMALIZE].doesOccur = TRUE;
   360             return;
   361         }
   362         else if (length >= CHECK_BIDI_DIRECTIVE_LEN
   363             && uprv_strncmp(s, CHECK_BIDI_DIRECTIVE, CHECK_BIDI_DIRECTIVE_LEN) == 0) {
   364             options[CHECK_BIDI].doesOccur = TRUE;
   365             return;
   366         }
   367         else {
   368             fprintf(stderr, "gensprep error parsing a directive %s.", fields[0][0]);
   369         }
   370     }
   372     typeName = fields[2][0];
   373     map = fields[1][0];
   375     if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
   377         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
   378         if(U_FAILURE(*pErrorCode)){
   379             fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
   380             return;
   381         }
   383         /* store the range */
   384         storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode);
   386     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
   388         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
   389         if(U_FAILURE(*pErrorCode)){
   390             fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
   391             return;
   392         }
   394         /* store the range */
   395         storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode);
   397     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
   399         /* get the character code, field 0 */
   400         code=(uint32_t)uprv_strtoul(s, &end, 16);
   401         if(end<=s || end!=fields[0][1]) {
   402             fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
   403             *pErrorCode=U_PARSE_ERROR;
   404             exit(U_PARSE_ERROR);
   405         }
   407         /* parse the mapping string */
   408         length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
   410         /* store the mapping */
   411         storeMapping(code,mapping, length,USPREP_MAP, pErrorCode);
   413     }else{
   414         *pErrorCode = U_INVALID_FORMAT_ERROR;
   415     }
   417     if(U_FAILURE(*pErrorCode)) {
   418         fprintf(stderr, "gensprep error parsing  %s line %s at %s. Error: %s\n",filename,
   419                fields[0][0],fields[2][0],u_errorName(*pErrorCode));
   420         exit(*pErrorCode);
   421     }
   423 }
   425 static void
   426 parseMappings(const char *filename, UBool reportError, UErrorCode *pErrorCode) {
   427     char *fields[3][2];
   429     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   430         return;
   431     }
   433     u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
   435     /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/
   437     if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
   438         fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
   439         exit(*pErrorCode);
   440     }
   441 }
   444 #endif /* #if !UCONFIG_NO_IDNA */
   446 /*
   447  * Hey, Emacs, please set the following:
   448  *
   449  * Local Variables:
   450  * indent-tabs-mode: nil
   451  * End:
   452  *
   453  */

mercurial