michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2005-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: icupkg.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2005jul29 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * This tool operates on ICU data (.dat package) files. michael@0: * It takes one as input, or creates an empty one, and can remove, add, and michael@0: * extract data pieces according to command-line options. michael@0: * At the same time, it swaps each piece to a consistent set of platform michael@0: * properties as desired. michael@0: * Useful as an install-time tool for shipping only one flavor of ICU data michael@0: * and preparing data files for the target platform. michael@0: * Also for customizing ICU data (pruning, augmenting, replacing) and for michael@0: * taking it apart. michael@0: * Subsumes functionality and implementation code from michael@0: * gencmn, decmn, and icuswap tools. michael@0: * Will not work with data DLLs (shared libraries). michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "cstring.h" michael@0: #include "toolutil.h" michael@0: #include "uoptions.h" michael@0: #include "uparse.h" michael@0: #include "filestrm.h" michael@0: #include "package.h" michael@0: #include "pkg_icu.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching? michael@0: michael@0: // general definitions ----------------------------------------------------- *** michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: // main() ------------------------------------------------------------------ *** michael@0: michael@0: static void michael@0: printUsage(const char *pname, UBool isHelp) { michael@0: FILE *where=isHelp ? stdout : stderr; michael@0: michael@0: fprintf(where, michael@0: "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n" michael@0: "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n" michael@0: "\t[-s path] [-d path] [-w] [-m mode]\n" michael@0: "\t[--auto_toc_prefix] [--auto_toc_prefix_with_type] [--toc_prefix]\n" michael@0: "\tinfilename [outfilename]\n", michael@0: isHelp ? 'U' : 'u', pname); michael@0: if(isHelp) { michael@0: fprintf(where, michael@0: "\n" michael@0: "Read the input ICU .dat package file, modify it according to the options,\n" michael@0: "swap it to the desired platform properties (charset & endianness),\n" michael@0: "and optionally write the resulting ICU .dat package to the output file.\n" michael@0: "Items are removed, then added, then extracted and listed.\n" michael@0: "An ICU .dat package is written if items are removed or added,\n" michael@0: "or if the input and output filenames differ,\n" michael@0: "or if the --writepkg (-w) option is set.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "If the input filename is \"new\" then an empty package is created.\n" michael@0: "If the output filename is missing, then it is automatically generated\n" michael@0: "from the input filename: If the input filename ends with an l, b, or e\n" michael@0: "matching its platform properties, then the output filename will\n" michael@0: "contain the letter from the -t (--type) option.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "This tool can also be used to just swap a single ICU data file, replacing the\n" michael@0: "former icuswap tool. For this mode, provide the infilename (and optional\n" michael@0: "outfilename) for a non-package ICU data file.\n" michael@0: "Allowed options include -t, -w, -s and -d.\n" michael@0: "The filenames can be absolute, or relative to the source/dest dir paths.\n" michael@0: "Other options are not allowed in this mode.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "Options:\n" michael@0: "\t(Only the last occurrence of an option is used.)\n" michael@0: "\n" michael@0: "\t-h or -? or --help print this message and exit\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t-tl or --type l output for little-endian/ASCII charset family\n" michael@0: "\t-tb or --type b output for big-endian/ASCII charset family\n" michael@0: "\t-te or --type e output for big-endian/EBCDIC charset family\n" michael@0: "\t The output type defaults to the input type.\n" michael@0: "\n" michael@0: "\t-c or --copyright include the ICU copyright notice\n" michael@0: "\t-C comment or --comment comment include a comment string\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t-a list or --add list add items to the package\n" michael@0: "\t-r list or --remove list remove items from the package\n" michael@0: "\t-x list or --extract list extract items from the package\n" michael@0: "\tThe list can be a single item's filename,\n" michael@0: "\tor a .txt filename with a list of item filenames,\n" michael@0: "\tor an ICU .dat package filename.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t-w or --writepkg write the output package even if no items are removed\n" michael@0: "\t or added (e.g., for only swapping the data)\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t-m mode or --matchmode mode set the matching mode for item names with\n" michael@0: "\t wildcards\n" michael@0: "\t noslash: the '*' wildcard does not match the '/' tree separator\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\tIn the .dat package, the Table of Contents (ToC) contains an entry\n" michael@0: "\tfor each item of the form prefix/tree/itemname .\n" michael@0: "\tThe prefix normally matches the package basename, and icupkg checks that,\n" michael@0: "\tbut this is not necessary when ICU need not find and load the package by filename.\n" michael@0: "\tICU package names end with the platform type letter, and thus differ\n" michael@0: "\tbetween platform types. This is not required for user data packages.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t--auto_toc_prefix automatic ToC entries prefix\n" michael@0: "\t Uses the prefix of the first entry of the\n" michael@0: "\t input package, rather than its basename.\n" michael@0: "\t Requires a non-empty input package.\n" michael@0: "\t--auto_toc_prefix_with_type auto_toc_prefix + adjust platform type\n" michael@0: "\t Same as auto_toc_prefix but also checks that\n" michael@0: "\t the prefix ends with the input platform\n" michael@0: "\t type letter, and modifies it to the output\n" michael@0: "\t platform type letter.\n" michael@0: "\t At most one of the auto_toc_prefix options\n" michael@0: "\t can be used at a time.\n" michael@0: "\t--toc_prefix prefix ToC prefix to be used in the output package\n" michael@0: "\t Overrides the package basename\n" michael@0: "\t and --auto_toc_prefix.\n" michael@0: "\t Cannot be combined with --auto_toc_prefix_with_type.\n"); michael@0: /* michael@0: * Usage text columns, starting after the initial TAB. michael@0: * 1 2 3 4 5 6 7 8 michael@0: * 901234567890123456789012345678901234567890123456789012345678901234567890 michael@0: */ michael@0: fprintf(where, michael@0: "\n" michael@0: "\tList file syntax: Items are listed on one or more lines and separated\n" michael@0: "\tby whitespace (space+tab).\n" michael@0: "\tComments begin with # and are ignored. Empty lines are ignored.\n" michael@0: "\tLines where the first non-whitespace character is one of %s\n" michael@0: "\tare also ignored, to reserve for future syntax.\n", michael@0: U_PKG_RESERVED_CHARS); michael@0: fprintf(where, michael@0: "\tItems for removal or extraction may contain a single '*' wildcard\n" michael@0: "\tcharacter. The '*' matches zero or more characters.\n" michael@0: "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n" michael@0: "\tdoes not match '/'.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\tItems must be listed relative to the package, and the --sourcedir or\n" michael@0: "\tthe --destdir path will be prepended.\n" michael@0: "\tThe paths are only prepended to item filenames while adding or\n" michael@0: "\textracting items, not to ICU .dat package or list filenames.\n" michael@0: "\t\n" michael@0: "\tPaths may contain '/' instead of the platform's\n" michael@0: "\tfile separator character, and are converted as appropriate.\n"); michael@0: fprintf(where, michael@0: "\n" michael@0: "\t-s path or --sourcedir path directory for the --add items\n" michael@0: "\t-d path or --destdir path directory for the --extract items\n" michael@0: "\n" michael@0: "\t-l or --list list the package items\n" michael@0: "\t (after modifying the package)\n" michael@0: "\t to stdout or to output list file\n" michael@0: "\t-o path or --outlist path path/filename for the --list output\n"); michael@0: } michael@0: } michael@0: michael@0: static UOption options[]={ michael@0: UOPTION_HELP_H, michael@0: UOPTION_HELP_QUESTION_MARK, michael@0: UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG), michael@0: michael@0: UOPTION_COPYRIGHT, michael@0: UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG), michael@0: michael@0: UOPTION_SOURCEDIR, michael@0: UOPTION_DESTDIR, michael@0: michael@0: UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG), michael@0: michael@0: UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG), michael@0: michael@0: UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG), michael@0: UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG), michael@0: UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG), michael@0: michael@0: UOPTION_DEF("list", 'l', UOPT_NO_ARG), michael@0: UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG), michael@0: michael@0: UOPTION_DEF("auto_toc_prefix", '\1', UOPT_NO_ARG), michael@0: UOPTION_DEF("auto_toc_prefix_with_type", '\1', UOPT_NO_ARG), michael@0: UOPTION_DEF("toc_prefix", '\1', UOPT_REQUIRES_ARG) michael@0: }; michael@0: michael@0: enum { michael@0: OPT_HELP_H, michael@0: OPT_HELP_QUESTION_MARK, michael@0: OPT_OUT_TYPE, michael@0: michael@0: OPT_COPYRIGHT, michael@0: OPT_COMMENT, michael@0: michael@0: OPT_SOURCEDIR, michael@0: OPT_DESTDIR, michael@0: michael@0: OPT_WRITEPKG, michael@0: michael@0: OPT_MATCHMODE, michael@0: michael@0: OPT_ADD_LIST, michael@0: OPT_REMOVE_LIST, michael@0: OPT_EXTRACT_LIST, michael@0: michael@0: OPT_LIST_ITEMS, michael@0: OPT_LIST_FILE, michael@0: michael@0: OPT_AUTO_TOC_PREFIX, michael@0: OPT_AUTO_TOC_PREFIX_WITH_TYPE, michael@0: OPT_TOC_PREFIX, michael@0: michael@0: OPT_COUNT michael@0: }; michael@0: michael@0: static UBool michael@0: isPackageName(const char *filename) { michael@0: int32_t len; michael@0: michael@0: len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */ michael@0: return (UBool)(len>0 && 0==strcmp(filename+len, ".dat")); michael@0: } michael@0: /* michael@0: This line is required by MinGW because it incorrectly globs the arguments. michael@0: So when \* is used, it turns into a list of files instead of a literal "*" michael@0: */ michael@0: int _CRT_glob = 0; michael@0: michael@0: extern int michael@0: main(int argc, char *argv[]) { michael@0: const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment; michael@0: char outType; michael@0: UBool isHelp, isModified, isPackage; michael@0: int result = 0; michael@0: michael@0: Package *pkg, *listPkg, *addListPkg; michael@0: michael@0: U_MAIN_INIT_ARGS(argc, argv); michael@0: michael@0: /* get the program basename */ michael@0: pname=findBasename(argv[0]); michael@0: michael@0: argc=u_parseArgs(argc, argv, LENGTHOF(options), options); michael@0: isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; michael@0: if(isHelp) { michael@0: printUsage(pname, TRUE); michael@0: return U_ZERO_ERROR; michael@0: } michael@0: michael@0: pkg=new Package; michael@0: if(pkg==NULL) { michael@0: fprintf(stderr, "icupkg: not enough memory\n"); michael@0: return U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: isModified=FALSE; michael@0: michael@0: int autoPrefix=0; michael@0: if(options[OPT_AUTO_TOC_PREFIX].doesOccur) { michael@0: pkg->setAutoPrefix(); michael@0: ++autoPrefix; michael@0: } michael@0: if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) { michael@0: if(options[OPT_TOC_PREFIX].doesOccur) { michael@0: fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n"); michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: pkg->setAutoPrefixWithType(); michael@0: ++autoPrefix; michael@0: } michael@0: if(argc<2 || 31) { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: if(options[OPT_SOURCEDIR].doesOccur) { michael@0: sourcePath=options[OPT_SOURCEDIR].value; michael@0: } else { michael@0: // work relative to the current working directory michael@0: sourcePath=NULL; michael@0: } michael@0: if(options[OPT_DESTDIR].doesOccur) { michael@0: destPath=options[OPT_DESTDIR].value; michael@0: } else { michael@0: // work relative to the current working directory michael@0: destPath=NULL; michael@0: } michael@0: michael@0: if(0==strcmp(argv[1], "new")) { michael@0: if(autoPrefix) { michael@0: fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n"); michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: inFilename=NULL; michael@0: isPackage=TRUE; michael@0: } else { michael@0: inFilename=argv[1]; michael@0: if(isPackageName(inFilename)) { michael@0: pkg->readPackage(inFilename); michael@0: isPackage=TRUE; michael@0: } else { michael@0: /* swap a single file (icuswap replacement) rather than work on a package */ michael@0: pkg->addFile(sourcePath, inFilename); michael@0: isPackage=FALSE; michael@0: } michael@0: } michael@0: michael@0: if(argc>=3) { michael@0: outFilename=argv[2]; michael@0: if(0!=strcmp(argv[1], argv[2])) { michael@0: isModified=TRUE; michael@0: } michael@0: } else if(isPackage) { michael@0: outFilename=NULL; michael@0: } else /* !isPackage */ { michael@0: outFilename=inFilename; michael@0: isModified=(UBool)(sourcePath!=destPath); michael@0: } michael@0: michael@0: /* parse the output type option */ michael@0: if(options[OPT_OUT_TYPE].doesOccur) { michael@0: const char *type=options[OPT_OUT_TYPE].value; michael@0: if(type[0]==0 || type[1]!=0) { michael@0: /* the type must be exactly one letter */ michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: outType=type[0]; michael@0: switch(outType) { michael@0: case 'l': michael@0: case 'b': michael@0: case 'e': michael@0: break; michael@0: default: michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: /* michael@0: * Set the isModified flag if the output type differs from the michael@0: * input package type. michael@0: * If we swap a single file, just assume that we are modifying it. michael@0: * The Package class does not give us access to the item and its type. michael@0: */ michael@0: isModified|=(UBool)(!isPackage || outType!=pkg->getInType()); michael@0: } else if(isPackage) { michael@0: outType=pkg->getInType(); // default to input type michael@0: } else /* !isPackage: swap single file */ { michael@0: outType=0; /* tells extractItem() to not swap */ michael@0: } michael@0: michael@0: if(options[OPT_WRITEPKG].doesOccur) { michael@0: isModified=TRUE; michael@0: } michael@0: michael@0: if(!isPackage) { michael@0: /* michael@0: * icuswap tool replacement: Only swap a single file. michael@0: * Check that irrelevant options are not set. michael@0: */ michael@0: if( options[OPT_COMMENT].doesOccur || michael@0: options[OPT_COPYRIGHT].doesOccur || michael@0: options[OPT_MATCHMODE].doesOccur || michael@0: options[OPT_REMOVE_LIST].doesOccur || michael@0: options[OPT_ADD_LIST].doesOccur || michael@0: options[OPT_EXTRACT_LIST].doesOccur || michael@0: options[OPT_LIST_ITEMS].doesOccur michael@0: ) { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: if(isModified) { michael@0: pkg->extractItem(destPath, outFilename, 0, outType); michael@0: } michael@0: michael@0: delete pkg; michael@0: return result; michael@0: } michael@0: michael@0: /* Work with a package. */ michael@0: michael@0: if(options[OPT_COMMENT].doesOccur) { michael@0: outComment=options[OPT_COMMENT].value; michael@0: } else if(options[OPT_COPYRIGHT].doesOccur) { michael@0: outComment=U_COPYRIGHT_STRING; michael@0: } else { michael@0: outComment=NULL; michael@0: } michael@0: michael@0: if(options[OPT_MATCHMODE].doesOccur) { michael@0: if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) { michael@0: pkg->setMatchMode(Package::MATCH_NOSLASH); michael@0: } else { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* remove items */ michael@0: if(options[OPT_REMOVE_LIST].doesOccur) { michael@0: listPkg=new Package(); michael@0: if(listPkg==NULL) { michael@0: fprintf(stderr, "icupkg: not enough memory\n"); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: if(readList(NULL, options[OPT_REMOVE_LIST].value, FALSE, listPkg)) { michael@0: pkg->removeItems(*listPkg); michael@0: delete listPkg; michael@0: isModified=TRUE; michael@0: } else { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * add items michael@0: * use a separate Package so that its memory and items stay around michael@0: * as long as the main Package michael@0: */ michael@0: addListPkg=NULL; michael@0: if(options[OPT_ADD_LIST].doesOccur) { michael@0: addListPkg=new Package(); michael@0: if(addListPkg==NULL) { michael@0: fprintf(stderr, "icupkg: not enough memory\n"); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: if(readList(sourcePath, options[OPT_ADD_LIST].value, TRUE, addListPkg)) { michael@0: pkg->addItems(*addListPkg); michael@0: // delete addListPkg; deferred until after writePackage() michael@0: isModified=TRUE; michael@0: } else { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* extract items */ michael@0: if(options[OPT_EXTRACT_LIST].doesOccur) { michael@0: listPkg=new Package(); michael@0: if(listPkg==NULL) { michael@0: fprintf(stderr, "icupkg: not enough memory\n"); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: if(readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE, listPkg)) { michael@0: pkg->extractItems(destPath, *listPkg, outType); michael@0: delete listPkg; michael@0: } else { michael@0: printUsage(pname, FALSE); michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* list items */ michael@0: if(options[OPT_LIST_ITEMS].doesOccur) { michael@0: int32_t i; michael@0: if (options[OPT_LIST_FILE].doesOccur) { michael@0: FileStream *out; michael@0: out = T_FileStream_open(options[OPT_LIST_FILE].value, "w"); michael@0: if (out != NULL) { michael@0: for(i=0; igetItemCount(); ++i) { michael@0: T_FileStream_writeLine(out, pkg->getItem(i)->name); michael@0: T_FileStream_writeLine(out, "\n"); michael@0: } michael@0: T_FileStream_close(out); michael@0: } else { michael@0: return U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } else { michael@0: for(i=0; igetItemCount(); ++i) { michael@0: fprintf(stdout, "%s\n", pkg->getItem(i)->name); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* check dependencies between items */ michael@0: if(!pkg->checkDependencies()) { michael@0: /* some dependencies are not fulfilled */ michael@0: return U_MISSING_RESOURCE_ERROR; michael@0: } michael@0: michael@0: /* write the output .dat package if there are any modifications */ michael@0: if(isModified) { michael@0: char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary michael@0: michael@0: if(outFilename==NULL || outFilename[0]==0) { michael@0: if(inFilename==NULL || inFilename[0]==0) { michael@0: fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n"); michael@0: exit(U_ILLEGAL_ARGUMENT_ERROR); michael@0: } michael@0: michael@0: /* michael@0: * auto-generate a filename: michael@0: * copy the inFilename, michael@0: * and if the last basename character matches the input file's type, michael@0: * then replace it with the output file's type michael@0: */ michael@0: char suffix[6]="?.dat"; michael@0: char *s; michael@0: michael@0: suffix[0]=pkg->getInType(); michael@0: strcpy(outFilenameBuffer, inFilename); michael@0: s=strchr(outFilenameBuffer, 0); michael@0: if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) { michael@0: *(s-5)=outType; michael@0: } michael@0: outFilename=outFilenameBuffer; michael@0: } michael@0: if(options[OPT_TOC_PREFIX].doesOccur) { michael@0: pkg->setPrefix(options[OPT_TOC_PREFIX].value); michael@0: } michael@0: result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, outType); michael@0: } michael@0: michael@0: delete addListPkg; michael@0: delete pkg; michael@0: return result; michael@0: } michael@0: michael@0: /* michael@0: * Hey, Emacs, please set the following: michael@0: * michael@0: * Local Variables: michael@0: * indent-tabs-mode: nil michael@0: * End: michael@0: * michael@0: */