1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/gendict/gendict.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,511 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2002-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* File gendict.cpp 1.11 +*/ 1.12 + 1.13 +#include "unicode/utypes.h" 1.14 +#include "unicode/uchar.h" 1.15 +#include "unicode/ucnv.h" 1.16 +#include "unicode/uniset.h" 1.17 +#include "unicode/unistr.h" 1.18 +#include "unicode/uclean.h" 1.19 +#include "unicode/udata.h" 1.20 +#include "unicode/putil.h" 1.21 +#include "unicode/ucharstriebuilder.h" 1.22 +#include "unicode/bytestriebuilder.h" 1.23 +#include "unicode/ucharstrie.h" 1.24 +#include "unicode/bytestrie.h" 1.25 +#include "unicode/ucnv.h" 1.26 +#include "unicode/utf16.h" 1.27 + 1.28 +#include "charstr.h" 1.29 +#include "dictionarydata.h" 1.30 +#include "uoptions.h" 1.31 +#include "unewdata.h" 1.32 +#include "cmemory.h" 1.33 +#include "uassert.h" 1.34 +#include "ucbuf.h" 1.35 +#include "toolutil.h" 1.36 +#include "cstring.h" 1.37 + 1.38 +#include <stdio.h> 1.39 +#include <stdlib.h> 1.40 +#include <string.h> 1.41 + 1.42 +#include "putilimp.h" 1.43 +UDate startTime; 1.44 + 1.45 +static int elapsedTime() { 1.46 + return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); 1.47 +} 1.48 + 1.49 +#if U_PLATFORM_IMPLEMENTS_POSIX && !U_PLATFORM_HAS_WIN32_API 1.50 + 1.51 +#include <signal.h> 1.52 +#include <unistd.h> 1.53 + 1.54 +const char *wToolname="gendict"; 1.55 +const char *wOutname="(some file)"; 1.56 + 1.57 +const int firstSeconds = 5; /* seconds between notices*/ 1.58 +const int nextSeconds = 15; /* seconds between notices*/ 1.59 + 1.60 +static void alarm_fn(int /*n*/) { 1.61 + printf("%s: still writing\t%s (%ds)\t...\n", wToolname, wOutname, elapsedTime()); 1.62 + 1.63 + signal(SIGALRM, &alarm_fn); 1.64 + alarm(nextSeconds); // reset the alarm 1.65 +} 1.66 + 1.67 +static void install_watchdog(const char *toolName, const char *outFileName) { 1.68 + wToolname=toolName; 1.69 + wOutname=outFileName; 1.70 + 1.71 + signal(SIGALRM, &alarm_fn); 1.72 + 1.73 + alarm(firstSeconds); // set the alarm 1.74 +} 1.75 + 1.76 +#else 1.77 +static void install_watchdog(const char*, const char*) { 1.78 + // not implemented 1.79 +} 1.80 +#endif 1.81 + 1.82 + 1.83 + 1.84 + 1.85 +U_NAMESPACE_USE 1.86 + 1.87 +static char *progName; 1.88 +static UOption options[]={ 1.89 + UOPTION_HELP_H, /* 0 */ 1.90 + UOPTION_HELP_QUESTION_MARK, /* 1 */ 1.91 + UOPTION_VERBOSE, /* 2 */ 1.92 + UOPTION_ICUDATADIR, /* 4 */ 1.93 + UOPTION_COPYRIGHT, /* 5 */ 1.94 + { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ 1.95 + { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ 1.96 + { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ 1.97 +}; 1.98 + 1.99 +enum arguments { 1.100 + ARG_HELP = 0, 1.101 + ARG_QMARK, 1.102 + ARG_VERBOSE, 1.103 + ARG_ICUDATADIR, 1.104 + ARG_COPYRIGHT, 1.105 + ARG_UCHARS, 1.106 + ARG_BYTES, 1.107 + ARG_TRANSFORM 1.108 +}; 1.109 + 1.110 +// prints out the standard usage method describing command line arguments, 1.111 +// then bails out with the desired exit code 1.112 +static void usageAndDie(UErrorCode retCode) { 1.113 + fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-options] input-dictionary-file output-file\n", progName); 1.114 + fprintf((U_SUCCESS(retCode) ? stdout : stderr), 1.115 + "\tRead in a word list and write out a string trie dictionary\n" 1.116 + "options:\n" 1.117 + "\t-h or -? or --help this usage text\n" 1.118 + "\t-V or --version show a version message\n" 1.119 + "\t-c or --copyright include a copyright notice\n" 1.120 + "\t-v or --verbose turn on verbose output\n" 1.121 + "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" // TODO: figure out if we need this option 1.122 + "\t followed by path, defaults to %s\n" 1.123 + "\t--uchars output a UCharsTrie (mutually exclusive with -b!)\n" 1.124 + "\t--bytes output a BytesTrie (mutually exclusive with -u!)\n" 1.125 + "\t--transform the kind of transform to use (eg --transform offset-40A3,\n" 1.126 + "\t which specifies an offset transform with constant 0x40A3)\n", 1.127 + u_getDataDirectory()); 1.128 + exit(retCode); 1.129 +} 1.130 + 1.131 + 1.132 +/* UDataInfo cf. udata.h */ 1.133 +static UDataInfo dataInfo = { 1.134 + sizeof(UDataInfo), 1.135 + 0, 1.136 + 1.137 + U_IS_BIG_ENDIAN, 1.138 + U_CHARSET_FAMILY, 1.139 + U_SIZEOF_UCHAR, 1.140 + 0, 1.141 + 1.142 + { 0x44, 0x69, 0x63, 0x74 }, /* "Dict" */ 1.143 + { 1, 0, 0, 0 }, /* format version */ 1.144 + { 0, 0, 0, 0 } /* data version */ 1.145 +}; 1.146 + 1.147 +#if !UCONFIG_NO_BREAK_ITERATION 1.148 + 1.149 +// A wrapper for both BytesTrieBuilder and UCharsTrieBuilder. 1.150 +// may want to put this somewhere in ICU, as it could be useful outside 1.151 +// of this tool? 1.152 +class DataDict { 1.153 +private: 1.154 + BytesTrieBuilder *bt; 1.155 + UCharsTrieBuilder *ut; 1.156 + UChar32 transformConstant; 1.157 + int32_t transformType; 1.158 +public: 1.159 + // constructs a new data dictionary. if there is an error, 1.160 + // it will be returned in status 1.161 + // isBytesTrie != 0 will produce a BytesTrieBuilder, 1.162 + // isBytesTrie == 0 will produce a UCharsTrieBuilder 1.163 + DataDict(UBool isBytesTrie, UErrorCode &status) : bt(NULL), ut(NULL), 1.164 + transformConstant(0), transformType(DictionaryData::TRANSFORM_NONE) { 1.165 + if (isBytesTrie) { 1.166 + bt = new BytesTrieBuilder(status); 1.167 + } else { 1.168 + ut = new UCharsTrieBuilder(status); 1.169 + } 1.170 + } 1.171 + 1.172 + ~DataDict() { 1.173 + delete bt; 1.174 + delete ut; 1.175 + } 1.176 + 1.177 +private: 1.178 + char transform(UChar32 c, UErrorCode &status) { 1.179 + if (transformType == DictionaryData::TRANSFORM_TYPE_OFFSET) { 1.180 + if (c == 0x200D) { return (char)0xFF; } 1.181 + else if (c == 0x200C) { return (char)0xFE; } 1.182 + int32_t delta = c - transformConstant; 1.183 + if (delta < 0 || 0xFD < delta) { 1.184 + fprintf(stderr, "Codepoint U+%04lx out of range for --transform offset-%04lx!\n", 1.185 + (long)c, (long)transformConstant); 1.186 + exit(U_ILLEGAL_ARGUMENT_ERROR); // TODO: should return and print the line number 1.187 + } 1.188 + return (char)delta; 1.189 + } else { // no such transform type 1.190 + status = U_INTERNAL_PROGRAM_ERROR; 1.191 + return (char)c; // it should be noted this transform type will not generally work 1.192 + } 1.193 + } 1.194 + 1.195 + void transform(const UnicodeString &word, CharString &buf, UErrorCode &errorCode) { 1.196 + UChar32 c = 0; 1.197 + int32_t len = word.length(); 1.198 + for (int32_t i = 0; i < len; i += U16_LENGTH(c)) { 1.199 + c = word.char32At(i); 1.200 + buf.append(transform(c, errorCode), errorCode); 1.201 + } 1.202 + } 1.203 + 1.204 +public: 1.205 + // sets the desired transformation data. 1.206 + // should be populated from a command line argument 1.207 + // so far the only acceptable format is offset-<hex constant> 1.208 + // eventually others (mask-<hex constant>?) may be enabled 1.209 + // more complex functions may be more difficult 1.210 + void setTransform(const char *t) { 1.211 + if (strncmp(t, "offset-", 7) == 0) { 1.212 + char *end; 1.213 + unsigned long base = uprv_strtoul(t + 7, &end, 16); 1.214 + if (end == (t + 7) || *end != 0 || base > 0x10FF80) { 1.215 + fprintf(stderr, "Syntax for offset value in --transform offset-%s invalid!\n", t + 7); 1.216 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.217 + } 1.218 + transformType = DictionaryData::TRANSFORM_TYPE_OFFSET; 1.219 + transformConstant = (UChar32)base; 1.220 + } 1.221 + else { 1.222 + fprintf(stderr, "Invalid transform specified: %s\n", t); 1.223 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.224 + } 1.225 + } 1.226 + 1.227 + // add a word to the trie 1.228 + void addWord(const UnicodeString &word, int32_t value, UErrorCode &status) { 1.229 + if (bt) { 1.230 + CharString buf; 1.231 + transform(word, buf, status); 1.232 + bt->add(buf.toStringPiece(), value, status); 1.233 + } 1.234 + if (ut) { ut->add(word, value, status); } 1.235 + } 1.236 + 1.237 + // if we are a bytestrie, give back the StringPiece representing the serialized version of us 1.238 + StringPiece serializeBytes(UErrorCode &status) { 1.239 + return bt->buildStringPiece(USTRINGTRIE_BUILD_SMALL, status); 1.240 + } 1.241 + 1.242 + // if we are a ucharstrie, produce the UnicodeString representing the serialized version of us 1.243 + void serializeUChars(UnicodeString &s, UErrorCode &status) { 1.244 + ut->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, s, status); 1.245 + } 1.246 + 1.247 + int32_t getTransform() { 1.248 + return (int32_t)(transformType | transformConstant); 1.249 + } 1.250 +}; 1.251 +#endif 1.252 + 1.253 +static const UChar LINEFEED_CHARACTER = 0x000A; 1.254 +static const UChar CARRIAGE_RETURN_CHARACTER = 0x000D; 1.255 + 1.256 +static UBool readLine(UCHARBUF *f, UnicodeString &fileLine, IcuToolErrorCode &errorCode) { 1.257 + int32_t lineLength; 1.258 + const UChar *line = ucbuf_readline(f, &lineLength, errorCode); 1.259 + if(line == NULL || errorCode.isFailure()) { return FALSE; } 1.260 + // Strip trailing CR/LF, comments, and spaces. 1.261 + const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' 1.262 + if(comment != NULL) { 1.263 + lineLength = (int32_t)(comment - line); 1.264 + } else { 1.265 + while(lineLength > 0 && (line[lineLength - 1] == CARRIAGE_RETURN_CHARACTER || line[lineLength - 1] == LINEFEED_CHARACTER)) { --lineLength; } 1.266 + } 1.267 + while(lineLength > 0 && u_isspace(line[lineLength - 1])) { --lineLength; } 1.268 + fileLine.setTo(FALSE, line, lineLength); 1.269 + return TRUE; 1.270 +} 1.271 + 1.272 +//---------------------------------------------------------------------------- 1.273 +// 1.274 +// main for gendict 1.275 +// 1.276 +//---------------------------------------------------------------------------- 1.277 +int main(int argc, char **argv) { 1.278 + // 1.279 + // Pick up and check the command line arguments, 1.280 + // using the standard ICU tool utils option handling. 1.281 + // 1.282 + U_MAIN_INIT_ARGS(argc, argv); 1.283 + progName = argv[0]; 1.284 + argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); 1.285 + if(argc<0) { 1.286 + // Unrecognized option 1.287 + fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); 1.288 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.289 + } 1.290 + 1.291 + if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { 1.292 + // -? or -h for help. 1.293 + usageAndDie(U_ZERO_ERROR); 1.294 + } 1.295 + 1.296 + UBool verbose = options[ARG_VERBOSE].doesOccur; 1.297 + 1.298 + if (argc < 3) { 1.299 + fprintf(stderr, "input and output file must both be specified.\n"); 1.300 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.301 + } 1.302 + const char *outFileName = argv[2]; 1.303 + const char *wordFileName = argv[1]; 1.304 + 1.305 + startTime = uprv_getRawUTCtime(); // initialize start timer 1.306 + // set up the watchdog 1.307 + install_watchdog(progName, outFileName); 1.308 + 1.309 + if (options[ARG_ICUDATADIR].doesOccur) { 1.310 + u_setDataDirectory(options[ARG_ICUDATADIR].value); 1.311 + } 1.312 + 1.313 + const char *copyright = NULL; 1.314 + if (options[ARG_COPYRIGHT].doesOccur) { 1.315 + copyright = U_COPYRIGHT_STRING; 1.316 + } 1.317 + 1.318 + if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { 1.319 + fprintf(stderr, "you must specify exactly one type of trie to output!\n"); 1.320 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.321 + } 1.322 + UBool isBytesTrie = options[ARG_BYTES].doesOccur; 1.323 + if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) { 1.324 + fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n"); 1.325 + usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); 1.326 + } 1.327 + 1.328 + IcuToolErrorCode status("gendict/main()"); 1.329 + 1.330 +#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO 1.331 + const char* outDir=NULL; 1.332 + 1.333 + UNewDataMemory *pData; 1.334 + char msg[1024]; 1.335 + UErrorCode tempstatus = U_ZERO_ERROR; 1.336 + 1.337 + /* write message with just the name */ // potential for a buffer overflow here... 1.338 + sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); 1.339 + fprintf(stderr, "%s\n", msg); 1.340 + 1.341 + /* write the dummy data file */ 1.342 + pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus); 1.343 + udata_writeBlock(pData, msg, strlen(msg)); 1.344 + udata_finish(pData, &tempstatus); 1.345 + return (int)tempstatus; 1.346 + 1.347 +#else 1.348 + // Read in the dictionary source file 1.349 + if (verbose) { printf("Opening file %s...\n", wordFileName); } 1.350 + const char *codepage = "UTF-8"; 1.351 + UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status); 1.352 + if (status.isFailure()) { 1.353 + fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName()); 1.354 + exit(status.reset()); 1.355 + } 1.356 + if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); } 1.357 + DataDict dict(isBytesTrie, status); 1.358 + if (status.isFailure()) { 1.359 + fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName()); 1.360 + exit(status.reset()); 1.361 + } 1.362 + if (options[ARG_TRANSFORM].doesOccur) { 1.363 + dict.setTransform(options[ARG_TRANSFORM].value); 1.364 + } 1.365 + 1.366 + UnicodeString fileLine; 1.367 + if (verbose) { puts("Adding words to dictionary..."); } 1.368 + UBool hasValues = FALSE; 1.369 + UBool hasValuelessContents = FALSE; 1.370 + int lineCount = 0; 1.371 + int wordCount = 0; 1.372 + int minlen = 255; 1.373 + int maxlen = 0; 1.374 + UBool isOk = TRUE; 1.375 + while (readLine(f, fileLine, status)) { 1.376 + lineCount++; 1.377 + if (fileLine.isEmpty()) continue; 1.378 + 1.379 + // Parse word [spaces value]. 1.380 + int32_t keyLen; 1.381 + for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {} 1.382 + if (keyLen == 0) { 1.383 + fprintf(stderr, "Error: no word on line %i!\n", lineCount); 1.384 + isOk = FALSE; 1.385 + continue; 1.386 + } 1.387 + int32_t valueStart; 1.388 + for (valueStart = keyLen; 1.389 + valueStart < fileLine.length() && u_isspace(fileLine[valueStart]); 1.390 + ++valueStart) {} 1.391 + 1.392 + if (keyLen < valueStart) { 1.393 + int32_t valueLength = fileLine.length() - valueStart; 1.394 + if (valueLength > 15) { 1.395 + fprintf(stderr, "Error: value too long on line %i!\n", lineCount); 1.396 + isOk = FALSE; 1.397 + continue; 1.398 + } 1.399 + char s[16]; 1.400 + fileLine.extract(valueStart, valueLength, s, 16, US_INV); 1.401 + char *end; 1.402 + unsigned long value = uprv_strtoul(s, &end, 0); 1.403 + if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) { 1.404 + fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount); 1.405 + isOk = FALSE; 1.406 + continue; 1.407 + } 1.408 + dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status); 1.409 + hasValues = TRUE; 1.410 + wordCount++; 1.411 + if (keyLen < minlen) minlen = keyLen; 1.412 + if (keyLen > maxlen) maxlen = keyLen; 1.413 + } else { 1.414 + dict.addWord(fileLine.tempSubString(0, keyLen), 0, status); 1.415 + hasValuelessContents = TRUE; 1.416 + wordCount++; 1.417 + if (keyLen < minlen) minlen = keyLen; 1.418 + if (keyLen > maxlen) maxlen = keyLen; 1.419 + } 1.420 + 1.421 + if (status.isFailure()) { 1.422 + fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n", 1.423 + status.errorName(), lineCount); 1.424 + exit(status.reset()); 1.425 + } 1.426 + } 1.427 + if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); } 1.428 + 1.429 + if (!isOk && status.isSuccess()) { 1.430 + status.set(U_ILLEGAL_ARGUMENT_ERROR); 1.431 + } 1.432 + if (hasValues && hasValuelessContents) { 1.433 + fprintf(stderr, "warning: file contained both valued and unvalued strings!\n"); 1.434 + } 1.435 + 1.436 + if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); } 1.437 + int32_t outDataSize; 1.438 + const void *outData; 1.439 + UnicodeString usp; 1.440 + if (isBytesTrie) { 1.441 + StringPiece sp = dict.serializeBytes(status); 1.442 + outDataSize = sp.size(); 1.443 + outData = sp.data(); 1.444 + } else { 1.445 + dict.serializeUChars(usp, status); 1.446 + outDataSize = usp.length() * U_SIZEOF_UCHAR; 1.447 + outData = usp.getBuffer(); 1.448 + } 1.449 + if (status.isFailure()) { 1.450 + fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName()); 1.451 + exit(status.reset()); 1.452 + } 1.453 + if (verbose) { puts("Opening output file..."); } 1.454 + UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status); 1.455 + if (status.isFailure()) { 1.456 + fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName()); 1.457 + exit(status.reset()); 1.458 + } 1.459 + 1.460 + if (verbose) { puts("Writing to output file..."); } 1.461 + int32_t indexes[DictionaryData::IX_COUNT] = { 1.462 + DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0 1.463 + }; 1.464 + int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; 1.465 + indexes[DictionaryData::IX_RESERVED1_OFFSET] = size; 1.466 + indexes[DictionaryData::IX_RESERVED2_OFFSET] = size; 1.467 + indexes[DictionaryData::IX_TOTAL_SIZE] = size; 1.468 + 1.469 + indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS; 1.470 + if (hasValues) { 1.471 + indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES; 1.472 + } 1.473 + 1.474 + indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform(); 1.475 + udata_writeBlock(pData, indexes, sizeof(indexes)); 1.476 + udata_writeBlock(pData, outData, outDataSize); 1.477 + size_t bytesWritten = udata_finish(pData, status); 1.478 + if (status.isFailure()) { 1.479 + fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName()); 1.480 + exit(status.reset()); 1.481 + } 1.482 + 1.483 + if (bytesWritten != (size_t)size) { 1.484 + fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); 1.485 + exit(U_INTERNAL_PROGRAM_ERROR); 1.486 + } 1.487 + 1.488 + printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()); 1.489 + 1.490 +#ifdef TEST_GENDICT 1.491 + if (isBytesTrie) { 1.492 + BytesTrie::Iterator it(outData, outDataSize, status); 1.493 + while (it.hasNext()) { 1.494 + it.next(status); 1.495 + const StringPiece s = it.getString(); 1.496 + int32_t val = it.getValue(); 1.497 + printf("%s -> %i\n", s.data(), val); 1.498 + } 1.499 + } else { 1.500 + UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); 1.501 + while (it.hasNext()) { 1.502 + it.next(status); 1.503 + const UnicodeString s = it.getString(); 1.504 + int32_t val = it.getValue(); 1.505 + char tmp[1024]; 1.506 + s.extract(0, s.length(), tmp, 1024); 1.507 + printf("%s -> %i\n", tmp, val); 1.508 + } 1.509 + } 1.510 +#endif 1.511 + 1.512 + return 0; 1.513 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1.514 +}