intl/icu/source/common/dictionarydata.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 * Copyright (C) 2013, International Business Machines
     4 * Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 * dictionarydata.h
     7 *
     8 * created on: 2012may31
     9 * created by: Markus W. Scherer & Maxime Serrano
    10 */
    12 #include "dictionarydata.h"
    13 #include "unicode/ucharstrie.h"
    14 #include "unicode/bytestrie.h"
    15 #include "unicode/udata.h"
    16 #include "cmemory.h"
    18 #if !UCONFIG_NO_BREAK_ITERATION
    20 U_NAMESPACE_BEGIN
    22 const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;
    23 const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;
    24 const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;
    25 const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;
    27 const int32_t  DictionaryData::TRANSFORM_NONE = 0;
    28 const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;
    29 const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;
    30 const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;
    32 DictionaryMatcher::~DictionaryMatcher() {
    33 }
    35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
    36     udata_close(file);
    37 }
    39 int32_t UCharsDictionaryMatcher::getType() const {
    40     return DictionaryData::TRIE_TYPE_UCHARS;
    41 }
    43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
    44     UCharsTrie uct(characters);
    45     UChar32 c = utext_next32(text);
    46     if (c < 0) {
    47         return 0;
    48     }
    49     UStringTrieResult result = uct.first(c);
    50     int32_t numChars = 1;
    51     count = 0;
    52     for (;;) {
    53         if (USTRINGTRIE_HAS_VALUE(result)) {
    54             if (count < limit) {
    55                 if (values != NULL) {
    56                     values[count] = uct.getValue();
    57                 }
    58                 lengths[count++] = numChars;
    59             }
    60             if (result == USTRINGTRIE_FINAL_VALUE) {
    61                 break;
    62             }
    63         }
    64         else if (result == USTRINGTRIE_NO_MATCH) {
    65             break;
    66         }
    68         // TODO: why do we have a text limit if the UText knows its length?
    69         if (numChars >= maxLength) {
    70             break;
    71         }
    73         c = utext_next32(text);
    74         if (c < 0) {
    75             break;
    76         }
    77         ++numChars;
    78         result = uct.next(c);
    79     }
    80     return numChars;
    81 }
    83 BytesDictionaryMatcher::~BytesDictionaryMatcher() {
    84     udata_close(file);
    85 }
    87 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
    88     if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
    89         if (c == 0x200D) {
    90             return 0xFF;
    91         } else if (c == 0x200C) {
    92             return 0xFE;
    93         }
    94         int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
    95         if (delta < 0 || 0xFD < delta) {
    96             return U_SENTINEL;
    97         }
    98         return (UChar32)delta;
    99     }
   100     return c;
   101 }
   103 int32_t BytesDictionaryMatcher::getType() const {
   104     return DictionaryData::TRIE_TYPE_BYTES;
   105 }
   107 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
   108     BytesTrie bt(characters);
   109     UChar32 c = utext_next32(text);
   110     if (c < 0) {
   111         return 0;
   112     }
   113     UStringTrieResult result = bt.first(transform(c));
   114     int32_t numChars = 1;
   115     count = 0;
   116     for (;;) {
   117         if (USTRINGTRIE_HAS_VALUE(result)) {
   118             if (count < limit) {
   119                 if (values != NULL) {
   120                     values[count] = bt.getValue();
   121             }
   122                 lengths[count++] = numChars;
   123             }
   124             if (result == USTRINGTRIE_FINAL_VALUE) {
   125                 break;
   126             }
   127         }
   128         else if (result == USTRINGTRIE_NO_MATCH) {
   129             break;
   130         }
   132         // TODO: why do we have a text limit if the UText knows its length?
   133         if (numChars >= maxLength) {
   134             break;
   135         }
   137         c = utext_next32(text);
   138         if (c < 0) {
   139             break;
   140         }
   141         ++numChars;
   142         result = bt.next(transform(c));
   143     }
   144     return numChars;
   145 }
   148 U_NAMESPACE_END
   150 U_NAMESPACE_USE
   152 U_CAPI int32_t U_EXPORT2
   153 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
   154            void *outData, UErrorCode *pErrorCode) {
   155     const UDataInfo *pInfo;
   156     int32_t headerSize;
   157     const uint8_t *inBytes;
   158     uint8_t *outBytes;
   159     const int32_t *inIndexes;
   160     int32_t indexes[DictionaryData::IX_COUNT];
   161     int32_t i, offset, size;
   163     headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   164     if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
   165     pInfo = (const UDataInfo *)((const char *)inData + 4);
   166     if (!(pInfo->dataFormat[0] == 0x44 && 
   167           pInfo->dataFormat[1] == 0x69 && 
   168           pInfo->dataFormat[2] == 0x63 && 
   169           pInfo->dataFormat[3] == 0x74 && 
   170           pInfo->formatVersion[0] == 1)) {
   171         udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
   172                          pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
   173         *pErrorCode = U_UNSUPPORTED_ERROR;
   174         return 0;
   175     }
   177     inBytes = (const uint8_t *)inData + headerSize;
   178     outBytes = (uint8_t *)outData + headerSize;
   180     inIndexes = (const int32_t *)inBytes;
   181     if (length >= 0) {
   182         length -= headerSize;
   183         if (length < (int32_t)(sizeof(indexes))) {
   184             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
   185             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
   186             return 0;
   187         }
   188     }
   190     for (i = 0; i < DictionaryData::IX_COUNT; i++) {
   191         indexes[i] = udata_readInt32(ds, inIndexes[i]);
   192     }
   194     size = indexes[DictionaryData::IX_TOTAL_SIZE];
   196     if (length >= 0) {
   197         if (length < size) {
   198             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
   199             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
   200             return 0;
   201         }
   203         if (inBytes != outBytes) {
   204             uprv_memcpy(outBytes, inBytes, size);
   205         }
   207         offset = 0;
   208         ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
   209         offset = (int32_t)sizeof(indexes);
   210         int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
   211         int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
   213         if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
   214             ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
   215         } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
   216             // nothing to do
   217         } else {
   218             udata_printError(ds, "udict_swap(): unknown trie type!\n");
   219             *pErrorCode = U_UNSUPPORTED_ERROR;
   220             return 0;
   221         }
   223         // these next two sections are empty in the current format,
   224         // but may be used later.
   225         offset = nextOffset;
   226         nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
   227         offset = nextOffset;
   228         nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
   229         offset = nextOffset;
   230     }
   231     return headerSize + size;
   232 }
   233 #endif

mercurial