The Tor Browser: intl/icu/source/common/dictionarydata.cpp@fc2d59ddac77

     1 /*

     2 *******************************************************************************

     3 * Copyright (C) 2013, International Business Machines

     4 * Corporation and others.  All Rights Reserved.

     5 *******************************************************************************

     6 * dictionarydata.h

7 *

     8 * created on: 2012may31

     9 * created by: Markus W. Scherer & Maxime Serrano

    10 */

    12 #include "dictionarydata.h"

    13 #include "unicode/ucharstrie.h"

    14 #include "unicode/bytestrie.h"

    15 #include "unicode/udata.h"

    16 #include "cmemory.h"

    18 #if !UCONFIG_NO_BREAK_ITERATION

    20 U_NAMESPACE_BEGIN

    22 const int32_t  DictionaryData::TRIE_TYPE_BYTES = 0;

    23 const int32_t  DictionaryData::TRIE_TYPE_UCHARS = 1;

    24 const int32_t  DictionaryData::TRIE_TYPE_MASK = 7;

    25 const int32_t  DictionaryData::TRIE_HAS_VALUES = 8;

    27 const int32_t  DictionaryData::TRANSFORM_NONE = 0;

    28 const int32_t  DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000;

    29 const int32_t  DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000;

    30 const int32_t  DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff;

    32 DictionaryMatcher::~DictionaryMatcher() {

    33 }

    35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {

    36     udata_close(file);

    37 }

    39 int32_t UCharsDictionaryMatcher::getType() const {

    40     return DictionaryData::TRIE_TYPE_UCHARS;

    41 }

    43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {

    44     UCharsTrie uct(characters);

    45     UChar32 c = utext_next32(text);

    46     if (c < 0) {

    47         return 0;

    48     }

    49     UStringTrieResult result = uct.first(c);

    50     int32_t numChars = 1;

    51     count = 0;

    52     for (;;) {

    53         if (USTRINGTRIE_HAS_VALUE(result)) {

    54             if (count < limit) {

    55                 if (values != NULL) {

    56                     values[count] = uct.getValue();

    57                 }

    58                 lengths[count++] = numChars;

    59             }

    60             if (result == USTRINGTRIE_FINAL_VALUE) {

    61                 break;

    62             }

    63         }

    64         else if (result == USTRINGTRIE_NO_MATCH) {

    65             break;

    66         }

    68         // TODO: why do we have a text limit if the UText knows its length?

    69         if (numChars >= maxLength) {

    70             break;

    71         }

    73         c = utext_next32(text);

    74         if (c < 0) {

    75             break;

    76         }

    77         ++numChars;

    78         result = uct.next(c);

    79     }

    80     return numChars;

    81 }

    83 BytesDictionaryMatcher::~BytesDictionaryMatcher() {

    84     udata_close(file);

    85 }

    87 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {

    88     if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {

    89         if (c == 0x200D) {

    90             return 0xFF;

    91         } else if (c == 0x200C) {

    92             return 0xFE;

    93         }

    94         int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);

    95         if (delta < 0 || 0xFD < delta) {

    96             return U_SENTINEL;

    97         }

    98         return (UChar32)delta;

    99     }

   100     return c;

   101 }

   103 int32_t BytesDictionaryMatcher::getType() const {

   104     return DictionaryData::TRIE_TYPE_BYTES;

   105 }

   107 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {

   108     BytesTrie bt(characters);

   109     UChar32 c = utext_next32(text);

   110     if (c < 0) {

   111         return 0;

   112     }

   113     UStringTrieResult result = bt.first(transform(c));

   114     int32_t numChars = 1;

   115     count = 0;

   116     for (;;) {

   117         if (USTRINGTRIE_HAS_VALUE(result)) {

   118             if (count < limit) {

   119                 if (values != NULL) {

   120                     values[count] = bt.getValue();

   121             }

   122                 lengths[count++] = numChars;

   123             }

   124             if (result == USTRINGTRIE_FINAL_VALUE) {

   125                 break;

   126             }

   127         }

   128         else if (result == USTRINGTRIE_NO_MATCH) {

   129             break;

   130         }

   132         // TODO: why do we have a text limit if the UText knows its length?

   133         if (numChars >= maxLength) {

   134             break;

   135         }

   137         c = utext_next32(text);

   138         if (c < 0) {

   139             break;

   140         }

   141         ++numChars;

   142         result = bt.next(transform(c));

   143     }

   144     return numChars;

   145 }

   148 U_NAMESPACE_END

   150 U_NAMESPACE_USE

   152 U_CAPI int32_t U_EXPORT2

   153 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,

   154            void *outData, UErrorCode *pErrorCode) {

   155     const UDataInfo *pInfo;

   156     int32_t headerSize;

   157     const uint8_t *inBytes;

   158     uint8_t *outBytes;

   159     const int32_t *inIndexes;

   160     int32_t indexes[DictionaryData::IX_COUNT];

   161     int32_t i, offset, size;

   163     headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

   164     if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;

   165     pInfo = (const UDataInfo *)((const char *)inData + 4);

   166     if (!(pInfo->dataFormat[0] == 0x44 &&

   167           pInfo->dataFormat[1] == 0x69 &&

   168           pInfo->dataFormat[2] == 0x63 &&

   169           pInfo->dataFormat[3] == 0x74 &&

   170           pInfo->formatVersion[0] == 1)) {

   171         udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",

   172                          pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);

   173         *pErrorCode = U_UNSUPPORTED_ERROR;

   174         return 0;

   175     }

   177     inBytes = (const uint8_t *)inData + headerSize;

   178     outBytes = (uint8_t *)outData + headerSize;

   180     inIndexes = (const int32_t *)inBytes;

   181     if (length >= 0) {

   182         length -= headerSize;

   183         if (length < (int32_t)(sizeof(indexes))) {

   184             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);

   185             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

   186             return 0;

   187         }

   188     }

   190     for (i = 0; i < DictionaryData::IX_COUNT; i++) {

   191         indexes[i] = udata_readInt32(ds, inIndexes[i]);

   192     }

   194     size = indexes[DictionaryData::IX_TOTAL_SIZE];

   196     if (length >= 0) {

   197         if (length < size) {

   198             udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);

   199             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;

   200             return 0;

   201         }

   203         if (inBytes != outBytes) {

   204             uprv_memcpy(outBytes, inBytes, size);

   205         }

   207         offset = 0;

   208         ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);

   209         offset = (int32_t)sizeof(indexes);

   210         int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;

   211         int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];

   213         if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {

   214             ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);

   215         } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {

   216             // nothing to do

   217         } else {

   218             udata_printError(ds, "udict_swap(): unknown trie type!\n");

   219             *pErrorCode = U_UNSUPPORTED_ERROR;

   220             return 0;

   221         }

   223         // these next two sections are empty in the current format,

   224         // but may be used later.

   225         offset = nextOffset;

   226         nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];

   227         offset = nextOffset;

   228         nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];

   229         offset = nextOffset;

   230     }

   231     return headerSize + size;

   232 }

   233 #endif

The Tor Browser / file revision

intl/icu/source/common/dictionarydata.cpp@fc2d59ddac77

intl/icu/source/common/dictionarydata.cpp