gfx/graphite2/src/inc/SegCache.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/graphite2/src/inc/SegCache.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,316 @@
     1.4 +/*  GRAPHITE2 LICENSING
     1.5 +
     1.6 +    Copyright 2010, SIL International
     1.7 +    All rights reserved.
     1.8 +
     1.9 +    This library is free software; you can redistribute it and/or modify
    1.10 +    it under the terms of the GNU Lesser General Public License as published
    1.11 +    by the Free Software Foundation; either version 2.1 of License, or
    1.12 +    (at your option) any later version.
    1.13 +
    1.14 +    This program is distributed in the hope that it will be useful,
    1.15 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.16 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.17 +    Lesser General Public License for more details.
    1.18 +
    1.19 +    You should also have received a copy of the GNU Lesser General Public
    1.20 +    License along with this library in the file named "LICENSE".
    1.21 +    If not, write to the Free Software Foundation, 51 Franklin Street,
    1.22 +    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
    1.23 +    internet at http://www.fsf.org/licenses/lgpl.html.
    1.24 +
    1.25 +Alternatively, the contents of this file may be used under the terms of the
    1.26 +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
    1.27 +License, as published by the Free Software Foundation, either version 2
    1.28 +of the License or (at your option) any later version.
    1.29 +*/
    1.30 +#pragma once
    1.31 +
    1.32 +#ifndef GRAPHITE2_NSEGCACHE
    1.33 +
    1.34 +#include <graphite2/Segment.h>
    1.35 +#include "inc/Main.h"
    1.36 +#include "inc/Slot.h"
    1.37 +#include "inc/FeatureVal.h"
    1.38 +#include "inc/SegCacheEntry.h"
    1.39 +#include "inc/Segment.h"
    1.40 +
    1.41 +namespace graphite2 {
    1.42 +
    1.43 +class SegCache;
    1.44 +class SegCacheEntry;
    1.45 +class SegCacheStore;
    1.46 +
    1.47 +/**
    1.48 + * SegPrefixEntry stores lists of word/syllable segments
    1.49 + * with one list for each word length. The prefix size should be chosen so that
    1.50 + * these list sizes stay small since they will be searched iteratively.
    1.51 + */
    1.52 +class SegCachePrefixEntry
    1.53 +{
    1.54 +    SegCachePrefixEntry(const SegCachePrefixEntry &);
    1.55 +    SegCachePrefixEntry & operator = (const SegCachePrefixEntry &);
    1.56 +
    1.57 +public:
    1.58 +    SegCachePrefixEntry() : m_lastPurge(0)
    1.59 +    {
    1.60 +        memset(m_entryCounts, 0, sizeof m_entryCounts);
    1.61 +        memset(m_entryBSIndex, 0, sizeof m_entryBSIndex);
    1.62 +        memset(m_entries, 0, sizeof m_entries);
    1.63 +    }
    1.64 +
    1.65 +    ~SegCachePrefixEntry()
    1.66 +    {
    1.67 +        for (size_t j = 0; j < eMaxSpliceSize; j++)
    1.68 +        {
    1.69 +            if (m_entryCounts[j])
    1.70 +            {
    1.71 +                assert(m_entries[j]);
    1.72 +                for (size_t k = 0; k < m_entryCounts[j]; k++)
    1.73 +                    m_entries[j][k].clear();
    1.74 +
    1.75 +                free(m_entries[j]);
    1.76 +            }
    1.77 +        }
    1.78 +    }
    1.79 +    const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const
    1.80 +    {
    1.81 +        if (length <= ePrefixLength)
    1.82 +        {
    1.83 +            assert(m_entryCounts[length-1] <= 1);
    1.84 +            if (m_entries[length-1])
    1.85 +                return m_entries[length-1];
    1.86 +            return NULL;
    1.87 +        }
    1.88 +        SegCacheEntry * entry = NULL;
    1.89 +        findPosition(cmapGlyphs, length, &entry);
    1.90 +        return entry;
    1.91 +    }
    1.92 +    SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount)
    1.93 +    {
    1.94 +        size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0;
    1.95 +        SegCacheEntry * newEntries = NULL;
    1.96 +        if (m_entryCounts[length-1] + 1u > listSize)
    1.97 +        {
    1.98 +            if (m_entryCounts[length-1] == 0)
    1.99 +                listSize = 1;
   1.100 +            else
   1.101 +            {
   1.102 +                // the problem comes when you get incremental numeric ids in a large doc
   1.103 +                if (listSize >= eMaxSuffixCount)
   1.104 +                    return NULL;
   1.105 +                listSize = (m_entryBSIndex[length-1] << 2) - 1;
   1.106 +            }
   1.107 +            newEntries = gralloc<SegCacheEntry>(listSize);
   1.108 +            if (!newEntries)
   1.109 +                return NULL;
   1.110 +        }        
   1.111 +
   1.112 +        uint16 insertPos = 0;
   1.113 +        if (m_entryCounts[length-1] > 0)
   1.114 +        {
   1.115 +            insertPos = findPosition(cmapGlyphs, length, NULL);
   1.116 +            if (!newEntries)
   1.117 +            {
   1.118 +                // same buffer, shift entries up
   1.119 +                memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos,
   1.120 +                    sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
   1.121 +            }
   1.122 +            else
   1.123 +            {
   1.124 +                memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos));
   1.125 +                memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos,
   1.126 +                   sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
   1.127 +                
   1.128 +                free(m_entries[length-1]);
   1.129 +                m_entries[length-1] = newEntries;
   1.130 +                assert (m_entryBSIndex[length-1]);
   1.131 +                m_entryBSIndex[length-1] <<= 1;
   1.132 +            }
   1.133 +        } 
   1.134 +        else
   1.135 +        {
   1.136 +            m_entryBSIndex[length-1] = 1;
   1.137 +            m_entries[length-1] = newEntries;
   1.138 +        }
   1.139 +        m_entryCounts[length-1] += 1;
   1.140 +        new (m_entries[length-1] + insertPos)
   1.141 +            SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount);
   1.142 +        return m_entries[length-1]  + insertPos;
   1.143 +    }
   1.144 +    uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime,
   1.145 +        unsigned long long currentTime);
   1.146 +    CLASS_NEW_DELETE
   1.147 +private:
   1.148 +    uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const
   1.149 +    {
   1.150 +        int dir = 0;
   1.151 +        if (m_entryCounts[length-1] == 0)
   1.152 +        {
   1.153 +            if (entry) *entry = NULL;
   1.154 +            return 0;
   1.155 +        }
   1.156 +        else if (m_entryCounts[length-1] == 1)
   1.157 +        {
   1.158 +            // optimize single entry case
   1.159 +            for (int i = ePrefixLength; i < length; i++)
   1.160 +            {
   1.161 +                if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i])
   1.162 +                {
   1.163 +                    return 1;
   1.164 +                }
   1.165 +                else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i])
   1.166 +                {
   1.167 +                    return 0;
   1.168 +                }
   1.169 +            }
   1.170 +            if (entry)
   1.171 +                *entry = m_entries[length-1];
   1.172 +            return 0;
   1.173 +        }
   1.174 +        uint16 searchIndex = m_entryBSIndex[length-1] - 1;
   1.175 +        uint16 stepSize = m_entryBSIndex[length-1] >> 1;
   1.176 +        size_t prevIndex = searchIndex;
   1.177 +        do
   1.178 +        {
   1.179 +            dir = 0;
   1.180 +            if (searchIndex >= m_entryCounts[length-1])
   1.181 +            {
   1.182 +                dir = -1;
   1.183 +                searchIndex -= stepSize;
   1.184 +                stepSize >>= 1;
   1.185 +            }
   1.186 +            else
   1.187 +            {
   1.188 +                for (int i = ePrefixLength; i < length; i++)
   1.189 +                {
   1.190 +                    if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i])
   1.191 +                    {
   1.192 +                        dir = 1;
   1.193 +                        searchIndex += stepSize;
   1.194 +                        stepSize >>= 1;
   1.195 +                        break;
   1.196 +                    }
   1.197 +                    else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i])
   1.198 +                    {
   1.199 +                        dir = -1;
   1.200 +                        searchIndex -= stepSize;
   1.201 +                        stepSize >>= 1;
   1.202 +                        break;
   1.203 +                    }
   1.204 +                }
   1.205 +            }
   1.206 +            if (prevIndex == searchIndex)
   1.207 +                break;
   1.208 +            prevIndex = searchIndex;
   1.209 +        } while (dir != 0);
   1.210 +        if (entry)
   1.211 +        {
   1.212 +            if (dir == 0)
   1.213 +                *entry = m_entries[length-1] + searchIndex;
   1.214 +            else
   1.215 +                *entry = NULL;
   1.216 +        }
   1.217 +        else
   1.218 +        {
   1.219 +            // if entry is null, then this is for inserting a new value, which
   1.220 +            // shouldn't already be in the cache
   1.221 +            assert(dir != 0);
   1.222 +            if (dir > 0)
   1.223 +                ++searchIndex;
   1.224 +        }
   1.225 +        return searchIndex;
   1.226 +    }
   1.227 +    /** m_entries is a null terminated list of entries */
   1.228 +    uint16 m_entryCounts[eMaxSpliceSize];
   1.229 +    uint16 m_entryBSIndex[eMaxSpliceSize];
   1.230 +    SegCacheEntry * m_entries[eMaxSpliceSize];
   1.231 +    unsigned long long m_lastPurge;
   1.232 +};
   1.233 +
   1.234 +
   1.235 +#define SEG_CACHE_MIN_INDEX (store->maxCmapGid())
   1.236 +#define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u)
   1.237 +#define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u)
   1.238 +
   1.239 +union SegCachePrefixArray
   1.240 +{
   1.241 +    void ** raw;
   1.242 +    SegCachePrefixArray * array;
   1.243 +    SegCachePrefixEntry ** prefixEntries;
   1.244 +    uintptr * range;
   1.245 +};
   1.246 +
   1.247 +class SegCache
   1.248 +{
   1.249 +public:
   1.250 +    SegCache(const SegCacheStore * store, const Features& features);
   1.251 +    ~SegCache();
   1.252 +
   1.253 +    const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const;
   1.254 +    SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset);
   1.255 +    void purge(SegCacheStore * store);
   1.256 +
   1.257 +    long long totalAccessCount() const { return m_totalAccessCount; }
   1.258 +    size_t segmentCount() const { return m_segmentCount; }
   1.259 +    const Features & features() const { return m_features; }
   1.260 +    void clear(SegCacheStore * store);
   1.261 +
   1.262 +    CLASS_NEW_DELETE
   1.263 +private:
   1.264 +    void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level);
   1.265 +    void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level,
   1.266 +                    unsigned long long minAccessCount, unsigned long long oldAccessTime);
   1.267 +
   1.268 +    uint16 m_prefixLength;
   1.269 +    uint16 m_maxCachedSegLength;
   1.270 +    size_t m_segmentCount;
   1.271 +    SegCachePrefixArray m_prefixes;
   1.272 +    Features m_features;
   1.273 +    mutable unsigned long long m_totalAccessCount;
   1.274 +    mutable unsigned long long m_totalMisses;
   1.275 +    float m_purgeFactor;
   1.276 +};
   1.277 +
   1.278 +inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const
   1.279 +{
   1.280 +    uint16 pos = 0;
   1.281 +    if (!length || length > eMaxSpliceSize) return NULL;
   1.282 +    SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]];
   1.283 +    while (++pos < m_prefixLength - 1)
   1.284 +    {
   1.285 +        if (!pEntry.raw)
   1.286 +        {
   1.287 +            ++m_totalMisses;
   1.288 +            return NULL;
   1.289 +        }
   1.290 +        pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0];
   1.291 +    }
   1.292 +    if (!pEntry.raw)
   1.293 +    {
   1.294 +        ++m_totalMisses;
   1.295 +        return NULL;
   1.296 +    }
   1.297 +    SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0];
   1.298 +    if (!prefixEntry)
   1.299 +    {
   1.300 +        ++m_totalMisses;
   1.301 +        return NULL;
   1.302 +    }
   1.303 +    const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length);
   1.304 +    if (entry)
   1.305 +    {
   1.306 +        ++m_totalAccessCount;
   1.307 +        entry->accessed(m_totalAccessCount);
   1.308 +    }
   1.309 +    else
   1.310 +    {
   1.311 +        ++m_totalMisses;
   1.312 +    }   
   1.313 +    return entry;
   1.314 +}
   1.315 +    
   1.316 +} // namespace graphite2
   1.317 +
   1.318 +#endif
   1.319 +

mercurial