1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/graphite2/src/inc/SegCache.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,316 @@ 1.4 +/* GRAPHITE2 LICENSING 1.5 + 1.6 + Copyright 2010, SIL International 1.7 + All rights reserved. 1.8 + 1.9 + This library is free software; you can redistribute it and/or modify 1.10 + it under the terms of the GNU Lesser General Public License as published 1.11 + by the Free Software Foundation; either version 2.1 of License, or 1.12 + (at your option) any later version. 1.13 + 1.14 + This program is distributed in the hope that it will be useful, 1.15 + but WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + Lesser General Public License for more details. 1.18 + 1.19 + You should also have received a copy of the GNU Lesser General Public 1.20 + License along with this library in the file named "LICENSE". 1.21 + If not, write to the Free Software Foundation, 51 Franklin Street, 1.22 + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the 1.23 + internet at http://www.fsf.org/licenses/lgpl.html. 1.24 + 1.25 +Alternatively, the contents of this file may be used under the terms of the 1.26 +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public 1.27 +License, as published by the Free Software Foundation, either version 2 1.28 +of the License or (at your option) any later version. 1.29 +*/ 1.30 +#pragma once 1.31 + 1.32 +#ifndef GRAPHITE2_NSEGCACHE 1.33 + 1.34 +#include <graphite2/Segment.h> 1.35 +#include "inc/Main.h" 1.36 +#include "inc/Slot.h" 1.37 +#include "inc/FeatureVal.h" 1.38 +#include "inc/SegCacheEntry.h" 1.39 +#include "inc/Segment.h" 1.40 + 1.41 +namespace graphite2 { 1.42 + 1.43 +class SegCache; 1.44 +class SegCacheEntry; 1.45 +class SegCacheStore; 1.46 + 1.47 +/** 1.48 + * SegPrefixEntry stores lists of word/syllable segments 1.49 + * with one list for each word length. The prefix size should be chosen so that 1.50 + * these list sizes stay small since they will be searched iteratively. 1.51 + */ 1.52 +class SegCachePrefixEntry 1.53 +{ 1.54 + SegCachePrefixEntry(const SegCachePrefixEntry &); 1.55 + SegCachePrefixEntry & operator = (const SegCachePrefixEntry &); 1.56 + 1.57 +public: 1.58 + SegCachePrefixEntry() : m_lastPurge(0) 1.59 + { 1.60 + memset(m_entryCounts, 0, sizeof m_entryCounts); 1.61 + memset(m_entryBSIndex, 0, sizeof m_entryBSIndex); 1.62 + memset(m_entries, 0, sizeof m_entries); 1.63 + } 1.64 + 1.65 + ~SegCachePrefixEntry() 1.66 + { 1.67 + for (size_t j = 0; j < eMaxSpliceSize; j++) 1.68 + { 1.69 + if (m_entryCounts[j]) 1.70 + { 1.71 + assert(m_entries[j]); 1.72 + for (size_t k = 0; k < m_entryCounts[j]; k++) 1.73 + m_entries[j][k].clear(); 1.74 + 1.75 + free(m_entries[j]); 1.76 + } 1.77 + } 1.78 + } 1.79 + const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const 1.80 + { 1.81 + if (length <= ePrefixLength) 1.82 + { 1.83 + assert(m_entryCounts[length-1] <= 1); 1.84 + if (m_entries[length-1]) 1.85 + return m_entries[length-1]; 1.86 + return NULL; 1.87 + } 1.88 + SegCacheEntry * entry = NULL; 1.89 + findPosition(cmapGlyphs, length, &entry); 1.90 + return entry; 1.91 + } 1.92 + SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount) 1.93 + { 1.94 + size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0; 1.95 + SegCacheEntry * newEntries = NULL; 1.96 + if (m_entryCounts[length-1] + 1u > listSize) 1.97 + { 1.98 + if (m_entryCounts[length-1] == 0) 1.99 + listSize = 1; 1.100 + else 1.101 + { 1.102 + // the problem comes when you get incremental numeric ids in a large doc 1.103 + if (listSize >= eMaxSuffixCount) 1.104 + return NULL; 1.105 + listSize = (m_entryBSIndex[length-1] << 2) - 1; 1.106 + } 1.107 + newEntries = gralloc<SegCacheEntry>(listSize); 1.108 + if (!newEntries) 1.109 + return NULL; 1.110 + } 1.111 + 1.112 + uint16 insertPos = 0; 1.113 + if (m_entryCounts[length-1] > 0) 1.114 + { 1.115 + insertPos = findPosition(cmapGlyphs, length, NULL); 1.116 + if (!newEntries) 1.117 + { 1.118 + // same buffer, shift entries up 1.119 + memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos, 1.120 + sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); 1.121 + } 1.122 + else 1.123 + { 1.124 + memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos)); 1.125 + memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos, 1.126 + sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); 1.127 + 1.128 + free(m_entries[length-1]); 1.129 + m_entries[length-1] = newEntries; 1.130 + assert (m_entryBSIndex[length-1]); 1.131 + m_entryBSIndex[length-1] <<= 1; 1.132 + } 1.133 + } 1.134 + else 1.135 + { 1.136 + m_entryBSIndex[length-1] = 1; 1.137 + m_entries[length-1] = newEntries; 1.138 + } 1.139 + m_entryCounts[length-1] += 1; 1.140 + new (m_entries[length-1] + insertPos) 1.141 + SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount); 1.142 + return m_entries[length-1] + insertPos; 1.143 + } 1.144 + uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime, 1.145 + unsigned long long currentTime); 1.146 + CLASS_NEW_DELETE 1.147 +private: 1.148 + uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const 1.149 + { 1.150 + int dir = 0; 1.151 + if (m_entryCounts[length-1] == 0) 1.152 + { 1.153 + if (entry) *entry = NULL; 1.154 + return 0; 1.155 + } 1.156 + else if (m_entryCounts[length-1] == 1) 1.157 + { 1.158 + // optimize single entry case 1.159 + for (int i = ePrefixLength; i < length; i++) 1.160 + { 1.161 + if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i]) 1.162 + { 1.163 + return 1; 1.164 + } 1.165 + else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i]) 1.166 + { 1.167 + return 0; 1.168 + } 1.169 + } 1.170 + if (entry) 1.171 + *entry = m_entries[length-1]; 1.172 + return 0; 1.173 + } 1.174 + uint16 searchIndex = m_entryBSIndex[length-1] - 1; 1.175 + uint16 stepSize = m_entryBSIndex[length-1] >> 1; 1.176 + size_t prevIndex = searchIndex; 1.177 + do 1.178 + { 1.179 + dir = 0; 1.180 + if (searchIndex >= m_entryCounts[length-1]) 1.181 + { 1.182 + dir = -1; 1.183 + searchIndex -= stepSize; 1.184 + stepSize >>= 1; 1.185 + } 1.186 + else 1.187 + { 1.188 + for (int i = ePrefixLength; i < length; i++) 1.189 + { 1.190 + if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i]) 1.191 + { 1.192 + dir = 1; 1.193 + searchIndex += stepSize; 1.194 + stepSize >>= 1; 1.195 + break; 1.196 + } 1.197 + else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i]) 1.198 + { 1.199 + dir = -1; 1.200 + searchIndex -= stepSize; 1.201 + stepSize >>= 1; 1.202 + break; 1.203 + } 1.204 + } 1.205 + } 1.206 + if (prevIndex == searchIndex) 1.207 + break; 1.208 + prevIndex = searchIndex; 1.209 + } while (dir != 0); 1.210 + if (entry) 1.211 + { 1.212 + if (dir == 0) 1.213 + *entry = m_entries[length-1] + searchIndex; 1.214 + else 1.215 + *entry = NULL; 1.216 + } 1.217 + else 1.218 + { 1.219 + // if entry is null, then this is for inserting a new value, which 1.220 + // shouldn't already be in the cache 1.221 + assert(dir != 0); 1.222 + if (dir > 0) 1.223 + ++searchIndex; 1.224 + } 1.225 + return searchIndex; 1.226 + } 1.227 + /** m_entries is a null terminated list of entries */ 1.228 + uint16 m_entryCounts[eMaxSpliceSize]; 1.229 + uint16 m_entryBSIndex[eMaxSpliceSize]; 1.230 + SegCacheEntry * m_entries[eMaxSpliceSize]; 1.231 + unsigned long long m_lastPurge; 1.232 +}; 1.233 + 1.234 + 1.235 +#define SEG_CACHE_MIN_INDEX (store->maxCmapGid()) 1.236 +#define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u) 1.237 +#define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u) 1.238 + 1.239 +union SegCachePrefixArray 1.240 +{ 1.241 + void ** raw; 1.242 + SegCachePrefixArray * array; 1.243 + SegCachePrefixEntry ** prefixEntries; 1.244 + uintptr * range; 1.245 +}; 1.246 + 1.247 +class SegCache 1.248 +{ 1.249 +public: 1.250 + SegCache(const SegCacheStore * store, const Features& features); 1.251 + ~SegCache(); 1.252 + 1.253 + const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const; 1.254 + SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset); 1.255 + void purge(SegCacheStore * store); 1.256 + 1.257 + long long totalAccessCount() const { return m_totalAccessCount; } 1.258 + size_t segmentCount() const { return m_segmentCount; } 1.259 + const Features & features() const { return m_features; } 1.260 + void clear(SegCacheStore * store); 1.261 + 1.262 + CLASS_NEW_DELETE 1.263 +private: 1.264 + void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level); 1.265 + void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level, 1.266 + unsigned long long minAccessCount, unsigned long long oldAccessTime); 1.267 + 1.268 + uint16 m_prefixLength; 1.269 + uint16 m_maxCachedSegLength; 1.270 + size_t m_segmentCount; 1.271 + SegCachePrefixArray m_prefixes; 1.272 + Features m_features; 1.273 + mutable unsigned long long m_totalAccessCount; 1.274 + mutable unsigned long long m_totalMisses; 1.275 + float m_purgeFactor; 1.276 +}; 1.277 + 1.278 +inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const 1.279 +{ 1.280 + uint16 pos = 0; 1.281 + if (!length || length > eMaxSpliceSize) return NULL; 1.282 + SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]]; 1.283 + while (++pos < m_prefixLength - 1) 1.284 + { 1.285 + if (!pEntry.raw) 1.286 + { 1.287 + ++m_totalMisses; 1.288 + return NULL; 1.289 + } 1.290 + pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0]; 1.291 + } 1.292 + if (!pEntry.raw) 1.293 + { 1.294 + ++m_totalMisses; 1.295 + return NULL; 1.296 + } 1.297 + SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0]; 1.298 + if (!prefixEntry) 1.299 + { 1.300 + ++m_totalMisses; 1.301 + return NULL; 1.302 + } 1.303 + const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length); 1.304 + if (entry) 1.305 + { 1.306 + ++m_totalAccessCount; 1.307 + entry->accessed(m_totalAccessCount); 1.308 + } 1.309 + else 1.310 + { 1.311 + ++m_totalMisses; 1.312 + } 1.313 + return entry; 1.314 +} 1.315 + 1.316 +} // namespace graphite2 1.317 + 1.318 +#endif 1.319 +