michael@0: /* GRAPHITE2 LICENSING michael@0: michael@0: Copyright 2010, SIL International michael@0: All rights reserved. michael@0: michael@0: This library is free software; you can redistribute it and/or modify michael@0: it under the terms of the GNU Lesser General Public License as published michael@0: by the Free Software Foundation; either version 2.1 of License, or michael@0: (at your option) any later version. michael@0: michael@0: This program is distributed in the hope that it will be useful, michael@0: but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: Lesser General Public License for more details. michael@0: michael@0: You should also have received a copy of the GNU Lesser General Public michael@0: License along with this library in the file named "LICENSE". michael@0: If not, write to the Free Software Foundation, 51 Franklin Street, michael@0: Suite 500, Boston, MA 02110-1335, USA or visit their web page on the michael@0: internet at http://www.fsf.org/licenses/lgpl.html. michael@0: michael@0: Alternatively, the contents of this file may be used under the terms of the michael@0: Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public michael@0: License, as published by the Free Software Foundation, either version 2 michael@0: of the License or (at your option) any later version. michael@0: */ michael@0: #pragma once michael@0: michael@0: #ifndef GRAPHITE2_NSEGCACHE michael@0: michael@0: #include michael@0: #include "inc/Main.h" michael@0: #include "inc/Slot.h" michael@0: #include "inc/FeatureVal.h" michael@0: #include "inc/SegCacheEntry.h" michael@0: #include "inc/Segment.h" michael@0: michael@0: namespace graphite2 { michael@0: michael@0: class SegCache; michael@0: class SegCacheEntry; michael@0: class SegCacheStore; michael@0: michael@0: /** michael@0: * SegPrefixEntry stores lists of word/syllable segments michael@0: * with one list for each word length. The prefix size should be chosen so that michael@0: * these list sizes stay small since they will be searched iteratively. michael@0: */ michael@0: class SegCachePrefixEntry michael@0: { michael@0: SegCachePrefixEntry(const SegCachePrefixEntry &); michael@0: SegCachePrefixEntry & operator = (const SegCachePrefixEntry &); michael@0: michael@0: public: michael@0: SegCachePrefixEntry() : m_lastPurge(0) michael@0: { michael@0: memset(m_entryCounts, 0, sizeof m_entryCounts); michael@0: memset(m_entryBSIndex, 0, sizeof m_entryBSIndex); michael@0: memset(m_entries, 0, sizeof m_entries); michael@0: } michael@0: michael@0: ~SegCachePrefixEntry() michael@0: { michael@0: for (size_t j = 0; j < eMaxSpliceSize; j++) michael@0: { michael@0: if (m_entryCounts[j]) michael@0: { michael@0: assert(m_entries[j]); michael@0: for (size_t k = 0; k < m_entryCounts[j]; k++) michael@0: m_entries[j][k].clear(); michael@0: michael@0: free(m_entries[j]); michael@0: } michael@0: } michael@0: } michael@0: const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const michael@0: { michael@0: if (length <= ePrefixLength) michael@0: { michael@0: assert(m_entryCounts[length-1] <= 1); michael@0: if (m_entries[length-1]) michael@0: return m_entries[length-1]; michael@0: return NULL; michael@0: } michael@0: SegCacheEntry * entry = NULL; michael@0: findPosition(cmapGlyphs, length, &entry); michael@0: return entry; michael@0: } michael@0: SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount) michael@0: { michael@0: size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0; michael@0: SegCacheEntry * newEntries = NULL; michael@0: if (m_entryCounts[length-1] + 1u > listSize) michael@0: { michael@0: if (m_entryCounts[length-1] == 0) michael@0: listSize = 1; michael@0: else michael@0: { michael@0: // the problem comes when you get incremental numeric ids in a large doc michael@0: if (listSize >= eMaxSuffixCount) michael@0: return NULL; michael@0: listSize = (m_entryBSIndex[length-1] << 2) - 1; michael@0: } michael@0: newEntries = gralloc(listSize); michael@0: if (!newEntries) michael@0: return NULL; michael@0: } michael@0: michael@0: uint16 insertPos = 0; michael@0: if (m_entryCounts[length-1] > 0) michael@0: { michael@0: insertPos = findPosition(cmapGlyphs, length, NULL); michael@0: if (!newEntries) michael@0: { michael@0: // same buffer, shift entries up michael@0: memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos, michael@0: sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); michael@0: } michael@0: else michael@0: { michael@0: memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos)); michael@0: memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos, michael@0: sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); michael@0: michael@0: free(m_entries[length-1]); michael@0: m_entries[length-1] = newEntries; michael@0: assert (m_entryBSIndex[length-1]); michael@0: m_entryBSIndex[length-1] <<= 1; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: m_entryBSIndex[length-1] = 1; michael@0: m_entries[length-1] = newEntries; michael@0: } michael@0: m_entryCounts[length-1] += 1; michael@0: new (m_entries[length-1] + insertPos) michael@0: SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount); michael@0: return m_entries[length-1] + insertPos; michael@0: } michael@0: uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime, michael@0: unsigned long long currentTime); michael@0: CLASS_NEW_DELETE michael@0: private: michael@0: uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const michael@0: { michael@0: int dir = 0; michael@0: if (m_entryCounts[length-1] == 0) michael@0: { michael@0: if (entry) *entry = NULL; michael@0: return 0; michael@0: } michael@0: else if (m_entryCounts[length-1] == 1) michael@0: { michael@0: // optimize single entry case michael@0: for (int i = ePrefixLength; i < length; i++) michael@0: { michael@0: if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i]) michael@0: { michael@0: return 1; michael@0: } michael@0: else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i]) michael@0: { michael@0: return 0; michael@0: } michael@0: } michael@0: if (entry) michael@0: *entry = m_entries[length-1]; michael@0: return 0; michael@0: } michael@0: uint16 searchIndex = m_entryBSIndex[length-1] - 1; michael@0: uint16 stepSize = m_entryBSIndex[length-1] >> 1; michael@0: size_t prevIndex = searchIndex; michael@0: do michael@0: { michael@0: dir = 0; michael@0: if (searchIndex >= m_entryCounts[length-1]) michael@0: { michael@0: dir = -1; michael@0: searchIndex -= stepSize; michael@0: stepSize >>= 1; michael@0: } michael@0: else michael@0: { michael@0: for (int i = ePrefixLength; i < length; i++) michael@0: { michael@0: if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i]) michael@0: { michael@0: dir = 1; michael@0: searchIndex += stepSize; michael@0: stepSize >>= 1; michael@0: break; michael@0: } michael@0: else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i]) michael@0: { michael@0: dir = -1; michael@0: searchIndex -= stepSize; michael@0: stepSize >>= 1; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: if (prevIndex == searchIndex) michael@0: break; michael@0: prevIndex = searchIndex; michael@0: } while (dir != 0); michael@0: if (entry) michael@0: { michael@0: if (dir == 0) michael@0: *entry = m_entries[length-1] + searchIndex; michael@0: else michael@0: *entry = NULL; michael@0: } michael@0: else michael@0: { michael@0: // if entry is null, then this is for inserting a new value, which michael@0: // shouldn't already be in the cache michael@0: assert(dir != 0); michael@0: if (dir > 0) michael@0: ++searchIndex; michael@0: } michael@0: return searchIndex; michael@0: } michael@0: /** m_entries is a null terminated list of entries */ michael@0: uint16 m_entryCounts[eMaxSpliceSize]; michael@0: uint16 m_entryBSIndex[eMaxSpliceSize]; michael@0: SegCacheEntry * m_entries[eMaxSpliceSize]; michael@0: unsigned long long m_lastPurge; michael@0: }; michael@0: michael@0: michael@0: #define SEG_CACHE_MIN_INDEX (store->maxCmapGid()) michael@0: #define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u) michael@0: #define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u) michael@0: michael@0: union SegCachePrefixArray michael@0: { michael@0: void ** raw; michael@0: SegCachePrefixArray * array; michael@0: SegCachePrefixEntry ** prefixEntries; michael@0: uintptr * range; michael@0: }; michael@0: michael@0: class SegCache michael@0: { michael@0: public: michael@0: SegCache(const SegCacheStore * store, const Features& features); michael@0: ~SegCache(); michael@0: michael@0: const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const; michael@0: SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset); michael@0: void purge(SegCacheStore * store); michael@0: michael@0: long long totalAccessCount() const { return m_totalAccessCount; } michael@0: size_t segmentCount() const { return m_segmentCount; } michael@0: const Features & features() const { return m_features; } michael@0: void clear(SegCacheStore * store); michael@0: michael@0: CLASS_NEW_DELETE michael@0: private: michael@0: void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level); michael@0: void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level, michael@0: unsigned long long minAccessCount, unsigned long long oldAccessTime); michael@0: michael@0: uint16 m_prefixLength; michael@0: uint16 m_maxCachedSegLength; michael@0: size_t m_segmentCount; michael@0: SegCachePrefixArray m_prefixes; michael@0: Features m_features; michael@0: mutable unsigned long long m_totalAccessCount; michael@0: mutable unsigned long long m_totalMisses; michael@0: float m_purgeFactor; michael@0: }; michael@0: michael@0: inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const michael@0: { michael@0: uint16 pos = 0; michael@0: if (!length || length > eMaxSpliceSize) return NULL; michael@0: SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]]; michael@0: while (++pos < m_prefixLength - 1) michael@0: { michael@0: if (!pEntry.raw) michael@0: { michael@0: ++m_totalMisses; michael@0: return NULL; michael@0: } michael@0: pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0]; michael@0: } michael@0: if (!pEntry.raw) michael@0: { michael@0: ++m_totalMisses; michael@0: return NULL; michael@0: } michael@0: SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0]; michael@0: if (!prefixEntry) michael@0: { michael@0: ++m_totalMisses; michael@0: return NULL; michael@0: } michael@0: const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length); michael@0: if (entry) michael@0: { michael@0: ++m_totalAccessCount; michael@0: entry->accessed(m_totalAccessCount); michael@0: } michael@0: else michael@0: { michael@0: ++m_totalMisses; michael@0: } michael@0: return entry; michael@0: } michael@0: michael@0: } // namespace graphite2 michael@0: michael@0: #endif michael@0: