michael@0: /*  GRAPHITE2 LICENSING
michael@0: 
michael@0:     Copyright 2010, SIL International
michael@0:     All rights reserved.
michael@0: 
michael@0:     This library is free software; you can redistribute it and/or modify
michael@0:     it under the terms of the GNU Lesser General Public License as published
michael@0:     by the Free Software Foundation; either version 2.1 of License, or
michael@0:     (at your option) any later version.
michael@0: 
michael@0:     This program is distributed in the hope that it will be useful,
michael@0:     but WITHOUT ANY WARRANTY; without even the implied warranty of
michael@0:     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
michael@0:     Lesser General Public License for more details.
michael@0: 
michael@0:     You should also have received a copy of the GNU Lesser General Public
michael@0:     License along with this library in the file named "LICENSE".
michael@0:     If not, write to the Free Software Foundation, 51 Franklin Street,
michael@0:     Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
michael@0:     internet at http://www.fsf.org/licenses/lgpl.html.
michael@0: 
michael@0: Alternatively, the contents of this file may be used under the terms of the
michael@0: Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
michael@0: License, as published by the Free Software Foundation, either version 2
michael@0: of the License or (at your option) any later version.
michael@0: */
michael@0: #pragma once
michael@0: 
michael@0: #ifndef GRAPHITE2_NSEGCACHE
michael@0: 
michael@0: #include <graphite2/Segment.h>
michael@0: #include "inc/Main.h"
michael@0: #include "inc/Slot.h"
michael@0: #include "inc/FeatureVal.h"
michael@0: #include "inc/SegCacheEntry.h"
michael@0: #include "inc/Segment.h"
michael@0: 
michael@0: namespace graphite2 {
michael@0: 
michael@0: class SegCache;
michael@0: class SegCacheEntry;
michael@0: class SegCacheStore;
michael@0: 
michael@0: /**
michael@0:  * SegPrefixEntry stores lists of word/syllable segments
michael@0:  * with one list for each word length. The prefix size should be chosen so that
michael@0:  * these list sizes stay small since they will be searched iteratively.
michael@0:  */
michael@0: class SegCachePrefixEntry
michael@0: {
michael@0:     SegCachePrefixEntry(const SegCachePrefixEntry &);
michael@0:     SegCachePrefixEntry & operator = (const SegCachePrefixEntry &);
michael@0: 
michael@0: public:
michael@0:     SegCachePrefixEntry() : m_lastPurge(0)
michael@0:     {
michael@0:         memset(m_entryCounts, 0, sizeof m_entryCounts);
michael@0:         memset(m_entryBSIndex, 0, sizeof m_entryBSIndex);
michael@0:         memset(m_entries, 0, sizeof m_entries);
michael@0:     }
michael@0: 
michael@0:     ~SegCachePrefixEntry()
michael@0:     {
michael@0:         for (size_t j = 0; j < eMaxSpliceSize; j++)
michael@0:         {
michael@0:             if (m_entryCounts[j])
michael@0:             {
michael@0:                 assert(m_entries[j]);
michael@0:                 for (size_t k = 0; k < m_entryCounts[j]; k++)
michael@0:                     m_entries[j][k].clear();
michael@0: 
michael@0:                 free(m_entries[j]);
michael@0:             }
michael@0:         }
michael@0:     }
michael@0:     const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const
michael@0:     {
michael@0:         if (length <= ePrefixLength)
michael@0:         {
michael@0:             assert(m_entryCounts[length-1] <= 1);
michael@0:             if (m_entries[length-1])
michael@0:                 return m_entries[length-1];
michael@0:             return NULL;
michael@0:         }
michael@0:         SegCacheEntry * entry = NULL;
michael@0:         findPosition(cmapGlyphs, length, &entry);
michael@0:         return entry;
michael@0:     }
michael@0:     SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount)
michael@0:     {
michael@0:         size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0;
michael@0:         SegCacheEntry * newEntries = NULL;
michael@0:         if (m_entryCounts[length-1] + 1u > listSize)
michael@0:         {
michael@0:             if (m_entryCounts[length-1] == 0)
michael@0:                 listSize = 1;
michael@0:             else
michael@0:             {
michael@0:                 // the problem comes when you get incremental numeric ids in a large doc
michael@0:                 if (listSize >= eMaxSuffixCount)
michael@0:                     return NULL;
michael@0:                 listSize = (m_entryBSIndex[length-1] << 2) - 1;
michael@0:             }
michael@0:             newEntries = gralloc<SegCacheEntry>(listSize);
michael@0:             if (!newEntries)
michael@0:                 return NULL;
michael@0:         }        
michael@0: 
michael@0:         uint16 insertPos = 0;
michael@0:         if (m_entryCounts[length-1] > 0)
michael@0:         {
michael@0:             insertPos = findPosition(cmapGlyphs, length, NULL);
michael@0:             if (!newEntries)
michael@0:             {
michael@0:                 // same buffer, shift entries up
michael@0:                 memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos,
michael@0:                     sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
michael@0:             }
michael@0:             else
michael@0:             {
michael@0:                 memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos));
michael@0:                 memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos,
michael@0:                    sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
michael@0:                 
michael@0:                 free(m_entries[length-1]);
michael@0:                 m_entries[length-1] = newEntries;
michael@0:                 assert (m_entryBSIndex[length-1]);
michael@0:                 m_entryBSIndex[length-1] <<= 1;
michael@0:             }
michael@0:         } 
michael@0:         else
michael@0:         {
michael@0:             m_entryBSIndex[length-1] = 1;
michael@0:             m_entries[length-1] = newEntries;
michael@0:         }
michael@0:         m_entryCounts[length-1] += 1;
michael@0:         new (m_entries[length-1] + insertPos)
michael@0:             SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount);
michael@0:         return m_entries[length-1]  + insertPos;
michael@0:     }
michael@0:     uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime,
michael@0:         unsigned long long currentTime);
michael@0:     CLASS_NEW_DELETE
michael@0: private:
michael@0:     uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const
michael@0:     {
michael@0:         int dir = 0;
michael@0:         if (m_entryCounts[length-1] == 0)
michael@0:         {
michael@0:             if (entry) *entry = NULL;
michael@0:             return 0;
michael@0:         }
michael@0:         else if (m_entryCounts[length-1] == 1)
michael@0:         {
michael@0:             // optimize single entry case
michael@0:             for (int i = ePrefixLength; i < length; i++)
michael@0:             {
michael@0:                 if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i])
michael@0:                 {
michael@0:                     return 1;
michael@0:                 }
michael@0:                 else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i])
michael@0:                 {
michael@0:                     return 0;
michael@0:                 }
michael@0:             }
michael@0:             if (entry)
michael@0:                 *entry = m_entries[length-1];
michael@0:             return 0;
michael@0:         }
michael@0:         uint16 searchIndex = m_entryBSIndex[length-1] - 1;
michael@0:         uint16 stepSize = m_entryBSIndex[length-1] >> 1;
michael@0:         size_t prevIndex = searchIndex;
michael@0:         do
michael@0:         {
michael@0:             dir = 0;
michael@0:             if (searchIndex >= m_entryCounts[length-1])
michael@0:             {
michael@0:                 dir = -1;
michael@0:                 searchIndex -= stepSize;
michael@0:                 stepSize >>= 1;
michael@0:             }
michael@0:             else
michael@0:             {
michael@0:                 for (int i = ePrefixLength; i < length; i++)
michael@0:                 {
michael@0:                     if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i])
michael@0:                     {
michael@0:                         dir = 1;
michael@0:                         searchIndex += stepSize;
michael@0:                         stepSize >>= 1;
michael@0:                         break;
michael@0:                     }
michael@0:                     else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i])
michael@0:                     {
michael@0:                         dir = -1;
michael@0:                         searchIndex -= stepSize;
michael@0:                         stepSize >>= 1;
michael@0:                         break;
michael@0:                     }
michael@0:                 }
michael@0:             }
michael@0:             if (prevIndex == searchIndex)
michael@0:                 break;
michael@0:             prevIndex = searchIndex;
michael@0:         } while (dir != 0);
michael@0:         if (entry)
michael@0:         {
michael@0:             if (dir == 0)
michael@0:                 *entry = m_entries[length-1] + searchIndex;
michael@0:             else
michael@0:                 *entry = NULL;
michael@0:         }
michael@0:         else
michael@0:         {
michael@0:             // if entry is null, then this is for inserting a new value, which
michael@0:             // shouldn't already be in the cache
michael@0:             assert(dir != 0);
michael@0:             if (dir > 0)
michael@0:                 ++searchIndex;
michael@0:         }
michael@0:         return searchIndex;
michael@0:     }
michael@0:     /** m_entries is a null terminated list of entries */
michael@0:     uint16 m_entryCounts[eMaxSpliceSize];
michael@0:     uint16 m_entryBSIndex[eMaxSpliceSize];
michael@0:     SegCacheEntry * m_entries[eMaxSpliceSize];
michael@0:     unsigned long long m_lastPurge;
michael@0: };
michael@0: 
michael@0: 
michael@0: #define SEG_CACHE_MIN_INDEX (store->maxCmapGid())
michael@0: #define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u)
michael@0: #define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u)
michael@0: 
michael@0: union SegCachePrefixArray
michael@0: {
michael@0:     void ** raw;
michael@0:     SegCachePrefixArray * array;
michael@0:     SegCachePrefixEntry ** prefixEntries;
michael@0:     uintptr * range;
michael@0: };
michael@0: 
michael@0: class SegCache
michael@0: {
michael@0: public:
michael@0:     SegCache(const SegCacheStore * store, const Features& features);
michael@0:     ~SegCache();
michael@0: 
michael@0:     const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const;
michael@0:     SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset);
michael@0:     void purge(SegCacheStore * store);
michael@0: 
michael@0:     long long totalAccessCount() const { return m_totalAccessCount; }
michael@0:     size_t segmentCount() const { return m_segmentCount; }
michael@0:     const Features & features() const { return m_features; }
michael@0:     void clear(SegCacheStore * store);
michael@0: 
michael@0:     CLASS_NEW_DELETE
michael@0: private:
michael@0:     void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level);
michael@0:     void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level,
michael@0:                     unsigned long long minAccessCount, unsigned long long oldAccessTime);
michael@0: 
michael@0:     uint16 m_prefixLength;
michael@0:     uint16 m_maxCachedSegLength;
michael@0:     size_t m_segmentCount;
michael@0:     SegCachePrefixArray m_prefixes;
michael@0:     Features m_features;
michael@0:     mutable unsigned long long m_totalAccessCount;
michael@0:     mutable unsigned long long m_totalMisses;
michael@0:     float m_purgeFactor;
michael@0: };
michael@0: 
michael@0: inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const
michael@0: {
michael@0:     uint16 pos = 0;
michael@0:     if (!length || length > eMaxSpliceSize) return NULL;
michael@0:     SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]];
michael@0:     while (++pos < m_prefixLength - 1)
michael@0:     {
michael@0:         if (!pEntry.raw)
michael@0:         {
michael@0:             ++m_totalMisses;
michael@0:             return NULL;
michael@0:         }
michael@0:         pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0];
michael@0:     }
michael@0:     if (!pEntry.raw)
michael@0:     {
michael@0:         ++m_totalMisses;
michael@0:         return NULL;
michael@0:     }
michael@0:     SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0];
michael@0:     if (!prefixEntry)
michael@0:     {
michael@0:         ++m_totalMisses;
michael@0:         return NULL;
michael@0:     }
michael@0:     const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length);
michael@0:     if (entry)
michael@0:     {
michael@0:         ++m_totalAccessCount;
michael@0:         entry->accessed(m_totalAccessCount);
michael@0:     }
michael@0:     else
michael@0:     {
michael@0:         ++m_totalMisses;
michael@0:     }   
michael@0:     return entry;
michael@0: }
michael@0:     
michael@0: } // namespace graphite2
michael@0: 
michael@0: #endif
michael@0: