gfx/graphite2/src/inc/SegCache.h

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:67424506117c
1 /* GRAPHITE2 LICENSING
2
3 Copyright 2010, SIL International
4 All rights reserved.
5
6 This library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published
8 by the Free Software Foundation; either version 2.1 of License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should also have received a copy of the GNU Lesser General Public
17 License along with this library in the file named "LICENSE".
18 If not, write to the Free Software Foundation, 51 Franklin Street,
19 Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
20 internet at http://www.fsf.org/licenses/lgpl.html.
21
22 Alternatively, the contents of this file may be used under the terms of the
23 Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
24 License, as published by the Free Software Foundation, either version 2
25 of the License or (at your option) any later version.
26 */
27 #pragma once
28
29 #ifndef GRAPHITE2_NSEGCACHE
30
31 #include <graphite2/Segment.h>
32 #include "inc/Main.h"
33 #include "inc/Slot.h"
34 #include "inc/FeatureVal.h"
35 #include "inc/SegCacheEntry.h"
36 #include "inc/Segment.h"
37
38 namespace graphite2 {
39
40 class SegCache;
41 class SegCacheEntry;
42 class SegCacheStore;
43
44 /**
45 * SegPrefixEntry stores lists of word/syllable segments
46 * with one list for each word length. The prefix size should be chosen so that
47 * these list sizes stay small since they will be searched iteratively.
48 */
49 class SegCachePrefixEntry
50 {
51 SegCachePrefixEntry(const SegCachePrefixEntry &);
52 SegCachePrefixEntry & operator = (const SegCachePrefixEntry &);
53
54 public:
55 SegCachePrefixEntry() : m_lastPurge(0)
56 {
57 memset(m_entryCounts, 0, sizeof m_entryCounts);
58 memset(m_entryBSIndex, 0, sizeof m_entryBSIndex);
59 memset(m_entries, 0, sizeof m_entries);
60 }
61
62 ~SegCachePrefixEntry()
63 {
64 for (size_t j = 0; j < eMaxSpliceSize; j++)
65 {
66 if (m_entryCounts[j])
67 {
68 assert(m_entries[j]);
69 for (size_t k = 0; k < m_entryCounts[j]; k++)
70 m_entries[j][k].clear();
71
72 free(m_entries[j]);
73 }
74 }
75 }
76 const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const
77 {
78 if (length <= ePrefixLength)
79 {
80 assert(m_entryCounts[length-1] <= 1);
81 if (m_entries[length-1])
82 return m_entries[length-1];
83 return NULL;
84 }
85 SegCacheEntry * entry = NULL;
86 findPosition(cmapGlyphs, length, &entry);
87 return entry;
88 }
89 SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount)
90 {
91 size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0;
92 SegCacheEntry * newEntries = NULL;
93 if (m_entryCounts[length-1] + 1u > listSize)
94 {
95 if (m_entryCounts[length-1] == 0)
96 listSize = 1;
97 else
98 {
99 // the problem comes when you get incremental numeric ids in a large doc
100 if (listSize >= eMaxSuffixCount)
101 return NULL;
102 listSize = (m_entryBSIndex[length-1] << 2) - 1;
103 }
104 newEntries = gralloc<SegCacheEntry>(listSize);
105 if (!newEntries)
106 return NULL;
107 }
108
109 uint16 insertPos = 0;
110 if (m_entryCounts[length-1] > 0)
111 {
112 insertPos = findPosition(cmapGlyphs, length, NULL);
113 if (!newEntries)
114 {
115 // same buffer, shift entries up
116 memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos,
117 sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
118 }
119 else
120 {
121 memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos));
122 memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos,
123 sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos));
124
125 free(m_entries[length-1]);
126 m_entries[length-1] = newEntries;
127 assert (m_entryBSIndex[length-1]);
128 m_entryBSIndex[length-1] <<= 1;
129 }
130 }
131 else
132 {
133 m_entryBSIndex[length-1] = 1;
134 m_entries[length-1] = newEntries;
135 }
136 m_entryCounts[length-1] += 1;
137 new (m_entries[length-1] + insertPos)
138 SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount);
139 return m_entries[length-1] + insertPos;
140 }
141 uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime,
142 unsigned long long currentTime);
143 CLASS_NEW_DELETE
144 private:
145 uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const
146 {
147 int dir = 0;
148 if (m_entryCounts[length-1] == 0)
149 {
150 if (entry) *entry = NULL;
151 return 0;
152 }
153 else if (m_entryCounts[length-1] == 1)
154 {
155 // optimize single entry case
156 for (int i = ePrefixLength; i < length; i++)
157 {
158 if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i])
159 {
160 return 1;
161 }
162 else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i])
163 {
164 return 0;
165 }
166 }
167 if (entry)
168 *entry = m_entries[length-1];
169 return 0;
170 }
171 uint16 searchIndex = m_entryBSIndex[length-1] - 1;
172 uint16 stepSize = m_entryBSIndex[length-1] >> 1;
173 size_t prevIndex = searchIndex;
174 do
175 {
176 dir = 0;
177 if (searchIndex >= m_entryCounts[length-1])
178 {
179 dir = -1;
180 searchIndex -= stepSize;
181 stepSize >>= 1;
182 }
183 else
184 {
185 for (int i = ePrefixLength; i < length; i++)
186 {
187 if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i])
188 {
189 dir = 1;
190 searchIndex += stepSize;
191 stepSize >>= 1;
192 break;
193 }
194 else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i])
195 {
196 dir = -1;
197 searchIndex -= stepSize;
198 stepSize >>= 1;
199 break;
200 }
201 }
202 }
203 if (prevIndex == searchIndex)
204 break;
205 prevIndex = searchIndex;
206 } while (dir != 0);
207 if (entry)
208 {
209 if (dir == 0)
210 *entry = m_entries[length-1] + searchIndex;
211 else
212 *entry = NULL;
213 }
214 else
215 {
216 // if entry is null, then this is for inserting a new value, which
217 // shouldn't already be in the cache
218 assert(dir != 0);
219 if (dir > 0)
220 ++searchIndex;
221 }
222 return searchIndex;
223 }
224 /** m_entries is a null terminated list of entries */
225 uint16 m_entryCounts[eMaxSpliceSize];
226 uint16 m_entryBSIndex[eMaxSpliceSize];
227 SegCacheEntry * m_entries[eMaxSpliceSize];
228 unsigned long long m_lastPurge;
229 };
230
231
232 #define SEG_CACHE_MIN_INDEX (store->maxCmapGid())
233 #define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u)
234 #define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u)
235
236 union SegCachePrefixArray
237 {
238 void ** raw;
239 SegCachePrefixArray * array;
240 SegCachePrefixEntry ** prefixEntries;
241 uintptr * range;
242 };
243
244 class SegCache
245 {
246 public:
247 SegCache(const SegCacheStore * store, const Features& features);
248 ~SegCache();
249
250 const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const;
251 SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset);
252 void purge(SegCacheStore * store);
253
254 long long totalAccessCount() const { return m_totalAccessCount; }
255 size_t segmentCount() const { return m_segmentCount; }
256 const Features & features() const { return m_features; }
257 void clear(SegCacheStore * store);
258
259 CLASS_NEW_DELETE
260 private:
261 void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level);
262 void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level,
263 unsigned long long minAccessCount, unsigned long long oldAccessTime);
264
265 uint16 m_prefixLength;
266 uint16 m_maxCachedSegLength;
267 size_t m_segmentCount;
268 SegCachePrefixArray m_prefixes;
269 Features m_features;
270 mutable unsigned long long m_totalAccessCount;
271 mutable unsigned long long m_totalMisses;
272 float m_purgeFactor;
273 };
274
275 inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const
276 {
277 uint16 pos = 0;
278 if (!length || length > eMaxSpliceSize) return NULL;
279 SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]];
280 while (++pos < m_prefixLength - 1)
281 {
282 if (!pEntry.raw)
283 {
284 ++m_totalMisses;
285 return NULL;
286 }
287 pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0];
288 }
289 if (!pEntry.raw)
290 {
291 ++m_totalMisses;
292 return NULL;
293 }
294 SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0];
295 if (!prefixEntry)
296 {
297 ++m_totalMisses;
298 return NULL;
299 }
300 const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length);
301 if (entry)
302 {
303 ++m_totalAccessCount;
304 entry->accessed(m_totalAccessCount);
305 }
306 else
307 {
308 ++m_totalMisses;
309 }
310 return entry;
311 }
312
313 } // namespace graphite2
314
315 #endif
316

mercurial