Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* GRAPHITE2 LICENSING |
michael@0 | 2 | |
michael@0 | 3 | Copyright 2010, SIL International |
michael@0 | 4 | All rights reserved. |
michael@0 | 5 | |
michael@0 | 6 | This library is free software; you can redistribute it and/or modify |
michael@0 | 7 | it under the terms of the GNU Lesser General Public License as published |
michael@0 | 8 | by the Free Software Foundation; either version 2.1 of License, or |
michael@0 | 9 | (at your option) any later version. |
michael@0 | 10 | |
michael@0 | 11 | This program is distributed in the hope that it will be useful, |
michael@0 | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
michael@0 | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
michael@0 | 14 | Lesser General Public License for more details. |
michael@0 | 15 | |
michael@0 | 16 | You should also have received a copy of the GNU Lesser General Public |
michael@0 | 17 | License along with this library in the file named "LICENSE". |
michael@0 | 18 | If not, write to the Free Software Foundation, 51 Franklin Street, |
michael@0 | 19 | Suite 500, Boston, MA 02110-1335, USA or visit their web page on the |
michael@0 | 20 | internet at http://www.fsf.org/licenses/lgpl.html. |
michael@0 | 21 | |
michael@0 | 22 | Alternatively, the contents of this file may be used under the terms of the |
michael@0 | 23 | Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public |
michael@0 | 24 | License, as published by the Free Software Foundation, either version 2 |
michael@0 | 25 | of the License or (at your option) any later version. |
michael@0 | 26 | */ |
michael@0 | 27 | #pragma once |
michael@0 | 28 | |
michael@0 | 29 | #ifndef GRAPHITE2_NSEGCACHE |
michael@0 | 30 | |
michael@0 | 31 | #include <graphite2/Segment.h> |
michael@0 | 32 | #include "inc/Main.h" |
michael@0 | 33 | #include "inc/Slot.h" |
michael@0 | 34 | #include "inc/FeatureVal.h" |
michael@0 | 35 | #include "inc/SegCacheEntry.h" |
michael@0 | 36 | #include "inc/Segment.h" |
michael@0 | 37 | |
michael@0 | 38 | namespace graphite2 { |
michael@0 | 39 | |
michael@0 | 40 | class SegCache; |
michael@0 | 41 | class SegCacheEntry; |
michael@0 | 42 | class SegCacheStore; |
michael@0 | 43 | |
michael@0 | 44 | /** |
michael@0 | 45 | * SegPrefixEntry stores lists of word/syllable segments |
michael@0 | 46 | * with one list for each word length. The prefix size should be chosen so that |
michael@0 | 47 | * these list sizes stay small since they will be searched iteratively. |
michael@0 | 48 | */ |
michael@0 | 49 | class SegCachePrefixEntry |
michael@0 | 50 | { |
michael@0 | 51 | SegCachePrefixEntry(const SegCachePrefixEntry &); |
michael@0 | 52 | SegCachePrefixEntry & operator = (const SegCachePrefixEntry &); |
michael@0 | 53 | |
michael@0 | 54 | public: |
michael@0 | 55 | SegCachePrefixEntry() : m_lastPurge(0) |
michael@0 | 56 | { |
michael@0 | 57 | memset(m_entryCounts, 0, sizeof m_entryCounts); |
michael@0 | 58 | memset(m_entryBSIndex, 0, sizeof m_entryBSIndex); |
michael@0 | 59 | memset(m_entries, 0, sizeof m_entries); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | ~SegCachePrefixEntry() |
michael@0 | 63 | { |
michael@0 | 64 | for (size_t j = 0; j < eMaxSpliceSize; j++) |
michael@0 | 65 | { |
michael@0 | 66 | if (m_entryCounts[j]) |
michael@0 | 67 | { |
michael@0 | 68 | assert(m_entries[j]); |
michael@0 | 69 | for (size_t k = 0; k < m_entryCounts[j]; k++) |
michael@0 | 70 | m_entries[j][k].clear(); |
michael@0 | 71 | |
michael@0 | 72 | free(m_entries[j]); |
michael@0 | 73 | } |
michael@0 | 74 | } |
michael@0 | 75 | } |
michael@0 | 76 | const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const |
michael@0 | 77 | { |
michael@0 | 78 | if (length <= ePrefixLength) |
michael@0 | 79 | { |
michael@0 | 80 | assert(m_entryCounts[length-1] <= 1); |
michael@0 | 81 | if (m_entries[length-1]) |
michael@0 | 82 | return m_entries[length-1]; |
michael@0 | 83 | return NULL; |
michael@0 | 84 | } |
michael@0 | 85 | SegCacheEntry * entry = NULL; |
michael@0 | 86 | findPosition(cmapGlyphs, length, &entry); |
michael@0 | 87 | return entry; |
michael@0 | 88 | } |
michael@0 | 89 | SegCacheEntry * cache(const uint16* cmapGlyphs, size_t length, Segment * seg, size_t charOffset, unsigned long long totalAccessCount) |
michael@0 | 90 | { |
michael@0 | 91 | size_t listSize = m_entryBSIndex[length-1]? (m_entryBSIndex[length-1] << 1) - 1 : 0; |
michael@0 | 92 | SegCacheEntry * newEntries = NULL; |
michael@0 | 93 | if (m_entryCounts[length-1] + 1u > listSize) |
michael@0 | 94 | { |
michael@0 | 95 | if (m_entryCounts[length-1] == 0) |
michael@0 | 96 | listSize = 1; |
michael@0 | 97 | else |
michael@0 | 98 | { |
michael@0 | 99 | // the problem comes when you get incremental numeric ids in a large doc |
michael@0 | 100 | if (listSize >= eMaxSuffixCount) |
michael@0 | 101 | return NULL; |
michael@0 | 102 | listSize = (m_entryBSIndex[length-1] << 2) - 1; |
michael@0 | 103 | } |
michael@0 | 104 | newEntries = gralloc<SegCacheEntry>(listSize); |
michael@0 | 105 | if (!newEntries) |
michael@0 | 106 | return NULL; |
michael@0 | 107 | } |
michael@0 | 108 | |
michael@0 | 109 | uint16 insertPos = 0; |
michael@0 | 110 | if (m_entryCounts[length-1] > 0) |
michael@0 | 111 | { |
michael@0 | 112 | insertPos = findPosition(cmapGlyphs, length, NULL); |
michael@0 | 113 | if (!newEntries) |
michael@0 | 114 | { |
michael@0 | 115 | // same buffer, shift entries up |
michael@0 | 116 | memmove(m_entries[length-1] + insertPos + 1, m_entries[length-1] + insertPos, |
michael@0 | 117 | sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); |
michael@0 | 118 | } |
michael@0 | 119 | else |
michael@0 | 120 | { |
michael@0 | 121 | memcpy(newEntries, m_entries[length-1], sizeof(SegCacheEntry) * (insertPos)); |
michael@0 | 122 | memcpy(newEntries + insertPos + 1, m_entries[length-1] + insertPos, |
michael@0 | 123 | sizeof(SegCacheEntry) * (m_entryCounts[length-1] - insertPos)); |
michael@0 | 124 | |
michael@0 | 125 | free(m_entries[length-1]); |
michael@0 | 126 | m_entries[length-1] = newEntries; |
michael@0 | 127 | assert (m_entryBSIndex[length-1]); |
michael@0 | 128 | m_entryBSIndex[length-1] <<= 1; |
michael@0 | 129 | } |
michael@0 | 130 | } |
michael@0 | 131 | else |
michael@0 | 132 | { |
michael@0 | 133 | m_entryBSIndex[length-1] = 1; |
michael@0 | 134 | m_entries[length-1] = newEntries; |
michael@0 | 135 | } |
michael@0 | 136 | m_entryCounts[length-1] += 1; |
michael@0 | 137 | new (m_entries[length-1] + insertPos) |
michael@0 | 138 | SegCacheEntry(cmapGlyphs, length, seg, charOffset, totalAccessCount); |
michael@0 | 139 | return m_entries[length-1] + insertPos; |
michael@0 | 140 | } |
michael@0 | 141 | uint32 purge(unsigned long long minAccessCount, unsigned long long oldAccessTime, |
michael@0 | 142 | unsigned long long currentTime); |
michael@0 | 143 | CLASS_NEW_DELETE |
michael@0 | 144 | private: |
michael@0 | 145 | uint16 findPosition(const uint16 * cmapGlyphs, uint16 length, SegCacheEntry ** entry) const |
michael@0 | 146 | { |
michael@0 | 147 | int dir = 0; |
michael@0 | 148 | if (m_entryCounts[length-1] == 0) |
michael@0 | 149 | { |
michael@0 | 150 | if (entry) *entry = NULL; |
michael@0 | 151 | return 0; |
michael@0 | 152 | } |
michael@0 | 153 | else if (m_entryCounts[length-1] == 1) |
michael@0 | 154 | { |
michael@0 | 155 | // optimize single entry case |
michael@0 | 156 | for (int i = ePrefixLength; i < length; i++) |
michael@0 | 157 | { |
michael@0 | 158 | if (cmapGlyphs[i] > m_entries[length-1][0].m_unicode[i]) |
michael@0 | 159 | { |
michael@0 | 160 | return 1; |
michael@0 | 161 | } |
michael@0 | 162 | else if (cmapGlyphs[i] < m_entries[length-1][0].m_unicode[i]) |
michael@0 | 163 | { |
michael@0 | 164 | return 0; |
michael@0 | 165 | } |
michael@0 | 166 | } |
michael@0 | 167 | if (entry) |
michael@0 | 168 | *entry = m_entries[length-1]; |
michael@0 | 169 | return 0; |
michael@0 | 170 | } |
michael@0 | 171 | uint16 searchIndex = m_entryBSIndex[length-1] - 1; |
michael@0 | 172 | uint16 stepSize = m_entryBSIndex[length-1] >> 1; |
michael@0 | 173 | size_t prevIndex = searchIndex; |
michael@0 | 174 | do |
michael@0 | 175 | { |
michael@0 | 176 | dir = 0; |
michael@0 | 177 | if (searchIndex >= m_entryCounts[length-1]) |
michael@0 | 178 | { |
michael@0 | 179 | dir = -1; |
michael@0 | 180 | searchIndex -= stepSize; |
michael@0 | 181 | stepSize >>= 1; |
michael@0 | 182 | } |
michael@0 | 183 | else |
michael@0 | 184 | { |
michael@0 | 185 | for (int i = ePrefixLength; i < length; i++) |
michael@0 | 186 | { |
michael@0 | 187 | if (cmapGlyphs[i] > m_entries[length-1][searchIndex].m_unicode[i]) |
michael@0 | 188 | { |
michael@0 | 189 | dir = 1; |
michael@0 | 190 | searchIndex += stepSize; |
michael@0 | 191 | stepSize >>= 1; |
michael@0 | 192 | break; |
michael@0 | 193 | } |
michael@0 | 194 | else if (cmapGlyphs[i] < m_entries[length-1][searchIndex].m_unicode[i]) |
michael@0 | 195 | { |
michael@0 | 196 | dir = -1; |
michael@0 | 197 | searchIndex -= stepSize; |
michael@0 | 198 | stepSize >>= 1; |
michael@0 | 199 | break; |
michael@0 | 200 | } |
michael@0 | 201 | } |
michael@0 | 202 | } |
michael@0 | 203 | if (prevIndex == searchIndex) |
michael@0 | 204 | break; |
michael@0 | 205 | prevIndex = searchIndex; |
michael@0 | 206 | } while (dir != 0); |
michael@0 | 207 | if (entry) |
michael@0 | 208 | { |
michael@0 | 209 | if (dir == 0) |
michael@0 | 210 | *entry = m_entries[length-1] + searchIndex; |
michael@0 | 211 | else |
michael@0 | 212 | *entry = NULL; |
michael@0 | 213 | } |
michael@0 | 214 | else |
michael@0 | 215 | { |
michael@0 | 216 | // if entry is null, then this is for inserting a new value, which |
michael@0 | 217 | // shouldn't already be in the cache |
michael@0 | 218 | assert(dir != 0); |
michael@0 | 219 | if (dir > 0) |
michael@0 | 220 | ++searchIndex; |
michael@0 | 221 | } |
michael@0 | 222 | return searchIndex; |
michael@0 | 223 | } |
michael@0 | 224 | /** m_entries is a null terminated list of entries */ |
michael@0 | 225 | uint16 m_entryCounts[eMaxSpliceSize]; |
michael@0 | 226 | uint16 m_entryBSIndex[eMaxSpliceSize]; |
michael@0 | 227 | SegCacheEntry * m_entries[eMaxSpliceSize]; |
michael@0 | 228 | unsigned long long m_lastPurge; |
michael@0 | 229 | }; |
michael@0 | 230 | |
michael@0 | 231 | |
michael@0 | 232 | #define SEG_CACHE_MIN_INDEX (store->maxCmapGid()) |
michael@0 | 233 | #define SEG_CACHE_MAX_INDEX (store->maxCmapGid()+1u) |
michael@0 | 234 | #define SEG_CACHE_UNSET_INDEX (store->maxCmapGid()+2u) |
michael@0 | 235 | |
michael@0 | 236 | union SegCachePrefixArray |
michael@0 | 237 | { |
michael@0 | 238 | void ** raw; |
michael@0 | 239 | SegCachePrefixArray * array; |
michael@0 | 240 | SegCachePrefixEntry ** prefixEntries; |
michael@0 | 241 | uintptr * range; |
michael@0 | 242 | }; |
michael@0 | 243 | |
michael@0 | 244 | class SegCache |
michael@0 | 245 | { |
michael@0 | 246 | public: |
michael@0 | 247 | SegCache(const SegCacheStore * store, const Features& features); |
michael@0 | 248 | ~SegCache(); |
michael@0 | 249 | |
michael@0 | 250 | const SegCacheEntry * find(const uint16 * cmapGlyphs, size_t length) const; |
michael@0 | 251 | SegCacheEntry * cache(SegCacheStore * store, const uint16 * cmapGlyphs, size_t length, Segment * seg, size_t charOffset); |
michael@0 | 252 | void purge(SegCacheStore * store); |
michael@0 | 253 | |
michael@0 | 254 | long long totalAccessCount() const { return m_totalAccessCount; } |
michael@0 | 255 | size_t segmentCount() const { return m_segmentCount; } |
michael@0 | 256 | const Features & features() const { return m_features; } |
michael@0 | 257 | void clear(SegCacheStore * store); |
michael@0 | 258 | |
michael@0 | 259 | CLASS_NEW_DELETE |
michael@0 | 260 | private: |
michael@0 | 261 | void freeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level); |
michael@0 | 262 | void purgeLevel(SegCacheStore * store, SegCachePrefixArray prefixes, size_t level, |
michael@0 | 263 | unsigned long long minAccessCount, unsigned long long oldAccessTime); |
michael@0 | 264 | |
michael@0 | 265 | uint16 m_prefixLength; |
michael@0 | 266 | uint16 m_maxCachedSegLength; |
michael@0 | 267 | size_t m_segmentCount; |
michael@0 | 268 | SegCachePrefixArray m_prefixes; |
michael@0 | 269 | Features m_features; |
michael@0 | 270 | mutable unsigned long long m_totalAccessCount; |
michael@0 | 271 | mutable unsigned long long m_totalMisses; |
michael@0 | 272 | float m_purgeFactor; |
michael@0 | 273 | }; |
michael@0 | 274 | |
michael@0 | 275 | inline const SegCacheEntry * SegCache::find(const uint16 * cmapGlyphs, size_t length) const |
michael@0 | 276 | { |
michael@0 | 277 | uint16 pos = 0; |
michael@0 | 278 | if (!length || length > eMaxSpliceSize) return NULL; |
michael@0 | 279 | SegCachePrefixArray pEntry = m_prefixes.array[cmapGlyphs[0]]; |
michael@0 | 280 | while (++pos < m_prefixLength - 1) |
michael@0 | 281 | { |
michael@0 | 282 | if (!pEntry.raw) |
michael@0 | 283 | { |
michael@0 | 284 | ++m_totalMisses; |
michael@0 | 285 | return NULL; |
michael@0 | 286 | } |
michael@0 | 287 | pEntry = pEntry.array[(pos < length)? cmapGlyphs[pos] : 0]; |
michael@0 | 288 | } |
michael@0 | 289 | if (!pEntry.raw) |
michael@0 | 290 | { |
michael@0 | 291 | ++m_totalMisses; |
michael@0 | 292 | return NULL; |
michael@0 | 293 | } |
michael@0 | 294 | SegCachePrefixEntry * prefixEntry = pEntry.prefixEntries[(pos < length)? cmapGlyphs[pos] : 0]; |
michael@0 | 295 | if (!prefixEntry) |
michael@0 | 296 | { |
michael@0 | 297 | ++m_totalMisses; |
michael@0 | 298 | return NULL; |
michael@0 | 299 | } |
michael@0 | 300 | const SegCacheEntry * entry = prefixEntry->find(cmapGlyphs, length); |
michael@0 | 301 | if (entry) |
michael@0 | 302 | { |
michael@0 | 303 | ++m_totalAccessCount; |
michael@0 | 304 | entry->accessed(m_totalAccessCount); |
michael@0 | 305 | } |
michael@0 | 306 | else |
michael@0 | 307 | { |
michael@0 | 308 | ++m_totalMisses; |
michael@0 | 309 | } |
michael@0 | 310 | return entry; |
michael@0 | 311 | } |
michael@0 | 312 | |
michael@0 | 313 | } // namespace graphite2 |
michael@0 | 314 | |
michael@0 | 315 | #endif |
michael@0 | 316 |