Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include "WebGLElementArrayCache.h" |
michael@0 | 7 | |
michael@0 | 8 | #include "nsTArray.h" |
michael@0 | 9 | #include "mozilla/Assertions.h" |
michael@0 | 10 | #include "mozilla/MemoryReporting.h" |
michael@0 | 11 | |
michael@0 | 12 | #include <cstdlib> |
michael@0 | 13 | #include <cstring> |
michael@0 | 14 | #include <limits> |
michael@0 | 15 | #include <algorithm> |
michael@0 | 16 | |
michael@0 | 17 | namespace mozilla { |
michael@0 | 18 | |
michael@0 | 19 | static void |
michael@0 | 20 | SetUpperBound(uint32_t* out_upperBound, uint32_t newBound) |
michael@0 | 21 | { |
michael@0 | 22 | if (!out_upperBound) |
michael@0 | 23 | return; |
michael@0 | 24 | |
michael@0 | 25 | *out_upperBound = newBound; |
michael@0 | 26 | } |
michael@0 | 27 | |
michael@0 | 28 | static void |
michael@0 | 29 | UpdateUpperBound(uint32_t* out_upperBound, uint32_t newBound) |
michael@0 | 30 | { |
michael@0 | 31 | if (!out_upperBound) |
michael@0 | 32 | return; |
michael@0 | 33 | |
michael@0 | 34 | *out_upperBound = std::max(*out_upperBound, newBound); |
michael@0 | 35 | } |
michael@0 | 36 | |
michael@0 | 37 | /* |
michael@0 | 38 | * WebGLElementArrayCacheTree contains most of the implementation of WebGLElementArrayCache, |
michael@0 | 39 | * which performs WebGL element array buffer validation for drawElements. |
michael@0 | 40 | * |
michael@0 | 41 | * Attention: Here lie nontrivial data structures, bug-prone algorithms, and non-canonical tweaks! |
michael@0 | 42 | * Whence the explanatory comments, and compiled unit test. |
michael@0 | 43 | * |
michael@0 | 44 | * *** What problem are we solving here? *** |
michael@0 | 45 | * |
michael@0 | 46 | * WebGL::DrawElements has to validate that the elements are in range wrt the current vertex attribs. |
michael@0 | 47 | * This boils down to the problem, given an array of integers, of computing the maximum in an arbitrary |
michael@0 | 48 | * sub-array. The naive algorithm has linear complexity; this has been a major performance problem, |
michael@0 | 49 | * see bug 569431. In that bug, we took the approach of caching the max for the whole array, which |
michael@0 | 50 | * does cover most cases (DrawElements typically consumes the whole element array buffer) but doesn't |
michael@0 | 51 | * help in other use cases: |
michael@0 | 52 | * - when doing "partial DrawElements" i.e. consuming only part of the element array buffer |
michael@0 | 53 | * - when doing frequent "partial buffer updates" i.e. bufferSubData calls updating parts of the |
michael@0 | 54 | * element array buffer |
michael@0 | 55 | * |
michael@0 | 56 | * *** The solution: a binary tree *** |
michael@0 | 57 | * |
michael@0 | 58 | * The solution implemented here is to use a binary tree as the cache data structure. Each tree node |
michael@0 | 59 | * contains the max of its two children nodes. In this way, finding the maximum in any contiguous sub-array |
michael@0 | 60 | * has log complexity instead of linear complexity. |
michael@0 | 61 | * |
michael@0 | 62 | * Simplistically, if the element array is |
michael@0 | 63 | * |
michael@0 | 64 | * 1 4 3 2 |
michael@0 | 65 | * |
michael@0 | 66 | * then the corresponding tree is |
michael@0 | 67 | * |
michael@0 | 68 | * 4 |
michael@0 | 69 | * _/ \_ |
michael@0 | 70 | * 4 3 |
michael@0 | 71 | * / \ / \ |
michael@0 | 72 | * 1 4 3 2 |
michael@0 | 73 | * |
michael@0 | 74 | * In practice, the bottom-most levels of the tree are both the largest to store (because they |
michael@0 | 75 | * have more nodes), and the least useful performance-wise (because each node in the bottom |
michael@0 | 76 | * levels concerns only few entries in the elements array buffer, it is cheap to compute). |
michael@0 | 77 | * |
michael@0 | 78 | * For this reason, we stop the tree a few levels above, so that each tree leaf actually corresponds |
michael@0 | 79 | * to more than one element array entry. |
michael@0 | 80 | * |
michael@0 | 81 | * The number of levels that we "drop" is |sSkippedBottomTreeLevels| and the number of element array entries |
michael@0 | 82 | * that each leaf corresponds to, is |sElementsPerLeaf|. This being a binary tree, we have |
michael@0 | 83 | * |
michael@0 | 84 | * sElementsPerLeaf = 2 ^ sSkippedBottomTreeLevels. |
michael@0 | 85 | * |
michael@0 | 86 | * *** Storage layout of the binary tree *** |
michael@0 | 87 | * |
michael@0 | 88 | * We take advantage of the specifics of the situation to avoid generalist tree storage and instead |
michael@0 | 89 | * store the tree entries in a vector, mTreeData. |
michael@0 | 90 | * |
michael@0 | 91 | * The number of leaves is given by mNumLeaves, and mTreeData is always a vector of length |
michael@0 | 92 | * |
michael@0 | 93 | * 2 * mNumLeaves. |
michael@0 | 94 | * |
michael@0 | 95 | * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the root node, |
michael@0 | 96 | * then at offsets 2..3 is the tree level immediately below the root node, then at offsets 4..7 |
michael@0 | 97 | * is the tree level below that, etc. |
michael@0 | 98 | * |
michael@0 | 99 | * The figure below illustrates this by writing at each tree node the offset into mTreeData at |
michael@0 | 100 | * which it is stored: |
michael@0 | 101 | * |
michael@0 | 102 | * 1 |
michael@0 | 103 | * _/ \_ |
michael@0 | 104 | * 2 3 |
michael@0 | 105 | * / \ / \ |
michael@0 | 106 | * 4 5 6 7 |
michael@0 | 107 | * ... |
michael@0 | 108 | * |
michael@0 | 109 | * Thus, under the convention that the root level is level 0, we see that level N is stored at offsets |
michael@0 | 110 | * |
michael@0 | 111 | * [ 2^n .. 2^(n+1) - 1 ] |
michael@0 | 112 | * |
michael@0 | 113 | * in mTreeData. Likewise, all the usual tree operations have simple mathematical expressions in |
michael@0 | 114 | * terms of mTreeData offsets, see all the methods such as ParentNode, LeftChildNode, etc. |
michael@0 | 115 | * |
michael@0 | 116 | * *** Design constraint: element types aren't known at buffer-update time *** |
michael@0 | 117 | * |
michael@0 | 118 | * Note that a key constraint that we're operating under, is that we don't know the types of the elements |
michael@0 | 119 | * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only |
michael@0 | 120 | * specified in the drawElements call. This means that we may potentially have to store caches for |
michael@0 | 121 | * multiple element types, for the same element array buffer. Since we don't know yet how many |
michael@0 | 122 | * element types we'll eventually support (extensions add more), the concern about memory usage is serious. |
michael@0 | 123 | * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical |
michael@0 | 124 | * case where each element array buffer is only ever used with one type, this is also addressed |
michael@0 | 125 | * by having WebGLElementArrayCache lazily create trees for each type only upon first use. |
michael@0 | 126 | * |
michael@0 | 127 | * Another consequence of this constraint is that when invalidating the trees, we have to invalidate |
michael@0 | 128 | * all existing trees. So if trees for types uint8_t, uint16_t and uint32_t have ever been constructed for this buffer, |
michael@0 | 129 | * every subsequent invalidation will have to invalidate all trees even if one of the types is never |
michael@0 | 130 | * used again. This implies that it is important to minimize the cost of invalidation i.e. |
michael@0 | 131 | * do lazy updates upon use as opposed to immediately updating invalidated trees. This poses a problem: |
michael@0 | 132 | * it is nontrivial to keep track of the part of the tree that's invalidated. The current solution |
michael@0 | 133 | * can only keep track of an invalidated interval, from |mFirstInvalidatedLeaf| to |mLastInvalidatedLeaf|. |
michael@0 | 134 | * The problem is that if one does two small, far-apart partial buffer updates, the resulting invalidated |
michael@0 | 135 | * area is very large even though only a small part of the array really needed to be invalidated. |
michael@0 | 136 | * The real solution to this problem would be to use a smarter data structure to keep track of the |
michael@0 | 137 | * invalidated area, probably an interval tree. Meanwhile, we can probably live with the current situation |
michael@0 | 138 | * as the unfavorable case seems to be a small corner case: in order to run into performance issues, |
michael@0 | 139 | * the number of bufferSubData in between two consecutive draws must be small but greater than 1, and |
michael@0 | 140 | * the partial buffer updates must be small and far apart. Anything else than this corner case |
michael@0 | 141 | * should run fast in the current setting. |
michael@0 | 142 | */ |
michael@0 | 143 | template<typename T> |
michael@0 | 144 | struct WebGLElementArrayCacheTree |
michael@0 | 145 | { |
michael@0 | 146 | // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls |
michael@0 | 147 | // A too-low sSkippedBottomTreeLevels would cause undue memory usage. |
michael@0 | 148 | // The current value has been validated by some benchmarking. See bug 732660. |
michael@0 | 149 | static const size_t sSkippedBottomTreeLevels = 3; |
michael@0 | 150 | static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels; |
michael@0 | 151 | static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT |
michael@0 | 152 | |
michael@0 | 153 | private: |
michael@0 | 154 | WebGLElementArrayCache& mParent; |
michael@0 | 155 | FallibleTArray<T> mTreeData; |
michael@0 | 156 | size_t mNumLeaves; |
michael@0 | 157 | bool mInvalidated; |
michael@0 | 158 | size_t mFirstInvalidatedLeaf; |
michael@0 | 159 | size_t mLastInvalidatedLeaf; |
michael@0 | 160 | |
michael@0 | 161 | public: |
michael@0 | 162 | WebGLElementArrayCacheTree(WebGLElementArrayCache& p) |
michael@0 | 163 | : mParent(p) |
michael@0 | 164 | , mNumLeaves(0) |
michael@0 | 165 | , mInvalidated(false) |
michael@0 | 166 | , mFirstInvalidatedLeaf(0) |
michael@0 | 167 | , mLastInvalidatedLeaf(0) |
michael@0 | 168 | { |
michael@0 | 169 | ResizeToParentSize(); |
michael@0 | 170 | } |
michael@0 | 171 | |
michael@0 | 172 | T GlobalMaximum() const { |
michael@0 | 173 | MOZ_ASSERT(!mInvalidated); |
michael@0 | 174 | return mTreeData[1]; |
michael@0 | 175 | } |
michael@0 | 176 | |
michael@0 | 177 | // returns the index of the parent node; if treeIndex=1 (the root node), |
michael@0 | 178 | // the return value is 0. |
michael@0 | 179 | static size_t ParentNode(size_t treeIndex) { |
michael@0 | 180 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 181 | return treeIndex >> 1; |
michael@0 | 182 | } |
michael@0 | 183 | |
michael@0 | 184 | static bool IsRightNode(size_t treeIndex) { |
michael@0 | 185 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 186 | return treeIndex & 1; |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | static bool IsLeftNode(size_t treeIndex) { |
michael@0 | 190 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 191 | return !IsRightNode(treeIndex); |
michael@0 | 192 | } |
michael@0 | 193 | |
michael@0 | 194 | static size_t SiblingNode(size_t treeIndex) { |
michael@0 | 195 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 196 | return treeIndex ^ 1; |
michael@0 | 197 | } |
michael@0 | 198 | |
michael@0 | 199 | static size_t LeftChildNode(size_t treeIndex) { |
michael@0 | 200 | MOZ_ASSERT(treeIndex); |
michael@0 | 201 | return treeIndex << 1; |
michael@0 | 202 | } |
michael@0 | 203 | |
michael@0 | 204 | static size_t RightChildNode(size_t treeIndex) { |
michael@0 | 205 | MOZ_ASSERT(treeIndex); |
michael@0 | 206 | return SiblingNode(LeftChildNode(treeIndex)); |
michael@0 | 207 | } |
michael@0 | 208 | |
michael@0 | 209 | static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) { |
michael@0 | 210 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 211 | return treeIndex - distance; |
michael@0 | 212 | } |
michael@0 | 213 | |
michael@0 | 214 | static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) { |
michael@0 | 215 | MOZ_ASSERT(treeIndex > 1); |
michael@0 | 216 | return treeIndex + distance; |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | size_t LeafForElement(size_t element) { |
michael@0 | 220 | size_t leaf = element / sElementsPerLeaf; |
michael@0 | 221 | MOZ_ASSERT(leaf < mNumLeaves); |
michael@0 | 222 | return leaf; |
michael@0 | 223 | } |
michael@0 | 224 | |
michael@0 | 225 | size_t LeafForByte(size_t byte) { |
michael@0 | 226 | return LeafForElement(byte / sizeof(T)); |
michael@0 | 227 | } |
michael@0 | 228 | |
michael@0 | 229 | // Returns the index, into the tree storage, where a given leaf is stored |
michael@0 | 230 | size_t TreeIndexForLeaf(size_t leaf) { |
michael@0 | 231 | // See above class comment. The tree storage is an array of length 2*mNumLeaves. |
michael@0 | 232 | // The leaves are stored in its second half. |
michael@0 | 233 | return leaf + mNumLeaves; |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | static size_t LastElementUnderSameLeaf(size_t element) { |
michael@0 | 237 | return element | sElementsPerLeafMask; |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | static size_t FirstElementUnderSameLeaf(size_t element) { |
michael@0 | 241 | return element & ~sElementsPerLeafMask; |
michael@0 | 242 | } |
michael@0 | 243 | |
michael@0 | 244 | static size_t NextMultipleOfElementsPerLeaf(size_t numElements) { |
michael@0 | 245 | return ((numElements - 1) | sElementsPerLeafMask) + 1; |
michael@0 | 246 | } |
michael@0 | 247 | |
michael@0 | 248 | bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf, |
michael@0 | 249 | uint32_t* out_upperBound) |
michael@0 | 250 | { |
michael@0 | 251 | MOZ_ASSERT(!mInvalidated); |
michael@0 | 252 | |
michael@0 | 253 | size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf); |
michael@0 | 254 | size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf); |
michael@0 | 255 | |
michael@0 | 256 | while (true) { |
michael@0 | 257 | // given that we tweak these values in nontrivial ways, it doesn't hurt to do |
michael@0 | 258 | // this sanity check |
michael@0 | 259 | MOZ_ASSERT(firstTreeIndex <= lastTreeIndex); |
michael@0 | 260 | |
michael@0 | 261 | // final case where there is only 1 node to validate at the current tree level |
michael@0 | 262 | if (lastTreeIndex == firstTreeIndex) { |
michael@0 | 263 | const T& curData = mTreeData[firstTreeIndex]; |
michael@0 | 264 | UpdateUpperBound(out_upperBound, curData); |
michael@0 | 265 | return curData <= maxAllowed; |
michael@0 | 266 | } |
michael@0 | 267 | |
michael@0 | 268 | // if the first node at current tree level is a right node, handle it individually |
michael@0 | 269 | // and replace it with its right neighbor, which is a left node |
michael@0 | 270 | if (IsRightNode(firstTreeIndex)) { |
michael@0 | 271 | const T& curData = mTreeData[firstTreeIndex]; |
michael@0 | 272 | UpdateUpperBound(out_upperBound, curData); |
michael@0 | 273 | if (curData > maxAllowed) |
michael@0 | 274 | return false; |
michael@0 | 275 | firstTreeIndex = RightNeighborNode(firstTreeIndex); |
michael@0 | 276 | } |
michael@0 | 277 | |
michael@0 | 278 | // if the last node at current tree level is a left node, handle it individually |
michael@0 | 279 | // and replace it with its left neighbor, which is a right node |
michael@0 | 280 | if (IsLeftNode(lastTreeIndex)) { |
michael@0 | 281 | const T& curData = mTreeData[lastTreeIndex]; |
michael@0 | 282 | UpdateUpperBound(out_upperBound, curData); |
michael@0 | 283 | if (curData > maxAllowed) |
michael@0 | 284 | return false; |
michael@0 | 285 | lastTreeIndex = LeftNeighborNode(lastTreeIndex); |
michael@0 | 286 | } |
michael@0 | 287 | |
michael@0 | 288 | // at this point it can happen that firstTreeIndex and lastTreeIndex "crossed" each |
michael@0 | 289 | // other. That happens if firstTreeIndex was a right node and lastTreeIndex was its |
michael@0 | 290 | // right neighor: in that case, both above tweaks happened and as a result, they ended |
michael@0 | 291 | // up being swapped: lastTreeIndex is now the _left_ neighbor of firstTreeIndex. |
michael@0 | 292 | // When that happens, there is nothing left to validate. |
michael@0 | 293 | if (lastTreeIndex == LeftNeighborNode(firstTreeIndex)) { |
michael@0 | 294 | return true; |
michael@0 | 295 | } |
michael@0 | 296 | |
michael@0 | 297 | // walk up 1 level |
michael@0 | 298 | firstTreeIndex = ParentNode(firstTreeIndex); |
michael@0 | 299 | lastTreeIndex = ParentNode(lastTreeIndex); |
michael@0 | 300 | } |
michael@0 | 301 | } |
michael@0 | 302 | |
michael@0 | 303 | template<typename U> |
michael@0 | 304 | static U NextPowerOfTwo(U x) { |
michael@0 | 305 | U result = 1; |
michael@0 | 306 | while (result < x) |
michael@0 | 307 | result <<= 1; |
michael@0 | 308 | MOZ_ASSERT(result >= x); |
michael@0 | 309 | MOZ_ASSERT((result & (result - 1)) == 0); |
michael@0 | 310 | return result; |
michael@0 | 311 | } |
michael@0 | 312 | |
michael@0 | 313 | bool ResizeToParentSize() |
michael@0 | 314 | { |
michael@0 | 315 | size_t numberOfElements = mParent.ByteSize() / sizeof(T); |
michael@0 | 316 | size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf; |
michael@0 | 317 | |
michael@0 | 318 | size_t oldNumLeaves = mNumLeaves; |
michael@0 | 319 | mNumLeaves = NextPowerOfTwo(requiredNumLeaves); |
michael@0 | 320 | Invalidate(0, mParent.ByteSize() - 1); |
michael@0 | 321 | |
michael@0 | 322 | // see class comment for why we the tree storage size is 2 * mNumLeaves |
michael@0 | 323 | if (!mTreeData.SetLength(2 * mNumLeaves)) { |
michael@0 | 324 | return false; |
michael@0 | 325 | } |
michael@0 | 326 | if (mNumLeaves != oldNumLeaves) { |
michael@0 | 327 | memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(mTreeData[0])); |
michael@0 | 328 | } |
michael@0 | 329 | return true; |
michael@0 | 330 | } |
michael@0 | 331 | |
michael@0 | 332 | void Invalidate(size_t firstByte, size_t lastByte); |
michael@0 | 333 | |
michael@0 | 334 | void Update(); |
michael@0 | 335 | |
michael@0 | 336 | size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const |
michael@0 | 337 | { |
michael@0 | 338 | return aMallocSizeOf(this) + mTreeData.SizeOfExcludingThis(aMallocSizeOf); |
michael@0 | 339 | } |
michael@0 | 340 | }; |
michael@0 | 341 | |
michael@0 | 342 | // TreeForType: just a template helper to select the right tree object for a given |
michael@0 | 343 | // element type. |
michael@0 | 344 | template<typename T> |
michael@0 | 345 | struct TreeForType {}; |
michael@0 | 346 | |
michael@0 | 347 | template<> |
michael@0 | 348 | struct TreeForType<uint8_t> |
michael@0 | 349 | { |
michael@0 | 350 | static WebGLElementArrayCacheTree<uint8_t>*& Run(WebGLElementArrayCache *b) { return b->mUint8Tree; } |
michael@0 | 351 | }; |
michael@0 | 352 | |
michael@0 | 353 | template<> |
michael@0 | 354 | struct TreeForType<uint16_t> |
michael@0 | 355 | { |
michael@0 | 356 | static WebGLElementArrayCacheTree<uint16_t>*& Run(WebGLElementArrayCache *b) { return b->mUint16Tree; } |
michael@0 | 357 | }; |
michael@0 | 358 | |
michael@0 | 359 | template<> |
michael@0 | 360 | struct TreeForType<uint32_t> |
michael@0 | 361 | { |
michael@0 | 362 | static WebGLElementArrayCacheTree<uint32_t>*& Run(WebGLElementArrayCache *b) { return b->mUint32Tree; } |
michael@0 | 363 | }; |
michael@0 | 364 | |
michael@0 | 365 | // When the buffer gets updated from firstByte to lastByte, |
michael@0 | 366 | // calling this method will notify the tree accordingly |
michael@0 | 367 | template<typename T> |
michael@0 | 368 | void WebGLElementArrayCacheTree<T>::Invalidate(size_t firstByte, size_t lastByte) |
michael@0 | 369 | { |
michael@0 | 370 | lastByte = std::min(lastByte, mNumLeaves * sElementsPerLeaf * sizeof(T) - 1); |
michael@0 | 371 | if (firstByte > lastByte) { |
michael@0 | 372 | return; |
michael@0 | 373 | } |
michael@0 | 374 | |
michael@0 | 375 | size_t firstLeaf = LeafForByte(firstByte); |
michael@0 | 376 | size_t lastLeaf = LeafForByte(lastByte); |
michael@0 | 377 | |
michael@0 | 378 | if (mInvalidated) { |
michael@0 | 379 | mFirstInvalidatedLeaf = std::min(firstLeaf, mFirstInvalidatedLeaf); |
michael@0 | 380 | mLastInvalidatedLeaf = std::max(lastLeaf, mLastInvalidatedLeaf); |
michael@0 | 381 | } else { |
michael@0 | 382 | mInvalidated = true; |
michael@0 | 383 | mFirstInvalidatedLeaf = firstLeaf; |
michael@0 | 384 | mLastInvalidatedLeaf = lastLeaf; |
michael@0 | 385 | } |
michael@0 | 386 | } |
michael@0 | 387 | |
michael@0 | 388 | |
michael@0 | 389 | // When tree has been partially invalidated, from mFirstInvalidatedLeaf to |
michael@0 | 390 | // mLastInvalidatedLeaf, calling this method will 1) update the leaves in this interval |
michael@0 | 391 | // from the raw buffer data, and 2) propagate this update up the tree |
michael@0 | 392 | template<typename T> |
michael@0 | 393 | void WebGLElementArrayCacheTree<T>::Update() |
michael@0 | 394 | { |
michael@0 | 395 | if (!mInvalidated) { |
michael@0 | 396 | return; |
michael@0 | 397 | } |
michael@0 | 398 | |
michael@0 | 399 | MOZ_ASSERT(mLastInvalidatedLeaf < mNumLeaves); |
michael@0 | 400 | |
michael@0 | 401 | size_t firstTreeIndex = TreeIndexForLeaf(mFirstInvalidatedLeaf); |
michael@0 | 402 | size_t lastTreeIndex = TreeIndexForLeaf(mLastInvalidatedLeaf); |
michael@0 | 403 | |
michael@0 | 404 | // Step #1: initialize the tree leaves from plain buffer data. |
michael@0 | 405 | // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding |
michael@0 | 406 | // buffer entries. |
michael@0 | 407 | // condition-less scope to prevent leaking this scope's variables into the code below |
michael@0 | 408 | { |
michael@0 | 409 | // treeIndex is the index of the tree leaf we're writing, i.e. the destination index |
michael@0 | 410 | size_t treeIndex = firstTreeIndex; |
michael@0 | 411 | // srcIndex is the index in the source buffer |
michael@0 | 412 | size_t srcIndex = mFirstInvalidatedLeaf * sElementsPerLeaf; |
michael@0 | 413 | size_t numberOfElements = mParent.ByteSize() / sizeof(T); |
michael@0 | 414 | while (treeIndex <= lastTreeIndex) { |
michael@0 | 415 | T m = 0; |
michael@0 | 416 | size_t a = srcIndex; |
michael@0 | 417 | size_t srcIndexNextLeaf = std::min(a + sElementsPerLeaf, numberOfElements); |
michael@0 | 418 | for (; srcIndex < srcIndexNextLeaf; srcIndex++) { |
michael@0 | 419 | m = std::max(m, mParent.Element<T>(srcIndex)); |
michael@0 | 420 | } |
michael@0 | 421 | mTreeData[treeIndex] = m; |
michael@0 | 422 | treeIndex++; |
michael@0 | 423 | } |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | // Step #2: propagate the values up the tree. This is simply a matter of walking up |
michael@0 | 427 | // the tree and setting each node to the max of its two children. |
michael@0 | 428 | while (firstTreeIndex > 1) { |
michael@0 | 429 | |
michael@0 | 430 | // move up 1 level |
michael@0 | 431 | firstTreeIndex = ParentNode(firstTreeIndex); |
michael@0 | 432 | lastTreeIndex = ParentNode(lastTreeIndex); |
michael@0 | 433 | |
michael@0 | 434 | // fast-exit case where only one node is invalidated at the current level |
michael@0 | 435 | if (firstTreeIndex == lastTreeIndex) { |
michael@0 | 436 | mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]); |
michael@0 | 437 | continue; |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | // initialize local iteration variables: child and parent. |
michael@0 | 441 | size_t child = LeftChildNode(firstTreeIndex); |
michael@0 | 442 | size_t parent = firstTreeIndex; |
michael@0 | 443 | |
michael@0 | 444 | // the unrolling makes this look more complicated than it is; the plain non-unrolled |
michael@0 | 445 | // version is in the second while loop below |
michael@0 | 446 | const int unrollSize = 8; |
michael@0 | 447 | while (RightNeighborNode(parent, unrollSize - 1) <= lastTreeIndex) |
michael@0 | 448 | { |
michael@0 | 449 | for (int unroll = 0; unroll < unrollSize; unroll++) |
michael@0 | 450 | { |
michael@0 | 451 | T a = mTreeData[child]; |
michael@0 | 452 | child = RightNeighborNode(child); |
michael@0 | 453 | T b = mTreeData[child]; |
michael@0 | 454 | child = RightNeighborNode(child); |
michael@0 | 455 | mTreeData[parent] = std::max(a, b); |
michael@0 | 456 | parent = RightNeighborNode(parent); |
michael@0 | 457 | } |
michael@0 | 458 | } |
michael@0 | 459 | // plain non-unrolled version, used to terminate the job after the last unrolled iteration |
michael@0 | 460 | while (parent <= lastTreeIndex) |
michael@0 | 461 | { |
michael@0 | 462 | T a = mTreeData[child]; |
michael@0 | 463 | child = RightNeighborNode(child); |
michael@0 | 464 | T b = mTreeData[child]; |
michael@0 | 465 | child = RightNeighborNode(child); |
michael@0 | 466 | mTreeData[parent] = std::max(a, b); |
michael@0 | 467 | parent = RightNeighborNode(parent); |
michael@0 | 468 | } |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | mInvalidated = false; |
michael@0 | 472 | } |
michael@0 | 473 | |
michael@0 | 474 | WebGLElementArrayCache::~WebGLElementArrayCache() { |
michael@0 | 475 | delete mUint8Tree; |
michael@0 | 476 | delete mUint16Tree; |
michael@0 | 477 | delete mUint32Tree; |
michael@0 | 478 | free(mUntypedData); |
michael@0 | 479 | } |
michael@0 | 480 | |
michael@0 | 481 | bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteSize) { |
michael@0 | 482 | mByteSize = byteSize; |
michael@0 | 483 | if (mUint8Tree) |
michael@0 | 484 | if (!mUint8Tree->ResizeToParentSize()) |
michael@0 | 485 | return false; |
michael@0 | 486 | if (mUint16Tree) |
michael@0 | 487 | if (!mUint16Tree->ResizeToParentSize()) |
michael@0 | 488 | return false; |
michael@0 | 489 | if (mUint32Tree) |
michael@0 | 490 | if (!mUint32Tree->ResizeToParentSize()) |
michael@0 | 491 | return false; |
michael@0 | 492 | mUntypedData = realloc(mUntypedData, byteSize); |
michael@0 | 493 | if (!mUntypedData) |
michael@0 | 494 | return false; |
michael@0 | 495 | BufferSubData(0, ptr, byteSize); |
michael@0 | 496 | return true; |
michael@0 | 497 | } |
michael@0 | 498 | |
michael@0 | 499 | void WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) { |
michael@0 | 500 | if (!updateByteSize) return; |
michael@0 | 501 | if (ptr) |
michael@0 | 502 | memcpy(static_cast<uint8_t*>(mUntypedData) + pos, ptr, updateByteSize); |
michael@0 | 503 | else |
michael@0 | 504 | memset(static_cast<uint8_t*>(mUntypedData) + pos, 0, updateByteSize); |
michael@0 | 505 | InvalidateTrees(pos, pos + updateByteSize - 1); |
michael@0 | 506 | } |
michael@0 | 507 | |
michael@0 | 508 | void WebGLElementArrayCache::InvalidateTrees(size_t firstByte, size_t lastByte) |
michael@0 | 509 | { |
michael@0 | 510 | if (mUint8Tree) |
michael@0 | 511 | mUint8Tree->Invalidate(firstByte, lastByte); |
michael@0 | 512 | if (mUint16Tree) |
michael@0 | 513 | mUint16Tree->Invalidate(firstByte, lastByte); |
michael@0 | 514 | if (mUint32Tree) |
michael@0 | 515 | mUint32Tree->Invalidate(firstByte, lastByte); |
michael@0 | 516 | } |
michael@0 | 517 | |
michael@0 | 518 | template<typename T> |
michael@0 | 519 | bool |
michael@0 | 520 | WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement, |
michael@0 | 521 | size_t countElements, uint32_t* out_upperBound) |
michael@0 | 522 | { |
michael@0 | 523 | SetUpperBound(out_upperBound, 0); |
michael@0 | 524 | |
michael@0 | 525 | // if maxAllowed is >= the max T value, then there is no way that a T index could be invalid |
michael@0 | 526 | uint32_t maxTSize = std::numeric_limits<T>::max(); |
michael@0 | 527 | if (maxAllowed >= maxTSize) { |
michael@0 | 528 | SetUpperBound(out_upperBound, maxTSize); |
michael@0 | 529 | return true; |
michael@0 | 530 | } |
michael@0 | 531 | |
michael@0 | 532 | T maxAllowedT(maxAllowed); |
michael@0 | 533 | |
michael@0 | 534 | // integer overflow must have been handled earlier, so we assert that maxAllowedT |
michael@0 | 535 | // is exactly the max allowed value. |
michael@0 | 536 | MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed); |
michael@0 | 537 | |
michael@0 | 538 | if (!mByteSize || !countElements) |
michael@0 | 539 | return true; |
michael@0 | 540 | |
michael@0 | 541 | WebGLElementArrayCacheTree<T>*& tree = TreeForType<T>::Run(this); |
michael@0 | 542 | if (!tree) { |
michael@0 | 543 | tree = new WebGLElementArrayCacheTree<T>(*this); |
michael@0 | 544 | } |
michael@0 | 545 | |
michael@0 | 546 | size_t lastElement = firstElement + countElements - 1; |
michael@0 | 547 | |
michael@0 | 548 | tree->Update(); |
michael@0 | 549 | |
michael@0 | 550 | // fast exit path when the global maximum for the whole element array buffer |
michael@0 | 551 | // falls in the allowed range |
michael@0 | 552 | T globalMax = tree->GlobalMaximum(); |
michael@0 | 553 | if (globalMax <= maxAllowedT) |
michael@0 | 554 | { |
michael@0 | 555 | SetUpperBound(out_upperBound, globalMax); |
michael@0 | 556 | return true; |
michael@0 | 557 | } |
michael@0 | 558 | |
michael@0 | 559 | const T* elements = Elements<T>(); |
michael@0 | 560 | |
michael@0 | 561 | // before calling tree->Validate, we have to validate ourselves the boundaries of the elements span, |
michael@0 | 562 | // to round them to the nearest multiple of sElementsPerLeaf. |
michael@0 | 563 | size_t firstElementAdjustmentEnd = std::min(lastElement, |
michael@0 | 564 | tree->LastElementUnderSameLeaf(firstElement)); |
michael@0 | 565 | while (firstElement <= firstElementAdjustmentEnd) { |
michael@0 | 566 | const T& curData = elements[firstElement]; |
michael@0 | 567 | UpdateUpperBound(out_upperBound, curData); |
michael@0 | 568 | if (curData > maxAllowedT) |
michael@0 | 569 | return false; |
michael@0 | 570 | firstElement++; |
michael@0 | 571 | } |
michael@0 | 572 | size_t lastElementAdjustmentEnd = std::max(firstElement, |
michael@0 | 573 | tree->FirstElementUnderSameLeaf(lastElement)); |
michael@0 | 574 | while (lastElement >= lastElementAdjustmentEnd) { |
michael@0 | 575 | const T& curData = elements[lastElement]; |
michael@0 | 576 | UpdateUpperBound(out_upperBound, curData); |
michael@0 | 577 | if (curData > maxAllowedT) |
michael@0 | 578 | return false; |
michael@0 | 579 | lastElement--; |
michael@0 | 580 | } |
michael@0 | 581 | |
michael@0 | 582 | // at this point, for many tiny validations, we're already done. |
michael@0 | 583 | if (firstElement > lastElement) |
michael@0 | 584 | return true; |
michael@0 | 585 | |
michael@0 | 586 | // general case |
michael@0 | 587 | return tree->Validate(maxAllowedT, |
michael@0 | 588 | tree->LeafForElement(firstElement), |
michael@0 | 589 | tree->LeafForElement(lastElement), |
michael@0 | 590 | out_upperBound); |
michael@0 | 591 | } |
michael@0 | 592 | |
michael@0 | 593 | bool |
michael@0 | 594 | WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed, |
michael@0 | 595 | size_t firstElement, size_t countElements, |
michael@0 | 596 | uint32_t* out_upperBound) |
michael@0 | 597 | { |
michael@0 | 598 | if (type == LOCAL_GL_UNSIGNED_BYTE) |
michael@0 | 599 | return Validate<uint8_t>(maxAllowed, firstElement, countElements, out_upperBound); |
michael@0 | 600 | if (type == LOCAL_GL_UNSIGNED_SHORT) |
michael@0 | 601 | return Validate<uint16_t>(maxAllowed, firstElement, countElements, out_upperBound); |
michael@0 | 602 | if (type == LOCAL_GL_UNSIGNED_INT) |
michael@0 | 603 | return Validate<uint32_t>(maxAllowed, firstElement, countElements, out_upperBound); |
michael@0 | 604 | |
michael@0 | 605 | MOZ_ASSERT(false, "Invalid type."); |
michael@0 | 606 | return false; |
michael@0 | 607 | } |
michael@0 | 608 | |
michael@0 | 609 | size_t |
michael@0 | 610 | WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const |
michael@0 | 611 | { |
michael@0 | 612 | size_t uint8TreeSize = mUint8Tree ? mUint8Tree->SizeOfIncludingThis(aMallocSizeOf) : 0; |
michael@0 | 613 | size_t uint16TreeSize = mUint16Tree ? mUint16Tree->SizeOfIncludingThis(aMallocSizeOf) : 0; |
michael@0 | 614 | size_t uint32TreeSize = mUint32Tree ? mUint32Tree->SizeOfIncludingThis(aMallocSizeOf) : 0; |
michael@0 | 615 | return aMallocSizeOf(this) + |
michael@0 | 616 | mByteSize + |
michael@0 | 617 | uint8TreeSize + |
michael@0 | 618 | uint16TreeSize + |
michael@0 | 619 | uint32TreeSize; |
michael@0 | 620 | } |
michael@0 | 621 | |
michael@0 | 622 | } // end namespace mozilla |