content/canvas/src/WebGLElementArrayCache.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/canvas/src/WebGLElementArrayCache.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,622 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "WebGLElementArrayCache.h"
    1.10 +
    1.11 +#include "nsTArray.h"
    1.12 +#include "mozilla/Assertions.h"
    1.13 +#include "mozilla/MemoryReporting.h"
    1.14 +
    1.15 +#include <cstdlib>
    1.16 +#include <cstring>
    1.17 +#include <limits>
    1.18 +#include <algorithm>
    1.19 +
    1.20 +namespace mozilla {
    1.21 +
    1.22 +static void
    1.23 +SetUpperBound(uint32_t* out_upperBound, uint32_t newBound)
    1.24 +{
    1.25 +  if (!out_upperBound)
    1.26 +      return;
    1.27 +
    1.28 +  *out_upperBound = newBound;
    1.29 +}
    1.30 +
    1.31 +static void
    1.32 +UpdateUpperBound(uint32_t* out_upperBound, uint32_t newBound)
    1.33 +{
    1.34 +  if (!out_upperBound)
    1.35 +      return;
    1.36 +
    1.37 +  *out_upperBound = std::max(*out_upperBound, newBound);
    1.38 +}
    1.39 +
    1.40 +/*
    1.41 + * WebGLElementArrayCacheTree contains most of the implementation of WebGLElementArrayCache,
    1.42 + * which performs WebGL element array buffer validation for drawElements.
    1.43 + *
    1.44 + * Attention: Here lie nontrivial data structures, bug-prone algorithms, and non-canonical tweaks!
    1.45 + * Whence the explanatory comments, and compiled unit test.
    1.46 + *
    1.47 + * *** What problem are we solving here? ***
    1.48 + *
    1.49 + * WebGL::DrawElements has to validate that the elements are in range wrt the current vertex attribs.
    1.50 + * This boils down to the problem, given an array of integers, of computing the maximum in an arbitrary
    1.51 + * sub-array. The naive algorithm has linear complexity; this has been a major performance problem,
    1.52 + * see bug 569431. In that bug, we took the approach of caching the max for the whole array, which
    1.53 + * does cover most cases (DrawElements typically consumes the whole element array buffer) but doesn't
    1.54 + * help in other use cases:
    1.55 + *  - when doing "partial DrawElements" i.e. consuming only part of the element array buffer
    1.56 + *  - when doing frequent "partial buffer updates" i.e. bufferSubData calls updating parts of the
    1.57 + *    element array buffer
    1.58 + *
    1.59 + * *** The solution: a binary tree ***
    1.60 + *
    1.61 + * The solution implemented here is to use a binary tree as the cache data structure. Each tree node
    1.62 + * contains the max of its two children nodes. In this way, finding the maximum in any contiguous sub-array
    1.63 + * has log complexity instead of linear complexity.
    1.64 + *
    1.65 + * Simplistically, if the element array is
    1.66 + *
    1.67 + *     1   4   3   2
    1.68 + *
    1.69 + * then the corresponding tree is
    1.70 + *
    1.71 + *           4
    1.72 + *         _/ \_
    1.73 + *       4       3
    1.74 + *      / \     / \
    1.75 + *     1   4   3   2
    1.76 + *
    1.77 + * In practice, the bottom-most levels of the tree are both the largest to store (because they
    1.78 + * have more nodes), and the least useful performance-wise (because each node in the bottom
    1.79 + * levels concerns only few entries in the elements array buffer, it is cheap to compute).
    1.80 + *
    1.81 + * For this reason, we stop the tree a few levels above, so that each tree leaf actually corresponds
    1.82 + * to more than one element array entry.
    1.83 + *
    1.84 + * The number of levels that we "drop" is |sSkippedBottomTreeLevels| and the number of element array entries
    1.85 + * that each leaf corresponds to, is |sElementsPerLeaf|. This being a binary tree, we have
    1.86 + *
    1.87 + *   sElementsPerLeaf = 2 ^ sSkippedBottomTreeLevels.
    1.88 + *
    1.89 + * *** Storage layout of the binary tree ***
    1.90 + *
    1.91 + * We take advantage of the specifics of the situation to avoid generalist tree storage and instead
    1.92 + * store the tree entries in a vector, mTreeData.
    1.93 + *
    1.94 + * The number of leaves is given by mNumLeaves, and mTreeData is always a vector of length
    1.95 + *
    1.96 + *    2 * mNumLeaves.
    1.97 + *
    1.98 + * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the root node,
    1.99 + * then at offsets 2..3 is the tree level immediately below the root node, then at offsets 4..7
   1.100 + * is the tree level below that, etc.
   1.101 + *
   1.102 + * The figure below illustrates this by writing at each tree node the offset into mTreeData at
   1.103 + * which it is stored:
   1.104 + *
   1.105 + *           1
   1.106 + *         _/ \_
   1.107 + *       2       3
   1.108 + *      / \     / \
   1.109 + *     4   5   6   7
   1.110 + *    ...
   1.111 + *
   1.112 + * Thus, under the convention that the root level is level 0, we see that level N is stored at offsets
   1.113 + *
   1.114 + *    [ 2^n .. 2^(n+1) - 1 ]
   1.115 + *
   1.116 + * in mTreeData. Likewise, all the usual tree operations have simple mathematical expressions in
   1.117 + * terms of mTreeData offsets, see all the methods such as ParentNode, LeftChildNode, etc.
   1.118 + *
   1.119 + * *** Design constraint: element types aren't known at buffer-update time ***
   1.120 + *
   1.121 + * Note that a key constraint that we're operating under, is that we don't know the types of the elements
   1.122 + * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only
   1.123 + * specified in the drawElements call. This means that we may potentially have to store caches for
   1.124 + * multiple element types, for the same element array buffer. Since we don't know yet how many
   1.125 + * element types we'll eventually support (extensions add more), the concern about memory usage is serious.
   1.126 + * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical
   1.127 + * case where each element array buffer is only ever used with one type, this is also addressed
   1.128 + * by having WebGLElementArrayCache lazily create trees for each type only upon first use.
   1.129 + *
   1.130 + * Another consequence of this constraint is that when invalidating the trees, we have to invalidate
   1.131 + * all existing trees. So if trees for types uint8_t, uint16_t and uint32_t have ever been constructed for this buffer,
   1.132 + * every subsequent invalidation will have to invalidate all trees even if one of the types is never
   1.133 + * used again. This implies that it is important to minimize the cost of invalidation i.e.
   1.134 + * do lazy updates upon use as opposed to immediately updating invalidated trees. This poses a problem:
   1.135 + * it is nontrivial to keep track of the part of the tree that's invalidated. The current solution
   1.136 + * can only keep track of an invalidated interval, from |mFirstInvalidatedLeaf| to |mLastInvalidatedLeaf|.
   1.137 + * The problem is that if one does two small, far-apart partial buffer updates, the resulting invalidated
   1.138 + * area is very large even though only a small part of the array really needed to be invalidated.
   1.139 + * The real solution to this problem would be to use a smarter data structure to keep track of the
   1.140 + * invalidated area, probably an interval tree. Meanwhile, we can probably live with the current situation
   1.141 + * as the unfavorable case seems to be a small corner case: in order to run into performance issues,
   1.142 + * the number of bufferSubData in between two consecutive draws must be small but greater than 1, and
   1.143 + * the partial buffer updates must be small and far apart. Anything else than this corner case
   1.144 + * should run fast in the current setting.
   1.145 + */
   1.146 +template<typename T>
   1.147 +struct WebGLElementArrayCacheTree
   1.148 +{
   1.149 +  // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls
   1.150 +  // A too-low sSkippedBottomTreeLevels would cause undue memory usage.
   1.151 +  // The current value has been validated by some benchmarking. See bug 732660.
   1.152 +  static const size_t sSkippedBottomTreeLevels = 3;
   1.153 +  static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels;
   1.154 +  static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT
   1.155 +
   1.156 +private:
   1.157 +  WebGLElementArrayCache& mParent;
   1.158 +  FallibleTArray<T> mTreeData;
   1.159 +  size_t mNumLeaves;
   1.160 +  bool mInvalidated;
   1.161 +  size_t mFirstInvalidatedLeaf;
   1.162 +  size_t mLastInvalidatedLeaf;
   1.163 +
   1.164 +public:
   1.165 +  WebGLElementArrayCacheTree(WebGLElementArrayCache& p)
   1.166 +    : mParent(p)
   1.167 +    , mNumLeaves(0)
   1.168 +    , mInvalidated(false)
   1.169 +    , mFirstInvalidatedLeaf(0)
   1.170 +    , mLastInvalidatedLeaf(0)
   1.171 +  {
   1.172 +    ResizeToParentSize();
   1.173 +  }
   1.174 +
   1.175 +  T GlobalMaximum() const {
   1.176 +    MOZ_ASSERT(!mInvalidated);
   1.177 +    return mTreeData[1];
   1.178 +  }
   1.179 +
   1.180 +  // returns the index of the parent node; if treeIndex=1 (the root node),
   1.181 +  // the return value is 0.
   1.182 +  static size_t ParentNode(size_t treeIndex) {
   1.183 +    MOZ_ASSERT(treeIndex > 1);
   1.184 +    return treeIndex >> 1;
   1.185 +  }
   1.186 +
   1.187 +  static bool IsRightNode(size_t treeIndex) {
   1.188 +    MOZ_ASSERT(treeIndex > 1);
   1.189 +    return treeIndex & 1;
   1.190 +  }
   1.191 +
   1.192 +  static bool IsLeftNode(size_t treeIndex) {
   1.193 +    MOZ_ASSERT(treeIndex > 1);
   1.194 +    return !IsRightNode(treeIndex);
   1.195 +  }
   1.196 +
   1.197 +  static size_t SiblingNode(size_t treeIndex) {
   1.198 +    MOZ_ASSERT(treeIndex > 1);
   1.199 +    return treeIndex ^ 1;
   1.200 +  }
   1.201 +
   1.202 +  static size_t LeftChildNode(size_t treeIndex) {
   1.203 +    MOZ_ASSERT(treeIndex);
   1.204 +    return treeIndex << 1;
   1.205 +  }
   1.206 +
   1.207 +  static size_t RightChildNode(size_t treeIndex) {
   1.208 +    MOZ_ASSERT(treeIndex);
   1.209 +    return SiblingNode(LeftChildNode(treeIndex));
   1.210 +  }
   1.211 +
   1.212 +  static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) {
   1.213 +    MOZ_ASSERT(treeIndex > 1);
   1.214 +    return treeIndex - distance;
   1.215 +  }
   1.216 +
   1.217 +  static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) {
   1.218 +    MOZ_ASSERT(treeIndex > 1);
   1.219 +    return treeIndex + distance;
   1.220 +  }
   1.221 +
   1.222 +  size_t LeafForElement(size_t element) {
   1.223 +    size_t leaf = element / sElementsPerLeaf;
   1.224 +    MOZ_ASSERT(leaf < mNumLeaves);
   1.225 +    return leaf;
   1.226 +  }
   1.227 +
   1.228 +  size_t LeafForByte(size_t byte) {
   1.229 +    return LeafForElement(byte / sizeof(T));
   1.230 +  }
   1.231 +
   1.232 +  // Returns the index, into the tree storage, where a given leaf is stored
   1.233 +  size_t TreeIndexForLeaf(size_t leaf) {
   1.234 +    // See above class comment. The tree storage is an array of length 2*mNumLeaves.
   1.235 +    // The leaves are stored in its second half.
   1.236 +    return leaf + mNumLeaves;
   1.237 +  }
   1.238 +
   1.239 +  static size_t LastElementUnderSameLeaf(size_t element) {
   1.240 +    return element | sElementsPerLeafMask;
   1.241 +  }
   1.242 +
   1.243 +  static size_t FirstElementUnderSameLeaf(size_t element) {
   1.244 +    return element & ~sElementsPerLeafMask;
   1.245 +  }
   1.246 +
   1.247 +  static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
   1.248 +    return ((numElements - 1) | sElementsPerLeafMask) + 1;
   1.249 +  }
   1.250 +
   1.251 +  bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf,
   1.252 +                uint32_t* out_upperBound)
   1.253 +  {
   1.254 +    MOZ_ASSERT(!mInvalidated);
   1.255 +
   1.256 +    size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
   1.257 +    size_t lastTreeIndex  = TreeIndexForLeaf(lastLeaf);
   1.258 +
   1.259 +    while (true) {
   1.260 +      // given that we tweak these values in nontrivial ways, it doesn't hurt to do
   1.261 +      // this sanity check
   1.262 +      MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
   1.263 +
   1.264 +      // final case where there is only 1 node to validate at the current tree level
   1.265 +      if (lastTreeIndex == firstTreeIndex) {
   1.266 +        const T& curData = mTreeData[firstTreeIndex];
   1.267 +        UpdateUpperBound(out_upperBound, curData);
   1.268 +        return curData <= maxAllowed;
   1.269 +      }
   1.270 +
   1.271 +      // if the first node at current tree level is a right node, handle it individually
   1.272 +      // and replace it with its right neighbor, which is a left node
   1.273 +      if (IsRightNode(firstTreeIndex)) {
   1.274 +        const T& curData = mTreeData[firstTreeIndex];
   1.275 +        UpdateUpperBound(out_upperBound, curData);
   1.276 +        if (curData > maxAllowed)
   1.277 +          return false;
   1.278 +        firstTreeIndex = RightNeighborNode(firstTreeIndex);
   1.279 +      }
   1.280 +
   1.281 +      // if the last node at current tree level is a left node, handle it individually
   1.282 +      // and replace it with its left neighbor, which is a right node
   1.283 +      if (IsLeftNode(lastTreeIndex)) {
   1.284 +        const T& curData = mTreeData[lastTreeIndex];
   1.285 +        UpdateUpperBound(out_upperBound, curData);
   1.286 +        if (curData > maxAllowed)
   1.287 +          return false;
   1.288 +        lastTreeIndex = LeftNeighborNode(lastTreeIndex);
   1.289 +      }
   1.290 +
   1.291 +      // at this point it can happen that firstTreeIndex and lastTreeIndex "crossed" each
   1.292 +      // other. That happens if firstTreeIndex was a right node and lastTreeIndex was its
   1.293 +      // right neighor: in that case, both above tweaks happened and as a result, they ended
   1.294 +      // up being swapped: lastTreeIndex is now the _left_ neighbor of firstTreeIndex.
   1.295 +      // When that happens, there is nothing left to validate.
   1.296 +      if (lastTreeIndex == LeftNeighborNode(firstTreeIndex)) {
   1.297 +        return true;
   1.298 +      }
   1.299 +
   1.300 +      // walk up 1 level
   1.301 +      firstTreeIndex = ParentNode(firstTreeIndex);
   1.302 +      lastTreeIndex = ParentNode(lastTreeIndex);
   1.303 +    }
   1.304 +  }
   1.305 +
   1.306 +  template<typename U>
   1.307 +  static U NextPowerOfTwo(U x) {
   1.308 +    U result = 1;
   1.309 +    while (result < x)
   1.310 +      result <<= 1;
   1.311 +    MOZ_ASSERT(result >= x);
   1.312 +    MOZ_ASSERT((result & (result - 1)) == 0);
   1.313 +    return result;
   1.314 +  }
   1.315 +
   1.316 +  bool ResizeToParentSize()
   1.317 +  {
   1.318 +    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
   1.319 +    size_t requiredNumLeaves = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf;
   1.320 +
   1.321 +    size_t oldNumLeaves = mNumLeaves;
   1.322 +    mNumLeaves = NextPowerOfTwo(requiredNumLeaves);
   1.323 +    Invalidate(0, mParent.ByteSize() - 1);
   1.324 +
   1.325 +    // see class comment for why we the tree storage size is 2 * mNumLeaves
   1.326 +    if (!mTreeData.SetLength(2 * mNumLeaves)) {
   1.327 +      return false;
   1.328 +    }
   1.329 +    if (mNumLeaves != oldNumLeaves) {
   1.330 +      memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(mTreeData[0]));
   1.331 +    }
   1.332 +    return true;
   1.333 +  }
   1.334 +
   1.335 +  void Invalidate(size_t firstByte, size_t lastByte);
   1.336 +
   1.337 +  void Update();
   1.338 +
   1.339 +  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
   1.340 +  {
   1.341 +    return aMallocSizeOf(this) + mTreeData.SizeOfExcludingThis(aMallocSizeOf);
   1.342 +  }
   1.343 +};
   1.344 +
   1.345 +// TreeForType: just a template helper to select the right tree object for a given
   1.346 +// element type.
   1.347 +template<typename T>
   1.348 +struct TreeForType {};
   1.349 +
   1.350 +template<>
   1.351 +struct TreeForType<uint8_t>
   1.352 +{
   1.353 +  static WebGLElementArrayCacheTree<uint8_t>*& Run(WebGLElementArrayCache *b) { return b->mUint8Tree; }
   1.354 +};
   1.355 +
   1.356 +template<>
   1.357 +struct TreeForType<uint16_t>
   1.358 +{
   1.359 +  static WebGLElementArrayCacheTree<uint16_t>*& Run(WebGLElementArrayCache *b) { return b->mUint16Tree; }
   1.360 +};
   1.361 +
   1.362 +template<>
   1.363 +struct TreeForType<uint32_t>
   1.364 +{
   1.365 +  static WebGLElementArrayCacheTree<uint32_t>*& Run(WebGLElementArrayCache *b) { return b->mUint32Tree; }
   1.366 +};
   1.367 +
   1.368 +// When the buffer gets updated from firstByte to lastByte,
   1.369 +// calling this method will notify the tree accordingly
   1.370 +template<typename T>
   1.371 +void WebGLElementArrayCacheTree<T>::Invalidate(size_t firstByte, size_t lastByte)
   1.372 +{
   1.373 +  lastByte = std::min(lastByte, mNumLeaves * sElementsPerLeaf * sizeof(T) - 1);
   1.374 +  if (firstByte > lastByte) {
   1.375 +    return;
   1.376 +  }
   1.377 +
   1.378 +  size_t firstLeaf = LeafForByte(firstByte);
   1.379 +  size_t lastLeaf = LeafForByte(lastByte);
   1.380 +
   1.381 +  if (mInvalidated) {
   1.382 +    mFirstInvalidatedLeaf = std::min(firstLeaf, mFirstInvalidatedLeaf);
   1.383 +    mLastInvalidatedLeaf = std::max(lastLeaf, mLastInvalidatedLeaf);
   1.384 +  } else {
   1.385 +    mInvalidated = true;
   1.386 +    mFirstInvalidatedLeaf = firstLeaf;
   1.387 +    mLastInvalidatedLeaf = lastLeaf;
   1.388 +  }
   1.389 +}
   1.390 +
   1.391 +
   1.392 +// When tree has been partially invalidated, from mFirstInvalidatedLeaf to
   1.393 +// mLastInvalidatedLeaf, calling this method will 1) update the leaves in this interval
   1.394 +// from the raw buffer data, and 2) propagate this update up the tree
   1.395 +template<typename T>
   1.396 +void WebGLElementArrayCacheTree<T>::Update()
   1.397 +{
   1.398 +  if (!mInvalidated) {
   1.399 +    return;
   1.400 +  }
   1.401 +
   1.402 +  MOZ_ASSERT(mLastInvalidatedLeaf < mNumLeaves);
   1.403 +
   1.404 +  size_t firstTreeIndex = TreeIndexForLeaf(mFirstInvalidatedLeaf);
   1.405 +  size_t lastTreeIndex = TreeIndexForLeaf(mLastInvalidatedLeaf);
   1.406 +
   1.407 +  // Step #1: initialize the tree leaves from plain buffer data.
   1.408 +  // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding
   1.409 +  // buffer entries.
   1.410 +  // condition-less scope to prevent leaking this scope's variables into the code below
   1.411 +  {
   1.412 +    // treeIndex is the index of the tree leaf we're writing, i.e. the destination index
   1.413 +    size_t treeIndex = firstTreeIndex;
   1.414 +    // srcIndex is the index in the source buffer
   1.415 +    size_t srcIndex = mFirstInvalidatedLeaf * sElementsPerLeaf;
   1.416 +    size_t numberOfElements = mParent.ByteSize() / sizeof(T);
   1.417 +    while (treeIndex <= lastTreeIndex) {
   1.418 +      T m = 0;
   1.419 +      size_t a = srcIndex;
   1.420 +      size_t srcIndexNextLeaf = std::min(a + sElementsPerLeaf, numberOfElements);
   1.421 +      for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
   1.422 +        m = std::max(m, mParent.Element<T>(srcIndex));
   1.423 +      }
   1.424 +      mTreeData[treeIndex] = m;
   1.425 +      treeIndex++;
   1.426 +    }
   1.427 +  }
   1.428 +
   1.429 +  // Step #2: propagate the values up the tree. This is simply a matter of walking up
   1.430 +  // the tree and setting each node to the max of its two children.
   1.431 +  while (firstTreeIndex > 1) {
   1.432 +
   1.433 +    // move up 1 level
   1.434 +    firstTreeIndex = ParentNode(firstTreeIndex);
   1.435 +    lastTreeIndex = ParentNode(lastTreeIndex);
   1.436 +
   1.437 +    // fast-exit case where only one node is invalidated at the current level
   1.438 +    if (firstTreeIndex == lastTreeIndex) {
   1.439 +      mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]);
   1.440 +      continue;
   1.441 +    }
   1.442 +
   1.443 +    // initialize local iteration variables: child and parent.
   1.444 +    size_t child = LeftChildNode(firstTreeIndex);
   1.445 +    size_t parent = firstTreeIndex;
   1.446 +
   1.447 +    // the unrolling makes this look more complicated than it is; the plain non-unrolled
   1.448 +    // version is in the second while loop below
   1.449 +    const int unrollSize = 8;
   1.450 +    while (RightNeighborNode(parent, unrollSize - 1) <= lastTreeIndex)
   1.451 +    {
   1.452 +      for (int unroll = 0; unroll < unrollSize; unroll++)
   1.453 +      {
   1.454 +        T a = mTreeData[child];
   1.455 +        child = RightNeighborNode(child);
   1.456 +        T b = mTreeData[child];
   1.457 +        child = RightNeighborNode(child);
   1.458 +        mTreeData[parent] = std::max(a, b);
   1.459 +        parent = RightNeighborNode(parent);
   1.460 +      }
   1.461 +    }
   1.462 +    // plain non-unrolled version, used to terminate the job after the last unrolled iteration
   1.463 +    while (parent <= lastTreeIndex)
   1.464 +    {
   1.465 +      T a = mTreeData[child];
   1.466 +      child = RightNeighborNode(child);
   1.467 +      T b = mTreeData[child];
   1.468 +      child = RightNeighborNode(child);
   1.469 +      mTreeData[parent] = std::max(a, b);
   1.470 +      parent = RightNeighborNode(parent);
   1.471 +    }
   1.472 +  }
   1.473 +
   1.474 +  mInvalidated = false;
   1.475 +}
   1.476 +
   1.477 +WebGLElementArrayCache::~WebGLElementArrayCache() {
   1.478 +  delete mUint8Tree;
   1.479 +  delete mUint16Tree;
   1.480 +  delete mUint32Tree;
   1.481 +  free(mUntypedData);
   1.482 +}
   1.483 +
   1.484 +bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteSize) {
   1.485 +  mByteSize = byteSize;
   1.486 +  if (mUint8Tree)
   1.487 +    if (!mUint8Tree->ResizeToParentSize())
   1.488 +      return false;
   1.489 +  if (mUint16Tree)
   1.490 +    if (!mUint16Tree->ResizeToParentSize())
   1.491 +      return false;
   1.492 +  if (mUint32Tree)
   1.493 +    if (!mUint32Tree->ResizeToParentSize())
   1.494 +      return false;
   1.495 +  mUntypedData = realloc(mUntypedData, byteSize);
   1.496 +  if (!mUntypedData)
   1.497 +    return false;
   1.498 +  BufferSubData(0, ptr, byteSize);
   1.499 +  return true;
   1.500 +}
   1.501 +
   1.502 +void WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteSize) {
   1.503 +  if (!updateByteSize) return;
   1.504 +  if (ptr)
   1.505 +      memcpy(static_cast<uint8_t*>(mUntypedData) + pos, ptr, updateByteSize);
   1.506 +  else
   1.507 +      memset(static_cast<uint8_t*>(mUntypedData) + pos, 0, updateByteSize);
   1.508 +  InvalidateTrees(pos, pos + updateByteSize - 1);
   1.509 +}
   1.510 +
   1.511 +void WebGLElementArrayCache::InvalidateTrees(size_t firstByte, size_t lastByte)
   1.512 +{
   1.513 +  if (mUint8Tree)
   1.514 +    mUint8Tree->Invalidate(firstByte, lastByte);
   1.515 +  if (mUint16Tree)
   1.516 +    mUint16Tree->Invalidate(firstByte, lastByte);
   1.517 +  if (mUint32Tree)
   1.518 +    mUint32Tree->Invalidate(firstByte, lastByte);
   1.519 +}
   1.520 +
   1.521 +template<typename T>
   1.522 +bool
   1.523 +WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement,
   1.524 +                                 size_t countElements, uint32_t* out_upperBound)
   1.525 +{
   1.526 +  SetUpperBound(out_upperBound, 0);
   1.527 +
   1.528 +  // if maxAllowed is >= the max T value, then there is no way that a T index could be invalid
   1.529 +  uint32_t maxTSize = std::numeric_limits<T>::max();
   1.530 +  if (maxAllowed >= maxTSize) {
   1.531 +    SetUpperBound(out_upperBound, maxTSize);
   1.532 +    return true;
   1.533 +  }
   1.534 +
   1.535 +  T maxAllowedT(maxAllowed);
   1.536 +
   1.537 +  // integer overflow must have been handled earlier, so we assert that maxAllowedT
   1.538 +  // is exactly the max allowed value.
   1.539 +  MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed);
   1.540 +
   1.541 +  if (!mByteSize || !countElements)
   1.542 +    return true;
   1.543 +
   1.544 +  WebGLElementArrayCacheTree<T>*& tree = TreeForType<T>::Run(this);
   1.545 +  if (!tree) {
   1.546 +    tree = new WebGLElementArrayCacheTree<T>(*this);
   1.547 +  }
   1.548 +
   1.549 +  size_t lastElement = firstElement + countElements - 1;
   1.550 +
   1.551 +  tree->Update();
   1.552 +
   1.553 +  // fast exit path when the global maximum for the whole element array buffer
   1.554 +  // falls in the allowed range
   1.555 +  T globalMax = tree->GlobalMaximum();
   1.556 +  if (globalMax <= maxAllowedT)
   1.557 +  {
   1.558 +    SetUpperBound(out_upperBound, globalMax);
   1.559 +    return true;
   1.560 +  }
   1.561 +
   1.562 +  const T* elements = Elements<T>();
   1.563 +
   1.564 +  // before calling tree->Validate, we have to validate ourselves the boundaries of the elements span,
   1.565 +  // to round them to the nearest multiple of sElementsPerLeaf.
   1.566 +  size_t firstElementAdjustmentEnd = std::min(lastElement,
   1.567 +                                            tree->LastElementUnderSameLeaf(firstElement));
   1.568 +  while (firstElement <= firstElementAdjustmentEnd) {
   1.569 +    const T& curData = elements[firstElement];
   1.570 +    UpdateUpperBound(out_upperBound, curData);
   1.571 +    if (curData > maxAllowedT)
   1.572 +      return false;
   1.573 +    firstElement++;
   1.574 +  }
   1.575 +  size_t lastElementAdjustmentEnd = std::max(firstElement,
   1.576 +                                           tree->FirstElementUnderSameLeaf(lastElement));
   1.577 +  while (lastElement >= lastElementAdjustmentEnd) {
   1.578 +    const T& curData = elements[lastElement];
   1.579 +    UpdateUpperBound(out_upperBound, curData);
   1.580 +    if (curData > maxAllowedT)
   1.581 +      return false;
   1.582 +    lastElement--;
   1.583 +  }
   1.584 +
   1.585 +  // at this point, for many tiny validations, we're already done.
   1.586 +  if (firstElement > lastElement)
   1.587 +    return true;
   1.588 +
   1.589 +  // general case
   1.590 +  return tree->Validate(maxAllowedT,
   1.591 +                        tree->LeafForElement(firstElement),
   1.592 +                        tree->LeafForElement(lastElement),
   1.593 +                        out_upperBound);
   1.594 +}
   1.595 +
   1.596 +bool
   1.597 +WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed,
   1.598 +                                 size_t firstElement, size_t countElements,
   1.599 +                                 uint32_t* out_upperBound)
   1.600 +{
   1.601 +  if (type == LOCAL_GL_UNSIGNED_BYTE)
   1.602 +    return Validate<uint8_t>(maxAllowed, firstElement, countElements, out_upperBound);
   1.603 +  if (type == LOCAL_GL_UNSIGNED_SHORT)
   1.604 +    return Validate<uint16_t>(maxAllowed, firstElement, countElements, out_upperBound);
   1.605 +  if (type == LOCAL_GL_UNSIGNED_INT)
   1.606 +    return Validate<uint32_t>(maxAllowed, firstElement, countElements, out_upperBound);
   1.607 +
   1.608 +  MOZ_ASSERT(false, "Invalid type.");
   1.609 +  return false;
   1.610 +}
   1.611 +
   1.612 +size_t
   1.613 +WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
   1.614 +{
   1.615 +  size_t uint8TreeSize  = mUint8Tree  ? mUint8Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
   1.616 +  size_t uint16TreeSize = mUint16Tree ? mUint16Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
   1.617 +  size_t uint32TreeSize = mUint32Tree ? mUint32Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
   1.618 +  return aMallocSizeOf(this) +
   1.619 +          mByteSize +
   1.620 +          uint8TreeSize +
   1.621 +          uint16TreeSize +
   1.622 +          uint32TreeSize;
   1.623 +}
   1.624 +
   1.625 +} // end namespace mozilla

mercurial