The Tor Browser: gfx/ycbcr/win64.patch@97036ab72558 (annotated)

gfx/ycbcr/win64.patch@97036ab72558 (annotated)

gfx/ycbcr/win64.patch

Tue, 06 Jan 2015 21:39:09 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Tue, 06 Jan 2015 21:39:09 +0100
branch: TOR_BUG_9701
changeset 8: 97036ab72558
permissions: -rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

 diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
 new file mode 100644
 --- /dev/null
 +++ b/gfx/ycbcr/yuv_row_win64.cpp
@@ -0,0 +1,205 @@
 +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 +// Use of this source code is governed by a BSD-style license that can be
 +// found in the LICENSE file.
 +
 +#include "yuv_row.h"
 +
 +extern "C" {
 +
 +// x64 compiler doesn't support MMX and inline assembler.  Use SSE2 intrinsics.
 +
 +#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
 +#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
 +
 +#include <emmintrin.h>
 +
 +static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
 +                                          const uint8* u_buf,
 +                                          const uint8* v_buf,
 +                                          uint8* rgb_buf,
 +                                          int width) {
 +  __m128i xmm0, xmmY1, xmmY2;
 +  __m128  xmmY;
 +
 +  while (width >= 2) {
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
 +
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +
 +    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
 +    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
 +
 +    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
 +                          0x44);
 +    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +
 +    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
 +    rgb_buf += 8;
 +    width -= 2;
 +  }
 +
 +  if (width) {
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +    xmmY1 = _mm_srai_epi16(xmmY1, 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
 +  }
 +}
 +
 +static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
 +                                    const uint8* u_buf,
 +                                    const uint8* v_buf,
 +                                    uint8* rgb_buf,
 +                                    int width,
 +                                    int source_dx) {
 +  __m128i xmm0, xmmY1, xmmY2;
 +  __m128  xmmY;
 +  uint8 u, v, y;
 +  int x = 0;
 +
 +  while (width >= 2) {
 +    u = u_buf[x >> 17];
 +    v = v_buf[x >> 17];
 +    y = y_buf[x >> 16];
 +    x += source_dx;
 +
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +
 +    y = y_buf[x >> 16];
 +    x += source_dx;
 +
 +    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
 +
 +    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
 +                          0x44);
 +    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +
 +    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
 +    rgb_buf += 8;
 +    width -= 2;
 +  }
 +
 +  if (width) {
 +    u = u_buf[x >> 17];
 +    v = v_buf[x >> 17];
 +    y = y_buf[x >> 16];
 +
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +    xmmY1 = _mm_srai_epi16(xmmY1, 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
 +  }
 +}
 +
 +static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
 +                                          const uint8* u_buf,
 +                                          const uint8* v_buf,
 +                                          uint8* rgb_buf,
 +                                          int width,
 +                                          int source_dx) {
 +  __m128i xmm0, xmmY1, xmmY2;
 +  __m128  xmmY;
 +  uint8 u0, u1, v0, v1, y0, y1;
 +  uint32 uv_frac, y_frac, u, v, y;
 +  int x = 0;
 +
 +  if (source_dx >= 0x20000) {
 +    x = 32768;
 +  }
 +
 +  while(width >= 2) {
 +    u0 = u_buf[x >> 17];
 +    u1 = u_buf[(x >> 17) + 1];
 +    v0 = v_buf[x >> 17];
 +    v1 = v_buf[(x >> 17) + 1];
 +    y0 = y_buf[x >> 16];
 +    y1 = y_buf[(x >> 16) + 1];
 +    uv_frac = (x & 0x1fffe);
 +    y_frac = (x & 0xffff);
 +    u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
 +    v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
 +    y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
 +    x += source_dx;
 +
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +
 +    y0 = y_buf[x >> 16];
 +    y1 = y_buf[(x >> 16) + 1];
 +    y_frac = (x & 0xffff);
 +    y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
 +    x += source_dx;
 +
 +    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
 +
 +    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
 +                          0x44);
 +    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +
 +    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
 +    rgb_buf += 8;
 +    width -= 2;
 +  }
 +
 +  if (width) {
 +    u = u_buf[x >> 17];
 +    v = v_buf[x >> 17];
 +    y = y_buf[x >> 16];
 +
 +    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
 +                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
 +    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
 +
 +    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
 +    xmmY1 = _mm_srai_epi16(xmmY1, 6);
 +    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
 +    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
 +  }
 +}
 +
 +void FastConvertYUVToRGB32Row(const uint8* y_buf,
 +                              const uint8* u_buf,
 +                              const uint8* v_buf,
 +                              uint8* rgb_buf,
 +                              int width) {
 +  FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
 +}
 +
 +void ScaleYUVToRGB32Row(const uint8* y_buf,
 +                        const uint8* u_buf,
 +                        const uint8* v_buf,
 +                        uint8* rgb_buf,
 +                        int width,
 +                        int source_dx) {
 +  ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
 +}
 +
 +void LinearScaleYUVToRGB32Row(const uint8* y_buf,
 +                              const uint8* u_buf,
 +                              const uint8* v_buf,
 +                              uint8* rgb_buf,
 +                              int width,
 +                              int source_dx) {
 +  LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
 +                                source_dx);
 +}
 +
 +} // extern "C"

The Tor Browser / annotate

gfx/ycbcr/win64.patch@97036ab72558 (annotated)

gfx/ycbcr/win64.patch