gfx/ycbcr/win64.patch

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
michael@0 2 new file mode 100644
michael@0 3 --- /dev/null
michael@0 4 +++ b/gfx/ycbcr/yuv_row_win64.cpp
michael@0 5 @@ -0,0 +1,205 @@
michael@0 6 +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
michael@0 7 +// Use of this source code is governed by a BSD-style license that can be
michael@0 8 +// found in the LICENSE file.
michael@0 9 +
michael@0 10 +#include "yuv_row.h"
michael@0 11 +
michael@0 12 +extern "C" {
michael@0 13 +
michael@0 14 +// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics.
michael@0 15 +
michael@0 16 +#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
michael@0 17 +#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
michael@0 18 +
michael@0 19 +#include <emmintrin.h>
michael@0 20 +
michael@0 21 +static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
michael@0 22 + const uint8* u_buf,
michael@0 23 + const uint8* v_buf,
michael@0 24 + uint8* rgb_buf,
michael@0 25 + int width) {
michael@0 26 + __m128i xmm0, xmmY1, xmmY2;
michael@0 27 + __m128 xmmY;
michael@0 28 +
michael@0 29 + while (width >= 2) {
michael@0 30 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
michael@0 31 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
michael@0 32 +
michael@0 33 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
michael@0 34 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 35 +
michael@0 36 + xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
michael@0 37 + xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
michael@0 38 +
michael@0 39 + xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
michael@0 40 + 0x44);
michael@0 41 + xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
michael@0 42 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 43 +
michael@0 44 + _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
michael@0 45 + rgb_buf += 8;
michael@0 46 + width -= 2;
michael@0 47 + }
michael@0 48 +
michael@0 49 + if (width) {
michael@0 50 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
michael@0 51 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
michael@0 52 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
michael@0 53 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 54 + xmmY1 = _mm_srai_epi16(xmmY1, 6);
michael@0 55 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 56 + *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
michael@0 57 + }
michael@0 58 +}
michael@0 59 +
michael@0 60 +static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
michael@0 61 + const uint8* u_buf,
michael@0 62 + const uint8* v_buf,
michael@0 63 + uint8* rgb_buf,
michael@0 64 + int width,
michael@0 65 + int source_dx) {
michael@0 66 + __m128i xmm0, xmmY1, xmmY2;
michael@0 67 + __m128 xmmY;
michael@0 68 + uint8 u, v, y;
michael@0 69 + int x = 0;
michael@0 70 +
michael@0 71 + while (width >= 2) {
michael@0 72 + u = u_buf[x >> 17];
michael@0 73 + v = v_buf[x >> 17];
michael@0 74 + y = y_buf[x >> 16];
michael@0 75 + x += source_dx;
michael@0 76 +
michael@0 77 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
michael@0 78 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
michael@0 79 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 80 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 81 +
michael@0 82 + y = y_buf[x >> 16];
michael@0 83 + x += source_dx;
michael@0 84 +
michael@0 85 + xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 86 + xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
michael@0 87 +
michael@0 88 + xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
michael@0 89 + 0x44);
michael@0 90 + xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
michael@0 91 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 92 +
michael@0 93 + _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
michael@0 94 + rgb_buf += 8;
michael@0 95 + width -= 2;
michael@0 96 + }
michael@0 97 +
michael@0 98 + if (width) {
michael@0 99 + u = u_buf[x >> 17];
michael@0 100 + v = v_buf[x >> 17];
michael@0 101 + y = y_buf[x >> 16];
michael@0 102 +
michael@0 103 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
michael@0 104 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
michael@0 105 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 106 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 107 + xmmY1 = _mm_srai_epi16(xmmY1, 6);
michael@0 108 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 109 + *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
michael@0 110 + }
michael@0 111 +}
michael@0 112 +
michael@0 113 +static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
michael@0 114 + const uint8* u_buf,
michael@0 115 + const uint8* v_buf,
michael@0 116 + uint8* rgb_buf,
michael@0 117 + int width,
michael@0 118 + int source_dx) {
michael@0 119 + __m128i xmm0, xmmY1, xmmY2;
michael@0 120 + __m128 xmmY;
michael@0 121 + uint8 u0, u1, v0, v1, y0, y1;
michael@0 122 + uint32 uv_frac, y_frac, u, v, y;
michael@0 123 + int x = 0;
michael@0 124 +
michael@0 125 + if (source_dx >= 0x20000) {
michael@0 126 + x = 32768;
michael@0 127 + }
michael@0 128 +
michael@0 129 + while(width >= 2) {
michael@0 130 + u0 = u_buf[x >> 17];
michael@0 131 + u1 = u_buf[(x >> 17) + 1];
michael@0 132 + v0 = v_buf[x >> 17];
michael@0 133 + v1 = v_buf[(x >> 17) + 1];
michael@0 134 + y0 = y_buf[x >> 16];
michael@0 135 + y1 = y_buf[(x >> 16) + 1];
michael@0 136 + uv_frac = (x & 0x1fffe);
michael@0 137 + y_frac = (x & 0xffff);
michael@0 138 + u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
michael@0 139 + v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
michael@0 140 + y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
michael@0 141 + x += source_dx;
michael@0 142 +
michael@0 143 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
michael@0 144 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
michael@0 145 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 146 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 147 +
michael@0 148 + y0 = y_buf[x >> 16];
michael@0 149 + y1 = y_buf[(x >> 16) + 1];
michael@0 150 + y_frac = (x & 0xffff);
michael@0 151 + y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
michael@0 152 + x += source_dx;
michael@0 153 +
michael@0 154 + xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 155 + xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
michael@0 156 +
michael@0 157 + xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
michael@0 158 + 0x44);
michael@0 159 + xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
michael@0 160 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 161 +
michael@0 162 + _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
michael@0 163 + rgb_buf += 8;
michael@0 164 + width -= 2;
michael@0 165 + }
michael@0 166 +
michael@0 167 + if (width) {
michael@0 168 + u = u_buf[x >> 17];
michael@0 169 + v = v_buf[x >> 17];
michael@0 170 + y = y_buf[x >> 16];
michael@0 171 +
michael@0 172 + xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
michael@0 173 + _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
michael@0 174 + xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
michael@0 175 +
michael@0 176 + xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
michael@0 177 + xmmY1 = _mm_srai_epi16(xmmY1, 6);
michael@0 178 + xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
michael@0 179 + *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
michael@0 180 + }
michael@0 181 +}
michael@0 182 +
michael@0 183 +void FastConvertYUVToRGB32Row(const uint8* y_buf,
michael@0 184 + const uint8* u_buf,
michael@0 185 + const uint8* v_buf,
michael@0 186 + uint8* rgb_buf,
michael@0 187 + int width) {
michael@0 188 + FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
michael@0 189 +}
michael@0 190 +
michael@0 191 +void ScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 192 + const uint8* u_buf,
michael@0 193 + const uint8* v_buf,
michael@0 194 + uint8* rgb_buf,
michael@0 195 + int width,
michael@0 196 + int source_dx) {
michael@0 197 + ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 198 +}
michael@0 199 +
michael@0 200 +void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 201 + const uint8* u_buf,
michael@0 202 + const uint8* v_buf,
michael@0 203 + uint8* rgb_buf,
michael@0 204 + int width,
michael@0 205 + int source_dx) {
michael@0 206 + LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
michael@0 207 + source_dx);
michael@0 208 +}
michael@0 209 +
michael@0 210 +} // extern "C"

mercurial