The Tor Browser: gfx/ycbcr/yuv_row_win64.cpp@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

     2 // Use of this source code is governed by a BSD-style license that can be

     3 // found in the LICENSE file.

     5 #include "yuv_row.h"

     7 extern "C" {

     9 // x64 compiler doesn't support MMX and inline assembler.  Use SSE2 intrinsics.

    11 #define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)

    12 #define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)

    14 #include <emmintrin.h>

    16 static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,

    17                                           const uint8* u_buf,

    18                                           const uint8* v_buf,

    19                                           uint8* rgb_buf,

    20                                           int width) {

    21   __m128i xmm0, xmmY1, xmmY2;

    22   __m128  xmmY;

    24   while (width >= 2) {

    25     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),

    26                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));

    28     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));

    29     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

    31     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));

    32     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);

    34     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),

    35                           0x44);

    36     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);

    37     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

    39     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);

    40     rgb_buf += 8;

    41     width -= 2;

    42   }

    44   if (width) {

    45     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),

    46                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));

    47     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));

    48     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

    49     xmmY1 = _mm_srai_epi16(xmmY1, 6);

    50     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

    51     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);

    52   }

    53 }

    55 static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,

    56                                     const uint8* u_buf,

    57                                     const uint8* v_buf,

    58                                     uint8* rgb_buf,

    59                                     int width,

    60                                     int source_dx) {

    61   __m128i xmm0, xmmY1, xmmY2;

    62   __m128  xmmY;

    63   uint8 u, v, y;

    64   int x = 0;

    66   while (width >= 2) {

    67     u = u_buf[x >> 17];

    68     v = v_buf[x >> 17];

    69     y = y_buf[x >> 16];

    70     x += source_dx;

    72     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),

    73                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));

    74     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

    75     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

    77     y = y_buf[x >> 16];

    78     x += source_dx;

    80     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

    81     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);

    83     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),

    84                           0x44);

    85     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);

    86     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

    88     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);

    89     rgb_buf += 8;

    90     width -= 2;

    91   }

    93   if (width) {

    94     u = u_buf[x >> 17];

    95     v = v_buf[x >> 17];

    96     y = y_buf[x >> 16];

    98     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),

    99                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));

   100     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

   101     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

   102     xmmY1 = _mm_srai_epi16(xmmY1, 6);

   103     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

   104     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);

   105   }

   106 }

   108 static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,

   109                                           const uint8* u_buf,

   110                                           const uint8* v_buf,

   111                                           uint8* rgb_buf,

   112                                           int width,

   113                                           int source_dx) {

   114   __m128i xmm0, xmmY1, xmmY2;

   115   __m128  xmmY;

   116   uint8 u0, u1, v0, v1, y0, y1;

   117   uint32 uv_frac, y_frac, u, v, y;

   118   int x = 0;

   120   if (source_dx >= 0x20000) {

   121     x = 32768;

   122   }

   124   while(width >= 2) {

   125     u0 = u_buf[x >> 17];

   126     u1 = u_buf[(x >> 17) + 1];

   127     v0 = v_buf[x >> 17];

   128     v1 = v_buf[(x >> 17) + 1];

   129     y0 = y_buf[x >> 16];

   130     y1 = y_buf[(x >> 16) + 1];

   131     uv_frac = (x & 0x1fffe);

   132     y_frac = (x & 0xffff);

   133     u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;

   134     v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;

   135     y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;

   136     x += source_dx;

   138     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),

   139                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));

   140     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

   141     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

   143     y0 = y_buf[x >> 16];

   144     y1 = y_buf[(x >> 16) + 1];

   145     y_frac = (x & 0xffff);

   146     y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;

   147     x += source_dx;

   149     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

   150     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);

   152     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),

   153                           0x44);

   154     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);

   155     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

   157     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);

   158     rgb_buf += 8;

   159     width -= 2;

   160   }

   162   if (width) {

   163     u = u_buf[x >> 17];

   164     v = v_buf[x >> 17];

   165     y = y_buf[x >> 16];

   167     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),

   168                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));

   169     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));

   171     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);

   172     xmmY1 = _mm_srai_epi16(xmmY1, 6);

   173     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);

   174     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);

   175   }

   176 }

   178 void FastConvertYUVToRGB32Row(const uint8* y_buf,

   179                               const uint8* u_buf,

   180                               const uint8* v_buf,

   181                               uint8* rgb_buf,

   182                               int width) {

   183   FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);

   184 }

   186 void ScaleYUVToRGB32Row(const uint8* y_buf,

   187                         const uint8* u_buf,

   188                         const uint8* v_buf,

   189                         uint8* rgb_buf,

   190                         int width,

   191                         int source_dx) {

   192   ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);

   193 }

   195 void LinearScaleYUVToRGB32Row(const uint8* y_buf,

   196                               const uint8* u_buf,

   197                               const uint8* v_buf,

   198                               uint8* rgb_buf,

   199                               int width,

   200                               int source_dx) {

   201   LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,

   202                                 source_dx);

   203 }

   205 } // extern "C"

The Tor Browser / file revision

gfx/ycbcr/yuv_row_win64.cpp@97036ab72558

gfx/ycbcr/yuv_row_win64.cpp