gfx/ycbcr/yuv_row_win64.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
     2 // Use of this source code is governed by a BSD-style license that can be
     3 // found in the LICENSE file.
     5 #include "yuv_row.h"
     7 extern "C" {
     9 // x64 compiler doesn't support MMX and inline assembler.  Use SSE2 intrinsics.
    11 #define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
    12 #define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
    14 #include <emmintrin.h>
    16 static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
    17                                           const uint8* u_buf,
    18                                           const uint8* v_buf,
    19                                           uint8* rgb_buf,
    20                                           int width) {
    21   __m128i xmm0, xmmY1, xmmY2;
    22   __m128  xmmY;
    24   while (width >= 2) {
    25     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
    26                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
    28     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
    29     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
    31     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
    32     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
    34     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
    35                           0x44);
    36     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
    37     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
    39     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
    40     rgb_buf += 8;
    41     width -= 2;
    42   }
    44   if (width) {
    45     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
    46                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
    47     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
    48     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
    49     xmmY1 = _mm_srai_epi16(xmmY1, 6);
    50     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
    51     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
    52   }
    53 }
    55 static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
    56                                     const uint8* u_buf,
    57                                     const uint8* v_buf,
    58                                     uint8* rgb_buf,
    59                                     int width,
    60                                     int source_dx) {
    61   __m128i xmm0, xmmY1, xmmY2;
    62   __m128  xmmY;
    63   uint8 u, v, y;
    64   int x = 0;
    66   while (width >= 2) {
    67     u = u_buf[x >> 17];
    68     v = v_buf[x >> 17];
    69     y = y_buf[x >> 16];
    70     x += source_dx;
    72     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
    73                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
    74     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
    75     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
    77     y = y_buf[x >> 16];
    78     x += source_dx;
    80     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
    81     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
    83     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
    84                           0x44);
    85     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
    86     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
    88     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
    89     rgb_buf += 8;
    90     width -= 2;
    91   }
    93   if (width) {
    94     u = u_buf[x >> 17];
    95     v = v_buf[x >> 17];
    96     y = y_buf[x >> 16];
    98     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
    99                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
   100     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
   101     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
   102     xmmY1 = _mm_srai_epi16(xmmY1, 6);
   103     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
   104     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
   105   }
   106 }
   108 static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
   109                                           const uint8* u_buf,
   110                                           const uint8* v_buf,
   111                                           uint8* rgb_buf,
   112                                           int width,
   113                                           int source_dx) {
   114   __m128i xmm0, xmmY1, xmmY2;
   115   __m128  xmmY;
   116   uint8 u0, u1, v0, v1, y0, y1;
   117   uint32 uv_frac, y_frac, u, v, y;
   118   int x = 0;
   120   if (source_dx >= 0x20000) {
   121     x = 32768;
   122   }
   124   while(width >= 2) {
   125     u0 = u_buf[x >> 17];
   126     u1 = u_buf[(x >> 17) + 1];
   127     v0 = v_buf[x >> 17];
   128     v1 = v_buf[(x >> 17) + 1];
   129     y0 = y_buf[x >> 16];
   130     y1 = y_buf[(x >> 16) + 1];
   131     uv_frac = (x & 0x1fffe);
   132     y_frac = (x & 0xffff);
   133     u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
   134     v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
   135     y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
   136     x += source_dx;
   138     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
   139                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
   140     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
   141     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
   143     y0 = y_buf[x >> 16];
   144     y1 = y_buf[(x >> 16) + 1];
   145     y_frac = (x & 0xffff);
   146     y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
   147     x += source_dx;
   149     xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
   150     xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
   152     xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
   153                           0x44);
   154     xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
   155     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
   157     _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
   158     rgb_buf += 8;
   159     width -= 2;
   160   }
   162   if (width) {
   163     u = u_buf[x >> 17];
   164     v = v_buf[x >> 17];
   165     y = y_buf[x >> 16];
   167     xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
   168                           _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
   169     xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
   171     xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
   172     xmmY1 = _mm_srai_epi16(xmmY1, 6);
   173     xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
   174     *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
   175   }
   176 }
   178 void FastConvertYUVToRGB32Row(const uint8* y_buf,
   179                               const uint8* u_buf,
   180                               const uint8* v_buf,
   181                               uint8* rgb_buf,
   182                               int width) {
   183   FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
   184 }
   186 void ScaleYUVToRGB32Row(const uint8* y_buf,
   187                         const uint8* u_buf,
   188                         const uint8* v_buf,
   189                         uint8* rgb_buf,
   190                         int width,
   191                         int source_dx) {
   192   ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
   193 }
   195 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
   196                               const uint8* u_buf,
   197                               const uint8* v_buf,
   198                               uint8* rgb_buf,
   199                               int width,
   200                               int source_dx) {
   201   LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
   202                                 source_dx);
   203 }
   205 } // extern "C"

mercurial