The Tor Browser: gfx/thebes/gfxAlphaRecoverySSE2.cpp@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*-

     2  * This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this

     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     6 #include "gfxAlphaRecovery.h"

     7 #include "gfxImageSurface.h"

     8 #include "nsRect.h"

     9 #include <emmintrin.h>

    11 // This file should only be compiled on x86 and x64 systems.  Additionally,

    12 // you'll need to compile it with -msse2 if you're using GCC on x86.

    14 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))

    15 __declspec(align(16)) static uint32_t greenMaski[] =

    16     { 0x0000ff00, 0x0000ff00, 0x0000ff00, 0x0000ff00 };

    17 __declspec(align(16)) static uint32_t alphaMaski[] =

    18     { 0xff000000, 0xff000000, 0xff000000, 0xff000000 };

    19 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))

    20 static uint32_t greenMaski[] __attribute__ ((aligned (16))) =

    21     { 0x0000ff00, 0x0000ff00, 0x0000ff00, 0x0000ff00 };

    22 static uint32_t alphaMaski[] __attribute__ ((aligned (16))) =

    23     { 0xff000000, 0xff000000, 0xff000000, 0xff000000 };

    24 #elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))

    25 #pragma align 16 (greenMaski, alphaMaski)

    26 static uint32_t greenMaski[] = { 0x0000ff00, 0x0000ff00, 0x0000ff00, 0x0000ff00 };

    27 static uint32_t alphaMaski[] = { 0xff000000, 0xff000000, 0xff000000, 0xff000000 };

    28 #endif

    30 bool

    31 gfxAlphaRecovery::RecoverAlphaSSE2(gfxImageSurface* blackSurf,

    32                                    const gfxImageSurface* whiteSurf)

    33 {

    34     gfxIntSize size = blackSurf->GetSize();

    36     if (size != whiteSurf->GetSize() ||

    37         (blackSurf->Format() != gfxImageFormat::ARGB32 &&

    38          blackSurf->Format() != gfxImageFormat::RGB24) ||

    39         (whiteSurf->Format() != gfxImageFormat::ARGB32 &&

    40          whiteSurf->Format() != gfxImageFormat::RGB24))

    41         return false;

    43     blackSurf->Flush();

    44     whiteSurf->Flush();

    46     unsigned char* blackData = blackSurf->Data();

    47     unsigned char* whiteData = whiteSurf->Data();

    49     if ((NS_PTR_TO_UINT32(blackData) & 0xf) != (NS_PTR_TO_UINT32(whiteData) & 0xf) ||

    50         (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {

    51         // Cannot keep these in alignment.

    52         return false;

    53     }

    55     __m128i greenMask = _mm_load_si128((__m128i*)greenMaski);

    56     __m128i alphaMask = _mm_load_si128((__m128i*)alphaMaski);

    58     for (int32_t i = 0; i < size.height; ++i) {

    59         int32_t j = 0;

    60         // Loop single pixels until at 4 byte alignment.

    61         while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {

    62             *((uint32_t*)blackData) =

    63                 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),

    64                              *reinterpret_cast<uint32_t*>(whiteData));

    65             blackData += 4;

    66             whiteData += 4;

    67             j++;

    68         }

    69         // This extra loop allows the compiler to do some more clever registry

    70         // management and makes it about 5% faster than with only the 4 pixel

    71         // at a time loop.

    72         for (; j < size.width - 8; j += 8) {

    73             __m128i black1 = _mm_load_si128((__m128i*)blackData);

    74             __m128i white1 = _mm_load_si128((__m128i*)whiteData);

    75             __m128i black2 = _mm_load_si128((__m128i*)(blackData + 16));

    76             __m128i white2 = _mm_load_si128((__m128i*)(whiteData + 16));

    78             // Execute the same instructions as described in RecoverPixel, only

    79             // using an SSE2 packed saturated subtract.

    80             white1 = _mm_subs_epu8(white1, black1);

    81             white2 = _mm_subs_epu8(white2, black2);

    82             white1 = _mm_subs_epu8(greenMask, white1);

    83             white2 = _mm_subs_epu8(greenMask, white2);

    84             // Producing the final black pixel in an XMM register and storing

    85             // that is actually faster than doing a masked store since that

    86             // does an unaligned storage. We have the black pixel in a register

    87             // anyway.

    88             black1 = _mm_andnot_si128(alphaMask, black1);

    89             black2 = _mm_andnot_si128(alphaMask, black2);

    90             white1 = _mm_slli_si128(white1, 2);

    91             white2 = _mm_slli_si128(white2, 2);

    92             white1 = _mm_and_si128(alphaMask, white1);

    93             white2 = _mm_and_si128(alphaMask, white2);

    94             black1 = _mm_or_si128(white1, black1);

    95             black2 = _mm_or_si128(white2, black2);

    97             _mm_store_si128((__m128i*)blackData, black1);

    98             _mm_store_si128((__m128i*)(blackData + 16), black2);

    99             blackData += 32;

   100             whiteData += 32;

   101         }

   102         for (; j < size.width - 4; j += 4) {

   103             __m128i black = _mm_load_si128((__m128i*)blackData);

   104             __m128i white = _mm_load_si128((__m128i*)whiteData);

   106             white = _mm_subs_epu8(white, black);

   107             white = _mm_subs_epu8(greenMask, white);

   108             black = _mm_andnot_si128(alphaMask, black);

   109             white = _mm_slli_si128(white, 2);

   110             white = _mm_and_si128(alphaMask, white);

   111             black = _mm_or_si128(white, black);

   112             _mm_store_si128((__m128i*)blackData, black);

   113             blackData += 16;

   114             whiteData += 16;

   115         }

   116         // Loop single pixels until we're done.

   117         while (j < size.width) {

   118             *((uint32_t*)blackData) =

   119                 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),

   120                              *reinterpret_cast<uint32_t*>(whiteData));

   121             blackData += 4;

   122             whiteData += 4;

   123             j++;

   124         }

   125         blackData += blackSurf->Stride() - j * 4;

   126         whiteData += whiteSurf->Stride() - j * 4;

   127     }

   129     blackSurf->MarkDirty();

   131     return true;

   132 }

   134 static int32_t

   135 ByteAlignment(int32_t aAlignToLog2, int32_t aX, int32_t aY=0, int32_t aStride=1)

   136 {

   137     return (aX + aStride * aY) & ((1 << aAlignToLog2) - 1);

   138 }

   140 /*static*/ nsIntRect

   141 gfxAlphaRecovery::AlignRectForSubimageRecovery(const nsIntRect& aRect,

   142                                                gfxImageSurface* aSurface)

   143 {

   144     NS_ASSERTION(gfxImageFormat::ARGB32 == aSurface->Format(),

   145                  "Thebes grew support for non-ARGB32 COLOR_ALPHA?");

   146     static const int32_t kByteAlignLog2 = GoodAlignmentLog2();

   147     static const int32_t bpp = 4;

   148     static const int32_t pixPerAlign = (1 << kByteAlignLog2) / bpp;

   149     //

   150     // We're going to create a subimage of the surface with size

   151     // <sw,sh> for alpha recovery, and want a SIMD fast-path.  The

   152     // rect <x,y, w,h> /needs/ to be redrawn, but it might not be

   153     // properly aligned for SIMD.  So we want to find a rect <x',y',

   154     // w',h'> that's a superset of what needs to be redrawn but is

   155     // properly aligned.  Proper alignment is

   156     //

   157     //   BPP * (x' + y' * sw) \cong 0         (mod ALIGN)

   158     //   BPP * w'             \cong BPP * sw  (mod ALIGN)

   159     //

   160     // (We assume the pixel at surface <0,0> is already ALIGN'd.)

   161     // That rect (obviously) has to fit within the surface bounds, and

   162     // we should also minimize the extra pixels redrawn only for

   163     // alignment's sake.  So we also want

   164     //

   165     //  minimize <x',y', w',h'>

   166     //   0 <= x' <= x

   167     //   0 <= y' <= y

   168     //   w <= w' <= sw

   169     //   h <= h' <= sh

   170     //

   171     // This is a messy integer non-linear programming problem, except

   172     // ... we can assume that ALIGN/BPP is a very small constant.  So,

   173     // brute force is viable.  The algorithm below will find a

   174     // solution if one exists, but isn't guaranteed to find the

   175     // minimum solution.  (For SSE2, ALIGN/BPP = 4, so it'll do at

   176     // most 64 iterations below).  In what's likely the common case,

   177     // an already-aligned rectangle, it only needs 1 iteration.

   178     //

   179     // Is this alignment worth doing?  Recovering alpha will take work

   180     // proportional to w*h (assuming alpha recovery computation isn't

   181     // memory bound).  This analysis can lead to O(w+h) extra work

   182     // (with small constants).  In exchange, we expect to shave off a

   183     // ALIGN/BPP constant by using SIMD-ized alpha recovery.  So as

   184     // w*h diverges from w+h, the win factor approaches ALIGN/BPP.  We

   185     // only really care about the w*h >> w+h case anyway; others

   186     // should be fast enough even with the overhead.  (Unless the cost

   187     // of repainting the expanded rect is high, but in that case

   188     // SIMD-ized alpha recovery won't make a difference so this code

   189     // shouldn't be called.)

   190     //

   191     gfxIntSize surfaceSize = aSurface->GetSize();

   192     const int32_t stride = bpp * surfaceSize.width;

   193     if (stride != aSurface->Stride()) {

   194         NS_WARNING("Unexpected stride, falling back on slow alpha recovery");

   195         return aRect;

   196     }

   198     const int32_t x = aRect.x, y = aRect.y, w = aRect.width, h = aRect.height;

   199     const int32_t r = x + w;

   200     const int32_t sw = surfaceSize.width;

   201     const int32_t strideAlign = ByteAlignment(kByteAlignLog2, stride);

   203     // The outer two loops below keep the rightmost (|r| above) and

   204     // bottommost pixels in |aRect| fixed wrt <x,y>, to ensure that we

   205     // return only a superset of the original rect.  These loops

   206     // search for an aligned top-left pixel by trying to expand <x,y>

   207     // left and up by <dx,dy> pixels, respectively.

   208     //

   209     // Then if a properly-aligned top-left pixel is found, the

   210     // innermost loop tries to find an aligned stride by moving the

   211     // rightmost pixel rightward by dr.

   212     int32_t dx, dy, dr;

   213     for (dy = 0; (dy < pixPerAlign) && (y - dy >= 0); ++dy) {

   214         for (dx = 0; (dx < pixPerAlign) && (x - dx >= 0); ++dx) {

   215             if (0 != ByteAlignment(kByteAlignLog2,

   216                                    bpp * (x - dx), y - dy, stride)) {

   217                 continue;

   218             }

   219             for (dr = 0; (dr < pixPerAlign) && (r + dr <= sw); ++dr) {

   220                 if (strideAlign == ByteAlignment(kByteAlignLog2,

   221                                                  bpp * (w + dr + dx))) {

   222                     goto FOUND_SOLUTION;

   223                 }

   224             }

   225         }

   226     }

   228     // Didn't find a solution.

   229     return aRect;

   231 FOUND_SOLUTION:

   232     nsIntRect solution = nsIntRect(x - dx, y - dy, w + dr + dx, h + dy);

   233     NS_ABORT_IF_FALSE(nsIntRect(0, 0, sw, surfaceSize.height).Contains(solution),

   234                       "'Solution' extends outside surface bounds!");

   235     return solution;

   236 }

The Tor Browser / file revision

gfx/thebes/gfxAlphaRecoverySSE2.cpp@97036ab72558

gfx/thebes/gfxAlphaRecoverySSE2.cpp