1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/angle/src/libGLESv2/renderer/ImageSSE2.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,100 @@ 1.4 +#include "precompiled.h" 1.5 +// 1.6 +// Copyright (c) 2002-2012 The ANGLE Project Authors. All rights reserved. 1.7 +// Use of this source code is governed by a BSD-style license that can be 1.8 +// found in the LICENSE file. 1.9 +// 1.10 + 1.11 +// ImageSSE2.cpp: Implements SSE2-based functions of rx::Image class. It's 1.12 +// in a separated file for GCC, which can enable SSE usage only per-file, 1.13 +// not for code blocks that use SSE2 explicitly. 1.14 + 1.15 +#include "libGLESv2/Texture.h" 1.16 +#include "libGLESv2/renderer/Image.h" 1.17 + 1.18 +namespace rx 1.19 +{ 1.20 + 1.21 +void Image::loadRGBAUByteDataToBGRASSE2(GLsizei width, GLsizei height, 1.22 + int inputPitch, const void *input, size_t outputPitch, void *output) 1.23 +{ 1.24 + const unsigned int *source = NULL; 1.25 + unsigned int *dest = NULL; 1.26 + __m128i brMask = _mm_set1_epi32(0x00ff00ff); 1.27 + 1.28 + for (int y = 0; y < height; y++) 1.29 + { 1.30 + source = reinterpret_cast<const unsigned int*>(static_cast<const unsigned char*>(input) + y * inputPitch); 1.31 + dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputPitch); 1.32 + int x = 0; 1.33 + 1.34 + // Make output writes aligned 1.35 + for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++) 1.36 + { 1.37 + unsigned int rgba = source[x]; 1.38 + dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00); 1.39 + } 1.40 + 1.41 + for (; x + 3 < width; x += 4) 1.42 + { 1.43 + __m128i sourceData = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source[x])); 1.44 + // Mask out g and a, which don't change 1.45 + __m128i gaComponents = _mm_andnot_si128(brMask, sourceData); 1.46 + // Mask out b and r 1.47 + __m128i brComponents = _mm_and_si128(sourceData, brMask); 1.48 + // Swap b and r 1.49 + __m128i brSwapped = _mm_shufflehi_epi16(_mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); 1.50 + __m128i result = _mm_or_si128(gaComponents, brSwapped); 1.51 + _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), result); 1.52 + } 1.53 + 1.54 + // Perform leftover writes 1.55 + for (; x < width; x++) 1.56 + { 1.57 + unsigned int rgba = source[x]; 1.58 + dest[x] = (_rotl(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00); 1.59 + } 1.60 + } 1.61 +} 1.62 + 1.63 +void Image::loadAlphaDataToBGRASSE2(GLsizei width, GLsizei height, 1.64 + int inputPitch, const void *input, size_t outputPitch, void *output) 1.65 +{ 1.66 + const unsigned char *source = NULL; 1.67 + unsigned int *dest = NULL; 1.68 + __m128i zeroWide = _mm_setzero_si128(); 1.69 + 1.70 + for (int y = 0; y < height; y++) 1.71 + { 1.72 + source = static_cast<const unsigned char*>(input) + y * inputPitch; 1.73 + dest = reinterpret_cast<unsigned int*>(static_cast<unsigned char*>(output) + y * outputPitch); 1.74 + 1.75 + int x; 1.76 + // Make output writes aligned 1.77 + for (x = 0; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++) 1.78 + { 1.79 + dest[x] = static_cast<unsigned int>(source[x]) << 24; 1.80 + } 1.81 + 1.82 + for (; x + 7 < width; x += 8) 1.83 + { 1.84 + __m128i sourceData = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&source[x])); 1.85 + // Interleave each byte to 16bit, make the lower byte to zero 1.86 + sourceData = _mm_unpacklo_epi8(zeroWide, sourceData); 1.87 + // Interleave each 16bit to 32bit, make the lower 16bit to zero 1.88 + __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData); 1.89 + __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData); 1.90 + 1.91 + _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x]), lo); 1.92 + _mm_store_si128(reinterpret_cast<__m128i*>(&dest[x + 4]), hi); 1.93 + } 1.94 + 1.95 + // Handle the remainder 1.96 + for (; x < width; x++) 1.97 + { 1.98 + dest[x] = static_cast<unsigned int>(source[x]) << 24; 1.99 + } 1.100 + } 1.101 +} 1.102 + 1.103 +}