1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/ycbcr/yuv_convert.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,357 @@ 1.4 +// Copyright (c) 2010 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +// This webpage shows layout of YV12 and other YUV formats 1.9 +// http://www.fourcc.org/yuv.php 1.10 +// The actual conversion is best described here 1.11 +// http://en.wikipedia.org/wiki/YUV 1.12 +// An article on optimizing YUV conversion using tables instead of multiplies 1.13 +// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf 1.14 +// 1.15 +// YV12 is a full plane of Y and a half height, half width chroma planes 1.16 +// YV16 is a full plane of Y and a full height, half width chroma planes 1.17 +// YV24 is a full plane of Y and a full height, full width chroma planes 1.18 +// 1.19 +// ARGB pixel format is output, which on little endian is stored as BGRA. 1.20 +// The alpha is set to 255, allowing the application to use RGBA or RGB32. 1.21 + 1.22 +#include "yuv_convert.h" 1.23 + 1.24 +// Header for low level row functions. 1.25 +#include "yuv_row.h" 1.26 +#include "mozilla/SSE.h" 1.27 + 1.28 +namespace mozilla { 1.29 + 1.30 +namespace gfx { 1.31 + 1.32 +// 16.16 fixed point arithmetic 1.33 +const int kFractionBits = 16; 1.34 +const int kFractionMax = 1 << kFractionBits; 1.35 +const int kFractionMask = ((1 << kFractionBits) - 1); 1.36 + 1.37 +NS_GFX_(YUVType) TypeFromSize(int ywidth, 1.38 + int yheight, 1.39 + int cbcrwidth, 1.40 + int cbcrheight) 1.41 +{ 1.42 + if (ywidth == cbcrwidth && yheight == cbcrheight) { 1.43 + return YV24; 1.44 + } 1.45 + else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) { 1.46 + return YV16; 1.47 + } 1.48 + else { 1.49 + return YV12; 1.50 + } 1.51 +} 1.52 + 1.53 +// Convert a frame of YUV to 32 bit ARGB. 1.54 +NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf, 1.55 + const uint8* u_buf, 1.56 + const uint8* v_buf, 1.57 + uint8* rgb_buf, 1.58 + int pic_x, 1.59 + int pic_y, 1.60 + int pic_width, 1.61 + int pic_height, 1.62 + int y_pitch, 1.63 + int uv_pitch, 1.64 + int rgb_pitch, 1.65 + YUVType yuv_type) { 1.66 + unsigned int y_shift = yuv_type == YV12 ? 1 : 0; 1.67 + unsigned int x_shift = yuv_type == YV24 ? 0 : 1; 1.68 + // Test for SSE because the optimized code uses movntq, which is not part of MMX. 1.69 + bool has_sse = supports_mmx() && supports_sse(); 1.70 + // There is no optimized YV24 SSE routine so we check for this and 1.71 + // fall back to the C code. 1.72 + has_sse &= yuv_type != YV24; 1.73 + bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0; 1.74 + int x_width = odd_pic_x ? pic_width - 1 : pic_width; 1.75 + 1.76 + for (int y = pic_y; y < pic_height + pic_y; ++y) { 1.77 + uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch; 1.78 + const uint8* y_ptr = y_buf + y * y_pitch + pic_x; 1.79 + const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); 1.80 + const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); 1.81 + 1.82 + if (odd_pic_x) { 1.83 + // Handle the single odd pixel manually and use the 1.84 + // fast routines for the remaining. 1.85 + FastConvertYUVToRGB32Row_C(y_ptr++, 1.86 + u_ptr++, 1.87 + v_ptr++, 1.88 + rgb_row, 1.89 + 1, 1.90 + x_shift); 1.91 + rgb_row += 4; 1.92 + } 1.93 + 1.94 + if (has_sse) { 1.95 + FastConvertYUVToRGB32Row(y_ptr, 1.96 + u_ptr, 1.97 + v_ptr, 1.98 + rgb_row, 1.99 + x_width); 1.100 + } 1.101 + else { 1.102 + FastConvertYUVToRGB32Row_C(y_ptr, 1.103 + u_ptr, 1.104 + v_ptr, 1.105 + rgb_row, 1.106 + x_width, 1.107 + x_shift); 1.108 + } 1.109 + } 1.110 + 1.111 + // MMX used for FastConvertYUVToRGB32Row requires emms instruction. 1.112 + if (has_sse) 1.113 + EMMS(); 1.114 +} 1.115 + 1.116 +// C version does 8 at a time to mimic MMX code 1.117 +static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, 1.118 + int source_width, int source_y_fraction) { 1.119 + int y1_fraction = source_y_fraction; 1.120 + int y0_fraction = 256 - y1_fraction; 1.121 + uint8* end = ybuf + source_width; 1.122 + do { 1.123 + ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; 1.124 + ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; 1.125 + ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; 1.126 + ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8; 1.127 + ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8; 1.128 + ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8; 1.129 + ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8; 1.130 + ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8; 1.131 + y0_ptr += 8; 1.132 + y1_ptr += 8; 1.133 + ybuf += 8; 1.134 + } while (ybuf < end); 1.135 +} 1.136 + 1.137 +#ifdef MOZILLA_MAY_SUPPORT_MMX 1.138 +void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, 1.139 + int source_width, int source_y_fraction); 1.140 +#endif 1.141 + 1.142 +#ifdef MOZILLA_MAY_SUPPORT_SSE2 1.143 +void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, 1.144 + int source_width, int source_y_fraction); 1.145 +#endif 1.146 + 1.147 +static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr, 1.148 + const uint8* y1_ptr, int source_width, 1.149 + int source_y_fraction) { 1.150 +#ifdef MOZILLA_MAY_SUPPORT_SSE2 1.151 + if (mozilla::supports_sse2()) { 1.152 + FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); 1.153 + return; 1.154 + } 1.155 +#endif 1.156 + 1.157 +#ifdef MOZILLA_MAY_SUPPORT_MMX 1.158 + if (mozilla::supports_mmx()) { 1.159 + FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); 1.160 + return; 1.161 + } 1.162 +#endif 1.163 + 1.164 + FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); 1.165 +} 1.166 + 1.167 + 1.168 +// Scale a frame of YUV to 32 bit ARGB. 1.169 +NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf, 1.170 + const uint8* u_buf, 1.171 + const uint8* v_buf, 1.172 + uint8* rgb_buf, 1.173 + int source_width, 1.174 + int source_height, 1.175 + int width, 1.176 + int height, 1.177 + int y_pitch, 1.178 + int uv_pitch, 1.179 + int rgb_pitch, 1.180 + YUVType yuv_type, 1.181 + Rotate view_rotate, 1.182 + ScaleFilter filter) { 1.183 + bool has_mmx = supports_mmx(); 1.184 + 1.185 + // 4096 allows 3 buffers to fit in 12k. 1.186 + // Helps performance on CPU with 16K L1 cache. 1.187 + // Large enough for 3830x2160 and 30" displays which are 2560x1600. 1.188 + const int kFilterBufferSize = 4096; 1.189 + // Disable filtering if the screen is too big (to avoid buffer overflows). 1.190 + // This should never happen to regular users: they don't have monitors 1.191 + // wider than 4096 pixels. 1.192 + // TODO(fbarchard): Allow rotated videos to filter. 1.193 + if (source_width > kFilterBufferSize || view_rotate) 1.194 + filter = FILTER_NONE; 1.195 + 1.196 + unsigned int y_shift = yuv_type == YV12 ? 1 : 0; 1.197 + // Diagram showing origin and direction of source sampling. 1.198 + // ->0 4<- 1.199 + // 7 3 1.200 + // 1.201 + // 6 5 1.202 + // ->1 2<- 1.203 + // Rotations that start at right side of image. 1.204 + if ((view_rotate == ROTATE_180) || 1.205 + (view_rotate == ROTATE_270) || 1.206 + (view_rotate == MIRROR_ROTATE_0) || 1.207 + (view_rotate == MIRROR_ROTATE_90)) { 1.208 + y_buf += source_width - 1; 1.209 + u_buf += source_width / 2 - 1; 1.210 + v_buf += source_width / 2 - 1; 1.211 + source_width = -source_width; 1.212 + } 1.213 + // Rotations that start at bottom of image. 1.214 + if ((view_rotate == ROTATE_90) || 1.215 + (view_rotate == ROTATE_180) || 1.216 + (view_rotate == MIRROR_ROTATE_90) || 1.217 + (view_rotate == MIRROR_ROTATE_180)) { 1.218 + y_buf += (source_height - 1) * y_pitch; 1.219 + u_buf += ((source_height >> y_shift) - 1) * uv_pitch; 1.220 + v_buf += ((source_height >> y_shift) - 1) * uv_pitch; 1.221 + source_height = -source_height; 1.222 + } 1.223 + 1.224 + // Handle zero sized destination. 1.225 + if (width == 0 || height == 0) 1.226 + return; 1.227 + int source_dx = source_width * kFractionMax / width; 1.228 + int source_dy = source_height * kFractionMax / height; 1.229 + int source_dx_uv = source_dx; 1.230 + 1.231 + if ((view_rotate == ROTATE_90) || 1.232 + (view_rotate == ROTATE_270)) { 1.233 + int tmp = height; 1.234 + height = width; 1.235 + width = tmp; 1.236 + tmp = source_height; 1.237 + source_height = source_width; 1.238 + source_width = tmp; 1.239 + int original_dx = source_dx; 1.240 + int original_dy = source_dy; 1.241 + source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits; 1.242 + source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits; 1.243 + source_dy = original_dx; 1.244 + if (view_rotate == ROTATE_90) { 1.245 + y_pitch = -1; 1.246 + uv_pitch = -1; 1.247 + source_height = -source_height; 1.248 + } else { 1.249 + y_pitch = 1; 1.250 + uv_pitch = 1; 1.251 + } 1.252 + } 1.253 + 1.254 + // Need padding because FilterRows() will write 1 to 16 extra pixels 1.255 + // after the end for SSE2 version. 1.256 + uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16]; 1.257 + uint8* ybuf = 1.258 + reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); 1.259 + uint8* ubuf = ybuf + kFilterBufferSize; 1.260 + uint8* vbuf = ubuf + kFilterBufferSize; 1.261 + // TODO(fbarchard): Fixed point math is off by 1 on negatives. 1.262 + int yscale_fixed = (source_height << kFractionBits) / height; 1.263 + 1.264 + // TODO(fbarchard): Split this into separate function for better efficiency. 1.265 + for (int y = 0; y < height; ++y) { 1.266 + uint8* dest_pixel = rgb_buf + y * rgb_pitch; 1.267 + int source_y_subpixel = (y * yscale_fixed); 1.268 + if (yscale_fixed >= (kFractionMax * 2)) { 1.269 + source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter. 1.270 + } 1.271 + int source_y = source_y_subpixel >> kFractionBits; 1.272 + 1.273 + const uint8* y0_ptr = y_buf + source_y * y_pitch; 1.274 + const uint8* y1_ptr = y0_ptr + y_pitch; 1.275 + 1.276 + const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch; 1.277 + const uint8* u1_ptr = u0_ptr + uv_pitch; 1.278 + const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch; 1.279 + const uint8* v1_ptr = v0_ptr + uv_pitch; 1.280 + 1.281 + // vertical scaler uses 16.8 fixed point 1.282 + int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; 1.283 + int source_uv_fraction = 1.284 + ((source_y_subpixel >> y_shift) & kFractionMask) >> 8; 1.285 + 1.286 + const uint8* y_ptr = y0_ptr; 1.287 + const uint8* u_ptr = u0_ptr; 1.288 + const uint8* v_ptr = v0_ptr; 1.289 + // Apply vertical filtering if necessary. 1.290 + // TODO(fbarchard): Remove memcpy when not necessary. 1.291 + if (filter & mozilla::gfx::FILTER_BILINEAR_V) { 1.292 + if (yscale_fixed != kFractionMax && 1.293 + source_y_fraction && ((source_y + 1) < source_height)) { 1.294 + FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); 1.295 + } else { 1.296 + memcpy(ybuf, y0_ptr, source_width); 1.297 + } 1.298 + y_ptr = ybuf; 1.299 + ybuf[source_width] = ybuf[source_width-1]; 1.300 + int uv_source_width = (source_width + 1) / 2; 1.301 + if (yscale_fixed != kFractionMax && 1.302 + source_uv_fraction && 1.303 + (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { 1.304 + FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); 1.305 + FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); 1.306 + } else { 1.307 + memcpy(ubuf, u0_ptr, uv_source_width); 1.308 + memcpy(vbuf, v0_ptr, uv_source_width); 1.309 + } 1.310 + u_ptr = ubuf; 1.311 + v_ptr = vbuf; 1.312 + ubuf[uv_source_width] = ubuf[uv_source_width - 1]; 1.313 + vbuf[uv_source_width] = vbuf[uv_source_width - 1]; 1.314 + } 1.315 + if (source_dx == kFractionMax) { // Not scaled 1.316 + FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, 1.317 + dest_pixel, width); 1.318 + } else if (filter & FILTER_BILINEAR_H) { 1.319 + LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, 1.320 + dest_pixel, width, source_dx); 1.321 + } else { 1.322 +// Specialized scalers and rotation. 1.323 +#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) 1.324 + if(mozilla::supports_sse()) { 1.325 + if (width == (source_width * 2)) { 1.326 + DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, 1.327 + dest_pixel, width); 1.328 + } else if ((source_dx & kFractionMask) == 0) { 1.329 + // Scaling by integer scale factor. ie half. 1.330 + ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, 1.331 + dest_pixel, width, 1.332 + source_dx >> kFractionBits); 1.333 + } else if (source_dx_uv == source_dx) { // Not rotated. 1.334 + ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, 1.335 + dest_pixel, width, source_dx); 1.336 + } else { 1.337 + RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, 1.338 + dest_pixel, width, 1.339 + source_dx >> kFractionBits, 1.340 + source_dx_uv >> kFractionBits); 1.341 + } 1.342 + } 1.343 + else { 1.344 + ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, 1.345 + dest_pixel, width, source_dx); 1.346 + } 1.347 +#else 1.348 + (void)source_dx_uv; 1.349 + ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, 1.350 + dest_pixel, width, source_dx); 1.351 +#endif 1.352 + } 1.353 + } 1.354 + // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. 1.355 + if (has_mmx) 1.356 + EMMS(); 1.357 +} 1.358 + 1.359 +} // namespace gfx 1.360 +} // namespace mozilla