gfx/ycbcr/yuv_row_win.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4
michael@0 5 #include "yuv_row.h"
michael@0 6 #include "mozilla/SSE.h"
michael@0 7
michael@0 8 #define kCoefficientsRgbU kCoefficientsRgbY + 2048
michael@0 9 #define kCoefficientsRgbV kCoefficientsRgbY + 4096
michael@0 10
michael@0 11 extern "C" {
michael@0 12
michael@0 13 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
michael@0 14 __declspec(naked)
michael@0 15 void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 16 const uint8* u_buf,
michael@0 17 const uint8* v_buf,
michael@0 18 uint8* rgb_buf,
michael@0 19 int width) {
michael@0 20 __asm {
michael@0 21 pushad
michael@0 22 mov edx, [esp + 32 + 4] // Y
michael@0 23 mov edi, [esp + 32 + 8] // U
michael@0 24 mov esi, [esp + 32 + 12] // V
michael@0 25 mov ebp, [esp + 32 + 16] // rgb
michael@0 26 mov ecx, [esp + 32 + 20] // width
michael@0 27 jmp convertend
michael@0 28
michael@0 29 convertloop :
michael@0 30 movzx eax, byte ptr [edi]
michael@0 31 add edi, 1
michael@0 32 movzx ebx, byte ptr [esi]
michael@0 33 add esi, 1
michael@0 34 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 35 movzx eax, byte ptr [edx]
michael@0 36 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
michael@0 37 movzx ebx, byte ptr [edx + 1]
michael@0 38 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 39 add edx, 2
michael@0 40 movq mm2, [kCoefficientsRgbY + 8 * ebx]
michael@0 41 paddsw mm1, mm0
michael@0 42 paddsw mm2, mm0
michael@0 43 psraw mm1, 6
michael@0 44 psraw mm2, 6
michael@0 45 packuswb mm1, mm2
michael@0 46 movntq [ebp], mm1
michael@0 47 add ebp, 8
michael@0 48 convertend :
michael@0 49 sub ecx, 2
michael@0 50 jns convertloop
michael@0 51
michael@0 52 and ecx, 1 // odd number of pixels?
michael@0 53 jz convertdone
michael@0 54
michael@0 55 movzx eax, byte ptr [edi]
michael@0 56 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 57 movzx eax, byte ptr [esi]
michael@0 58 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 59 movzx eax, byte ptr [edx]
michael@0 60 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 61 paddsw mm1, mm0
michael@0 62 psraw mm1, 6
michael@0 63 packuswb mm1, mm1
michael@0 64 movd [ebp], mm1
michael@0 65 convertdone :
michael@0 66
michael@0 67 popad
michael@0 68 ret
michael@0 69 }
michael@0 70 }
michael@0 71
michael@0 72 __declspec(naked)
michael@0 73 void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 74 const uint8* u_buf,
michael@0 75 const uint8* v_buf,
michael@0 76 uint8* rgb_buf,
michael@0 77 int width,
michael@0 78 int step) {
michael@0 79 __asm {
michael@0 80 pushad
michael@0 81 mov edx, [esp + 32 + 4] // Y
michael@0 82 mov edi, [esp + 32 + 8] // U
michael@0 83 mov esi, [esp + 32 + 12] // V
michael@0 84 mov ebp, [esp + 32 + 16] // rgb
michael@0 85 mov ecx, [esp + 32 + 20] // width
michael@0 86 mov ebx, [esp + 32 + 24] // step
michael@0 87 jmp wend
michael@0 88
michael@0 89 wloop :
michael@0 90 movzx eax, byte ptr [edi]
michael@0 91 add edi, ebx
michael@0 92 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 93 movzx eax, byte ptr [esi]
michael@0 94 add esi, ebx
michael@0 95 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 96 movzx eax, byte ptr [edx]
michael@0 97 add edx, ebx
michael@0 98 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 99 movzx eax, byte ptr [edx]
michael@0 100 add edx, ebx
michael@0 101 movq mm2, [kCoefficientsRgbY + 8 * eax]
michael@0 102 paddsw mm1, mm0
michael@0 103 paddsw mm2, mm0
michael@0 104 psraw mm1, 6
michael@0 105 psraw mm2, 6
michael@0 106 packuswb mm1, mm2
michael@0 107 movntq [ebp], mm1
michael@0 108 add ebp, 8
michael@0 109 wend :
michael@0 110 sub ecx, 2
michael@0 111 jns wloop
michael@0 112
michael@0 113 and ecx, 1 // odd number of pixels?
michael@0 114 jz wdone
michael@0 115
michael@0 116 movzx eax, byte ptr [edi]
michael@0 117 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 118 movzx eax, byte ptr [esi]
michael@0 119 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 120 movzx eax, byte ptr [edx]
michael@0 121 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 122 paddsw mm1, mm0
michael@0 123 psraw mm1, 6
michael@0 124 packuswb mm1, mm1
michael@0 125 movd [ebp], mm1
michael@0 126 wdone :
michael@0 127
michael@0 128 popad
michael@0 129 ret
michael@0 130 }
michael@0 131 }
michael@0 132
michael@0 133 __declspec(naked)
michael@0 134 void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 135 const uint8* u_buf,
michael@0 136 const uint8* v_buf,
michael@0 137 uint8* rgb_buf,
michael@0 138 int width,
michael@0 139 int ystep,
michael@0 140 int uvstep) {
michael@0 141 __asm {
michael@0 142 pushad
michael@0 143 mov edx, [esp + 32 + 4] // Y
michael@0 144 mov edi, [esp + 32 + 8] // U
michael@0 145 mov esi, [esp + 32 + 12] // V
michael@0 146 mov ebp, [esp + 32 + 16] // rgb
michael@0 147 mov ecx, [esp + 32 + 20] // width
michael@0 148 jmp wend
michael@0 149
michael@0 150 wloop :
michael@0 151 movzx eax, byte ptr [edi]
michael@0 152 mov ebx, [esp + 32 + 28] // uvstep
michael@0 153 add edi, ebx
michael@0 154 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 155 movzx eax, byte ptr [esi]
michael@0 156 add esi, ebx
michael@0 157 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 158 movzx eax, byte ptr [edx]
michael@0 159 mov ebx, [esp + 32 + 24] // ystep
michael@0 160 add edx, ebx
michael@0 161 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 162 movzx eax, byte ptr [edx]
michael@0 163 add edx, ebx
michael@0 164 movq mm2, [kCoefficientsRgbY + 8 * eax]
michael@0 165 paddsw mm1, mm0
michael@0 166 paddsw mm2, mm0
michael@0 167 psraw mm1, 6
michael@0 168 psraw mm2, 6
michael@0 169 packuswb mm1, mm2
michael@0 170 movntq [ebp], mm1
michael@0 171 add ebp, 8
michael@0 172 wend :
michael@0 173 sub ecx, 2
michael@0 174 jns wloop
michael@0 175
michael@0 176 and ecx, 1 // odd number of pixels?
michael@0 177 jz wdone
michael@0 178
michael@0 179 movzx eax, byte ptr [edi]
michael@0 180 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 181 movzx eax, byte ptr [esi]
michael@0 182 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 183 movzx eax, byte ptr [edx]
michael@0 184 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 185 paddsw mm1, mm0
michael@0 186 psraw mm1, 6
michael@0 187 packuswb mm1, mm1
michael@0 188 movd [ebp], mm1
michael@0 189 wdone :
michael@0 190
michael@0 191 popad
michael@0 192 ret
michael@0 193 }
michael@0 194 }
michael@0 195
michael@0 196 __declspec(naked)
michael@0 197 void DoubleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 198 const uint8* u_buf,
michael@0 199 const uint8* v_buf,
michael@0 200 uint8* rgb_buf,
michael@0 201 int width) {
michael@0 202 __asm {
michael@0 203 pushad
michael@0 204 mov edx, [esp + 32 + 4] // Y
michael@0 205 mov edi, [esp + 32 + 8] // U
michael@0 206 mov esi, [esp + 32 + 12] // V
michael@0 207 mov ebp, [esp + 32 + 16] // rgb
michael@0 208 mov ecx, [esp + 32 + 20] // width
michael@0 209 jmp wend
michael@0 210
michael@0 211 wloop :
michael@0 212 movzx eax, byte ptr [edi]
michael@0 213 add edi, 1
michael@0 214 movzx ebx, byte ptr [esi]
michael@0 215 add esi, 1
michael@0 216 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 217 movzx eax, byte ptr [edx]
michael@0 218 paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
michael@0 219 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 220 paddsw mm1, mm0
michael@0 221 psraw mm1, 6
michael@0 222 packuswb mm1, mm1
michael@0 223 punpckldq mm1, mm1
michael@0 224 movntq [ebp], mm1
michael@0 225
michael@0 226 movzx ebx, byte ptr [edx + 1]
michael@0 227 add edx, 2
michael@0 228 paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
michael@0 229 psraw mm0, 6
michael@0 230 packuswb mm0, mm0
michael@0 231 punpckldq mm0, mm0
michael@0 232 movntq [ebp+8], mm0
michael@0 233 add ebp, 16
michael@0 234 wend :
michael@0 235 sub ecx, 4
michael@0 236 jns wloop
michael@0 237
michael@0 238 add ecx, 4
michael@0 239 jz wdone
michael@0 240
michael@0 241 movzx eax, byte ptr [edi]
michael@0 242 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 243 movzx eax, byte ptr [esi]
michael@0 244 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 245 movzx eax, byte ptr [edx]
michael@0 246 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 247 paddsw mm1, mm0
michael@0 248 psraw mm1, 6
michael@0 249 packuswb mm1, mm1
michael@0 250 jmp wend1
michael@0 251
michael@0 252 wloop1 :
michael@0 253 movd [ebp], mm1
michael@0 254 add ebp, 4
michael@0 255 wend1 :
michael@0 256 sub ecx, 1
michael@0 257 jns wloop1
michael@0 258 wdone :
michael@0 259 popad
michael@0 260 ret
michael@0 261 }
michael@0 262 }
michael@0 263
michael@0 264 // This version does general purpose scaling by any amount, up or down.
michael@0 265 // The only thing it cannot do is rotation by 90 or 270.
michael@0 266 // For performance the chroma is under-sampled, reducing cost of a 3x
michael@0 267 // 1080p scale from 8.4 ms to 5.4 ms.
michael@0 268 __declspec(naked)
michael@0 269 void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 270 const uint8* u_buf,
michael@0 271 const uint8* v_buf,
michael@0 272 uint8* rgb_buf,
michael@0 273 int width,
michael@0 274 int source_dx) {
michael@0 275 __asm {
michael@0 276 pushad
michael@0 277 mov edx, [esp + 32 + 4] // Y
michael@0 278 mov edi, [esp + 32 + 8] // U
michael@0 279 mov esi, [esp + 32 + 12] // V
michael@0 280 mov ebp, [esp + 32 + 16] // rgb
michael@0 281 mov ecx, [esp + 32 + 20] // width
michael@0 282 xor ebx, ebx // x
michael@0 283 jmp scaleend
michael@0 284
michael@0 285 scaleloop :
michael@0 286 mov eax, ebx
michael@0 287 sar eax, 17
michael@0 288 movzx eax, byte ptr [edi + eax]
michael@0 289 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 290 mov eax, ebx
michael@0 291 sar eax, 17
michael@0 292 movzx eax, byte ptr [esi + eax]
michael@0 293 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 294 mov eax, ebx
michael@0 295 add ebx, [esp + 32 + 24] // x += source_dx
michael@0 296 sar eax, 16
michael@0 297 movzx eax, byte ptr [edx + eax]
michael@0 298 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 299 mov eax, ebx
michael@0 300 add ebx, [esp + 32 + 24] // x += source_dx
michael@0 301 sar eax, 16
michael@0 302 movzx eax, byte ptr [edx + eax]
michael@0 303 movq mm2, [kCoefficientsRgbY + 8 * eax]
michael@0 304 paddsw mm1, mm0
michael@0 305 paddsw mm2, mm0
michael@0 306 psraw mm1, 6
michael@0 307 psraw mm2, 6
michael@0 308 packuswb mm1, mm2
michael@0 309 movntq [ebp], mm1
michael@0 310 add ebp, 8
michael@0 311 scaleend :
michael@0 312 sub ecx, 2
michael@0 313 jns scaleloop
michael@0 314
michael@0 315 and ecx, 1 // odd number of pixels?
michael@0 316 jz scaledone
michael@0 317
michael@0 318 mov eax, ebx
michael@0 319 sar eax, 17
michael@0 320 movzx eax, byte ptr [edi + eax]
michael@0 321 movq mm0, [kCoefficientsRgbU + 8 * eax]
michael@0 322 mov eax, ebx
michael@0 323 sar eax, 17
michael@0 324 movzx eax, byte ptr [esi + eax]
michael@0 325 paddsw mm0, [kCoefficientsRgbV + 8 * eax]
michael@0 326 mov eax, ebx
michael@0 327 sar eax, 16
michael@0 328 movzx eax, byte ptr [edx + eax]
michael@0 329 movq mm1, [kCoefficientsRgbY + 8 * eax]
michael@0 330 paddsw mm1, mm0
michael@0 331 psraw mm1, 6
michael@0 332 packuswb mm1, mm1
michael@0 333 movd [ebp], mm1
michael@0 334
michael@0 335 scaledone :
michael@0 336 popad
michael@0 337 ret
michael@0 338 }
michael@0 339 }
michael@0 340
michael@0 341 __declspec(naked)
michael@0 342 void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 343 const uint8* u_buf,
michael@0 344 const uint8* v_buf,
michael@0 345 uint8* rgb_buf,
michael@0 346 int width,
michael@0 347 int source_dx) {
michael@0 348 __asm {
michael@0 349 pushad
michael@0 350 mov edx, [esp + 32 + 4] // Y
michael@0 351 mov edi, [esp + 32 + 8] // U
michael@0 352 // [esp + 32 + 12] // V
michael@0 353 mov ebp, [esp + 32 + 16] // rgb
michael@0 354 mov ecx, [esp + 32 + 20] // width
michael@0 355 imul ecx, [esp + 32 + 24] // source_dx
michael@0 356 mov [esp + 32 + 20], ecx // source_width = width * source_dx
michael@0 357 mov ecx, [esp + 32 + 24] // source_dx
michael@0 358 xor ebx, ebx // x = 0
michael@0 359 cmp ecx, 0x20000
michael@0 360 jl lscaleend
michael@0 361 mov ebx, 0x8000 // x = 0.5 for 1/2 or less
michael@0 362 jmp lscaleend
michael@0 363 lscaleloop:
michael@0 364 mov eax, ebx
michael@0 365 sar eax, 0x11
michael@0 366
michael@0 367 movzx ecx, byte ptr [edi + eax]
michael@0 368 movzx esi, byte ptr [edi + eax + 1]
michael@0 369 mov eax, ebx
michael@0 370 and eax, 0x1fffe
michael@0 371 imul esi, eax
michael@0 372 xor eax, 0x1fffe
michael@0 373 imul ecx, eax
michael@0 374 add ecx, esi
michael@0 375 shr ecx, 17
michael@0 376 movq mm0, [kCoefficientsRgbU + 8 * ecx]
michael@0 377
michael@0 378 mov esi, [esp + 32 + 12]
michael@0 379 mov eax, ebx
michael@0 380 sar eax, 0x11
michael@0 381
michael@0 382 movzx ecx, byte ptr [esi + eax]
michael@0 383 movzx esi, byte ptr [esi + eax + 1]
michael@0 384 mov eax, ebx
michael@0 385 and eax, 0x1fffe
michael@0 386 imul esi, eax
michael@0 387 xor eax, 0x1fffe
michael@0 388 imul ecx, eax
michael@0 389 add ecx, esi
michael@0 390 shr ecx, 17
michael@0 391 paddsw mm0, [kCoefficientsRgbV + 8 * ecx]
michael@0 392
michael@0 393 mov eax, ebx
michael@0 394 sar eax, 0x10
michael@0 395 movzx ecx, byte ptr [edx + eax]
michael@0 396 movzx esi, byte ptr [1 + edx + eax]
michael@0 397 mov eax, ebx
michael@0 398 add ebx, [esp + 32 + 24]
michael@0 399 and eax, 0xffff
michael@0 400 imul esi, eax
michael@0 401 xor eax, 0xffff
michael@0 402 imul ecx, eax
michael@0 403 add ecx, esi
michael@0 404 shr ecx, 16
michael@0 405 movq mm1, [kCoefficientsRgbY + 8 * ecx]
michael@0 406
michael@0 407 cmp ebx, [esp + 32 + 20]
michael@0 408 jge lscalelastpixel
michael@0 409
michael@0 410 mov eax, ebx
michael@0 411 sar eax, 0x10
michael@0 412 movzx ecx, byte ptr [edx + eax]
michael@0 413 movzx esi, byte ptr [edx + eax + 1]
michael@0 414 mov eax, ebx
michael@0 415 add ebx, [esp + 32 + 24]
michael@0 416 and eax, 0xffff
michael@0 417 imul esi, eax
michael@0 418 xor eax, 0xffff
michael@0 419 imul ecx, eax
michael@0 420 add ecx, esi
michael@0 421 shr ecx, 16
michael@0 422 movq mm2, [kCoefficientsRgbY + 8 * ecx]
michael@0 423
michael@0 424 paddsw mm1, mm0
michael@0 425 paddsw mm2, mm0
michael@0 426 psraw mm1, 0x6
michael@0 427 psraw mm2, 0x6
michael@0 428 packuswb mm1, mm2
michael@0 429 movntq [ebp], mm1
michael@0 430 add ebp, 0x8
michael@0 431
michael@0 432 lscaleend:
michael@0 433 cmp ebx, [esp + 32 + 20]
michael@0 434 jl lscaleloop
michael@0 435 popad
michael@0 436 ret
michael@0 437
michael@0 438 lscalelastpixel:
michael@0 439 paddsw mm1, mm0
michael@0 440 psraw mm1, 6
michael@0 441 packuswb mm1, mm1
michael@0 442 movd [ebp], mm1
michael@0 443 popad
michael@0 444 ret
michael@0 445 };
michael@0 446 }
michael@0 447 #endif // if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
michael@0 448
michael@0 449 void FastConvertYUVToRGB32Row(const uint8* y_buf,
michael@0 450 const uint8* u_buf,
michael@0 451 const uint8* v_buf,
michael@0 452 uint8* rgb_buf,
michael@0 453 int width) {
michael@0 454 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
michael@0 455 if (mozilla::supports_sse()) {
michael@0 456 FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
michael@0 457 return;
michael@0 458 }
michael@0 459 #endif
michael@0 460
michael@0 461 FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
michael@0 462 }
michael@0 463
michael@0 464 void ScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 465 const uint8* u_buf,
michael@0 466 const uint8* v_buf,
michael@0 467 uint8* rgb_buf,
michael@0 468 int width,
michael@0 469 int source_dx) {
michael@0 470
michael@0 471 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
michael@0 472 if (mozilla::supports_sse()) {
michael@0 473 ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 474 return;
michael@0 475 }
michael@0 476 #endif
michael@0 477
michael@0 478 ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 479 }
michael@0 480
michael@0 481 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 482 const uint8* u_buf,
michael@0 483 const uint8* v_buf,
michael@0 484 uint8* rgb_buf,
michael@0 485 int width,
michael@0 486 int source_dx) {
michael@0 487 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86)
michael@0 488 if (mozilla::supports_sse()) {
michael@0 489 LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
michael@0 490 source_dx);
michael@0 491 return;
michael@0 492 }
michael@0 493 #endif
michael@0 494
michael@0 495 LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 496 }
michael@0 497
michael@0 498 } // extern "C"

mercurial