gfx/ycbcr/yuv_row_posix.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4
michael@0 5 #include "yuv_row.h"
michael@0 6 #include "mozilla/SSE.h"
michael@0 7
michael@0 8 #define DCHECK(a)
michael@0 9
michael@0 10 extern "C" {
michael@0 11
michael@0 12 #if defined(ARCH_CPU_X86_64)
michael@0 13
michael@0 14 // We don't need CPUID guards here, since x86-64 implies SSE2.
michael@0 15
michael@0 16 // AMD64 ABI uses register paremters.
michael@0 17 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
michael@0 18 const uint8* u_buf, // rsi
michael@0 19 const uint8* v_buf, // rdx
michael@0 20 uint8* rgb_buf, // rcx
michael@0 21 int width) { // r8
michael@0 22 asm(
michael@0 23 "jmp 1f\n"
michael@0 24 "0:"
michael@0 25 "movzb (%1),%%r10\n"
michael@0 26 "add $0x1,%1\n"
michael@0 27 "movzb (%2),%%r11\n"
michael@0 28 "add $0x1,%2\n"
michael@0 29 "movq 2048(%5,%%r10,8),%%xmm0\n"
michael@0 30 "movzb (%0),%%r10\n"
michael@0 31 "movq 4096(%5,%%r11,8),%%xmm1\n"
michael@0 32 "movzb 0x1(%0),%%r11\n"
michael@0 33 "paddsw %%xmm1,%%xmm0\n"
michael@0 34 "movq (%5,%%r10,8),%%xmm2\n"
michael@0 35 "add $0x2,%0\n"
michael@0 36 "movq (%5,%%r11,8),%%xmm3\n"
michael@0 37 "paddsw %%xmm0,%%xmm2\n"
michael@0 38 "paddsw %%xmm0,%%xmm3\n"
michael@0 39 "shufps $0x44,%%xmm3,%%xmm2\n"
michael@0 40 "psraw $0x6,%%xmm2\n"
michael@0 41 "packuswb %%xmm2,%%xmm2\n"
michael@0 42 "movq %%xmm2,0x0(%3)\n"
michael@0 43 "add $0x8,%3\n"
michael@0 44 "1:"
michael@0 45 "sub $0x2,%4\n"
michael@0 46 "jns 0b\n"
michael@0 47
michael@0 48 "2:"
michael@0 49 "add $0x1,%4\n"
michael@0 50 "js 3f\n"
michael@0 51
michael@0 52 "movzb (%1),%%r10\n"
michael@0 53 "movq 2048(%5,%%r10,8),%%xmm0\n"
michael@0 54 "movzb (%2),%%r10\n"
michael@0 55 "movq 4096(%5,%%r10,8),%%xmm1\n"
michael@0 56 "paddsw %%xmm1,%%xmm0\n"
michael@0 57 "movzb (%0),%%r10\n"
michael@0 58 "movq (%5,%%r10,8),%%xmm1\n"
michael@0 59 "paddsw %%xmm0,%%xmm1\n"
michael@0 60 "psraw $0x6,%%xmm1\n"
michael@0 61 "packuswb %%xmm1,%%xmm1\n"
michael@0 62 "movd %%xmm1,0x0(%3)\n"
michael@0 63 "3:"
michael@0 64 :
michael@0 65 : "r"(y_buf), // %0
michael@0 66 "r"(u_buf), // %1
michael@0 67 "r"(v_buf), // %2
michael@0 68 "r"(rgb_buf), // %3
michael@0 69 "r"(width), // %4
michael@0 70 "r" (kCoefficientsRgbY) // %5
michael@0 71 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
michael@0 72 );
michael@0 73 }
michael@0 74
michael@0 75 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
michael@0 76 const uint8* u_buf, // rsi
michael@0 77 const uint8* v_buf, // rdx
michael@0 78 uint8* rgb_buf, // rcx
michael@0 79 int width, // r8
michael@0 80 int source_dx) { // r9
michael@0 81 asm(
michael@0 82 "xor %%r11,%%r11\n"
michael@0 83 "sub $0x2,%4\n"
michael@0 84 "js 1f\n"
michael@0 85
michael@0 86 "0:"
michael@0 87 "mov %%r11,%%r10\n"
michael@0 88 "sar $0x11,%%r10\n"
michael@0 89 "movzb (%1,%%r10,1),%%rax\n"
michael@0 90 "movq 2048(%5,%%rax,8),%%xmm0\n"
michael@0 91 "movzb (%2,%%r10,1),%%rax\n"
michael@0 92 "movq 4096(%5,%%rax,8),%%xmm1\n"
michael@0 93 "lea (%%r11,%6),%%r10\n"
michael@0 94 "sar $0x10,%%r11\n"
michael@0 95 "movzb (%0,%%r11,1),%%rax\n"
michael@0 96 "paddsw %%xmm1,%%xmm0\n"
michael@0 97 "movq (%5,%%rax,8),%%xmm1\n"
michael@0 98 "lea (%%r10,%6),%%r11\n"
michael@0 99 "sar $0x10,%%r10\n"
michael@0 100 "movzb (%0,%%r10,1),%%rax\n"
michael@0 101 "movq (%5,%%rax,8),%%xmm2\n"
michael@0 102 "paddsw %%xmm0,%%xmm1\n"
michael@0 103 "paddsw %%xmm0,%%xmm2\n"
michael@0 104 "shufps $0x44,%%xmm2,%%xmm1\n"
michael@0 105 "psraw $0x6,%%xmm1\n"
michael@0 106 "packuswb %%xmm1,%%xmm1\n"
michael@0 107 "movq %%xmm1,0x0(%3)\n"
michael@0 108 "add $0x8,%3\n"
michael@0 109 "sub $0x2,%4\n"
michael@0 110 "jns 0b\n"
michael@0 111
michael@0 112 "1:"
michael@0 113 "add $0x1,%4\n"
michael@0 114 "js 2f\n"
michael@0 115
michael@0 116 "mov %%r11,%%r10\n"
michael@0 117 "sar $0x11,%%r10\n"
michael@0 118 "movzb (%1,%%r10,1),%%rax\n"
michael@0 119 "movq 2048(%5,%%rax,8),%%xmm0\n"
michael@0 120 "movzb (%2,%%r10,1),%%rax\n"
michael@0 121 "movq 4096(%5,%%rax,8),%%xmm1\n"
michael@0 122 "paddsw %%xmm1,%%xmm0\n"
michael@0 123 "sar $0x10,%%r11\n"
michael@0 124 "movzb (%0,%%r11,1),%%rax\n"
michael@0 125 "movq (%5,%%rax,8),%%xmm1\n"
michael@0 126 "paddsw %%xmm0,%%xmm1\n"
michael@0 127 "psraw $0x6,%%xmm1\n"
michael@0 128 "packuswb %%xmm1,%%xmm1\n"
michael@0 129 "movd %%xmm1,0x0(%3)\n"
michael@0 130
michael@0 131 "2:"
michael@0 132 :
michael@0 133 : "r"(y_buf), // %0
michael@0 134 "r"(u_buf), // %1
michael@0 135 "r"(v_buf), // %2
michael@0 136 "r"(rgb_buf), // %3
michael@0 137 "r"(width), // %4
michael@0 138 "r" (kCoefficientsRgbY), // %5
michael@0 139 "r"(static_cast<long>(source_dx)) // %6
michael@0 140 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
michael@0 141 );
michael@0 142 }
michael@0 143
michael@0 144 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 145 const uint8* u_buf,
michael@0 146 const uint8* v_buf,
michael@0 147 uint8* rgb_buf,
michael@0 148 int width,
michael@0 149 int source_dx) {
michael@0 150 asm(
michael@0 151 "xor %%r11,%%r11\n" // x = 0
michael@0 152 "sub $0x2,%4\n"
michael@0 153 "js 2f\n"
michael@0 154 "cmp $0x20000,%6\n" // if source_dx >= 2.0
michael@0 155 "jl 0f\n"
michael@0 156 "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
michael@0 157 "0:"
michael@0 158
michael@0 159 "1:"
michael@0 160 "mov %%r11,%%r10\n"
michael@0 161 "sar $0x11,%%r10\n"
michael@0 162
michael@0 163 "movzb (%1, %%r10, 1), %%r13 \n"
michael@0 164 "movzb 1(%1, %%r10, 1), %%r14 \n"
michael@0 165 "mov %%r11, %%rax \n"
michael@0 166 "and $0x1fffe, %%rax \n"
michael@0 167 "imul %%rax, %%r14 \n"
michael@0 168 "xor $0x1fffe, %%rax \n"
michael@0 169 "imul %%rax, %%r13 \n"
michael@0 170 "add %%r14, %%r13 \n"
michael@0 171 "shr $17, %%r13 \n"
michael@0 172 "movq 2048(%5,%%r13,8), %%xmm0\n"
michael@0 173
michael@0 174 "movzb (%2, %%r10, 1), %%r13 \n"
michael@0 175 "movzb 1(%2, %%r10, 1), %%r14 \n"
michael@0 176 "mov %%r11, %%rax \n"
michael@0 177 "and $0x1fffe, %%rax \n"
michael@0 178 "imul %%rax, %%r14 \n"
michael@0 179 "xor $0x1fffe, %%rax \n"
michael@0 180 "imul %%rax, %%r13 \n"
michael@0 181 "add %%r14, %%r13 \n"
michael@0 182 "shr $17, %%r13 \n"
michael@0 183 "movq 4096(%5,%%r13,8), %%xmm1\n"
michael@0 184
michael@0 185 "mov %%r11, %%rax \n"
michael@0 186 "lea (%%r11,%6),%%r10\n"
michael@0 187 "sar $0x10,%%r11\n"
michael@0 188 "paddsw %%xmm1,%%xmm0\n"
michael@0 189
michael@0 190 "movzb (%0, %%r11, 1), %%r13 \n"
michael@0 191 "movzb 1(%0, %%r11, 1), %%r14 \n"
michael@0 192 "and $0xffff, %%rax \n"
michael@0 193 "imul %%rax, %%r14 \n"
michael@0 194 "xor $0xffff, %%rax \n"
michael@0 195 "imul %%rax, %%r13 \n"
michael@0 196 "add %%r14, %%r13 \n"
michael@0 197 "shr $16, %%r13 \n"
michael@0 198 "movq (%5,%%r13,8),%%xmm1\n"
michael@0 199
michael@0 200 "mov %%r10, %%rax \n"
michael@0 201 "lea (%%r10,%6),%%r11\n"
michael@0 202 "sar $0x10,%%r10\n"
michael@0 203
michael@0 204 "movzb (%0,%%r10,1), %%r13 \n"
michael@0 205 "movzb 1(%0,%%r10,1), %%r14 \n"
michael@0 206 "and $0xffff, %%rax \n"
michael@0 207 "imul %%rax, %%r14 \n"
michael@0 208 "xor $0xffff, %%rax \n"
michael@0 209 "imul %%rax, %%r13 \n"
michael@0 210 "add %%r14, %%r13 \n"
michael@0 211 "shr $16, %%r13 \n"
michael@0 212 "movq (%5,%%r13,8),%%xmm2\n"
michael@0 213
michael@0 214 "paddsw %%xmm0,%%xmm1\n"
michael@0 215 "paddsw %%xmm0,%%xmm2\n"
michael@0 216 "shufps $0x44,%%xmm2,%%xmm1\n"
michael@0 217 "psraw $0x6,%%xmm1\n"
michael@0 218 "packuswb %%xmm1,%%xmm1\n"
michael@0 219 "movq %%xmm1,0x0(%3)\n"
michael@0 220 "add $0x8,%3\n"
michael@0 221 "sub $0x2,%4\n"
michael@0 222 "jns 1b\n"
michael@0 223
michael@0 224 "2:"
michael@0 225 "add $0x1,%4\n"
michael@0 226 "js 3f\n"
michael@0 227
michael@0 228 "mov %%r11,%%r10\n"
michael@0 229 "sar $0x11,%%r10\n"
michael@0 230
michael@0 231 "movzb (%1,%%r10,1), %%r13 \n"
michael@0 232 "movq 2048(%5,%%r13,8),%%xmm0\n"
michael@0 233
michael@0 234 "movzb (%2,%%r10,1), %%r13 \n"
michael@0 235 "movq 4096(%5,%%r13,8),%%xmm1\n"
michael@0 236
michael@0 237 "paddsw %%xmm1,%%xmm0\n"
michael@0 238 "sar $0x10,%%r11\n"
michael@0 239
michael@0 240 "movzb (%0,%%r11,1), %%r13 \n"
michael@0 241 "movq (%5,%%r13,8),%%xmm1\n"
michael@0 242
michael@0 243 "paddsw %%xmm0,%%xmm1\n"
michael@0 244 "psraw $0x6,%%xmm1\n"
michael@0 245 "packuswb %%xmm1,%%xmm1\n"
michael@0 246 "movd %%xmm1,0x0(%3)\n"
michael@0 247
michael@0 248 "3:"
michael@0 249 :
michael@0 250 : "r"(y_buf), // %0
michael@0 251 "r"(u_buf), // %1
michael@0 252 "r"(v_buf), // %2
michael@0 253 "r"(rgb_buf), // %3
michael@0 254 "r"(width), // %4
michael@0 255 "r" (kCoefficientsRgbY), // %5
michael@0 256 "r"(static_cast<long>(source_dx)) // %6
michael@0 257 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
michael@0 258 );
michael@0 259 }
michael@0 260
michael@0 261 #elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
michael@0 262
michael@0 263 // PIC version is slower because less registers are available, so
michael@0 264 // non-PIC is used on platforms where it is possible.
michael@0 265 void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 266 const uint8* u_buf,
michael@0 267 const uint8* v_buf,
michael@0 268 uint8* rgb_buf,
michael@0 269 int width);
michael@0 270 asm(
michael@0 271 ".text\n"
michael@0 272 ".global FastConvertYUVToRGB32Row_SSE\n"
michael@0 273 ".type FastConvertYUVToRGB32Row_SSE, @function\n"
michael@0 274 "FastConvertYUVToRGB32Row_SSE:\n"
michael@0 275 "pusha\n"
michael@0 276 "mov 0x24(%esp),%edx\n"
michael@0 277 "mov 0x28(%esp),%edi\n"
michael@0 278 "mov 0x2c(%esp),%esi\n"
michael@0 279 "mov 0x30(%esp),%ebp\n"
michael@0 280 "mov 0x34(%esp),%ecx\n"
michael@0 281 "jmp 1f\n"
michael@0 282
michael@0 283 "0:"
michael@0 284 "movzbl (%edi),%eax\n"
michael@0 285 "add $0x1,%edi\n"
michael@0 286 "movzbl (%esi),%ebx\n"
michael@0 287 "add $0x1,%esi\n"
michael@0 288 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
michael@0 289 "movzbl (%edx),%eax\n"
michael@0 290 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
michael@0 291 "movzbl 0x1(%edx),%ebx\n"
michael@0 292 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
michael@0 293 "add $0x2,%edx\n"
michael@0 294 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
michael@0 295 "paddsw %mm0,%mm1\n"
michael@0 296 "paddsw %mm0,%mm2\n"
michael@0 297 "psraw $0x6,%mm1\n"
michael@0 298 "psraw $0x6,%mm2\n"
michael@0 299 "packuswb %mm2,%mm1\n"
michael@0 300 "movntq %mm1,0x0(%ebp)\n"
michael@0 301 "add $0x8,%ebp\n"
michael@0 302 "1:"
michael@0 303 "sub $0x2,%ecx\n"
michael@0 304 "jns 0b\n"
michael@0 305
michael@0 306 "and $0x1,%ecx\n"
michael@0 307 "je 2f\n"
michael@0 308
michael@0 309 "movzbl (%edi),%eax\n"
michael@0 310 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
michael@0 311 "movzbl (%esi),%eax\n"
michael@0 312 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
michael@0 313 "movzbl (%edx),%eax\n"
michael@0 314 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
michael@0 315 "paddsw %mm0,%mm1\n"
michael@0 316 "psraw $0x6,%mm1\n"
michael@0 317 "packuswb %mm1,%mm1\n"
michael@0 318 "movd %mm1,0x0(%ebp)\n"
michael@0 319 "2:"
michael@0 320 "popa\n"
michael@0 321 "ret\n"
michael@0 322 #if !defined(XP_MACOSX)
michael@0 323 ".previous\n"
michael@0 324 #endif
michael@0 325 );
michael@0 326
michael@0 327 void FastConvertYUVToRGB32Row(const uint8* y_buf,
michael@0 328 const uint8* u_buf,
michael@0 329 const uint8* v_buf,
michael@0 330 uint8* rgb_buf,
michael@0 331 int width)
michael@0 332 {
michael@0 333 if (mozilla::supports_sse()) {
michael@0 334 FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width);
michael@0 335 return;
michael@0 336 }
michael@0 337
michael@0 338 FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
michael@0 339 }
michael@0 340
michael@0 341
michael@0 342 void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 343 const uint8* u_buf,
michael@0 344 const uint8* v_buf,
michael@0 345 uint8* rgb_buf,
michael@0 346 int width,
michael@0 347 int source_dx);
michael@0 348 asm(
michael@0 349 ".text\n"
michael@0 350 ".global ScaleYUVToRGB32Row_SSE\n"
michael@0 351 ".type ScaleYUVToRGB32Row_SSE, @function\n"
michael@0 352 "ScaleYUVToRGB32Row_SSE:\n"
michael@0 353 "pusha\n"
michael@0 354 "mov 0x24(%esp),%edx\n"
michael@0 355 "mov 0x28(%esp),%edi\n"
michael@0 356 "mov 0x2c(%esp),%esi\n"
michael@0 357 "mov 0x30(%esp),%ebp\n"
michael@0 358 "mov 0x34(%esp),%ecx\n"
michael@0 359 "xor %ebx,%ebx\n"
michael@0 360 "jmp 1f\n"
michael@0 361
michael@0 362 "0:"
michael@0 363 "mov %ebx,%eax\n"
michael@0 364 "sar $0x11,%eax\n"
michael@0 365 "movzbl (%edi,%eax,1),%eax\n"
michael@0 366 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
michael@0 367 "mov %ebx,%eax\n"
michael@0 368 "sar $0x11,%eax\n"
michael@0 369 "movzbl (%esi,%eax,1),%eax\n"
michael@0 370 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
michael@0 371 "mov %ebx,%eax\n"
michael@0 372 "add 0x38(%esp),%ebx\n"
michael@0 373 "sar $0x10,%eax\n"
michael@0 374 "movzbl (%edx,%eax,1),%eax\n"
michael@0 375 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
michael@0 376 "mov %ebx,%eax\n"
michael@0 377 "add 0x38(%esp),%ebx\n"
michael@0 378 "sar $0x10,%eax\n"
michael@0 379 "movzbl (%edx,%eax,1),%eax\n"
michael@0 380 "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
michael@0 381 "paddsw %mm0,%mm1\n"
michael@0 382 "paddsw %mm0,%mm2\n"
michael@0 383 "psraw $0x6,%mm1\n"
michael@0 384 "psraw $0x6,%mm2\n"
michael@0 385 "packuswb %mm2,%mm1\n"
michael@0 386 "movntq %mm1,0x0(%ebp)\n"
michael@0 387 "add $0x8,%ebp\n"
michael@0 388 "1:"
michael@0 389 "sub $0x2,%ecx\n"
michael@0 390 "jns 0b\n"
michael@0 391
michael@0 392 "and $0x1,%ecx\n"
michael@0 393 "je 2f\n"
michael@0 394
michael@0 395 "mov %ebx,%eax\n"
michael@0 396 "sar $0x11,%eax\n"
michael@0 397 "movzbl (%edi,%eax,1),%eax\n"
michael@0 398 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
michael@0 399 "mov %ebx,%eax\n"
michael@0 400 "sar $0x11,%eax\n"
michael@0 401 "movzbl (%esi,%eax,1),%eax\n"
michael@0 402 "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
michael@0 403 "mov %ebx,%eax\n"
michael@0 404 "sar $0x10,%eax\n"
michael@0 405 "movzbl (%edx,%eax,1),%eax\n"
michael@0 406 "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
michael@0 407 "paddsw %mm0,%mm1\n"
michael@0 408 "psraw $0x6,%mm1\n"
michael@0 409 "packuswb %mm1,%mm1\n"
michael@0 410 "movd %mm1,0x0(%ebp)\n"
michael@0 411
michael@0 412 "2:"
michael@0 413 "popa\n"
michael@0 414 "ret\n"
michael@0 415 #if !defined(XP_MACOSX)
michael@0 416 ".previous\n"
michael@0 417 #endif
michael@0 418 );
michael@0 419
michael@0 420 void ScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 421 const uint8* u_buf,
michael@0 422 const uint8* v_buf,
michael@0 423 uint8* rgb_buf,
michael@0 424 int width,
michael@0 425 int source_dx)
michael@0 426 {
michael@0 427 if (mozilla::supports_sse()) {
michael@0 428 ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
michael@0 429 width, source_dx);
michael@0 430 }
michael@0 431
michael@0 432 ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
michael@0 433 width, source_dx);
michael@0 434 }
michael@0 435
michael@0 436 void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 437 const uint8* u_buf,
michael@0 438 const uint8* v_buf,
michael@0 439 uint8* rgb_buf,
michael@0 440 int width,
michael@0 441 int source_dx);
michael@0 442 asm(
michael@0 443 ".text\n"
michael@0 444 ".global LinearScaleYUVToRGB32Row_SSE\n"
michael@0 445 ".type LinearScaleYUVToRGB32Row_SSE, @function\n"
michael@0 446 "LinearScaleYUVToRGB32Row_SSE:\n"
michael@0 447 "pusha\n"
michael@0 448 "mov 0x24(%esp),%edx\n"
michael@0 449 "mov 0x28(%esp),%edi\n"
michael@0 450 "mov 0x30(%esp),%ebp\n"
michael@0 451
michael@0 452 // source_width = width * source_dx + ebx
michael@0 453 "mov 0x34(%esp), %ecx\n"
michael@0 454 "imull 0x38(%esp), %ecx\n"
michael@0 455 "mov %ecx, 0x34(%esp)\n"
michael@0 456
michael@0 457 "mov 0x38(%esp), %ecx\n"
michael@0 458 "xor %ebx,%ebx\n" // x = 0
michael@0 459 "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
michael@0 460 "jl 1f\n"
michael@0 461 "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
michael@0 462 "jmp 1f\n"
michael@0 463
michael@0 464 "0:"
michael@0 465 "mov %ebx,%eax\n"
michael@0 466 "sar $0x11,%eax\n"
michael@0 467
michael@0 468 "movzbl (%edi,%eax,1),%ecx\n"
michael@0 469 "movzbl 1(%edi,%eax,1),%esi\n"
michael@0 470 "mov %ebx,%eax\n"
michael@0 471 "andl $0x1fffe, %eax \n"
michael@0 472 "imul %eax, %esi \n"
michael@0 473 "xorl $0x1fffe, %eax \n"
michael@0 474 "imul %eax, %ecx \n"
michael@0 475 "addl %esi, %ecx \n"
michael@0 476 "shrl $17, %ecx \n"
michael@0 477 "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
michael@0 478
michael@0 479 "mov 0x2c(%esp),%esi\n"
michael@0 480 "mov %ebx,%eax\n"
michael@0 481 "sar $0x11,%eax\n"
michael@0 482
michael@0 483 "movzbl (%esi,%eax,1),%ecx\n"
michael@0 484 "movzbl 1(%esi,%eax,1),%esi\n"
michael@0 485 "mov %ebx,%eax\n"
michael@0 486 "andl $0x1fffe, %eax \n"
michael@0 487 "imul %eax, %esi \n"
michael@0 488 "xorl $0x1fffe, %eax \n"
michael@0 489 "imul %eax, %ecx \n"
michael@0 490 "addl %esi, %ecx \n"
michael@0 491 "shrl $17, %ecx \n"
michael@0 492 "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
michael@0 493
michael@0 494 "mov %ebx,%eax\n"
michael@0 495 "sar $0x10,%eax\n"
michael@0 496 "movzbl (%edx,%eax,1),%ecx\n"
michael@0 497 "movzbl 1(%edx,%eax,1),%esi\n"
michael@0 498 "mov %ebx,%eax\n"
michael@0 499 "add 0x38(%esp),%ebx\n"
michael@0 500 "andl $0xffff, %eax \n"
michael@0 501 "imul %eax, %esi \n"
michael@0 502 "xorl $0xffff, %eax \n"
michael@0 503 "imul %eax, %ecx \n"
michael@0 504 "addl %esi, %ecx \n"
michael@0 505 "shrl $16, %ecx \n"
michael@0 506 "movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
michael@0 507
michael@0 508 "cmp 0x34(%esp), %ebx\n"
michael@0 509 "jge 2f\n"
michael@0 510
michael@0 511 "mov %ebx,%eax\n"
michael@0 512 "sar $0x10,%eax\n"
michael@0 513 "movzbl (%edx,%eax,1),%ecx\n"
michael@0 514 "movzbl 1(%edx,%eax,1),%esi\n"
michael@0 515 "mov %ebx,%eax\n"
michael@0 516 "add 0x38(%esp),%ebx\n"
michael@0 517 "andl $0xffff, %eax \n"
michael@0 518 "imul %eax, %esi \n"
michael@0 519 "xorl $0xffff, %eax \n"
michael@0 520 "imul %eax, %ecx \n"
michael@0 521 "addl %esi, %ecx \n"
michael@0 522 "shrl $16, %ecx \n"
michael@0 523 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n"
michael@0 524
michael@0 525 "paddsw %mm0,%mm1\n"
michael@0 526 "paddsw %mm0,%mm2\n"
michael@0 527 "psraw $0x6,%mm1\n"
michael@0 528 "psraw $0x6,%mm2\n"
michael@0 529 "packuswb %mm2,%mm1\n"
michael@0 530 "movntq %mm1,0x0(%ebp)\n"
michael@0 531 "add $0x8,%ebp\n"
michael@0 532
michael@0 533 "1:"
michael@0 534 "cmp 0x34(%esp), %ebx\n"
michael@0 535 "jl 0b\n"
michael@0 536 "popa\n"
michael@0 537 "ret\n"
michael@0 538
michael@0 539 "2:"
michael@0 540 "paddsw %mm0, %mm1\n"
michael@0 541 "psraw $6, %mm1\n"
michael@0 542 "packuswb %mm1, %mm1\n"
michael@0 543 "movd %mm1, (%ebp)\n"
michael@0 544 "popa\n"
michael@0 545 "ret\n"
michael@0 546 #if !defined(XP_MACOSX)
michael@0 547 ".previous\n"
michael@0 548 #endif
michael@0 549 );
michael@0 550
michael@0 551 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 552 const uint8* u_buf,
michael@0 553 const uint8* v_buf,
michael@0 554 uint8* rgb_buf,
michael@0 555 int width,
michael@0 556 int source_dx)
michael@0 557 {
michael@0 558 if (mozilla::supports_sse()) {
michael@0 559 LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf,
michael@0 560 width, source_dx);
michael@0 561 }
michael@0 562
michael@0 563 LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf,
michael@0 564 width, source_dx);
michael@0 565 }
michael@0 566
michael@0 567 #elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__)
michael@0 568
michael@0 569 void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 570 const uint8* u_buf,
michael@0 571 const uint8* v_buf,
michael@0 572 uint8* rgb_buf,
michael@0 573 int width,
michael@0 574 int16 *kCoefficientsRgbY);
michael@0 575
michael@0 576 asm(
michael@0 577 ".text\n"
michael@0 578 #if defined(XP_MACOSX)
michael@0 579 "_PICConvertYUVToRGB32Row_SSE:\n"
michael@0 580 #else
michael@0 581 "PICConvertYUVToRGB32Row_SSE:\n"
michael@0 582 #endif
michael@0 583 "pusha\n"
michael@0 584 "mov 0x24(%esp),%edx\n"
michael@0 585 "mov 0x28(%esp),%edi\n"
michael@0 586 "mov 0x2c(%esp),%esi\n"
michael@0 587 "mov 0x30(%esp),%ebp\n"
michael@0 588 "mov 0x38(%esp),%ecx\n"
michael@0 589
michael@0 590 "jmp 1f\n"
michael@0 591
michael@0 592 "0:"
michael@0 593 "movzbl (%edi),%eax\n"
michael@0 594 "add $0x1,%edi\n"
michael@0 595 "movzbl (%esi),%ebx\n"
michael@0 596 "add $0x1,%esi\n"
michael@0 597 "movq 2048(%ecx,%eax,8),%mm0\n"
michael@0 598 "movzbl (%edx),%eax\n"
michael@0 599 "paddsw 4096(%ecx,%ebx,8),%mm0\n"
michael@0 600 "movzbl 0x1(%edx),%ebx\n"
michael@0 601 "movq 0(%ecx,%eax,8),%mm1\n"
michael@0 602 "add $0x2,%edx\n"
michael@0 603 "movq 0(%ecx,%ebx,8),%mm2\n"
michael@0 604 "paddsw %mm0,%mm1\n"
michael@0 605 "paddsw %mm0,%mm2\n"
michael@0 606 "psraw $0x6,%mm1\n"
michael@0 607 "psraw $0x6,%mm2\n"
michael@0 608 "packuswb %mm2,%mm1\n"
michael@0 609 "movntq %mm1,0x0(%ebp)\n"
michael@0 610 "add $0x8,%ebp\n"
michael@0 611 "1:"
michael@0 612 "subl $0x2,0x34(%esp)\n"
michael@0 613 "jns 0b\n"
michael@0 614
michael@0 615 "andl $0x1,0x34(%esp)\n"
michael@0 616 "je 2f\n"
michael@0 617
michael@0 618 "movzbl (%edi),%eax\n"
michael@0 619 "movq 2048(%ecx,%eax,8),%mm0\n"
michael@0 620 "movzbl (%esi),%eax\n"
michael@0 621 "paddsw 4096(%ecx,%eax,8),%mm0\n"
michael@0 622 "movzbl (%edx),%eax\n"
michael@0 623 "movq 0(%ecx,%eax,8),%mm1\n"
michael@0 624 "paddsw %mm0,%mm1\n"
michael@0 625 "psraw $0x6,%mm1\n"
michael@0 626 "packuswb %mm1,%mm1\n"
michael@0 627 "movd %mm1,0x0(%ebp)\n"
michael@0 628 "2:"
michael@0 629 "popa\n"
michael@0 630 "ret\n"
michael@0 631 #if !defined(XP_MACOSX)
michael@0 632 ".previous\n"
michael@0 633 #endif
michael@0 634 );
michael@0 635
michael@0 636 void FastConvertYUVToRGB32Row(const uint8* y_buf,
michael@0 637 const uint8* u_buf,
michael@0 638 const uint8* v_buf,
michael@0 639 uint8* rgb_buf,
michael@0 640 int width)
michael@0 641 {
michael@0 642 if (mozilla::supports_sse()) {
michael@0 643 PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
michael@0 644 &kCoefficientsRgbY[0][0]);
michael@0 645 return;
michael@0 646 }
michael@0 647
michael@0 648 FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
michael@0 649 }
michael@0 650
michael@0 651 void PICScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 652 const uint8* u_buf,
michael@0 653 const uint8* v_buf,
michael@0 654 uint8* rgb_buf,
michael@0 655 int width,
michael@0 656 int source_dx,
michael@0 657 int16 *kCoefficientsRgbY);
michael@0 658
michael@0 659 asm(
michael@0 660 ".text\n"
michael@0 661 #if defined(XP_MACOSX)
michael@0 662 "_PICScaleYUVToRGB32Row_SSE:\n"
michael@0 663 #else
michael@0 664 "PICScaleYUVToRGB32Row_SSE:\n"
michael@0 665 #endif
michael@0 666 "pusha\n"
michael@0 667 "mov 0x24(%esp),%edx\n"
michael@0 668 "mov 0x28(%esp),%edi\n"
michael@0 669 "mov 0x2c(%esp),%esi\n"
michael@0 670 "mov 0x30(%esp),%ebp\n"
michael@0 671 "mov 0x3c(%esp),%ecx\n"
michael@0 672 "xor %ebx,%ebx\n"
michael@0 673 "jmp 1f\n"
michael@0 674
michael@0 675 "0:"
michael@0 676 "mov %ebx,%eax\n"
michael@0 677 "sar $0x11,%eax\n"
michael@0 678 "movzbl (%edi,%eax,1),%eax\n"
michael@0 679 "movq 2048(%ecx,%eax,8),%mm0\n"
michael@0 680 "mov %ebx,%eax\n"
michael@0 681 "sar $0x11,%eax\n"
michael@0 682 "movzbl (%esi,%eax,1),%eax\n"
michael@0 683 "paddsw 4096(%ecx,%eax,8),%mm0\n"
michael@0 684 "mov %ebx,%eax\n"
michael@0 685 "add 0x38(%esp),%ebx\n"
michael@0 686 "sar $0x10,%eax\n"
michael@0 687 "movzbl (%edx,%eax,1),%eax\n"
michael@0 688 "movq 0(%ecx,%eax,8),%mm1\n"
michael@0 689 "mov %ebx,%eax\n"
michael@0 690 "add 0x38(%esp),%ebx\n"
michael@0 691 "sar $0x10,%eax\n"
michael@0 692 "movzbl (%edx,%eax,1),%eax\n"
michael@0 693 "movq 0(%ecx,%eax,8),%mm2\n"
michael@0 694 "paddsw %mm0,%mm1\n"
michael@0 695 "paddsw %mm0,%mm2\n"
michael@0 696 "psraw $0x6,%mm1\n"
michael@0 697 "psraw $0x6,%mm2\n"
michael@0 698 "packuswb %mm2,%mm1\n"
michael@0 699 "movntq %mm1,0x0(%ebp)\n"
michael@0 700 "add $0x8,%ebp\n"
michael@0 701 "1:"
michael@0 702 "subl $0x2,0x34(%esp)\n"
michael@0 703 "jns 0b\n"
michael@0 704
michael@0 705 "andl $0x1,0x34(%esp)\n"
michael@0 706 "je 2f\n"
michael@0 707
michael@0 708 "mov %ebx,%eax\n"
michael@0 709 "sar $0x11,%eax\n"
michael@0 710 "movzbl (%edi,%eax,1),%eax\n"
michael@0 711 "movq 2048(%ecx,%eax,8),%mm0\n"
michael@0 712 "mov %ebx,%eax\n"
michael@0 713 "sar $0x11,%eax\n"
michael@0 714 "movzbl (%esi,%eax,1),%eax\n"
michael@0 715 "paddsw 4096(%ecx,%eax,8),%mm0\n"
michael@0 716 "mov %ebx,%eax\n"
michael@0 717 "sar $0x10,%eax\n"
michael@0 718 "movzbl (%edx,%eax,1),%eax\n"
michael@0 719 "movq 0(%ecx,%eax,8),%mm1\n"
michael@0 720 "paddsw %mm0,%mm1\n"
michael@0 721 "psraw $0x6,%mm1\n"
michael@0 722 "packuswb %mm1,%mm1\n"
michael@0 723 "movd %mm1,0x0(%ebp)\n"
michael@0 724
michael@0 725 "2:"
michael@0 726 "popa\n"
michael@0 727 "ret\n"
michael@0 728 #if !defined(XP_MACOSX)
michael@0 729 ".previous\n"
michael@0 730 #endif
michael@0 731 );
michael@0 732
michael@0 733 void ScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 734 const uint8* u_buf,
michael@0 735 const uint8* v_buf,
michael@0 736 uint8* rgb_buf,
michael@0 737 int width,
michael@0 738 int source_dx)
michael@0 739 {
michael@0 740 if (mozilla::supports_sse()) {
michael@0 741 PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
michael@0 742 &kCoefficientsRgbY[0][0]);
michael@0 743 return;
michael@0 744 }
michael@0 745
michael@0 746 ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 747 }
michael@0 748
michael@0 749 void PICLinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
michael@0 750 const uint8* u_buf,
michael@0 751 const uint8* v_buf,
michael@0 752 uint8* rgb_buf,
michael@0 753 int width,
michael@0 754 int source_dx,
michael@0 755 int16 *kCoefficientsRgbY);
michael@0 756
michael@0 757 asm(
michael@0 758 ".text\n"
michael@0 759 #if defined(XP_MACOSX)
michael@0 760 "_PICLinearScaleYUVToRGB32Row_SSE:\n"
michael@0 761 #else
michael@0 762 "PICLinearScaleYUVToRGB32Row_SSE:\n"
michael@0 763 #endif
michael@0 764 "pusha\n"
michael@0 765 "mov 0x24(%esp),%edx\n"
michael@0 766 "mov 0x30(%esp),%ebp\n"
michael@0 767 "mov 0x34(%esp),%ecx\n"
michael@0 768 "mov 0x3c(%esp),%edi\n"
michael@0 769 "xor %ebx,%ebx\n"
michael@0 770
michael@0 771 // source_width = width * source_dx + ebx
michael@0 772 "mov 0x34(%esp), %ecx\n"
michael@0 773 "imull 0x38(%esp), %ecx\n"
michael@0 774 "mov %ecx, 0x34(%esp)\n"
michael@0 775
michael@0 776 "mov 0x38(%esp), %ecx\n"
michael@0 777 "xor %ebx,%ebx\n" // x = 0
michael@0 778 "cmp $0x20000,%ecx\n" // if source_dx >= 2.0
michael@0 779 "jl 1f\n"
michael@0 780 "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
michael@0 781 "jmp 1f\n"
michael@0 782
michael@0 783 "0:"
michael@0 784 "mov 0x28(%esp),%esi\n"
michael@0 785 "mov %ebx,%eax\n"
michael@0 786 "sar $0x11,%eax\n"
michael@0 787
michael@0 788 "movzbl (%esi,%eax,1),%ecx\n"
michael@0 789 "movzbl 1(%esi,%eax,1),%esi\n"
michael@0 790 "mov %ebx,%eax\n"
michael@0 791 "andl $0x1fffe, %eax \n"
michael@0 792 "imul %eax, %esi \n"
michael@0 793 "xorl $0x1fffe, %eax \n"
michael@0 794 "imul %eax, %ecx \n"
michael@0 795 "addl %esi, %ecx \n"
michael@0 796 "shrl $17, %ecx \n"
michael@0 797 "movq 2048(%edi,%ecx,8),%mm0\n"
michael@0 798
michael@0 799 "mov 0x2c(%esp),%esi\n"
michael@0 800 "mov %ebx,%eax\n"
michael@0 801 "sar $0x11,%eax\n"
michael@0 802
michael@0 803 "movzbl (%esi,%eax,1),%ecx\n"
michael@0 804 "movzbl 1(%esi,%eax,1),%esi\n"
michael@0 805 "mov %ebx,%eax\n"
michael@0 806 "andl $0x1fffe, %eax \n"
michael@0 807 "imul %eax, %esi \n"
michael@0 808 "xorl $0x1fffe, %eax \n"
michael@0 809 "imul %eax, %ecx \n"
michael@0 810 "addl %esi, %ecx \n"
michael@0 811 "shrl $17, %ecx \n"
michael@0 812 "paddsw 4096(%edi,%ecx,8),%mm0\n"
michael@0 813
michael@0 814 "mov %ebx,%eax\n"
michael@0 815 "sar $0x10,%eax\n"
michael@0 816 "movzbl (%edx,%eax,1),%ecx\n"
michael@0 817 "movzbl 1(%edx,%eax,1),%esi\n"
michael@0 818 "mov %ebx,%eax\n"
michael@0 819 "add 0x38(%esp),%ebx\n"
michael@0 820 "andl $0xffff, %eax \n"
michael@0 821 "imul %eax, %esi \n"
michael@0 822 "xorl $0xffff, %eax \n"
michael@0 823 "imul %eax, %ecx \n"
michael@0 824 "addl %esi, %ecx \n"
michael@0 825 "shrl $16, %ecx \n"
michael@0 826 "movq (%edi,%ecx,8),%mm1\n"
michael@0 827
michael@0 828 "cmp 0x34(%esp), %ebx\n"
michael@0 829 "jge 2f\n"
michael@0 830
michael@0 831 "mov %ebx,%eax\n"
michael@0 832 "sar $0x10,%eax\n"
michael@0 833 "movzbl (%edx,%eax,1),%ecx\n"
michael@0 834 "movzbl 1(%edx,%eax,1),%esi\n"
michael@0 835 "mov %ebx,%eax\n"
michael@0 836 "add 0x38(%esp),%ebx\n"
michael@0 837 "andl $0xffff, %eax \n"
michael@0 838 "imul %eax, %esi \n"
michael@0 839 "xorl $0xffff, %eax \n"
michael@0 840 "imul %eax, %ecx \n"
michael@0 841 "addl %esi, %ecx \n"
michael@0 842 "shrl $16, %ecx \n"
michael@0 843 "movq (%edi,%ecx,8),%mm2\n"
michael@0 844
michael@0 845 "paddsw %mm0,%mm1\n"
michael@0 846 "paddsw %mm0,%mm2\n"
michael@0 847 "psraw $0x6,%mm1\n"
michael@0 848 "psraw $0x6,%mm2\n"
michael@0 849 "packuswb %mm2,%mm1\n"
michael@0 850 "movntq %mm1,0x0(%ebp)\n"
michael@0 851 "add $0x8,%ebp\n"
michael@0 852
michael@0 853 "1:"
michael@0 854 "cmp %ebx, 0x34(%esp)\n"
michael@0 855 "jg 0b\n"
michael@0 856 "popa\n"
michael@0 857 "ret\n"
michael@0 858
michael@0 859 "2:"
michael@0 860 "paddsw %mm0, %mm1\n"
michael@0 861 "psraw $6, %mm1\n"
michael@0 862 "packuswb %mm1, %mm1\n"
michael@0 863 "movd %mm1, (%ebp)\n"
michael@0 864 "popa\n"
michael@0 865 "ret\n"
michael@0 866 #if !defined(XP_MACOSX)
michael@0 867 ".previous\n"
michael@0 868 #endif
michael@0 869 );
michael@0 870
michael@0 871
michael@0 872 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 873 const uint8* u_buf,
michael@0 874 const uint8* v_buf,
michael@0 875 uint8* rgb_buf,
michael@0 876 int width,
michael@0 877 int source_dx)
michael@0 878 {
michael@0 879 if (mozilla::supports_sse()) {
michael@0 880 PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width,
michael@0 881 source_dx, &kCoefficientsRgbY[0][0]);
michael@0 882 return;
michael@0 883 }
michael@0 884
michael@0 885 LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 886 }
michael@0 887 #else
michael@0 888 void FastConvertYUVToRGB32Row(const uint8* y_buf,
michael@0 889 const uint8* u_buf,
michael@0 890 const uint8* v_buf,
michael@0 891 uint8* rgb_buf,
michael@0 892 int width) {
michael@0 893 FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
michael@0 894 }
michael@0 895
michael@0 896 void ScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 897 const uint8* u_buf,
michael@0 898 const uint8* v_buf,
michael@0 899 uint8* rgb_buf,
michael@0 900 int width,
michael@0 901 int source_dx) {
michael@0 902 ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 903 }
michael@0 904
michael@0 905 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
michael@0 906 const uint8* u_buf,
michael@0 907 const uint8* v_buf,
michael@0 908 uint8* rgb_buf,
michael@0 909 int width,
michael@0 910 int source_dx) {
michael@0 911 LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
michael@0 912 }
michael@0 913 #endif
michael@0 914
michael@0 915 }

mercurial