media/libyuv/source/rotate.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/rotate.h"
michael@0 12
michael@0 13 #include "libyuv/cpu_id.h"
michael@0 14 #include "libyuv/convert.h"
michael@0 15 #include "libyuv/planar_functions.h"
michael@0 16 #include "libyuv/row.h"
michael@0 17
michael@0 18 #ifdef __cplusplus
michael@0 19 namespace libyuv {
michael@0 20 extern "C" {
michael@0 21 #endif
michael@0 22
michael@0 23 #if !defined(LIBYUV_DISABLE_X86) && \
michael@0 24 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
michael@0 25 #if defined(__APPLE__) && defined(__i386__)
michael@0 26 #define DECLARE_FUNCTION(name) \
michael@0 27 ".text \n" \
michael@0 28 ".private_extern _" #name " \n" \
michael@0 29 ".align 4,0x90 \n" \
michael@0 30 "_" #name ": \n"
michael@0 31 #elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
michael@0 32 #define DECLARE_FUNCTION(name) \
michael@0 33 ".text \n" \
michael@0 34 ".align 4,0x90 \n" \
michael@0 35 "_" #name ": \n"
michael@0 36 #else
michael@0 37 #define DECLARE_FUNCTION(name) \
michael@0 38 ".text \n" \
michael@0 39 ".align 4,0x90 \n" \
michael@0 40 #name ": \n"
michael@0 41 #endif
michael@0 42 #endif
michael@0 43
michael@0 44 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
michael@0 45 (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
michael@0 46 #define HAS_MIRRORROW_NEON
michael@0 47 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
michael@0 48 #define HAS_MIRRORROW_UV_NEON
michael@0 49 void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
michael@0 50 #define HAS_TRANSPOSE_WX8_NEON
michael@0 51 void TransposeWx8_NEON(const uint8* src, int src_stride,
michael@0 52 uint8* dst, int dst_stride, int width);
michael@0 53 #define HAS_TRANSPOSE_UVWX8_NEON
michael@0 54 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
michael@0 55 uint8* dst_a, int dst_stride_a,
michael@0 56 uint8* dst_b, int dst_stride_b,
michael@0 57 int width);
michael@0 58 #endif // defined(__ARM_NEON__)
michael@0 59
michael@0 60 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
michael@0 61 defined(__mips__) && \
michael@0 62 defined(__mips_dsp) && (__mips_dsp_rev >= 2)
michael@0 63 #define HAS_TRANSPOSE_WX8_MIPS_DSPR2
michael@0 64 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 65 uint8* dst, int dst_stride, int width);
michael@0 66
michael@0 67 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 68 uint8* dst, int dst_stride, int width);
michael@0 69 #define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
michael@0 70 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 71 uint8* dst_a, int dst_stride_a,
michael@0 72 uint8* dst_b, int dst_stride_b,
michael@0 73 int width);
michael@0 74 #endif // defined(__mips__)
michael@0 75
michael@0 76 #if !defined(LIBYUV_DISABLE_X86) && \
michael@0 77 defined(_M_IX86) && defined(_MSC_VER)
michael@0 78 #define HAS_TRANSPOSE_WX8_SSSE3
michael@0 79 __declspec(naked) __declspec(align(16))
michael@0 80 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
michael@0 81 uint8* dst, int dst_stride, int width) {
michael@0 82 __asm {
michael@0 83 push edi
michael@0 84 push esi
michael@0 85 push ebp
michael@0 86 mov eax, [esp + 12 + 4] // src
michael@0 87 mov edi, [esp + 12 + 8] // src_stride
michael@0 88 mov edx, [esp + 12 + 12] // dst
michael@0 89 mov esi, [esp + 12 + 16] // dst_stride
michael@0 90 mov ecx, [esp + 12 + 20] // width
michael@0 91
michael@0 92 // Read in the data from the source pointer.
michael@0 93 // First round of bit swap.
michael@0 94 align 4
michael@0 95 convertloop:
michael@0 96 movq xmm0, qword ptr [eax]
michael@0 97 lea ebp, [eax + 8]
michael@0 98 movq xmm1, qword ptr [eax + edi]
michael@0 99 lea eax, [eax + 2 * edi]
michael@0 100 punpcklbw xmm0, xmm1
michael@0 101 movq xmm2, qword ptr [eax]
michael@0 102 movdqa xmm1, xmm0
michael@0 103 palignr xmm1, xmm1, 8
michael@0 104 movq xmm3, qword ptr [eax + edi]
michael@0 105 lea eax, [eax + 2 * edi]
michael@0 106 punpcklbw xmm2, xmm3
michael@0 107 movdqa xmm3, xmm2
michael@0 108 movq xmm4, qword ptr [eax]
michael@0 109 palignr xmm3, xmm3, 8
michael@0 110 movq xmm5, qword ptr [eax + edi]
michael@0 111 punpcklbw xmm4, xmm5
michael@0 112 lea eax, [eax + 2 * edi]
michael@0 113 movdqa xmm5, xmm4
michael@0 114 movq xmm6, qword ptr [eax]
michael@0 115 palignr xmm5, xmm5, 8
michael@0 116 movq xmm7, qword ptr [eax + edi]
michael@0 117 punpcklbw xmm6, xmm7
michael@0 118 mov eax, ebp
michael@0 119 movdqa xmm7, xmm6
michael@0 120 palignr xmm7, xmm7, 8
michael@0 121 // Second round of bit swap.
michael@0 122 punpcklwd xmm0, xmm2
michael@0 123 punpcklwd xmm1, xmm3
michael@0 124 movdqa xmm2, xmm0
michael@0 125 movdqa xmm3, xmm1
michael@0 126 palignr xmm2, xmm2, 8
michael@0 127 palignr xmm3, xmm3, 8
michael@0 128 punpcklwd xmm4, xmm6
michael@0 129 punpcklwd xmm5, xmm7
michael@0 130 movdqa xmm6, xmm4
michael@0 131 movdqa xmm7, xmm5
michael@0 132 palignr xmm6, xmm6, 8
michael@0 133 palignr xmm7, xmm7, 8
michael@0 134 // Third round of bit swap.
michael@0 135 // Write to the destination pointer.
michael@0 136 punpckldq xmm0, xmm4
michael@0 137 movq qword ptr [edx], xmm0
michael@0 138 movdqa xmm4, xmm0
michael@0 139 palignr xmm4, xmm4, 8
michael@0 140 movq qword ptr [edx + esi], xmm4
michael@0 141 lea edx, [edx + 2 * esi]
michael@0 142 punpckldq xmm2, xmm6
michael@0 143 movdqa xmm6, xmm2
michael@0 144 palignr xmm6, xmm6, 8
michael@0 145 movq qword ptr [edx], xmm2
michael@0 146 punpckldq xmm1, xmm5
michael@0 147 movq qword ptr [edx + esi], xmm6
michael@0 148 lea edx, [edx + 2 * esi]
michael@0 149 movdqa xmm5, xmm1
michael@0 150 movq qword ptr [edx], xmm1
michael@0 151 palignr xmm5, xmm5, 8
michael@0 152 punpckldq xmm3, xmm7
michael@0 153 movq qword ptr [edx + esi], xmm5
michael@0 154 lea edx, [edx + 2 * esi]
michael@0 155 movq qword ptr [edx], xmm3
michael@0 156 movdqa xmm7, xmm3
michael@0 157 palignr xmm7, xmm7, 8
michael@0 158 sub ecx, 8
michael@0 159 movq qword ptr [edx + esi], xmm7
michael@0 160 lea edx, [edx + 2 * esi]
michael@0 161 jg convertloop
michael@0 162
michael@0 163 pop ebp
michael@0 164 pop esi
michael@0 165 pop edi
michael@0 166 ret
michael@0 167 }
michael@0 168 }
michael@0 169
michael@0 170 #define HAS_TRANSPOSE_UVWX8_SSE2
michael@0 171 __declspec(naked) __declspec(align(16))
michael@0 172 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
michael@0 173 uint8* dst_a, int dst_stride_a,
michael@0 174 uint8* dst_b, int dst_stride_b,
michael@0 175 int w) {
michael@0 176 __asm {
michael@0 177 push ebx
michael@0 178 push esi
michael@0 179 push edi
michael@0 180 push ebp
michael@0 181 mov eax, [esp + 16 + 4] // src
michael@0 182 mov edi, [esp + 16 + 8] // src_stride
michael@0 183 mov edx, [esp + 16 + 12] // dst_a
michael@0 184 mov esi, [esp + 16 + 16] // dst_stride_a
michael@0 185 mov ebx, [esp + 16 + 20] // dst_b
michael@0 186 mov ebp, [esp + 16 + 24] // dst_stride_b
michael@0 187 mov ecx, esp
michael@0 188 sub esp, 4 + 16
michael@0 189 and esp, ~15
michael@0 190 mov [esp + 16], ecx
michael@0 191 mov ecx, [ecx + 16 + 28] // w
michael@0 192
michael@0 193 align 4
michael@0 194 convertloop:
michael@0 195 // Read in the data from the source pointer.
michael@0 196 // First round of bit swap.
michael@0 197 movdqa xmm0, [eax]
michael@0 198 movdqa xmm1, [eax + edi]
michael@0 199 lea eax, [eax + 2 * edi]
michael@0 200 movdqa xmm7, xmm0 // use xmm7 as temp register.
michael@0 201 punpcklbw xmm0, xmm1
michael@0 202 punpckhbw xmm7, xmm1
michael@0 203 movdqa xmm1, xmm7
michael@0 204 movdqa xmm2, [eax]
michael@0 205 movdqa xmm3, [eax + edi]
michael@0 206 lea eax, [eax + 2 * edi]
michael@0 207 movdqa xmm7, xmm2
michael@0 208 punpcklbw xmm2, xmm3
michael@0 209 punpckhbw xmm7, xmm3
michael@0 210 movdqa xmm3, xmm7
michael@0 211 movdqa xmm4, [eax]
michael@0 212 movdqa xmm5, [eax + edi]
michael@0 213 lea eax, [eax + 2 * edi]
michael@0 214 movdqa xmm7, xmm4
michael@0 215 punpcklbw xmm4, xmm5
michael@0 216 punpckhbw xmm7, xmm5
michael@0 217 movdqa xmm5, xmm7
michael@0 218 movdqa xmm6, [eax]
michael@0 219 movdqa xmm7, [eax + edi]
michael@0 220 lea eax, [eax + 2 * edi]
michael@0 221 movdqa [esp], xmm5 // backup xmm5
michael@0 222 neg edi
michael@0 223 movdqa xmm5, xmm6 // use xmm5 as temp register.
michael@0 224 punpcklbw xmm6, xmm7
michael@0 225 punpckhbw xmm5, xmm7
michael@0 226 movdqa xmm7, xmm5
michael@0 227 lea eax, [eax + 8 * edi + 16]
michael@0 228 neg edi
michael@0 229 // Second round of bit swap.
michael@0 230 movdqa xmm5, xmm0
michael@0 231 punpcklwd xmm0, xmm2
michael@0 232 punpckhwd xmm5, xmm2
michael@0 233 movdqa xmm2, xmm5
michael@0 234 movdqa xmm5, xmm1
michael@0 235 punpcklwd xmm1, xmm3
michael@0 236 punpckhwd xmm5, xmm3
michael@0 237 movdqa xmm3, xmm5
michael@0 238 movdqa xmm5, xmm4
michael@0 239 punpcklwd xmm4, xmm6
michael@0 240 punpckhwd xmm5, xmm6
michael@0 241 movdqa xmm6, xmm5
michael@0 242 movdqa xmm5, [esp] // restore xmm5
michael@0 243 movdqa [esp], xmm6 // backup xmm6
michael@0 244 movdqa xmm6, xmm5 // use xmm6 as temp register.
michael@0 245 punpcklwd xmm5, xmm7
michael@0 246 punpckhwd xmm6, xmm7
michael@0 247 movdqa xmm7, xmm6
michael@0 248 // Third round of bit swap.
michael@0 249 // Write to the destination pointer.
michael@0 250 movdqa xmm6, xmm0
michael@0 251 punpckldq xmm0, xmm4
michael@0 252 punpckhdq xmm6, xmm4
michael@0 253 movdqa xmm4, xmm6
michael@0 254 movdqa xmm6, [esp] // restore xmm6
michael@0 255 movlpd qword ptr [edx], xmm0
michael@0 256 movhpd qword ptr [ebx], xmm0
michael@0 257 movlpd qword ptr [edx + esi], xmm4
michael@0 258 lea edx, [edx + 2 * esi]
michael@0 259 movhpd qword ptr [ebx + ebp], xmm4
michael@0 260 lea ebx, [ebx + 2 * ebp]
michael@0 261 movdqa xmm0, xmm2 // use xmm0 as the temp register.
michael@0 262 punpckldq xmm2, xmm6
michael@0 263 movlpd qword ptr [edx], xmm2
michael@0 264 movhpd qword ptr [ebx], xmm2
michael@0 265 punpckhdq xmm0, xmm6
michael@0 266 movlpd qword ptr [edx + esi], xmm0
michael@0 267 lea edx, [edx + 2 * esi]
michael@0 268 movhpd qword ptr [ebx + ebp], xmm0
michael@0 269 lea ebx, [ebx + 2 * ebp]
michael@0 270 movdqa xmm0, xmm1 // use xmm0 as the temp register.
michael@0 271 punpckldq xmm1, xmm5
michael@0 272 movlpd qword ptr [edx], xmm1
michael@0 273 movhpd qword ptr [ebx], xmm1
michael@0 274 punpckhdq xmm0, xmm5
michael@0 275 movlpd qword ptr [edx + esi], xmm0
michael@0 276 lea edx, [edx + 2 * esi]
michael@0 277 movhpd qword ptr [ebx + ebp], xmm0
michael@0 278 lea ebx, [ebx + 2 * ebp]
michael@0 279 movdqa xmm0, xmm3 // use xmm0 as the temp register.
michael@0 280 punpckldq xmm3, xmm7
michael@0 281 movlpd qword ptr [edx], xmm3
michael@0 282 movhpd qword ptr [ebx], xmm3
michael@0 283 punpckhdq xmm0, xmm7
michael@0 284 sub ecx, 8
michael@0 285 movlpd qword ptr [edx + esi], xmm0
michael@0 286 lea edx, [edx + 2 * esi]
michael@0 287 movhpd qword ptr [ebx + ebp], xmm0
michael@0 288 lea ebx, [ebx + 2 * ebp]
michael@0 289 jg convertloop
michael@0 290
michael@0 291 mov esp, [esp + 16]
michael@0 292 pop ebp
michael@0 293 pop edi
michael@0 294 pop esi
michael@0 295 pop ebx
michael@0 296 ret
michael@0 297 }
michael@0 298 }
michael@0 299 #elif !defined(LIBYUV_DISABLE_X86) && \
michael@0 300 (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
michael@0 301 #define HAS_TRANSPOSE_WX8_SSSE3
michael@0 302 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
michael@0 303 uint8* dst, int dst_stride, int width) {
michael@0 304 asm volatile (
michael@0 305 // Read in the data from the source pointer.
michael@0 306 // First round of bit swap.
michael@0 307 ".p2align 2 \n"
michael@0 308 "1: \n"
michael@0 309 "movq (%0),%%xmm0 \n"
michael@0 310 "movq (%0,%3),%%xmm1 \n"
michael@0 311 "lea (%0,%3,2),%0 \n"
michael@0 312 "punpcklbw %%xmm1,%%xmm0 \n"
michael@0 313 "movq (%0),%%xmm2 \n"
michael@0 314 "movdqa %%xmm0,%%xmm1 \n"
michael@0 315 "palignr $0x8,%%xmm1,%%xmm1 \n"
michael@0 316 "movq (%0,%3),%%xmm3 \n"
michael@0 317 "lea (%0,%3,2),%0 \n"
michael@0 318 "punpcklbw %%xmm3,%%xmm2 \n"
michael@0 319 "movdqa %%xmm2,%%xmm3 \n"
michael@0 320 "movq (%0),%%xmm4 \n"
michael@0 321 "palignr $0x8,%%xmm3,%%xmm3 \n"
michael@0 322 "movq (%0,%3),%%xmm5 \n"
michael@0 323 "lea (%0,%3,2),%0 \n"
michael@0 324 "punpcklbw %%xmm5,%%xmm4 \n"
michael@0 325 "movdqa %%xmm4,%%xmm5 \n"
michael@0 326 "movq (%0),%%xmm6 \n"
michael@0 327 "palignr $0x8,%%xmm5,%%xmm5 \n"
michael@0 328 "movq (%0,%3),%%xmm7 \n"
michael@0 329 "lea (%0,%3,2),%0 \n"
michael@0 330 "punpcklbw %%xmm7,%%xmm6 \n"
michael@0 331 "neg %3 \n"
michael@0 332 "movdqa %%xmm6,%%xmm7 \n"
michael@0 333 "lea 0x8(%0,%3,8),%0 \n"
michael@0 334 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 335 "neg %3 \n"
michael@0 336 // Second round of bit swap.
michael@0 337 "punpcklwd %%xmm2,%%xmm0 \n"
michael@0 338 "punpcklwd %%xmm3,%%xmm1 \n"
michael@0 339 "movdqa %%xmm0,%%xmm2 \n"
michael@0 340 "movdqa %%xmm1,%%xmm3 \n"
michael@0 341 "palignr $0x8,%%xmm2,%%xmm2 \n"
michael@0 342 "palignr $0x8,%%xmm3,%%xmm3 \n"
michael@0 343 "punpcklwd %%xmm6,%%xmm4 \n"
michael@0 344 "punpcklwd %%xmm7,%%xmm5 \n"
michael@0 345 "movdqa %%xmm4,%%xmm6 \n"
michael@0 346 "movdqa %%xmm5,%%xmm7 \n"
michael@0 347 "palignr $0x8,%%xmm6,%%xmm6 \n"
michael@0 348 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 349 // Third round of bit swap.
michael@0 350 // Write to the destination pointer.
michael@0 351 "punpckldq %%xmm4,%%xmm0 \n"
michael@0 352 "movq %%xmm0,(%1) \n"
michael@0 353 "movdqa %%xmm0,%%xmm4 \n"
michael@0 354 "palignr $0x8,%%xmm4,%%xmm4 \n"
michael@0 355 "movq %%xmm4,(%1,%4) \n"
michael@0 356 "lea (%1,%4,2),%1 \n"
michael@0 357 "punpckldq %%xmm6,%%xmm2 \n"
michael@0 358 "movdqa %%xmm2,%%xmm6 \n"
michael@0 359 "movq %%xmm2,(%1) \n"
michael@0 360 "palignr $0x8,%%xmm6,%%xmm6 \n"
michael@0 361 "punpckldq %%xmm5,%%xmm1 \n"
michael@0 362 "movq %%xmm6,(%1,%4) \n"
michael@0 363 "lea (%1,%4,2),%1 \n"
michael@0 364 "movdqa %%xmm1,%%xmm5 \n"
michael@0 365 "movq %%xmm1,(%1) \n"
michael@0 366 "palignr $0x8,%%xmm5,%%xmm5 \n"
michael@0 367 "movq %%xmm5,(%1,%4) \n"
michael@0 368 "lea (%1,%4,2),%1 \n"
michael@0 369 "punpckldq %%xmm7,%%xmm3 \n"
michael@0 370 "movq %%xmm3,(%1) \n"
michael@0 371 "movdqa %%xmm3,%%xmm7 \n"
michael@0 372 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 373 "sub $0x8,%2 \n"
michael@0 374 "movq %%xmm7,(%1,%4) \n"
michael@0 375 "lea (%1,%4,2),%1 \n"
michael@0 376 "jg 1b \n"
michael@0 377 : "+r"(src), // %0
michael@0 378 "+r"(dst), // %1
michael@0 379 "+r"(width) // %2
michael@0 380 : "r"((intptr_t)(src_stride)), // %3
michael@0 381 "r"((intptr_t)(dst_stride)) // %4
michael@0 382 : "memory", "cc"
michael@0 383 #if defined(__SSE2__)
michael@0 384 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
michael@0 385 #endif
michael@0 386 );
michael@0 387 }
michael@0 388
michael@0 389 #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)
michael@0 390 #define HAS_TRANSPOSE_UVWX8_SSE2
michael@0 391 extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
michael@0 392 uint8* dst_a, int dst_stride_a,
michael@0 393 uint8* dst_b, int dst_stride_b,
michael@0 394 int w);
michael@0 395 asm (
michael@0 396 DECLARE_FUNCTION(TransposeUVWx8_SSE2)
michael@0 397 "push %ebx \n"
michael@0 398 "push %esi \n"
michael@0 399 "push %edi \n"
michael@0 400 "push %ebp \n"
michael@0 401 "mov 0x14(%esp),%eax \n"
michael@0 402 "mov 0x18(%esp),%edi \n"
michael@0 403 "mov 0x1c(%esp),%edx \n"
michael@0 404 "mov 0x20(%esp),%esi \n"
michael@0 405 "mov 0x24(%esp),%ebx \n"
michael@0 406 "mov 0x28(%esp),%ebp \n"
michael@0 407 "mov %esp,%ecx \n"
michael@0 408 "sub $0x14,%esp \n"
michael@0 409 "and $0xfffffff0,%esp \n"
michael@0 410 "mov %ecx,0x10(%esp) \n"
michael@0 411 "mov 0x2c(%ecx),%ecx \n"
michael@0 412
michael@0 413 "1: \n"
michael@0 414 "movdqa (%eax),%xmm0 \n"
michael@0 415 "movdqa (%eax,%edi,1),%xmm1 \n"
michael@0 416 "lea (%eax,%edi,2),%eax \n"
michael@0 417 "movdqa %xmm0,%xmm7 \n"
michael@0 418 "punpcklbw %xmm1,%xmm0 \n"
michael@0 419 "punpckhbw %xmm1,%xmm7 \n"
michael@0 420 "movdqa %xmm7,%xmm1 \n"
michael@0 421 "movdqa (%eax),%xmm2 \n"
michael@0 422 "movdqa (%eax,%edi,1),%xmm3 \n"
michael@0 423 "lea (%eax,%edi,2),%eax \n"
michael@0 424 "movdqa %xmm2,%xmm7 \n"
michael@0 425 "punpcklbw %xmm3,%xmm2 \n"
michael@0 426 "punpckhbw %xmm3,%xmm7 \n"
michael@0 427 "movdqa %xmm7,%xmm3 \n"
michael@0 428 "movdqa (%eax),%xmm4 \n"
michael@0 429 "movdqa (%eax,%edi,1),%xmm5 \n"
michael@0 430 "lea (%eax,%edi,2),%eax \n"
michael@0 431 "movdqa %xmm4,%xmm7 \n"
michael@0 432 "punpcklbw %xmm5,%xmm4 \n"
michael@0 433 "punpckhbw %xmm5,%xmm7 \n"
michael@0 434 "movdqa %xmm7,%xmm5 \n"
michael@0 435 "movdqa (%eax),%xmm6 \n"
michael@0 436 "movdqa (%eax,%edi,1),%xmm7 \n"
michael@0 437 "lea (%eax,%edi,2),%eax \n"
michael@0 438 "movdqa %xmm5,(%esp) \n"
michael@0 439 "neg %edi \n"
michael@0 440 "movdqa %xmm6,%xmm5 \n"
michael@0 441 "punpcklbw %xmm7,%xmm6 \n"
michael@0 442 "punpckhbw %xmm7,%xmm5 \n"
michael@0 443 "movdqa %xmm5,%xmm7 \n"
michael@0 444 "lea 0x10(%eax,%edi,8),%eax \n"
michael@0 445 "neg %edi \n"
michael@0 446 "movdqa %xmm0,%xmm5 \n"
michael@0 447 "punpcklwd %xmm2,%xmm0 \n"
michael@0 448 "punpckhwd %xmm2,%xmm5 \n"
michael@0 449 "movdqa %xmm5,%xmm2 \n"
michael@0 450 "movdqa %xmm1,%xmm5 \n"
michael@0 451 "punpcklwd %xmm3,%xmm1 \n"
michael@0 452 "punpckhwd %xmm3,%xmm5 \n"
michael@0 453 "movdqa %xmm5,%xmm3 \n"
michael@0 454 "movdqa %xmm4,%xmm5 \n"
michael@0 455 "punpcklwd %xmm6,%xmm4 \n"
michael@0 456 "punpckhwd %xmm6,%xmm5 \n"
michael@0 457 "movdqa %xmm5,%xmm6 \n"
michael@0 458 "movdqa (%esp),%xmm5 \n"
michael@0 459 "movdqa %xmm6,(%esp) \n"
michael@0 460 "movdqa %xmm5,%xmm6 \n"
michael@0 461 "punpcklwd %xmm7,%xmm5 \n"
michael@0 462 "punpckhwd %xmm7,%xmm6 \n"
michael@0 463 "movdqa %xmm6,%xmm7 \n"
michael@0 464 "movdqa %xmm0,%xmm6 \n"
michael@0 465 "punpckldq %xmm4,%xmm0 \n"
michael@0 466 "punpckhdq %xmm4,%xmm6 \n"
michael@0 467 "movdqa %xmm6,%xmm4 \n"
michael@0 468 "movdqa (%esp),%xmm6 \n"
michael@0 469 "movlpd %xmm0,(%edx) \n"
michael@0 470 "movhpd %xmm0,(%ebx) \n"
michael@0 471 "movlpd %xmm4,(%edx,%esi,1) \n"
michael@0 472 "lea (%edx,%esi,2),%edx \n"
michael@0 473 "movhpd %xmm4,(%ebx,%ebp,1) \n"
michael@0 474 "lea (%ebx,%ebp,2),%ebx \n"
michael@0 475 "movdqa %xmm2,%xmm0 \n"
michael@0 476 "punpckldq %xmm6,%xmm2 \n"
michael@0 477 "movlpd %xmm2,(%edx) \n"
michael@0 478 "movhpd %xmm2,(%ebx) \n"
michael@0 479 "punpckhdq %xmm6,%xmm0 \n"
michael@0 480 "movlpd %xmm0,(%edx,%esi,1) \n"
michael@0 481 "lea (%edx,%esi,2),%edx \n"
michael@0 482 "movhpd %xmm0,(%ebx,%ebp,1) \n"
michael@0 483 "lea (%ebx,%ebp,2),%ebx \n"
michael@0 484 "movdqa %xmm1,%xmm0 \n"
michael@0 485 "punpckldq %xmm5,%xmm1 \n"
michael@0 486 "movlpd %xmm1,(%edx) \n"
michael@0 487 "movhpd %xmm1,(%ebx) \n"
michael@0 488 "punpckhdq %xmm5,%xmm0 \n"
michael@0 489 "movlpd %xmm0,(%edx,%esi,1) \n"
michael@0 490 "lea (%edx,%esi,2),%edx \n"
michael@0 491 "movhpd %xmm0,(%ebx,%ebp,1) \n"
michael@0 492 "lea (%ebx,%ebp,2),%ebx \n"
michael@0 493 "movdqa %xmm3,%xmm0 \n"
michael@0 494 "punpckldq %xmm7,%xmm3 \n"
michael@0 495 "movlpd %xmm3,(%edx) \n"
michael@0 496 "movhpd %xmm3,(%ebx) \n"
michael@0 497 "punpckhdq %xmm7,%xmm0 \n"
michael@0 498 "sub $0x8,%ecx \n"
michael@0 499 "movlpd %xmm0,(%edx,%esi,1) \n"
michael@0 500 "lea (%edx,%esi,2),%edx \n"
michael@0 501 "movhpd %xmm0,(%ebx,%ebp,1) \n"
michael@0 502 "lea (%ebx,%ebp,2),%ebx \n"
michael@0 503 "jg 1b \n"
michael@0 504 "mov 0x10(%esp),%esp \n"
michael@0 505 "pop %ebp \n"
michael@0 506 "pop %edi \n"
michael@0 507 "pop %esi \n"
michael@0 508 "pop %ebx \n"
michael@0 509 #if defined(__native_client__)
michael@0 510 "pop %ecx \n"
michael@0 511 "and $0xffffffe0,%ecx \n"
michael@0 512 "jmp *%ecx \n"
michael@0 513 #else
michael@0 514 "ret \n"
michael@0 515 #endif
michael@0 516 );
michael@0 517 #elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
michael@0 518 defined(__x86_64__)
michael@0 519 // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
michael@0 520 #define HAS_TRANSPOSE_WX8_FAST_SSSE3
michael@0 521 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
michael@0 522 uint8* dst, int dst_stride, int width) {
michael@0 523 asm volatile (
michael@0 524 // Read in the data from the source pointer.
michael@0 525 // First round of bit swap.
michael@0 526 ".p2align 2 \n"
michael@0 527 "1: \n"
michael@0 528 "movdqa (%0),%%xmm0 \n"
michael@0 529 "movdqa (%0,%3),%%xmm1 \n"
michael@0 530 "lea (%0,%3,2),%0 \n"
michael@0 531 "movdqa %%xmm0,%%xmm8 \n"
michael@0 532 "punpcklbw %%xmm1,%%xmm0 \n"
michael@0 533 "punpckhbw %%xmm1,%%xmm8 \n"
michael@0 534 "movdqa (%0),%%xmm2 \n"
michael@0 535 "movdqa %%xmm0,%%xmm1 \n"
michael@0 536 "movdqa %%xmm8,%%xmm9 \n"
michael@0 537 "palignr $0x8,%%xmm1,%%xmm1 \n"
michael@0 538 "palignr $0x8,%%xmm9,%%xmm9 \n"
michael@0 539 "movdqa (%0,%3),%%xmm3 \n"
michael@0 540 "lea (%0,%3,2),%0 \n"
michael@0 541 "movdqa %%xmm2,%%xmm10 \n"
michael@0 542 "punpcklbw %%xmm3,%%xmm2 \n"
michael@0 543 "punpckhbw %%xmm3,%%xmm10 \n"
michael@0 544 "movdqa %%xmm2,%%xmm3 \n"
michael@0 545 "movdqa %%xmm10,%%xmm11 \n"
michael@0 546 "movdqa (%0),%%xmm4 \n"
michael@0 547 "palignr $0x8,%%xmm3,%%xmm3 \n"
michael@0 548 "palignr $0x8,%%xmm11,%%xmm11 \n"
michael@0 549 "movdqa (%0,%3),%%xmm5 \n"
michael@0 550 "lea (%0,%3,2),%0 \n"
michael@0 551 "movdqa %%xmm4,%%xmm12 \n"
michael@0 552 "punpcklbw %%xmm5,%%xmm4 \n"
michael@0 553 "punpckhbw %%xmm5,%%xmm12 \n"
michael@0 554 "movdqa %%xmm4,%%xmm5 \n"
michael@0 555 "movdqa %%xmm12,%%xmm13 \n"
michael@0 556 "movdqa (%0),%%xmm6 \n"
michael@0 557 "palignr $0x8,%%xmm5,%%xmm5 \n"
michael@0 558 "palignr $0x8,%%xmm13,%%xmm13 \n"
michael@0 559 "movdqa (%0,%3),%%xmm7 \n"
michael@0 560 "lea (%0,%3,2),%0 \n"
michael@0 561 "movdqa %%xmm6,%%xmm14 \n"
michael@0 562 "punpcklbw %%xmm7,%%xmm6 \n"
michael@0 563 "punpckhbw %%xmm7,%%xmm14 \n"
michael@0 564 "neg %3 \n"
michael@0 565 "movdqa %%xmm6,%%xmm7 \n"
michael@0 566 "movdqa %%xmm14,%%xmm15 \n"
michael@0 567 "lea 0x10(%0,%3,8),%0 \n"
michael@0 568 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 569 "palignr $0x8,%%xmm15,%%xmm15 \n"
michael@0 570 "neg %3 \n"
michael@0 571 // Second round of bit swap.
michael@0 572 "punpcklwd %%xmm2,%%xmm0 \n"
michael@0 573 "punpcklwd %%xmm3,%%xmm1 \n"
michael@0 574 "movdqa %%xmm0,%%xmm2 \n"
michael@0 575 "movdqa %%xmm1,%%xmm3 \n"
michael@0 576 "palignr $0x8,%%xmm2,%%xmm2 \n"
michael@0 577 "palignr $0x8,%%xmm3,%%xmm3 \n"
michael@0 578 "punpcklwd %%xmm6,%%xmm4 \n"
michael@0 579 "punpcklwd %%xmm7,%%xmm5 \n"
michael@0 580 "movdqa %%xmm4,%%xmm6 \n"
michael@0 581 "movdqa %%xmm5,%%xmm7 \n"
michael@0 582 "palignr $0x8,%%xmm6,%%xmm6 \n"
michael@0 583 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 584 "punpcklwd %%xmm10,%%xmm8 \n"
michael@0 585 "punpcklwd %%xmm11,%%xmm9 \n"
michael@0 586 "movdqa %%xmm8,%%xmm10 \n"
michael@0 587 "movdqa %%xmm9,%%xmm11 \n"
michael@0 588 "palignr $0x8,%%xmm10,%%xmm10 \n"
michael@0 589 "palignr $0x8,%%xmm11,%%xmm11 \n"
michael@0 590 "punpcklwd %%xmm14,%%xmm12 \n"
michael@0 591 "punpcklwd %%xmm15,%%xmm13 \n"
michael@0 592 "movdqa %%xmm12,%%xmm14 \n"
michael@0 593 "movdqa %%xmm13,%%xmm15 \n"
michael@0 594 "palignr $0x8,%%xmm14,%%xmm14 \n"
michael@0 595 "palignr $0x8,%%xmm15,%%xmm15 \n"
michael@0 596 // Third round of bit swap.
michael@0 597 // Write to the destination pointer.
michael@0 598 "punpckldq %%xmm4,%%xmm0 \n"
michael@0 599 "movq %%xmm0,(%1) \n"
michael@0 600 "movdqa %%xmm0,%%xmm4 \n"
michael@0 601 "palignr $0x8,%%xmm4,%%xmm4 \n"
michael@0 602 "movq %%xmm4,(%1,%4) \n"
michael@0 603 "lea (%1,%4,2),%1 \n"
michael@0 604 "punpckldq %%xmm6,%%xmm2 \n"
michael@0 605 "movdqa %%xmm2,%%xmm6 \n"
michael@0 606 "movq %%xmm2,(%1) \n"
michael@0 607 "palignr $0x8,%%xmm6,%%xmm6 \n"
michael@0 608 "punpckldq %%xmm5,%%xmm1 \n"
michael@0 609 "movq %%xmm6,(%1,%4) \n"
michael@0 610 "lea (%1,%4,2),%1 \n"
michael@0 611 "movdqa %%xmm1,%%xmm5 \n"
michael@0 612 "movq %%xmm1,(%1) \n"
michael@0 613 "palignr $0x8,%%xmm5,%%xmm5 \n"
michael@0 614 "movq %%xmm5,(%1,%4) \n"
michael@0 615 "lea (%1,%4,2),%1 \n"
michael@0 616 "punpckldq %%xmm7,%%xmm3 \n"
michael@0 617 "movq %%xmm3,(%1) \n"
michael@0 618 "movdqa %%xmm3,%%xmm7 \n"
michael@0 619 "palignr $0x8,%%xmm7,%%xmm7 \n"
michael@0 620 "movq %%xmm7,(%1,%4) \n"
michael@0 621 "lea (%1,%4,2),%1 \n"
michael@0 622 "punpckldq %%xmm12,%%xmm8 \n"
michael@0 623 "movq %%xmm8,(%1) \n"
michael@0 624 "movdqa %%xmm8,%%xmm12 \n"
michael@0 625 "palignr $0x8,%%xmm12,%%xmm12 \n"
michael@0 626 "movq %%xmm12,(%1,%4) \n"
michael@0 627 "lea (%1,%4,2),%1 \n"
michael@0 628 "punpckldq %%xmm14,%%xmm10 \n"
michael@0 629 "movdqa %%xmm10,%%xmm14 \n"
michael@0 630 "movq %%xmm10,(%1) \n"
michael@0 631 "palignr $0x8,%%xmm14,%%xmm14 \n"
michael@0 632 "punpckldq %%xmm13,%%xmm9 \n"
michael@0 633 "movq %%xmm14,(%1,%4) \n"
michael@0 634 "lea (%1,%4,2),%1 \n"
michael@0 635 "movdqa %%xmm9,%%xmm13 \n"
michael@0 636 "movq %%xmm9,(%1) \n"
michael@0 637 "palignr $0x8,%%xmm13,%%xmm13 \n"
michael@0 638 "movq %%xmm13,(%1,%4) \n"
michael@0 639 "lea (%1,%4,2),%1 \n"
michael@0 640 "punpckldq %%xmm15,%%xmm11 \n"
michael@0 641 "movq %%xmm11,(%1) \n"
michael@0 642 "movdqa %%xmm11,%%xmm15 \n"
michael@0 643 "palignr $0x8,%%xmm15,%%xmm15 \n"
michael@0 644 "sub $0x10,%2 \n"
michael@0 645 "movq %%xmm15,(%1,%4) \n"
michael@0 646 "lea (%1,%4,2),%1 \n"
michael@0 647 "jg 1b \n"
michael@0 648 : "+r"(src), // %0
michael@0 649 "+r"(dst), // %1
michael@0 650 "+r"(width) // %2
michael@0 651 : "r"((intptr_t)(src_stride)), // %3
michael@0 652 "r"((intptr_t)(dst_stride)) // %4
michael@0 653 : "memory", "cc",
michael@0 654 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
michael@0 655 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
michael@0 656 );
michael@0 657 }
michael@0 658
michael@0 659 #define HAS_TRANSPOSE_UVWX8_SSE2
michael@0 660 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
michael@0 661 uint8* dst_a, int dst_stride_a,
michael@0 662 uint8* dst_b, int dst_stride_b,
michael@0 663 int w) {
michael@0 664 asm volatile (
michael@0 665 // Read in the data from the source pointer.
michael@0 666 // First round of bit swap.
michael@0 667 ".p2align 2 \n"
michael@0 668 "1: \n"
michael@0 669 "movdqa (%0),%%xmm0 \n"
michael@0 670 "movdqa (%0,%4),%%xmm1 \n"
michael@0 671 "lea (%0,%4,2),%0 \n"
michael@0 672 "movdqa %%xmm0,%%xmm8 \n"
michael@0 673 "punpcklbw %%xmm1,%%xmm0 \n"
michael@0 674 "punpckhbw %%xmm1,%%xmm8 \n"
michael@0 675 "movdqa %%xmm8,%%xmm1 \n"
michael@0 676 "movdqa (%0),%%xmm2 \n"
michael@0 677 "movdqa (%0,%4),%%xmm3 \n"
michael@0 678 "lea (%0,%4,2),%0 \n"
michael@0 679 "movdqa %%xmm2,%%xmm8 \n"
michael@0 680 "punpcklbw %%xmm3,%%xmm2 \n"
michael@0 681 "punpckhbw %%xmm3,%%xmm8 \n"
michael@0 682 "movdqa %%xmm8,%%xmm3 \n"
michael@0 683 "movdqa (%0),%%xmm4 \n"
michael@0 684 "movdqa (%0,%4),%%xmm5 \n"
michael@0 685 "lea (%0,%4,2),%0 \n"
michael@0 686 "movdqa %%xmm4,%%xmm8 \n"
michael@0 687 "punpcklbw %%xmm5,%%xmm4 \n"
michael@0 688 "punpckhbw %%xmm5,%%xmm8 \n"
michael@0 689 "movdqa %%xmm8,%%xmm5 \n"
michael@0 690 "movdqa (%0),%%xmm6 \n"
michael@0 691 "movdqa (%0,%4),%%xmm7 \n"
michael@0 692 "lea (%0,%4,2),%0 \n"
michael@0 693 "movdqa %%xmm6,%%xmm8 \n"
michael@0 694 "punpcklbw %%xmm7,%%xmm6 \n"
michael@0 695 "neg %4 \n"
michael@0 696 "lea 0x10(%0,%4,8),%0 \n"
michael@0 697 "punpckhbw %%xmm7,%%xmm8 \n"
michael@0 698 "movdqa %%xmm8,%%xmm7 \n"
michael@0 699 "neg %4 \n"
michael@0 700 // Second round of bit swap.
michael@0 701 "movdqa %%xmm0,%%xmm8 \n"
michael@0 702 "movdqa %%xmm1,%%xmm9 \n"
michael@0 703 "punpckhwd %%xmm2,%%xmm8 \n"
michael@0 704 "punpckhwd %%xmm3,%%xmm9 \n"
michael@0 705 "punpcklwd %%xmm2,%%xmm0 \n"
michael@0 706 "punpcklwd %%xmm3,%%xmm1 \n"
michael@0 707 "movdqa %%xmm8,%%xmm2 \n"
michael@0 708 "movdqa %%xmm9,%%xmm3 \n"
michael@0 709 "movdqa %%xmm4,%%xmm8 \n"
michael@0 710 "movdqa %%xmm5,%%xmm9 \n"
michael@0 711 "punpckhwd %%xmm6,%%xmm8 \n"
michael@0 712 "punpckhwd %%xmm7,%%xmm9 \n"
michael@0 713 "punpcklwd %%xmm6,%%xmm4 \n"
michael@0 714 "punpcklwd %%xmm7,%%xmm5 \n"
michael@0 715 "movdqa %%xmm8,%%xmm6 \n"
michael@0 716 "movdqa %%xmm9,%%xmm7 \n"
michael@0 717 // Third round of bit swap.
michael@0 718 // Write to the destination pointer.
michael@0 719 "movdqa %%xmm0,%%xmm8 \n"
michael@0 720 "punpckldq %%xmm4,%%xmm0 \n"
michael@0 721 "movlpd %%xmm0,(%1) \n" // Write back U channel
michael@0 722 "movhpd %%xmm0,(%2) \n" // Write back V channel
michael@0 723 "punpckhdq %%xmm4,%%xmm8 \n"
michael@0 724 "movlpd %%xmm8,(%1,%5) \n"
michael@0 725 "lea (%1,%5,2),%1 \n"
michael@0 726 "movhpd %%xmm8,(%2,%6) \n"
michael@0 727 "lea (%2,%6,2),%2 \n"
michael@0 728 "movdqa %%xmm2,%%xmm8 \n"
michael@0 729 "punpckldq %%xmm6,%%xmm2 \n"
michael@0 730 "movlpd %%xmm2,(%1) \n"
michael@0 731 "movhpd %%xmm2,(%2) \n"
michael@0 732 "punpckhdq %%xmm6,%%xmm8 \n"
michael@0 733 "movlpd %%xmm8,(%1,%5) \n"
michael@0 734 "lea (%1,%5,2),%1 \n"
michael@0 735 "movhpd %%xmm8,(%2,%6) \n"
michael@0 736 "lea (%2,%6,2),%2 \n"
michael@0 737 "movdqa %%xmm1,%%xmm8 \n"
michael@0 738 "punpckldq %%xmm5,%%xmm1 \n"
michael@0 739 "movlpd %%xmm1,(%1) \n"
michael@0 740 "movhpd %%xmm1,(%2) \n"
michael@0 741 "punpckhdq %%xmm5,%%xmm8 \n"
michael@0 742 "movlpd %%xmm8,(%1,%5) \n"
michael@0 743 "lea (%1,%5,2),%1 \n"
michael@0 744 "movhpd %%xmm8,(%2,%6) \n"
michael@0 745 "lea (%2,%6,2),%2 \n"
michael@0 746 "movdqa %%xmm3,%%xmm8 \n"
michael@0 747 "punpckldq %%xmm7,%%xmm3 \n"
michael@0 748 "movlpd %%xmm3,(%1) \n"
michael@0 749 "movhpd %%xmm3,(%2) \n"
michael@0 750 "punpckhdq %%xmm7,%%xmm8 \n"
michael@0 751 "sub $0x8,%3 \n"
michael@0 752 "movlpd %%xmm8,(%1,%5) \n"
michael@0 753 "lea (%1,%5,2),%1 \n"
michael@0 754 "movhpd %%xmm8,(%2,%6) \n"
michael@0 755 "lea (%2,%6,2),%2 \n"
michael@0 756 "jg 1b \n"
michael@0 757 : "+r"(src), // %0
michael@0 758 "+r"(dst_a), // %1
michael@0 759 "+r"(dst_b), // %2
michael@0 760 "+r"(w) // %3
michael@0 761 : "r"((intptr_t)(src_stride)), // %4
michael@0 762 "r"((intptr_t)(dst_stride_a)), // %5
michael@0 763 "r"((intptr_t)(dst_stride_b)) // %6
michael@0 764 : "memory", "cc",
michael@0 765 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
michael@0 766 "xmm8", "xmm9"
michael@0 767 );
michael@0 768 }
michael@0 769 #endif
michael@0 770 #endif
michael@0 771
michael@0 772 static void TransposeWx8_C(const uint8* src, int src_stride,
michael@0 773 uint8* dst, int dst_stride,
michael@0 774 int width) {
michael@0 775 int i;
michael@0 776 for (i = 0; i < width; ++i) {
michael@0 777 dst[0] = src[0 * src_stride];
michael@0 778 dst[1] = src[1 * src_stride];
michael@0 779 dst[2] = src[2 * src_stride];
michael@0 780 dst[3] = src[3 * src_stride];
michael@0 781 dst[4] = src[4 * src_stride];
michael@0 782 dst[5] = src[5 * src_stride];
michael@0 783 dst[6] = src[6 * src_stride];
michael@0 784 dst[7] = src[7 * src_stride];
michael@0 785 ++src;
michael@0 786 dst += dst_stride;
michael@0 787 }
michael@0 788 }
michael@0 789
michael@0 790 static void TransposeWxH_C(const uint8* src, int src_stride,
michael@0 791 uint8* dst, int dst_stride,
michael@0 792 int width, int height) {
michael@0 793 int i;
michael@0 794 for (i = 0; i < width; ++i) {
michael@0 795 int j;
michael@0 796 for (j = 0; j < height; ++j) {
michael@0 797 dst[i * dst_stride + j] = src[j * src_stride + i];
michael@0 798 }
michael@0 799 }
michael@0 800 }
michael@0 801
michael@0 802 LIBYUV_API
michael@0 803 void TransposePlane(const uint8* src, int src_stride,
michael@0 804 uint8* dst, int dst_stride,
michael@0 805 int width, int height) {
michael@0 806 int i = height;
michael@0 807 void (*TransposeWx8)(const uint8* src, int src_stride,
michael@0 808 uint8* dst, int dst_stride,
michael@0 809 int width) = TransposeWx8_C;
michael@0 810 #if defined(HAS_TRANSPOSE_WX8_NEON)
michael@0 811 if (TestCpuFlag(kCpuHasNEON)) {
michael@0 812 TransposeWx8 = TransposeWx8_NEON;
michael@0 813 }
michael@0 814 #endif
michael@0 815 #if defined(HAS_TRANSPOSE_WX8_SSSE3)
michael@0 816 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
michael@0 817 TransposeWx8 = TransposeWx8_SSSE3;
michael@0 818 }
michael@0 819 #endif
michael@0 820 #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
michael@0 821 if (TestCpuFlag(kCpuHasSSSE3) &&
michael@0 822 IS_ALIGNED(width, 16) &&
michael@0 823 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
michael@0 824 TransposeWx8 = TransposeWx8_FAST_SSSE3;
michael@0 825 }
michael@0 826 #endif
michael@0 827 #if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
michael@0 828 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
michael@0 829 if (IS_ALIGNED(width, 4) &&
michael@0 830 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
michael@0 831 TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
michael@0 832 } else {
michael@0 833 TransposeWx8 = TransposeWx8_MIPS_DSPR2;
michael@0 834 }
michael@0 835 }
michael@0 836 #endif
michael@0 837
michael@0 838 // Work across the source in 8x8 tiles
michael@0 839 while (i >= 8) {
michael@0 840 TransposeWx8(src, src_stride, dst, dst_stride, width);
michael@0 841 src += 8 * src_stride; // Go down 8 rows.
michael@0 842 dst += 8; // Move over 8 columns.
michael@0 843 i -= 8;
michael@0 844 }
michael@0 845
michael@0 846 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
michael@0 847 }
michael@0 848
michael@0 849 LIBYUV_API
michael@0 850 void RotatePlane90(const uint8* src, int src_stride,
michael@0 851 uint8* dst, int dst_stride,
michael@0 852 int width, int height) {
michael@0 853 // Rotate by 90 is a transpose with the source read
michael@0 854 // from bottom to top. So set the source pointer to the end
michael@0 855 // of the buffer and flip the sign of the source stride.
michael@0 856 src += src_stride * (height - 1);
michael@0 857 src_stride = -src_stride;
michael@0 858 TransposePlane(src, src_stride, dst, dst_stride, width, height);
michael@0 859 }
michael@0 860
michael@0 861 LIBYUV_API
michael@0 862 void RotatePlane270(const uint8* src, int src_stride,
michael@0 863 uint8* dst, int dst_stride,
michael@0 864 int width, int height) {
michael@0 865 // Rotate by 270 is a transpose with the destination written
michael@0 866 // from bottom to top. So set the destination pointer to the end
michael@0 867 // of the buffer and flip the sign of the destination stride.
michael@0 868 dst += dst_stride * (width - 1);
michael@0 869 dst_stride = -dst_stride;
michael@0 870 TransposePlane(src, src_stride, dst, dst_stride, width, height);
michael@0 871 }
michael@0 872
michael@0 873 LIBYUV_API
michael@0 874 void RotatePlane180(const uint8* src, int src_stride,
michael@0 875 uint8* dst, int dst_stride,
michael@0 876 int width, int height) {
michael@0 877 // Swap first and last row and mirror the content. Uses a temporary row.
michael@0 878 align_buffer_64(row, width);
michael@0 879 const uint8* src_bot = src + src_stride * (height - 1);
michael@0 880 uint8* dst_bot = dst + dst_stride * (height - 1);
michael@0 881 int half_height = (height + 1) >> 1;
michael@0 882 int y;
michael@0 883 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
michael@0 884 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
michael@0 885 #if defined(HAS_MIRRORROW_NEON)
michael@0 886 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
michael@0 887 MirrorRow = MirrorRow_NEON;
michael@0 888 }
michael@0 889 #endif
michael@0 890 #if defined(HAS_MIRRORROW_SSE2)
michael@0 891 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
michael@0 892 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 893 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 894 MirrorRow = MirrorRow_SSE2;
michael@0 895 }
michael@0 896 #endif
michael@0 897 #if defined(HAS_MIRRORROW_SSSE3)
michael@0 898 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
michael@0 899 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 900 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 901 MirrorRow = MirrorRow_SSSE3;
michael@0 902 }
michael@0 903 #endif
michael@0 904 #if defined(HAS_MIRRORROW_AVX2)
michael@0 905 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
michael@0 906 MirrorRow = MirrorRow_AVX2;
michael@0 907 }
michael@0 908 #endif
michael@0 909 #if defined(HAS_MIRRORROW_MIPS_DSPR2)
michael@0 910 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
michael@0 911 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
michael@0 912 IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
michael@0 913 MirrorRow = MirrorRow_MIPS_DSPR2;
michael@0 914 }
michael@0 915 #endif
michael@0 916 #if defined(HAS_COPYROW_NEON)
michael@0 917 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
michael@0 918 CopyRow = CopyRow_NEON;
michael@0 919 }
michael@0 920 #endif
michael@0 921 #if defined(HAS_COPYROW_X86)
michael@0 922 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
michael@0 923 CopyRow = CopyRow_X86;
michael@0 924 }
michael@0 925 #endif
michael@0 926 #if defined(HAS_COPYROW_SSE2)
michael@0 927 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
michael@0 928 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 929 IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 930 CopyRow = CopyRow_SSE2;
michael@0 931 }
michael@0 932 #endif
michael@0 933 #if defined(HAS_COPYROW_ERMS)
michael@0 934 if (TestCpuFlag(kCpuHasERMS)) {
michael@0 935 CopyRow = CopyRow_ERMS;
michael@0 936 }
michael@0 937 #endif
michael@0 938 #if defined(HAS_COPYROW_MIPS)
michael@0 939 if (TestCpuFlag(kCpuHasMIPS)) {
michael@0 940 CopyRow = CopyRow_MIPS;
michael@0 941 }
michael@0 942 #endif
michael@0 943
michael@0 944 // Odd height will harmlessly mirror the middle row twice.
michael@0 945 for (y = 0; y < half_height; ++y) {
michael@0 946 MirrorRow(src, row, width); // Mirror first row into a buffer
michael@0 947 src += src_stride;
michael@0 948 MirrorRow(src_bot, dst, width); // Mirror last row into first row
michael@0 949 dst += dst_stride;
michael@0 950 CopyRow(row, dst_bot, width); // Copy first mirrored row into last
michael@0 951 src_bot -= src_stride;
michael@0 952 dst_bot -= dst_stride;
michael@0 953 }
michael@0 954 free_aligned_buffer_64(row);
michael@0 955 }
michael@0 956
michael@0 957 static void TransposeUVWx8_C(const uint8* src, int src_stride,
michael@0 958 uint8* dst_a, int dst_stride_a,
michael@0 959 uint8* dst_b, int dst_stride_b,
michael@0 960 int width) {
michael@0 961 int i;
michael@0 962 for (i = 0; i < width; ++i) {
michael@0 963 dst_a[0] = src[0 * src_stride + 0];
michael@0 964 dst_b[0] = src[0 * src_stride + 1];
michael@0 965 dst_a[1] = src[1 * src_stride + 0];
michael@0 966 dst_b[1] = src[1 * src_stride + 1];
michael@0 967 dst_a[2] = src[2 * src_stride + 0];
michael@0 968 dst_b[2] = src[2 * src_stride + 1];
michael@0 969 dst_a[3] = src[3 * src_stride + 0];
michael@0 970 dst_b[3] = src[3 * src_stride + 1];
michael@0 971 dst_a[4] = src[4 * src_stride + 0];
michael@0 972 dst_b[4] = src[4 * src_stride + 1];
michael@0 973 dst_a[5] = src[5 * src_stride + 0];
michael@0 974 dst_b[5] = src[5 * src_stride + 1];
michael@0 975 dst_a[6] = src[6 * src_stride + 0];
michael@0 976 dst_b[6] = src[6 * src_stride + 1];
michael@0 977 dst_a[7] = src[7 * src_stride + 0];
michael@0 978 dst_b[7] = src[7 * src_stride + 1];
michael@0 979 src += 2;
michael@0 980 dst_a += dst_stride_a;
michael@0 981 dst_b += dst_stride_b;
michael@0 982 }
michael@0 983 }
michael@0 984
michael@0 985 static void TransposeUVWxH_C(const uint8* src, int src_stride,
michael@0 986 uint8* dst_a, int dst_stride_a,
michael@0 987 uint8* dst_b, int dst_stride_b,
michael@0 988 int width, int height) {
michael@0 989 int i;
michael@0 990 for (i = 0; i < width * 2; i += 2) {
michael@0 991 int j;
michael@0 992 for (j = 0; j < height; ++j) {
michael@0 993 dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];
michael@0 994 dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];
michael@0 995 }
michael@0 996 }
michael@0 997 }
michael@0 998
michael@0 999 LIBYUV_API
michael@0 1000 void TransposeUV(const uint8* src, int src_stride,
michael@0 1001 uint8* dst_a, int dst_stride_a,
michael@0 1002 uint8* dst_b, int dst_stride_b,
michael@0 1003 int width, int height) {
michael@0 1004 int i = height;
michael@0 1005 void (*TransposeUVWx8)(const uint8* src, int src_stride,
michael@0 1006 uint8* dst_a, int dst_stride_a,
michael@0 1007 uint8* dst_b, int dst_stride_b,
michael@0 1008 int width) = TransposeUVWx8_C;
michael@0 1009 #if defined(HAS_TRANSPOSE_UVWX8_NEON)
michael@0 1010 if (TestCpuFlag(kCpuHasNEON)) {
michael@0 1011 TransposeUVWx8 = TransposeUVWx8_NEON;
michael@0 1012 }
michael@0 1013 #elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
michael@0 1014 if (TestCpuFlag(kCpuHasSSE2) &&
michael@0 1015 IS_ALIGNED(width, 8) &&
michael@0 1016 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
michael@0 1017 TransposeUVWx8 = TransposeUVWx8_SSE2;
michael@0 1018 }
michael@0 1019 #elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
michael@0 1020 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
michael@0 1021 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
michael@0 1022 TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
michael@0 1023 }
michael@0 1024 #endif
michael@0 1025
michael@0 1026 // Work through the source in 8x8 tiles.
michael@0 1027 while (i >= 8) {
michael@0 1028 TransposeUVWx8(src, src_stride,
michael@0 1029 dst_a, dst_stride_a,
michael@0 1030 dst_b, dst_stride_b,
michael@0 1031 width);
michael@0 1032 src += 8 * src_stride; // Go down 8 rows.
michael@0 1033 dst_a += 8; // Move over 8 columns.
michael@0 1034 dst_b += 8; // Move over 8 columns.
michael@0 1035 i -= 8;
michael@0 1036 }
michael@0 1037
michael@0 1038 TransposeUVWxH_C(src, src_stride,
michael@0 1039 dst_a, dst_stride_a,
michael@0 1040 dst_b, dst_stride_b,
michael@0 1041 width, i);
michael@0 1042 }
michael@0 1043
michael@0 1044 LIBYUV_API
michael@0 1045 void RotateUV90(const uint8* src, int src_stride,
michael@0 1046 uint8* dst_a, int dst_stride_a,
michael@0 1047 uint8* dst_b, int dst_stride_b,
michael@0 1048 int width, int height) {
michael@0 1049 src += src_stride * (height - 1);
michael@0 1050 src_stride = -src_stride;
michael@0 1051
michael@0 1052 TransposeUV(src, src_stride,
michael@0 1053 dst_a, dst_stride_a,
michael@0 1054 dst_b, dst_stride_b,
michael@0 1055 width, height);
michael@0 1056 }
michael@0 1057
michael@0 1058 LIBYUV_API
michael@0 1059 void RotateUV270(const uint8* src, int src_stride,
michael@0 1060 uint8* dst_a, int dst_stride_a,
michael@0 1061 uint8* dst_b, int dst_stride_b,
michael@0 1062 int width, int height) {
michael@0 1063 dst_a += dst_stride_a * (width - 1);
michael@0 1064 dst_b += dst_stride_b * (width - 1);
michael@0 1065 dst_stride_a = -dst_stride_a;
michael@0 1066 dst_stride_b = -dst_stride_b;
michael@0 1067
michael@0 1068 TransposeUV(src, src_stride,
michael@0 1069 dst_a, dst_stride_a,
michael@0 1070 dst_b, dst_stride_b,
michael@0 1071 width, height);
michael@0 1072 }
michael@0 1073
michael@0 1074 // Rotate 180 is a horizontal and vertical flip.
michael@0 1075 LIBYUV_API
michael@0 1076 void RotateUV180(const uint8* src, int src_stride,
michael@0 1077 uint8* dst_a, int dst_stride_a,
michael@0 1078 uint8* dst_b, int dst_stride_b,
michael@0 1079 int width, int height) {
michael@0 1080 int i;
michael@0 1081 void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
michael@0 1082 MirrorUVRow_C;
michael@0 1083 #if defined(HAS_MIRRORUVROW_NEON)
michael@0 1084 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
michael@0 1085 MirrorRowUV = MirrorUVRow_NEON;
michael@0 1086 }
michael@0 1087 #elif defined(HAS_MIRRORROW_UV_SSSE3)
michael@0 1088 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
michael@0 1089 IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
michael@0 1090 MirrorRowUV = MirrorUVRow_SSSE3;
michael@0 1091 }
michael@0 1092 #elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
michael@0 1093 if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
michael@0 1094 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
michael@0 1095 MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
michael@0 1096 }
michael@0 1097 #endif
michael@0 1098
michael@0 1099 dst_a += dst_stride_a * (height - 1);
michael@0 1100 dst_b += dst_stride_b * (height - 1);
michael@0 1101
michael@0 1102 for (i = 0; i < height; ++i) {
michael@0 1103 MirrorRowUV(src, dst_a, dst_b, width);
michael@0 1104 src += src_stride;
michael@0 1105 dst_a -= dst_stride_a;
michael@0 1106 dst_b -= dst_stride_b;
michael@0 1107 }
michael@0 1108 }
michael@0 1109
michael@0 1110 LIBYUV_API
michael@0 1111 int RotatePlane(const uint8* src, int src_stride,
michael@0 1112 uint8* dst, int dst_stride,
michael@0 1113 int width, int height,
michael@0 1114 enum RotationMode mode) {
michael@0 1115 if (!src || width <= 0 || height == 0 || !dst) {
michael@0 1116 return -1;
michael@0 1117 }
michael@0 1118
michael@0 1119 // Negative height means invert the image.
michael@0 1120 if (height < 0) {
michael@0 1121 height = -height;
michael@0 1122 src = src + (height - 1) * src_stride;
michael@0 1123 src_stride = -src_stride;
michael@0 1124 }
michael@0 1125
michael@0 1126 switch (mode) {
michael@0 1127 case kRotate0:
michael@0 1128 // copy frame
michael@0 1129 CopyPlane(src, src_stride,
michael@0 1130 dst, dst_stride,
michael@0 1131 width, height);
michael@0 1132 return 0;
michael@0 1133 case kRotate90:
michael@0 1134 RotatePlane90(src, src_stride,
michael@0 1135 dst, dst_stride,
michael@0 1136 width, height);
michael@0 1137 return 0;
michael@0 1138 case kRotate270:
michael@0 1139 RotatePlane270(src, src_stride,
michael@0 1140 dst, dst_stride,
michael@0 1141 width, height);
michael@0 1142 return 0;
michael@0 1143 case kRotate180:
michael@0 1144 RotatePlane180(src, src_stride,
michael@0 1145 dst, dst_stride,
michael@0 1146 width, height);
michael@0 1147 return 0;
michael@0 1148 default:
michael@0 1149 break;
michael@0 1150 }
michael@0 1151 return -1;
michael@0 1152 }
michael@0 1153
michael@0 1154 LIBYUV_API
michael@0 1155 int I420Rotate(const uint8* src_y, int src_stride_y,
michael@0 1156 const uint8* src_u, int src_stride_u,
michael@0 1157 const uint8* src_v, int src_stride_v,
michael@0 1158 uint8* dst_y, int dst_stride_y,
michael@0 1159 uint8* dst_u, int dst_stride_u,
michael@0 1160 uint8* dst_v, int dst_stride_v,
michael@0 1161 int width, int height,
michael@0 1162 enum RotationMode mode) {
michael@0 1163 int halfwidth = (width + 1) >> 1;
michael@0 1164 int halfheight = (height + 1) >> 1;
michael@0 1165 if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
michael@0 1166 !dst_y || !dst_u || !dst_v) {
michael@0 1167 return -1;
michael@0 1168 }
michael@0 1169
michael@0 1170 // Negative height means invert the image.
michael@0 1171 if (height < 0) {
michael@0 1172 height = -height;
michael@0 1173 halfheight = (height + 1) >> 1;
michael@0 1174 src_y = src_y + (height - 1) * src_stride_y;
michael@0 1175 src_u = src_u + (halfheight - 1) * src_stride_u;
michael@0 1176 src_v = src_v + (halfheight - 1) * src_stride_v;
michael@0 1177 src_stride_y = -src_stride_y;
michael@0 1178 src_stride_u = -src_stride_u;
michael@0 1179 src_stride_v = -src_stride_v;
michael@0 1180 }
michael@0 1181
michael@0 1182 switch (mode) {
michael@0 1183 case kRotate0:
michael@0 1184 // copy frame
michael@0 1185 return I420Copy(src_y, src_stride_y,
michael@0 1186 src_u, src_stride_u,
michael@0 1187 src_v, src_stride_v,
michael@0 1188 dst_y, dst_stride_y,
michael@0 1189 dst_u, dst_stride_u,
michael@0 1190 dst_v, dst_stride_v,
michael@0 1191 width, height);
michael@0 1192 case kRotate90:
michael@0 1193 RotatePlane90(src_y, src_stride_y,
michael@0 1194 dst_y, dst_stride_y,
michael@0 1195 width, height);
michael@0 1196 RotatePlane90(src_u, src_stride_u,
michael@0 1197 dst_u, dst_stride_u,
michael@0 1198 halfwidth, halfheight);
michael@0 1199 RotatePlane90(src_v, src_stride_v,
michael@0 1200 dst_v, dst_stride_v,
michael@0 1201 halfwidth, halfheight);
michael@0 1202 return 0;
michael@0 1203 case kRotate270:
michael@0 1204 RotatePlane270(src_y, src_stride_y,
michael@0 1205 dst_y, dst_stride_y,
michael@0 1206 width, height);
michael@0 1207 RotatePlane270(src_u, src_stride_u,
michael@0 1208 dst_u, dst_stride_u,
michael@0 1209 halfwidth, halfheight);
michael@0 1210 RotatePlane270(src_v, src_stride_v,
michael@0 1211 dst_v, dst_stride_v,
michael@0 1212 halfwidth, halfheight);
michael@0 1213 return 0;
michael@0 1214 case kRotate180:
michael@0 1215 RotatePlane180(src_y, src_stride_y,
michael@0 1216 dst_y, dst_stride_y,
michael@0 1217 width, height);
michael@0 1218 RotatePlane180(src_u, src_stride_u,
michael@0 1219 dst_u, dst_stride_u,
michael@0 1220 halfwidth, halfheight);
michael@0 1221 RotatePlane180(src_v, src_stride_v,
michael@0 1222 dst_v, dst_stride_v,
michael@0 1223 halfwidth, halfheight);
michael@0 1224 return 0;
michael@0 1225 default:
michael@0 1226 break;
michael@0 1227 }
michael@0 1228 return -1;
michael@0 1229 }
michael@0 1230
michael@0 1231 LIBYUV_API
michael@0 1232 int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
michael@0 1233 const uint8* src_uv, int src_stride_uv,
michael@0 1234 uint8* dst_y, int dst_stride_y,
michael@0 1235 uint8* dst_u, int dst_stride_u,
michael@0 1236 uint8* dst_v, int dst_stride_v,
michael@0 1237 int width, int height,
michael@0 1238 enum RotationMode mode) {
michael@0 1239 int halfwidth = (width + 1) >> 1;
michael@0 1240 int halfheight = (height + 1) >> 1;
michael@0 1241 if (!src_y || !src_uv || width <= 0 || height == 0 ||
michael@0 1242 !dst_y || !dst_u || !dst_v) {
michael@0 1243 return -1;
michael@0 1244 }
michael@0 1245
michael@0 1246 // Negative height means invert the image.
michael@0 1247 if (height < 0) {
michael@0 1248 height = -height;
michael@0 1249 halfheight = (height + 1) >> 1;
michael@0 1250 src_y = src_y + (height - 1) * src_stride_y;
michael@0 1251 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
michael@0 1252 src_stride_y = -src_stride_y;
michael@0 1253 src_stride_uv = -src_stride_uv;
michael@0 1254 }
michael@0 1255
michael@0 1256 switch (mode) {
michael@0 1257 case kRotate0:
michael@0 1258 // copy frame
michael@0 1259 return NV12ToI420(src_y, src_stride_y,
michael@0 1260 src_uv, src_stride_uv,
michael@0 1261 dst_y, dst_stride_y,
michael@0 1262 dst_u, dst_stride_u,
michael@0 1263 dst_v, dst_stride_v,
michael@0 1264 width, height);
michael@0 1265 case kRotate90:
michael@0 1266 RotatePlane90(src_y, src_stride_y,
michael@0 1267 dst_y, dst_stride_y,
michael@0 1268 width, height);
michael@0 1269 RotateUV90(src_uv, src_stride_uv,
michael@0 1270 dst_u, dst_stride_u,
michael@0 1271 dst_v, dst_stride_v,
michael@0 1272 halfwidth, halfheight);
michael@0 1273 return 0;
michael@0 1274 case kRotate270:
michael@0 1275 RotatePlane270(src_y, src_stride_y,
michael@0 1276 dst_y, dst_stride_y,
michael@0 1277 width, height);
michael@0 1278 RotateUV270(src_uv, src_stride_uv,
michael@0 1279 dst_u, dst_stride_u,
michael@0 1280 dst_v, dst_stride_v,
michael@0 1281 halfwidth, halfheight);
michael@0 1282 return 0;
michael@0 1283 case kRotate180:
michael@0 1284 RotatePlane180(src_y, src_stride_y,
michael@0 1285 dst_y, dst_stride_y,
michael@0 1286 width, height);
michael@0 1287 RotateUV180(src_uv, src_stride_uv,
michael@0 1288 dst_u, dst_stride_u,
michael@0 1289 dst_v, dst_stride_v,
michael@0 1290 halfwidth, halfheight);
michael@0 1291 return 0;
michael@0 1292 default:
michael@0 1293 break;
michael@0 1294 }
michael@0 1295 return -1;
michael@0 1296 }
michael@0 1297
michael@0 1298 #ifdef __cplusplus
michael@0 1299 } // extern "C"
michael@0 1300 } // namespace libyuv
michael@0 1301 #endif

mercurial