media/libyuv/source/row_mips.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/row.h"
michael@0 12
michael@0 13 #ifdef __cplusplus
michael@0 14 namespace libyuv {
michael@0 15 extern "C" {
michael@0 16 #endif
michael@0 17
michael@0 18 // The following are available on Mips platforms:
michael@0 19 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
michael@0 20
michael@0 21 #include <sgidefs.h>
michael@0 22
michael@0 23 #if (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS5)
michael@0 24 #define HAS_MIPS_PREFETCH 1
michael@0 25 #endif
michael@0 26
michael@0 27 #ifdef HAS_COPYROW_MIPS
michael@0 28 void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
michael@0 29 __asm__ __volatile__ (
michael@0 30 ".set noreorder \n"
michael@0 31 ".set noat \n"
michael@0 32 "slti $at, %[count], 8 \n"
michael@0 33 "bne $at ,$zero, $last8 \n"
michael@0 34 "xor $t8, %[src], %[dst] \n"
michael@0 35 "andi $t8, $t8, 0x3 \n"
michael@0 36
michael@0 37 "bne $t8, $zero, unaligned \n"
michael@0 38 "negu $a3, %[dst] \n"
michael@0 39 // make dst/src aligned
michael@0 40 "andi $a3, $a3, 0x3 \n"
michael@0 41 "beq $a3, $zero, $chk16w \n"
michael@0 42 // word-aligned now count is the remining bytes count
michael@0 43 "subu %[count], %[count], $a3 \n"
michael@0 44
michael@0 45 "lwr $t8, 0(%[src]) \n"
michael@0 46 "addu %[src], %[src], $a3 \n"
michael@0 47 "swr $t8, 0(%[dst]) \n"
michael@0 48 "addu %[dst], %[dst], $a3 \n"
michael@0 49
michael@0 50 // Now the dst/src are mutually word-aligned with word-aligned addresses
michael@0 51 "$chk16w: \n"
michael@0 52 "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
michael@0 53 // t8 is the byte count after 64-byte chunks
michael@0 54 "beq %[count], $t8, chk8w \n"
michael@0 55 // There will be at most 1 32-byte chunk after it
michael@0 56 "subu $a3, %[count], $t8 \n" // the reminder
michael@0 57 // Here a3 counts bytes in 16w chunks
michael@0 58 "addu $a3, %[dst], $a3 \n"
michael@0 59 // Now a3 is the final dst after 64-byte chunks
michael@0 60 "addu $t0, %[dst], %[count] \n"
michael@0 61 // t0 is the "past the end" address
michael@0 62
michael@0 63 // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
michael@0 64 // the "t0-32" address
michael@0 65 // This means: for x=128 the last "safe" a1 address is "t0-160"
michael@0 66 // Alternatively, for x=64 the last "safe" a1 address is "t0-96"
michael@0 67 // we will use "pref 30,128(a1)", so "t0-160" is the limit
michael@0 68 "subu $t9, $t0, 160 \n"
michael@0 69 #ifdef HAS_MIPS_PREFETCH
michael@0 70 // t9 is the "last safe pref 30,128(a1)" address
michael@0 71 "pref 0, 0(%[src]) \n" // first line of src
michael@0 72 "pref 0, 32(%[src]) \n" // second line of src
michael@0 73 "pref 0, 64(%[src]) \n"
michael@0 74 "pref 30, 32(%[dst]) \n"
michael@0 75 #endif
michael@0 76 // In case the a1 > t9 don't use "pref 30" at all
michael@0 77 "sgtu $v1, %[dst], $t9 \n"
michael@0 78 "bgtz $v1, $loop16w \n"
michael@0 79 "nop \n"
michael@0 80 // otherwise, start with using pref30
michael@0 81 #ifdef HAS_MIPS_PREFETCH
michael@0 82 "pref 30, 64(%[dst]) \n"
michael@0 83 #endif
michael@0 84 "$loop16w: \n"
michael@0 85 #ifdef HAS_MIPS_PREFETCH
michael@0 86 "pref 0, 96(%[src]) \n"
michael@0 87 #endif
michael@0 88 "lw $t0, 0(%[src]) \n"
michael@0 89 "bgtz $v1, $skip_pref30_96 \n" // skip
michael@0 90 "lw $t1, 4(%[src]) \n"
michael@0 91 #ifdef HAS_MIPS_PREFETCH
michael@0 92 "pref 30, 96(%[dst]) \n" // continue
michael@0 93 #endif
michael@0 94 "$skip_pref30_96: \n"
michael@0 95 "lw $t2, 8(%[src]) \n"
michael@0 96 "lw $t3, 12(%[src]) \n"
michael@0 97 "lw $t4, 16(%[src]) \n"
michael@0 98 "lw $t5, 20(%[src]) \n"
michael@0 99 "lw $t6, 24(%[src]) \n"
michael@0 100 "lw $t7, 28(%[src]) \n"
michael@0 101 #ifdef HAS_MIPS_PREFETCH
michael@0 102 "pref 0, 128(%[src]) \n"
michael@0 103 #endif
michael@0 104 // bring the next lines of src, addr 128
michael@0 105 "sw $t0, 0(%[dst]) \n"
michael@0 106 "sw $t1, 4(%[dst]) \n"
michael@0 107 "sw $t2, 8(%[dst]) \n"
michael@0 108 "sw $t3, 12(%[dst]) \n"
michael@0 109 "sw $t4, 16(%[dst]) \n"
michael@0 110 "sw $t5, 20(%[dst]) \n"
michael@0 111 "sw $t6, 24(%[dst]) \n"
michael@0 112 "sw $t7, 28(%[dst]) \n"
michael@0 113 "lw $t0, 32(%[src]) \n"
michael@0 114 "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
michael@0 115 "lw $t1, 36(%[src]) \n"
michael@0 116 #ifdef HAS_MIPS_PREFETCH
michael@0 117 "pref 30, 128(%[dst]) \n" // set dest, addr 128
michael@0 118 #endif
michael@0 119 "$skip_pref30_128: \n"
michael@0 120 "lw $t2, 40(%[src]) \n"
michael@0 121 "lw $t3, 44(%[src]) \n"
michael@0 122 "lw $t4, 48(%[src]) \n"
michael@0 123 "lw $t5, 52(%[src]) \n"
michael@0 124 "lw $t6, 56(%[src]) \n"
michael@0 125 "lw $t7, 60(%[src]) \n"
michael@0 126 #ifdef HAS_MIPS_PREFETCH
michael@0 127 "pref 0, 160(%[src]) \n"
michael@0 128 #endif
michael@0 129 // bring the next lines of src, addr 160
michael@0 130 "sw $t0, 32(%[dst]) \n"
michael@0 131 "sw $t1, 36(%[dst]) \n"
michael@0 132 "sw $t2, 40(%[dst]) \n"
michael@0 133 "sw $t3, 44(%[dst]) \n"
michael@0 134 "sw $t4, 48(%[dst]) \n"
michael@0 135 "sw $t5, 52(%[dst]) \n"
michael@0 136 "sw $t6, 56(%[dst]) \n"
michael@0 137 "sw $t7, 60(%[dst]) \n"
michael@0 138
michael@0 139 "addiu %[dst], %[dst], 64 \n" // adding 64 to dest
michael@0 140 "sgtu $v1, %[dst], $t9 \n"
michael@0 141 "bne %[dst], $a3, $loop16w \n"
michael@0 142 " addiu %[src], %[src], 64 \n" // adding 64 to src
michael@0 143 "move %[count], $t8 \n"
michael@0 144
michael@0 145 // Here we have src and dest word-aligned but less than 64-bytes to go
michael@0 146
michael@0 147 "chk8w: \n"
michael@0 148 #ifdef HAS_MIPS_PREFETCH
michael@0 149 "pref 0, 0x0(%[src]) \n"
michael@0 150 #endif
michael@0 151 "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
michael@0 152 // the t8 is the reminder count past 32-bytes
michael@0 153 "beq %[count], $t8, chk1w \n"
michael@0 154 // count=t8,no 32-byte chunk
michael@0 155 " nop \n"
michael@0 156
michael@0 157 "lw $t0, 0(%[src]) \n"
michael@0 158 "lw $t1, 4(%[src]) \n"
michael@0 159 "lw $t2, 8(%[src]) \n"
michael@0 160 "lw $t3, 12(%[src]) \n"
michael@0 161 "lw $t4, 16(%[src]) \n"
michael@0 162 "lw $t5, 20(%[src]) \n"
michael@0 163 "lw $t6, 24(%[src]) \n"
michael@0 164 "lw $t7, 28(%[src]) \n"
michael@0 165 "addiu %[src], %[src], 32 \n"
michael@0 166
michael@0 167 "sw $t0, 0(%[dst]) \n"
michael@0 168 "sw $t1, 4(%[dst]) \n"
michael@0 169 "sw $t2, 8(%[dst]) \n"
michael@0 170 "sw $t3, 12(%[dst]) \n"
michael@0 171 "sw $t4, 16(%[dst]) \n"
michael@0 172 "sw $t5, 20(%[dst]) \n"
michael@0 173 "sw $t6, 24(%[dst]) \n"
michael@0 174 "sw $t7, 28(%[dst]) \n"
michael@0 175 "addiu %[dst], %[dst], 32 \n"
michael@0 176
michael@0 177 "chk1w: \n"
michael@0 178 "andi %[count], $t8, 0x3 \n"
michael@0 179 // now count is the reminder past 1w chunks
michael@0 180 "beq %[count], $t8, $last8 \n"
michael@0 181 " subu $a3, $t8, %[count] \n"
michael@0 182 // a3 is count of bytes in 1w chunks
michael@0 183 "addu $a3, %[dst], $a3 \n"
michael@0 184 // now a3 is the dst address past the 1w chunks
michael@0 185 // copying in words (4-byte chunks)
michael@0 186 "$wordCopy_loop: \n"
michael@0 187 "lw $t3, 0(%[src]) \n"
michael@0 188 // the first t3 may be equal t0 ... optimize?
michael@0 189 "addiu %[src], %[src],4 \n"
michael@0 190 "addiu %[dst], %[dst],4 \n"
michael@0 191 "bne %[dst], $a3,$wordCopy_loop \n"
michael@0 192 " sw $t3, -4(%[dst]) \n"
michael@0 193
michael@0 194 // For the last (<8) bytes
michael@0 195 "$last8: \n"
michael@0 196 "blez %[count], leave \n"
michael@0 197 " addu $a3, %[dst], %[count] \n" // a3 -last dst address
michael@0 198 "$last8loop: \n"
michael@0 199 "lb $v1, 0(%[src]) \n"
michael@0 200 "addiu %[src], %[src], 1 \n"
michael@0 201 "addiu %[dst], %[dst], 1 \n"
michael@0 202 "bne %[dst], $a3, $last8loop \n"
michael@0 203 " sb $v1, -1(%[dst]) \n"
michael@0 204
michael@0 205 "leave: \n"
michael@0 206 " j $ra \n"
michael@0 207 " nop \n"
michael@0 208
michael@0 209 //
michael@0 210 // UNALIGNED case
michael@0 211 //
michael@0 212
michael@0 213 "unaligned: \n"
michael@0 214 // got here with a3="negu a1"
michael@0 215 "andi $a3, $a3, 0x3 \n" // a1 is word aligned?
michael@0 216 "beqz $a3, $ua_chk16w \n"
michael@0 217 " subu %[count], %[count], $a3 \n"
michael@0 218 // bytes left after initial a3 bytes
michael@0 219 "lwr $v1, 0(%[src]) \n"
michael@0 220 "lwl $v1, 3(%[src]) \n"
michael@0 221 "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
michael@0 222 "swr $v1, 0(%[dst]) \n"
michael@0 223 "addu %[dst], %[dst], $a3 \n"
michael@0 224 // below the dst will be word aligned (NOTE1)
michael@0 225 "$ua_chk16w: \n"
michael@0 226 "andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
michael@0 227 // t8 is the byte count after 64-byte chunks
michael@0 228 "beq %[count], $t8, ua_chk8w \n"
michael@0 229 // if a2==t8, no 64-byte chunks
michael@0 230 // There will be at most 1 32-byte chunk after it
michael@0 231 "subu $a3, %[count], $t8 \n" // the reminder
michael@0 232 // Here a3 counts bytes in 16w chunks
michael@0 233 "addu $a3, %[dst], $a3 \n"
michael@0 234 // Now a3 is the final dst after 64-byte chunks
michael@0 235 "addu $t0, %[dst], %[count] \n" // t0 "past the end"
michael@0 236 "subu $t9, $t0, 160 \n"
michael@0 237 // t9 is the "last safe pref 30,128(a1)" address
michael@0 238 #ifdef HAS_MIPS_PREFETCH
michael@0 239 "pref 0, 0(%[src]) \n" // first line of src
michael@0 240 "pref 0, 32(%[src]) \n" // second line addr 32
michael@0 241 "pref 0, 64(%[src]) \n"
michael@0 242 "pref 30, 32(%[dst]) \n"
michael@0 243 #endif
michael@0 244 // safe, as we have at least 64 bytes ahead
michael@0 245 // In case the a1 > t9 don't use "pref 30" at all
michael@0 246 "sgtu $v1, %[dst], $t9 \n"
michael@0 247 "bgtz $v1, $ua_loop16w \n"
michael@0 248 // skip "pref 30,64(a1)" for too short arrays
michael@0 249 " nop \n"
michael@0 250 // otherwise, start with using pref30
michael@0 251 #ifdef HAS_MIPS_PREFETCH
michael@0 252 "pref 30, 64(%[dst]) \n"
michael@0 253 #endif
michael@0 254 "$ua_loop16w: \n"
michael@0 255 #ifdef HAS_MIPS_PREFETCH
michael@0 256 "pref 0, 96(%[src]) \n"
michael@0 257 #endif
michael@0 258 "lwr $t0, 0(%[src]) \n"
michael@0 259 "lwl $t0, 3(%[src]) \n"
michael@0 260 "lwr $t1, 4(%[src]) \n"
michael@0 261 "bgtz $v1, $ua_skip_pref30_96 \n"
michael@0 262 " lwl $t1, 7(%[src]) \n"
michael@0 263 #ifdef HAS_MIPS_PREFETCH
michael@0 264 "pref 30, 96(%[dst]) \n"
michael@0 265 #endif
michael@0 266 // continue setting up the dest, addr 96
michael@0 267 "$ua_skip_pref30_96: \n"
michael@0 268 "lwr $t2, 8(%[src]) \n"
michael@0 269 "lwl $t2, 11(%[src]) \n"
michael@0 270 "lwr $t3, 12(%[src]) \n"
michael@0 271 "lwl $t3, 15(%[src]) \n"
michael@0 272 "lwr $t4, 16(%[src]) \n"
michael@0 273 "lwl $t4, 19(%[src]) \n"
michael@0 274 "lwr $t5, 20(%[src]) \n"
michael@0 275 "lwl $t5, 23(%[src]) \n"
michael@0 276 "lwr $t6, 24(%[src]) \n"
michael@0 277 "lwl $t6, 27(%[src]) \n"
michael@0 278 "lwr $t7, 28(%[src]) \n"
michael@0 279 "lwl $t7, 31(%[src]) \n"
michael@0 280 #ifdef HAS_MIPS_PREFETCH
michael@0 281 "pref 0, 128(%[src]) \n"
michael@0 282 #endif
michael@0 283 // bring the next lines of src, addr 128
michael@0 284 "sw $t0, 0(%[dst]) \n"
michael@0 285 "sw $t1, 4(%[dst]) \n"
michael@0 286 "sw $t2, 8(%[dst]) \n"
michael@0 287 "sw $t3, 12(%[dst]) \n"
michael@0 288 "sw $t4, 16(%[dst]) \n"
michael@0 289 "sw $t5, 20(%[dst]) \n"
michael@0 290 "sw $t6, 24(%[dst]) \n"
michael@0 291 "sw $t7, 28(%[dst]) \n"
michael@0 292 "lwr $t0, 32(%[src]) \n"
michael@0 293 "lwl $t0, 35(%[src]) \n"
michael@0 294 "lwr $t1, 36(%[src]) \n"
michael@0 295 "bgtz $v1, ua_skip_pref30_128 \n"
michael@0 296 " lwl $t1, 39(%[src]) \n"
michael@0 297 #ifdef HAS_MIPS_PREFETCH
michael@0 298 "pref 30, 128(%[dst]) \n"
michael@0 299 #endif
michael@0 300 // continue setting up the dest, addr 128
michael@0 301 "ua_skip_pref30_128: \n"
michael@0 302
michael@0 303 "lwr $t2, 40(%[src]) \n"
michael@0 304 "lwl $t2, 43(%[src]) \n"
michael@0 305 "lwr $t3, 44(%[src]) \n"
michael@0 306 "lwl $t3, 47(%[src]) \n"
michael@0 307 "lwr $t4, 48(%[src]) \n"
michael@0 308 "lwl $t4, 51(%[src]) \n"
michael@0 309 "lwr $t5, 52(%[src]) \n"
michael@0 310 "lwl $t5, 55(%[src]) \n"
michael@0 311 "lwr $t6, 56(%[src]) \n"
michael@0 312 "lwl $t6, 59(%[src]) \n"
michael@0 313 "lwr $t7, 60(%[src]) \n"
michael@0 314 "lwl $t7, 63(%[src]) \n"
michael@0 315 #ifdef HAS_MIPS_PREFETCH
michael@0 316 "pref 0, 160(%[src]) \n"
michael@0 317 #endif
michael@0 318 // bring the next lines of src, addr 160
michael@0 319 "sw $t0, 32(%[dst]) \n"
michael@0 320 "sw $t1, 36(%[dst]) \n"
michael@0 321 "sw $t2, 40(%[dst]) \n"
michael@0 322 "sw $t3, 44(%[dst]) \n"
michael@0 323 "sw $t4, 48(%[dst]) \n"
michael@0 324 "sw $t5, 52(%[dst]) \n"
michael@0 325 "sw $t6, 56(%[dst]) \n"
michael@0 326 "sw $t7, 60(%[dst]) \n"
michael@0 327
michael@0 328 "addiu %[dst],%[dst],64 \n" // adding 64 to dest
michael@0 329 "sgtu $v1,%[dst],$t9 \n"
michael@0 330 "bne %[dst],$a3,$ua_loop16w \n"
michael@0 331 " addiu %[src],%[src],64 \n" // adding 64 to src
michael@0 332 "move %[count],$t8 \n"
michael@0 333
michael@0 334 // Here we have src and dest word-aligned but less than 64-bytes to go
michael@0 335
michael@0 336 "ua_chk8w: \n"
michael@0 337 #ifdef HAS_MIPS_PREFETCH
michael@0 338 "pref 0, 0x0(%[src]) \n"
michael@0 339 #endif
michael@0 340 "andi $t8, %[count], 0x1f \n" // 32-byte chunk?
michael@0 341 // the t8 is the reminder count
michael@0 342 "beq %[count], $t8, $ua_chk1w \n"
michael@0 343 // when count==t8, no 32-byte chunk
michael@0 344
michael@0 345 "lwr $t0, 0(%[src]) \n"
michael@0 346 "lwl $t0, 3(%[src]) \n"
michael@0 347 "lwr $t1, 4(%[src]) \n"
michael@0 348 "lwl $t1, 7(%[src]) \n"
michael@0 349 "lwr $t2, 8(%[src]) \n"
michael@0 350 "lwl $t2, 11(%[src]) \n"
michael@0 351 "lwr $t3, 12(%[src]) \n"
michael@0 352 "lwl $t3, 15(%[src]) \n"
michael@0 353 "lwr $t4, 16(%[src]) \n"
michael@0 354 "lwl $t4, 19(%[src]) \n"
michael@0 355 "lwr $t5, 20(%[src]) \n"
michael@0 356 "lwl $t5, 23(%[src]) \n"
michael@0 357 "lwr $t6, 24(%[src]) \n"
michael@0 358 "lwl $t6, 27(%[src]) \n"
michael@0 359 "lwr $t7, 28(%[src]) \n"
michael@0 360 "lwl $t7, 31(%[src]) \n"
michael@0 361 "addiu %[src], %[src], 32 \n"
michael@0 362
michael@0 363 "sw $t0, 0(%[dst]) \n"
michael@0 364 "sw $t1, 4(%[dst]) \n"
michael@0 365 "sw $t2, 8(%[dst]) \n"
michael@0 366 "sw $t3, 12(%[dst]) \n"
michael@0 367 "sw $t4, 16(%[dst]) \n"
michael@0 368 "sw $t5, 20(%[dst]) \n"
michael@0 369 "sw $t6, 24(%[dst]) \n"
michael@0 370 "sw $t7, 28(%[dst]) \n"
michael@0 371 "addiu %[dst], %[dst], 32 \n"
michael@0 372
michael@0 373 "$ua_chk1w: \n"
michael@0 374 "andi %[count], $t8, 0x3 \n"
michael@0 375 // now count is the reminder past 1w chunks
michael@0 376 "beq %[count], $t8, ua_smallCopy \n"
michael@0 377 "subu $a3, $t8, %[count] \n"
michael@0 378 // a3 is count of bytes in 1w chunks
michael@0 379 "addu $a3, %[dst], $a3 \n"
michael@0 380 // now a3 is the dst address past the 1w chunks
michael@0 381
michael@0 382 // copying in words (4-byte chunks)
michael@0 383 "$ua_wordCopy_loop: \n"
michael@0 384 "lwr $v1, 0(%[src]) \n"
michael@0 385 "lwl $v1, 3(%[src]) \n"
michael@0 386 "addiu %[src], %[src], 4 \n"
michael@0 387 "addiu %[dst], %[dst], 4 \n"
michael@0 388 // note: dst=a1 is word aligned here, see NOTE1
michael@0 389 "bne %[dst], $a3, $ua_wordCopy_loop \n"
michael@0 390 " sw $v1,-4(%[dst]) \n"
michael@0 391
michael@0 392 // Now less than 4 bytes (value in count) left to copy
michael@0 393 "ua_smallCopy: \n"
michael@0 394 "beqz %[count], leave \n"
michael@0 395 " addu $a3, %[dst], %[count] \n" // a3 = last dst address
michael@0 396 "$ua_smallCopy_loop: \n"
michael@0 397 "lb $v1, 0(%[src]) \n"
michael@0 398 "addiu %[src], %[src], 1 \n"
michael@0 399 "addiu %[dst], %[dst], 1 \n"
michael@0 400 "bne %[dst],$a3,$ua_smallCopy_loop \n"
michael@0 401 " sb $v1, -1(%[dst]) \n"
michael@0 402
michael@0 403 "j $ra \n"
michael@0 404 " nop \n"
michael@0 405 ".set at \n"
michael@0 406 ".set reorder \n"
michael@0 407 : [dst] "+r" (dst), [src] "+r" (src)
michael@0 408 : [count] "r" (count)
michael@0 409 : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
michael@0 410 "t8", "t9", "a3", "v1", "at"
michael@0 411 );
michael@0 412 }
michael@0 413 #endif // HAS_COPYROW_MIPS
michael@0 414
michael@0 415 // MIPS DSPR2 functions
michael@0 416 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
michael@0 417 (__mips_dsp_rev >= 2)
michael@0 418 void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
michael@0 419 int width) {
michael@0 420 __asm__ __volatile__ (
michael@0 421 ".set push \n"
michael@0 422 ".set noreorder \n"
michael@0 423 "srl $t4, %[width], 4 \n" // multiplies of 16
michael@0 424 "blez $t4, 2f \n"
michael@0 425 " andi %[width], %[width], 0xf \n" // residual
michael@0 426
michael@0 427 ".p2align 2 \n"
michael@0 428 "1: \n"
michael@0 429 "addiu $t4, $t4, -1 \n"
michael@0 430 "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
michael@0 431 "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
michael@0 432 "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
michael@0 433 "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
michael@0 434 "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
michael@0 435 "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
michael@0 436 "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
michael@0 437 "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
michael@0 438 "addiu %[src_uv], %[src_uv], 32 \n"
michael@0 439 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
michael@0 440 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
michael@0 441 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
michael@0 442 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
michael@0 443 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
michael@0 444 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
michael@0 445 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
michael@0 446 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
michael@0 447 "sw $t9, 0(%[dst_v]) \n"
michael@0 448 "sw $t0, 0(%[dst_u]) \n"
michael@0 449 "sw $t1, 4(%[dst_v]) \n"
michael@0 450 "sw $t2, 4(%[dst_u]) \n"
michael@0 451 "sw $t3, 8(%[dst_v]) \n"
michael@0 452 "sw $t5, 8(%[dst_u]) \n"
michael@0 453 "sw $t6, 12(%[dst_v]) \n"
michael@0 454 "sw $t7, 12(%[dst_u]) \n"
michael@0 455 "addiu %[dst_v], %[dst_v], 16 \n"
michael@0 456 "bgtz $t4, 1b \n"
michael@0 457 " addiu %[dst_u], %[dst_u], 16 \n"
michael@0 458
michael@0 459 "beqz %[width], 3f \n"
michael@0 460 " nop \n"
michael@0 461
michael@0 462 "2: \n"
michael@0 463 "lbu $t0, 0(%[src_uv]) \n"
michael@0 464 "lbu $t1, 1(%[src_uv]) \n"
michael@0 465 "addiu %[src_uv], %[src_uv], 2 \n"
michael@0 466 "addiu %[width], %[width], -1 \n"
michael@0 467 "sb $t0, 0(%[dst_u]) \n"
michael@0 468 "sb $t1, 0(%[dst_v]) \n"
michael@0 469 "addiu %[dst_u], %[dst_u], 1 \n"
michael@0 470 "bgtz %[width], 2b \n"
michael@0 471 " addiu %[dst_v], %[dst_v], 1 \n"
michael@0 472
michael@0 473 "3: \n"
michael@0 474 ".set pop \n"
michael@0 475 : [src_uv] "+r" (src_uv),
michael@0 476 [width] "+r" (width),
michael@0 477 [dst_u] "+r" (dst_u),
michael@0 478 [dst_v] "+r" (dst_v)
michael@0 479 :
michael@0 480 : "t0", "t1", "t2", "t3",
michael@0 481 "t4", "t5", "t6", "t7", "t8", "t9"
michael@0 482 );
michael@0 483 }
michael@0 484
michael@0 485 void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
michael@0 486 uint8* dst_v, int width) {
michael@0 487 __asm__ __volatile__ (
michael@0 488 ".set push \n"
michael@0 489 ".set noreorder \n"
michael@0 490 "srl $t4, %[width], 4 \n" // multiplies of 16
michael@0 491 "blez $t4, 2f \n"
michael@0 492 " andi %[width], %[width], 0xf \n" // residual
michael@0 493
michael@0 494 ".p2align 2 \n"
michael@0 495 "1: \n"
michael@0 496 "addiu $t4, $t4, -1 \n"
michael@0 497 "lwr $t0, 0(%[src_uv]) \n"
michael@0 498 "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
michael@0 499 "lwr $t1, 4(%[src_uv]) \n"
michael@0 500 "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
michael@0 501 "lwr $t2, 8(%[src_uv]) \n"
michael@0 502 "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
michael@0 503 "lwr $t3, 12(%[src_uv]) \n"
michael@0 504 "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
michael@0 505 "lwr $t5, 16(%[src_uv]) \n"
michael@0 506 "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
michael@0 507 "lwr $t6, 20(%[src_uv]) \n"
michael@0 508 "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
michael@0 509 "lwr $t7, 24(%[src_uv]) \n"
michael@0 510 "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
michael@0 511 "lwr $t8, 28(%[src_uv]) \n"
michael@0 512 "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
michael@0 513 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
michael@0 514 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
michael@0 515 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
michael@0 516 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
michael@0 517 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
michael@0 518 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
michael@0 519 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
michael@0 520 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
michael@0 521 "addiu %[src_uv], %[src_uv], 32 \n"
michael@0 522 "swr $t9, 0(%[dst_v]) \n"
michael@0 523 "swl $t9, 3(%[dst_v]) \n"
michael@0 524 "swr $t0, 0(%[dst_u]) \n"
michael@0 525 "swl $t0, 3(%[dst_u]) \n"
michael@0 526 "swr $t1, 4(%[dst_v]) \n"
michael@0 527 "swl $t1, 7(%[dst_v]) \n"
michael@0 528 "swr $t2, 4(%[dst_u]) \n"
michael@0 529 "swl $t2, 7(%[dst_u]) \n"
michael@0 530 "swr $t3, 8(%[dst_v]) \n"
michael@0 531 "swl $t3, 11(%[dst_v]) \n"
michael@0 532 "swr $t5, 8(%[dst_u]) \n"
michael@0 533 "swl $t5, 11(%[dst_u]) \n"
michael@0 534 "swr $t6, 12(%[dst_v]) \n"
michael@0 535 "swl $t6, 15(%[dst_v]) \n"
michael@0 536 "swr $t7, 12(%[dst_u]) \n"
michael@0 537 "swl $t7, 15(%[dst_u]) \n"
michael@0 538 "addiu %[dst_u], %[dst_u], 16 \n"
michael@0 539 "bgtz $t4, 1b \n"
michael@0 540 " addiu %[dst_v], %[dst_v], 16 \n"
michael@0 541
michael@0 542 "beqz %[width], 3f \n"
michael@0 543 " nop \n"
michael@0 544
michael@0 545 "2: \n"
michael@0 546 "lbu $t0, 0(%[src_uv]) \n"
michael@0 547 "lbu $t1, 1(%[src_uv]) \n"
michael@0 548 "addiu %[src_uv], %[src_uv], 2 \n"
michael@0 549 "addiu %[width], %[width], -1 \n"
michael@0 550 "sb $t0, 0(%[dst_u]) \n"
michael@0 551 "sb $t1, 0(%[dst_v]) \n"
michael@0 552 "addiu %[dst_u], %[dst_u], 1 \n"
michael@0 553 "bgtz %[width], 2b \n"
michael@0 554 " addiu %[dst_v], %[dst_v], 1 \n"
michael@0 555
michael@0 556 "3: \n"
michael@0 557 ".set pop \n"
michael@0 558 : [src_uv] "+r" (src_uv),
michael@0 559 [width] "+r" (width),
michael@0 560 [dst_u] "+r" (dst_u),
michael@0 561 [dst_v] "+r" (dst_v)
michael@0 562 :
michael@0 563 : "t0", "t1", "t2", "t3",
michael@0 564 "t4", "t5", "t6", "t7", "t8", "t9"
michael@0 565 );
michael@0 566 }
michael@0 567
michael@0 568 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
michael@0 569 __asm__ __volatile__ (
michael@0 570 ".set push \n"
michael@0 571 ".set noreorder \n"
michael@0 572
michael@0 573 "srl $t4, %[width], 4 \n" // multiplies of 16
michael@0 574 "andi $t5, %[width], 0xf \n"
michael@0 575 "blez $t4, 2f \n"
michael@0 576 " addu %[src], %[src], %[width] \n" // src += width
michael@0 577
michael@0 578 ".p2align 2 \n"
michael@0 579 "1: \n"
michael@0 580 "lw $t0, -16(%[src]) \n" // |3|2|1|0|
michael@0 581 "lw $t1, -12(%[src]) \n" // |7|6|5|4|
michael@0 582 "lw $t2, -8(%[src]) \n" // |11|10|9|8|
michael@0 583 "lw $t3, -4(%[src]) \n" // |15|14|13|12|
michael@0 584 "wsbh $t0, $t0 \n" // |2|3|0|1|
michael@0 585 "wsbh $t1, $t1 \n" // |6|7|4|5|
michael@0 586 "wsbh $t2, $t2 \n" // |10|11|8|9|
michael@0 587 "wsbh $t3, $t3 \n" // |14|15|12|13|
michael@0 588 "rotr $t0, $t0, 16 \n" // |0|1|2|3|
michael@0 589 "rotr $t1, $t1, 16 \n" // |4|5|6|7|
michael@0 590 "rotr $t2, $t2, 16 \n" // |8|9|10|11|
michael@0 591 "rotr $t3, $t3, 16 \n" // |12|13|14|15|
michael@0 592 "addiu %[src], %[src], -16 \n"
michael@0 593 "addiu $t4, $t4, -1 \n"
michael@0 594 "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
michael@0 595 "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
michael@0 596 "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
michael@0 597 "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
michael@0 598 "bgtz $t4, 1b \n"
michael@0 599 " addiu %[dst], %[dst], 16 \n"
michael@0 600 "beqz $t5, 3f \n"
michael@0 601 " nop \n"
michael@0 602
michael@0 603 "2: \n"
michael@0 604 "lbu $t0, -1(%[src]) \n"
michael@0 605 "addiu $t5, $t5, -1 \n"
michael@0 606 "addiu %[src], %[src], -1 \n"
michael@0 607 "sb $t0, 0(%[dst]) \n"
michael@0 608 "bgez $t5, 2b \n"
michael@0 609 " addiu %[dst], %[dst], 1 \n"
michael@0 610
michael@0 611 "3: \n"
michael@0 612 ".set pop \n"
michael@0 613 : [src] "+r" (src), [dst] "+r" (dst)
michael@0 614 : [width] "r" (width)
michael@0 615 : "t0", "t1", "t2", "t3", "t4", "t5"
michael@0 616 );
michael@0 617 }
michael@0 618
michael@0 619 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
michael@0 620 int width) {
michael@0 621 int x = 0;
michael@0 622 int y = 0;
michael@0 623 __asm__ __volatile__ (
michael@0 624 ".set push \n"
michael@0 625 ".set noreorder \n"
michael@0 626
michael@0 627 "addu $t4, %[width], %[width] \n"
michael@0 628 "srl %[x], %[width], 4 \n"
michael@0 629 "andi %[y], %[width], 0xf \n"
michael@0 630 "blez %[x], 2f \n"
michael@0 631 " addu %[src_uv], %[src_uv], $t4 \n"
michael@0 632
michael@0 633 ".p2align 2 \n"
michael@0 634 "1: \n"
michael@0 635 "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
michael@0 636 "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
michael@0 637 "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
michael@0 638 "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
michael@0 639 "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
michael@0 640 "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
michael@0 641 "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
michael@0 642 "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
michael@0 643
michael@0 644 "rotr $t0, $t0, 16 \n" // |1|0|3|2|
michael@0 645 "rotr $t1, $t1, 16 \n" // |5|4|7|6|
michael@0 646 "rotr $t2, $t2, 16 \n" // |9|8|11|10|
michael@0 647 "rotr $t3, $t3, 16 \n" // |13|12|15|14|
michael@0 648 "rotr $t4, $t4, 16 \n" // |17|16|19|18|
michael@0 649 "rotr $t6, $t6, 16 \n" // |21|20|23|22|
michael@0 650 "rotr $t7, $t7, 16 \n" // |25|24|27|26|
michael@0 651 "rotr $t8, $t8, 16 \n" // |29|28|31|30|
michael@0 652 "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
michael@0 653 "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
michael@0 654 "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
michael@0 655 "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
michael@0 656 "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
michael@0 657 "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
michael@0 658 "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
michael@0 659 "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
michael@0 660 "addiu %[src_uv], %[src_uv], -32 \n"
michael@0 661 "addiu %[x], %[x], -1 \n"
michael@0 662 "swr $t4, 0(%[dst_u]) \n"
michael@0 663 "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
michael@0 664 "swr $t6, 0(%[dst_v]) \n"
michael@0 665 "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
michael@0 666 "swr $t2, 4(%[dst_u]) \n"
michael@0 667 "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
michael@0 668 "swr $t3, 4(%[dst_v]) \n"
michael@0 669 "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
michael@0 670 "swr $t0, 8(%[dst_u]) \n"
michael@0 671 "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
michael@0 672 "swr $t1, 8(%[dst_v]) \n"
michael@0 673 "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
michael@0 674 "swr $t9, 12(%[dst_u]) \n"
michael@0 675 "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
michael@0 676 "swr $t5, 12(%[dst_v]) \n"
michael@0 677 "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
michael@0 678 "addiu %[dst_v], %[dst_v], 16 \n"
michael@0 679 "bgtz %[x], 1b \n"
michael@0 680 " addiu %[dst_u], %[dst_u], 16 \n"
michael@0 681 "beqz %[y], 3f \n"
michael@0 682 " nop \n"
michael@0 683 "b 2f \n"
michael@0 684 " nop \n"
michael@0 685
michael@0 686 "2: \n"
michael@0 687 "lbu $t0, -2(%[src_uv]) \n"
michael@0 688 "lbu $t1, -1(%[src_uv]) \n"
michael@0 689 "addiu %[src_uv], %[src_uv], -2 \n"
michael@0 690 "addiu %[y], %[y], -1 \n"
michael@0 691 "sb $t0, 0(%[dst_u]) \n"
michael@0 692 "sb $t1, 0(%[dst_v]) \n"
michael@0 693 "addiu %[dst_u], %[dst_u], 1 \n"
michael@0 694 "bgtz %[y], 2b \n"
michael@0 695 " addiu %[dst_v], %[dst_v], 1 \n"
michael@0 696
michael@0 697 "3: \n"
michael@0 698 ".set pop \n"
michael@0 699 : [src_uv] "+r" (src_uv),
michael@0 700 [dst_u] "+r" (dst_u),
michael@0 701 [dst_v] "+r" (dst_v),
michael@0 702 [x] "=&r" (x),
michael@0 703 [y] "+r" (y)
michael@0 704 : [width] "r" (width)
michael@0 705 : "t0", "t1", "t2", "t3", "t4",
michael@0 706 "t5", "t7", "t8", "t9"
michael@0 707 );
michael@0 708 }
michael@0 709
michael@0 710 // Convert (4 Y and 2 VU) I422 and arrange RGB values into
michael@0 711 // t5 = | 0 | B0 | 0 | b0 |
michael@0 712 // t4 = | 0 | B1 | 0 | b1 |
michael@0 713 // t9 = | 0 | G0 | 0 | g0 |
michael@0 714 // t8 = | 0 | G1 | 0 | g1 |
michael@0 715 // t2 = | 0 | R0 | 0 | r0 |
michael@0 716 // t1 = | 0 | R1 | 0 | r1 |
michael@0 717 #define I422ToTransientMipsRGB \
michael@0 718 "lw $t0, 0(%[y_buf]) \n" \
michael@0 719 "lhu $t1, 0(%[u_buf]) \n" \
michael@0 720 "lhu $t2, 0(%[v_buf]) \n" \
michael@0 721 "preceu.ph.qbr $t1, $t1 \n" \
michael@0 722 "preceu.ph.qbr $t2, $t2 \n" \
michael@0 723 "preceu.ph.qbra $t3, $t0 \n" \
michael@0 724 "preceu.ph.qbla $t0, $t0 \n" \
michael@0 725 "subu.ph $t1, $t1, $s5 \n" \
michael@0 726 "subu.ph $t2, $t2, $s5 \n" \
michael@0 727 "subu.ph $t3, $t3, $s4 \n" \
michael@0 728 "subu.ph $t0, $t0, $s4 \n" \
michael@0 729 "mul.ph $t3, $t3, $s0 \n" \
michael@0 730 "mul.ph $t0, $t0, $s0 \n" \
michael@0 731 "shll.ph $t4, $t1, 0x7 \n" \
michael@0 732 "subu.ph $t4, $t4, $t1 \n" \
michael@0 733 "mul.ph $t6, $t1, $s1 \n" \
michael@0 734 "mul.ph $t1, $t2, $s2 \n" \
michael@0 735 "addq_s.ph $t5, $t4, $t3 \n" \
michael@0 736 "addq_s.ph $t4, $t4, $t0 \n" \
michael@0 737 "shra.ph $t5, $t5, 6 \n" \
michael@0 738 "shra.ph $t4, $t4, 6 \n" \
michael@0 739 "addiu %[u_buf], 2 \n" \
michael@0 740 "addiu %[v_buf], 2 \n" \
michael@0 741 "addu.ph $t6, $t6, $t1 \n" \
michael@0 742 "mul.ph $t1, $t2, $s3 \n" \
michael@0 743 "addu.ph $t9, $t6, $t3 \n" \
michael@0 744 "addu.ph $t8, $t6, $t0 \n" \
michael@0 745 "shra.ph $t9, $t9, 6 \n" \
michael@0 746 "shra.ph $t8, $t8, 6 \n" \
michael@0 747 "addu.ph $t2, $t1, $t3 \n" \
michael@0 748 "addu.ph $t1, $t1, $t0 \n" \
michael@0 749 "shra.ph $t2, $t2, 6 \n" \
michael@0 750 "shra.ph $t1, $t1, 6 \n" \
michael@0 751 "subu.ph $t5, $t5, $s5 \n" \
michael@0 752 "subu.ph $t4, $t4, $s5 \n" \
michael@0 753 "subu.ph $t9, $t9, $s5 \n" \
michael@0 754 "subu.ph $t8, $t8, $s5 \n" \
michael@0 755 "subu.ph $t2, $t2, $s5 \n" \
michael@0 756 "subu.ph $t1, $t1, $s5 \n" \
michael@0 757 "shll_s.ph $t5, $t5, 8 \n" \
michael@0 758 "shll_s.ph $t4, $t4, 8 \n" \
michael@0 759 "shll_s.ph $t9, $t9, 8 \n" \
michael@0 760 "shll_s.ph $t8, $t8, 8 \n" \
michael@0 761 "shll_s.ph $t2, $t2, 8 \n" \
michael@0 762 "shll_s.ph $t1, $t1, 8 \n" \
michael@0 763 "shra.ph $t5, $t5, 8 \n" \
michael@0 764 "shra.ph $t4, $t4, 8 \n" \
michael@0 765 "shra.ph $t9, $t9, 8 \n" \
michael@0 766 "shra.ph $t8, $t8, 8 \n" \
michael@0 767 "shra.ph $t2, $t2, 8 \n" \
michael@0 768 "shra.ph $t1, $t1, 8 \n" \
michael@0 769 "addu.ph $t5, $t5, $s5 \n" \
michael@0 770 "addu.ph $t4, $t4, $s5 \n" \
michael@0 771 "addu.ph $t9, $t9, $s5 \n" \
michael@0 772 "addu.ph $t8, $t8, $s5 \n" \
michael@0 773 "addu.ph $t2, $t2, $s5 \n" \
michael@0 774 "addu.ph $t1, $t1, $s5 \n"
michael@0 775
michael@0 776 void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
michael@0 777 const uint8* u_buf,
michael@0 778 const uint8* v_buf,
michael@0 779 uint8* rgb_buf,
michael@0 780 int width) {
michael@0 781 __asm__ __volatile__ (
michael@0 782 ".set push \n"
michael@0 783 ".set noreorder \n"
michael@0 784 "beqz %[width], 2f \n"
michael@0 785 " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
michael@0 786 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
michael@0 787 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
michael@0 788 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
michael@0 789 "repl.ph $s4, 16 \n" // |0|16|0|16|
michael@0 790 "repl.ph $s5, 128 \n" // |128|128| // clipping
michael@0 791 "lui $s6, 0xff00 \n"
michael@0 792 "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
michael@0 793
michael@0 794 ".p2align 2 \n"
michael@0 795 "1: \n"
michael@0 796 I422ToTransientMipsRGB
michael@0 797 // Arranging into argb format
michael@0 798 "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
michael@0 799 "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
michael@0 800 "addiu %[width], -4 \n"
michael@0 801 "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
michael@0 802 "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
michael@0 803 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
michael@0 804
michael@0 805 "addiu %[y_buf], 4 \n"
michael@0 806 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
michael@0 807 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
michael@0 808 "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
michael@0 809 "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
michael@0 810 "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
michael@0 811 "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
michael@0 812 "sll $t9, $t9, 16 \n"
michael@0 813 "sll $t8, $t8, 16 \n"
michael@0 814 "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
michael@0 815 "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
michael@0 816 // Store results.
michael@0 817 "sw $t2, 0(%[rgb_buf]) \n"
michael@0 818 "sw $t0, 4(%[rgb_buf]) \n"
michael@0 819 "sw $t1, 8(%[rgb_buf]) \n"
michael@0 820 "sw $t3, 12(%[rgb_buf]) \n"
michael@0 821 "bnez %[width], 1b \n"
michael@0 822 " addiu %[rgb_buf], 16 \n"
michael@0 823 "2: \n"
michael@0 824 ".set pop \n"
michael@0 825 :[y_buf] "+r" (y_buf),
michael@0 826 [u_buf] "+r" (u_buf),
michael@0 827 [v_buf] "+r" (v_buf),
michael@0 828 [width] "+r" (width),
michael@0 829 [rgb_buf] "+r" (rgb_buf)
michael@0 830 :
michael@0 831 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 832 "t6", "t7", "t8", "t9",
michael@0 833 "s0", "s1", "s2", "s3",
michael@0 834 "s4", "s5", "s6"
michael@0 835 );
michael@0 836 }
michael@0 837
michael@0 838 void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
michael@0 839 const uint8* u_buf,
michael@0 840 const uint8* v_buf,
michael@0 841 uint8* rgb_buf,
michael@0 842 int width) {
michael@0 843 __asm__ __volatile__ (
michael@0 844 ".set push \n"
michael@0 845 ".set noreorder \n"
michael@0 846 "beqz %[width], 2f \n"
michael@0 847 " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
michael@0 848 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
michael@0 849 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
michael@0 850 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
michael@0 851 "repl.ph $s4, 16 \n" // |0|16|0|16|
michael@0 852 "repl.ph $s5, 128 \n" // |128|128|
michael@0 853 "lui $s6, 0xff00 \n"
michael@0 854 "ori $s6, 0xff00 \n" // |ff|00|ff|00|
michael@0 855
michael@0 856 ".p2align 2 \n"
michael@0 857 "1: \n"
michael@0 858 I422ToTransientMipsRGB
michael@0 859 // Arranging into abgr format
michael@0 860 "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
michael@0 861 "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
michael@0 862 "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
michael@0 863 "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
michael@0 864
michael@0 865 "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
michael@0 866 "addiu %[width], -4 \n"
michael@0 867 "addiu %[y_buf], 4 \n"
michael@0 868 "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
michael@0 869 "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
michael@0 870 "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
michael@0 871 "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
michael@0 872 "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
michael@0 873 "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
michael@0 874 "sll $t9, $t9, 16 \n"
michael@0 875 "sll $t8, $t8, 16 \n"
michael@0 876 "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
michael@0 877 "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
michael@0 878 // Store results.
michael@0 879 "sw $t2, 0(%[rgb_buf]) \n"
michael@0 880 "sw $t0, 4(%[rgb_buf]) \n"
michael@0 881 "sw $t1, 8(%[rgb_buf]) \n"
michael@0 882 "sw $t3, 12(%[rgb_buf]) \n"
michael@0 883 "bnez %[width], 1b \n"
michael@0 884 " addiu %[rgb_buf], 16 \n"
michael@0 885 "2: \n"
michael@0 886 ".set pop \n"
michael@0 887 :[y_buf] "+r" (y_buf),
michael@0 888 [u_buf] "+r" (u_buf),
michael@0 889 [v_buf] "+r" (v_buf),
michael@0 890 [width] "+r" (width),
michael@0 891 [rgb_buf] "+r" (rgb_buf)
michael@0 892 :
michael@0 893 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 894 "t6", "t7", "t8", "t9",
michael@0 895 "s0", "s1", "s2", "s3",
michael@0 896 "s4", "s5", "s6"
michael@0 897 );
michael@0 898 }
michael@0 899
michael@0 900 void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
michael@0 901 const uint8* u_buf,
michael@0 902 const uint8* v_buf,
michael@0 903 uint8* rgb_buf,
michael@0 904 int width) {
michael@0 905 __asm__ __volatile__ (
michael@0 906 ".set push \n"
michael@0 907 ".set noreorder \n"
michael@0 908 "beqz %[width], 2f \n"
michael@0 909 " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
michael@0 910 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
michael@0 911 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
michael@0 912 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
michael@0 913 "repl.ph $s4, 16 \n" // |0|16|0|16|
michael@0 914 "repl.ph $s5, 128 \n" // |128|128|
michael@0 915 "lui $s6, 0xff \n"
michael@0 916 "ori $s6, 0xff \n" // |00|ff|00|ff|
michael@0 917
michael@0 918 ".p2align 2 \n"
michael@0 919 "1: \n"
michael@0 920 I422ToTransientMipsRGB
michael@0 921 // Arranging into bgra format
michael@0 922 "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
michael@0 923 "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
michael@0 924 "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
michael@0 925 "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
michael@0 926
michael@0 927 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
michael@0 928 "addiu %[width], -4 \n"
michael@0 929 "addiu %[y_buf], 4 \n"
michael@0 930 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
michael@0 931 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
michael@0 932 "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
michael@0 933 "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
michael@0 934 "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
michael@0 935 "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
michael@0 936 "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
michael@0 937 "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
michael@0 938 "sll $t1, $t1, 16 \n"
michael@0 939 "sll $t2, $t2, 16 \n"
michael@0 940 "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
michael@0 941 "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
michael@0 942 // Store results.
michael@0 943 "sw $t2, 0(%[rgb_buf]) \n"
michael@0 944 "sw $t0, 4(%[rgb_buf]) \n"
michael@0 945 "sw $t1, 8(%[rgb_buf]) \n"
michael@0 946 "sw $t3, 12(%[rgb_buf]) \n"
michael@0 947 "bnez %[width], 1b \n"
michael@0 948 " addiu %[rgb_buf], 16 \n"
michael@0 949 "2: \n"
michael@0 950 ".set pop \n"
michael@0 951 :[y_buf] "+r" (y_buf),
michael@0 952 [u_buf] "+r" (u_buf),
michael@0 953 [v_buf] "+r" (v_buf),
michael@0 954 [width] "+r" (width),
michael@0 955 [rgb_buf] "+r" (rgb_buf)
michael@0 956 :
michael@0 957 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 958 "t6", "t7", "t8", "t9",
michael@0 959 "s0", "s1", "s2", "s3",
michael@0 960 "s4", "s5", "s6"
michael@0 961 );
michael@0 962 }
michael@0 963
michael@0 964 // Bilinear filter 8x2 -> 8x1
michael@0 965 void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
michael@0 966 ptrdiff_t src_stride, int dst_width,
michael@0 967 int source_y_fraction) {
michael@0 968 int y0_fraction = 256 - source_y_fraction;
michael@0 969 const uint8* src_ptr1 = src_ptr + src_stride;
michael@0 970
michael@0 971 __asm__ __volatile__ (
michael@0 972 ".set push \n"
michael@0 973 ".set noreorder \n"
michael@0 974
michael@0 975 "replv.ph $t0, %[y0_fraction] \n"
michael@0 976 "replv.ph $t1, %[source_y_fraction] \n"
michael@0 977
michael@0 978 ".p2align 2 \n"
michael@0 979 "1: \n"
michael@0 980 "lw $t2, 0(%[src_ptr]) \n"
michael@0 981 "lw $t3, 0(%[src_ptr1]) \n"
michael@0 982 "lw $t4, 4(%[src_ptr]) \n"
michael@0 983 "lw $t5, 4(%[src_ptr1]) \n"
michael@0 984 "muleu_s.ph.qbl $t6, $t2, $t0 \n"
michael@0 985 "muleu_s.ph.qbr $t7, $t2, $t0 \n"
michael@0 986 "muleu_s.ph.qbl $t8, $t3, $t1 \n"
michael@0 987 "muleu_s.ph.qbr $t9, $t3, $t1 \n"
michael@0 988 "muleu_s.ph.qbl $t2, $t4, $t0 \n"
michael@0 989 "muleu_s.ph.qbr $t3, $t4, $t0 \n"
michael@0 990 "muleu_s.ph.qbl $t4, $t5, $t1 \n"
michael@0 991 "muleu_s.ph.qbr $t5, $t5, $t1 \n"
michael@0 992 "addq.ph $t6, $t6, $t8 \n"
michael@0 993 "addq.ph $t7, $t7, $t9 \n"
michael@0 994 "addq.ph $t2, $t2, $t4 \n"
michael@0 995 "addq.ph $t3, $t3, $t5 \n"
michael@0 996 "shra.ph $t6, $t6, 8 \n"
michael@0 997 "shra.ph $t7, $t7, 8 \n"
michael@0 998 "shra.ph $t2, $t2, 8 \n"
michael@0 999 "shra.ph $t3, $t3, 8 \n"
michael@0 1000 "precr.qb.ph $t6, $t6, $t7 \n"
michael@0 1001 "precr.qb.ph $t2, $t2, $t3 \n"
michael@0 1002 "addiu %[src_ptr], %[src_ptr], 8 \n"
michael@0 1003 "addiu %[src_ptr1], %[src_ptr1], 8 \n"
michael@0 1004 "addiu %[dst_width], %[dst_width], -8 \n"
michael@0 1005 "sw $t6, 0(%[dst_ptr]) \n"
michael@0 1006 "sw $t2, 4(%[dst_ptr]) \n"
michael@0 1007 "bgtz %[dst_width], 1b \n"
michael@0 1008 " addiu %[dst_ptr], %[dst_ptr], 8 \n"
michael@0 1009
michael@0 1010 ".set pop \n"
michael@0 1011 : [dst_ptr] "+r" (dst_ptr),
michael@0 1012 [src_ptr1] "+r" (src_ptr1),
michael@0 1013 [src_ptr] "+r" (src_ptr),
michael@0 1014 [dst_width] "+r" (dst_width)
michael@0 1015 : [source_y_fraction] "r" (source_y_fraction),
michael@0 1016 [y0_fraction] "r" (y0_fraction),
michael@0 1017 [src_stride] "r" (src_stride)
michael@0 1018 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 1019 "t6", "t7", "t8", "t9"
michael@0 1020 );
michael@0 1021 }
michael@0 1022 #endif // __mips_dsp_rev >= 2
michael@0 1023
michael@0 1024 #endif // defined(__mips__)
michael@0 1025
michael@0 1026 #ifdef __cplusplus
michael@0 1027 } // extern "C"
michael@0 1028 } // namespace libyuv
michael@0 1029 #endif

mercurial