media/libyuv/source/rotate_mips.cc

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/row.h"
michael@0 12
michael@0 13 #include "libyuv/basic_types.h"
michael@0 14
michael@0 15 #ifdef __cplusplus
michael@0 16 namespace libyuv {
michael@0 17 extern "C" {
michael@0 18 #endif
michael@0 19
michael@0 20 #if !defined(LIBYUV_DISABLE_MIPS) && \
michael@0 21 defined(__mips_dsp) && (__mips_dsp_rev >= 2)
michael@0 22
michael@0 23 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 24 uint8* dst, int dst_stride,
michael@0 25 int width) {
michael@0 26 __asm__ __volatile__ (
michael@0 27 ".set push \n"
michael@0 28 ".set noreorder \n"
michael@0 29 "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
michael@0 30 "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
michael@0 31 "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
michael@0 32 "addu $t3, $t2, %[src_stride] \n"
michael@0 33 "addu $t5, $t4, %[src_stride] \n"
michael@0 34 "addu $t6, $t2, $t4 \n"
michael@0 35 "andi $t0, %[dst], 0x3 \n"
michael@0 36 "andi $t1, %[dst_stride], 0x3 \n"
michael@0 37 "or $t0, $t0, $t1 \n"
michael@0 38 "bnez $t0, 11f \n"
michael@0 39 " subu $t7, $t9, %[src_stride] \n"
michael@0 40 //dst + dst_stride word aligned
michael@0 41 "1: \n"
michael@0 42 "lbu $t0, 0(%[src]) \n"
michael@0 43 "lbux $t1, %[src_stride](%[src]) \n"
michael@0 44 "lbux $t8, $t2(%[src]) \n"
michael@0 45 "lbux $t9, $t3(%[src]) \n"
michael@0 46 "sll $t1, $t1, 16 \n"
michael@0 47 "sll $t9, $t9, 16 \n"
michael@0 48 "or $t0, $t0, $t1 \n"
michael@0 49 "or $t8, $t8, $t9 \n"
michael@0 50 "precr.qb.ph $s0, $t8, $t0 \n"
michael@0 51 "lbux $t0, $t4(%[src]) \n"
michael@0 52 "lbux $t1, $t5(%[src]) \n"
michael@0 53 "lbux $t8, $t6(%[src]) \n"
michael@0 54 "lbux $t9, $t7(%[src]) \n"
michael@0 55 "sll $t1, $t1, 16 \n"
michael@0 56 "sll $t9, $t9, 16 \n"
michael@0 57 "or $t0, $t0, $t1 \n"
michael@0 58 "or $t8, $t8, $t9 \n"
michael@0 59 "precr.qb.ph $s1, $t8, $t0 \n"
michael@0 60 "sw $s0, 0(%[dst]) \n"
michael@0 61 "addiu %[width], -1 \n"
michael@0 62 "addiu %[src], 1 \n"
michael@0 63 "sw $s1, 4(%[dst]) \n"
michael@0 64 "bnez %[width], 1b \n"
michael@0 65 " addu %[dst], %[dst], %[dst_stride] \n"
michael@0 66 "b 2f \n"
michael@0 67 //dst + dst_stride unaligned
michael@0 68 "11: \n"
michael@0 69 "lbu $t0, 0(%[src]) \n"
michael@0 70 "lbux $t1, %[src_stride](%[src]) \n"
michael@0 71 "lbux $t8, $t2(%[src]) \n"
michael@0 72 "lbux $t9, $t3(%[src]) \n"
michael@0 73 "sll $t1, $t1, 16 \n"
michael@0 74 "sll $t9, $t9, 16 \n"
michael@0 75 "or $t0, $t0, $t1 \n"
michael@0 76 "or $t8, $t8, $t9 \n"
michael@0 77 "precr.qb.ph $s0, $t8, $t0 \n"
michael@0 78 "lbux $t0, $t4(%[src]) \n"
michael@0 79 "lbux $t1, $t5(%[src]) \n"
michael@0 80 "lbux $t8, $t6(%[src]) \n"
michael@0 81 "lbux $t9, $t7(%[src]) \n"
michael@0 82 "sll $t1, $t1, 16 \n"
michael@0 83 "sll $t9, $t9, 16 \n"
michael@0 84 "or $t0, $t0, $t1 \n"
michael@0 85 "or $t8, $t8, $t9 \n"
michael@0 86 "precr.qb.ph $s1, $t8, $t0 \n"
michael@0 87 "swr $s0, 0(%[dst]) \n"
michael@0 88 "swl $s0, 3(%[dst]) \n"
michael@0 89 "addiu %[width], -1 \n"
michael@0 90 "addiu %[src], 1 \n"
michael@0 91 "swr $s1, 4(%[dst]) \n"
michael@0 92 "swl $s1, 7(%[dst]) \n"
michael@0 93 "bnez %[width], 11b \n"
michael@0 94 "addu %[dst], %[dst], %[dst_stride] \n"
michael@0 95 "2: \n"
michael@0 96 ".set pop \n"
michael@0 97 :[src] "+r" (src),
michael@0 98 [dst] "+r" (dst),
michael@0 99 [width] "+r" (width)
michael@0 100 :[src_stride] "r" (src_stride),
michael@0 101 [dst_stride] "r" (dst_stride)
michael@0 102 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 103 "t6", "t7", "t8", "t9",
michael@0 104 "s0", "s1"
michael@0 105 );
michael@0 106 }
michael@0 107
michael@0 108 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 109 uint8* dst, int dst_stride,
michael@0 110 int width) {
michael@0 111 __asm__ __volatile__ (
michael@0 112 ".set noat \n"
michael@0 113 ".set push \n"
michael@0 114 ".set noreorder \n"
michael@0 115 "beqz %[width], 2f \n"
michael@0 116 " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
michael@0 117 "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
michael@0 118 "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
michael@0 119 "addu $t3, $t2, %[src_stride] \n"
michael@0 120 "addu $t5, $t4, %[src_stride] \n"
michael@0 121 "addu $t6, $t2, $t4 \n"
michael@0 122
michael@0 123 "srl $AT, %[width], 0x2 \n"
michael@0 124 "andi $t0, %[dst], 0x3 \n"
michael@0 125 "andi $t1, %[dst_stride], 0x3 \n"
michael@0 126 "or $t0, $t0, $t1 \n"
michael@0 127 "bnez $t0, 11f \n"
michael@0 128 " subu $t7, $t9, %[src_stride] \n"
michael@0 129 //dst + dst_stride word aligned
michael@0 130 "1: \n"
michael@0 131 "lw $t0, 0(%[src]) \n"
michael@0 132 "lwx $t1, %[src_stride](%[src]) \n"
michael@0 133 "lwx $t8, $t2(%[src]) \n"
michael@0 134 "lwx $t9, $t3(%[src]) \n"
michael@0 135
michael@0 136 // t0 = | 30 | 20 | 10 | 00 |
michael@0 137 // t1 = | 31 | 21 | 11 | 01 |
michael@0 138 // t8 = | 32 | 22 | 12 | 02 |
michael@0 139 // t9 = | 33 | 23 | 13 | 03 |
michael@0 140
michael@0 141 "precr.qb.ph $s0, $t1, $t0 \n"
michael@0 142 "precr.qb.ph $s1, $t9, $t8 \n"
michael@0 143 "precrq.qb.ph $s2, $t1, $t0 \n"
michael@0 144 "precrq.qb.ph $s3, $t9, $t8 \n"
michael@0 145
michael@0 146 // s0 = | 21 | 01 | 20 | 00 |
michael@0 147 // s1 = | 23 | 03 | 22 | 02 |
michael@0 148 // s2 = | 31 | 11 | 30 | 10 |
michael@0 149 // s3 = | 33 | 13 | 32 | 12 |
michael@0 150
michael@0 151 "precr.qb.ph $s4, $s1, $s0 \n"
michael@0 152 "precrq.qb.ph $s5, $s1, $s0 \n"
michael@0 153 "precr.qb.ph $s6, $s3, $s2 \n"
michael@0 154 "precrq.qb.ph $s7, $s3, $s2 \n"
michael@0 155
michael@0 156 // s4 = | 03 | 02 | 01 | 00 |
michael@0 157 // s5 = | 23 | 22 | 21 | 20 |
michael@0 158 // s6 = | 13 | 12 | 11 | 10 |
michael@0 159 // s7 = | 33 | 32 | 31 | 30 |
michael@0 160
michael@0 161 "lwx $t0, $t4(%[src]) \n"
michael@0 162 "lwx $t1, $t5(%[src]) \n"
michael@0 163 "lwx $t8, $t6(%[src]) \n"
michael@0 164 "lwx $t9, $t7(%[src]) \n"
michael@0 165
michael@0 166 // t0 = | 34 | 24 | 14 | 04 |
michael@0 167 // t1 = | 35 | 25 | 15 | 05 |
michael@0 168 // t8 = | 36 | 26 | 16 | 06 |
michael@0 169 // t9 = | 37 | 27 | 17 | 07 |
michael@0 170
michael@0 171 "precr.qb.ph $s0, $t1, $t0 \n"
michael@0 172 "precr.qb.ph $s1, $t9, $t8 \n"
michael@0 173 "precrq.qb.ph $s2, $t1, $t0 \n"
michael@0 174 "precrq.qb.ph $s3, $t9, $t8 \n"
michael@0 175
michael@0 176 // s0 = | 25 | 05 | 24 | 04 |
michael@0 177 // s1 = | 27 | 07 | 26 | 06 |
michael@0 178 // s2 = | 35 | 15 | 34 | 14 |
michael@0 179 // s3 = | 37 | 17 | 36 | 16 |
michael@0 180
michael@0 181 "precr.qb.ph $t0, $s1, $s0 \n"
michael@0 182 "precrq.qb.ph $t1, $s1, $s0 \n"
michael@0 183 "precr.qb.ph $t8, $s3, $s2 \n"
michael@0 184 "precrq.qb.ph $t9, $s3, $s2 \n"
michael@0 185
michael@0 186 // t0 = | 07 | 06 | 05 | 04 |
michael@0 187 // t1 = | 27 | 26 | 25 | 24 |
michael@0 188 // t8 = | 17 | 16 | 15 | 14 |
michael@0 189 // t9 = | 37 | 36 | 35 | 34 |
michael@0 190
michael@0 191 "addu $s0, %[dst], %[dst_stride] \n"
michael@0 192 "addu $s1, $s0, %[dst_stride] \n"
michael@0 193 "addu $s2, $s1, %[dst_stride] \n"
michael@0 194
michael@0 195 "sw $s4, 0(%[dst]) \n"
michael@0 196 "sw $t0, 4(%[dst]) \n"
michael@0 197 "sw $s6, 0($s0) \n"
michael@0 198 "sw $t8, 4($s0) \n"
michael@0 199 "sw $s5, 0($s1) \n"
michael@0 200 "sw $t1, 4($s1) \n"
michael@0 201 "sw $s7, 0($s2) \n"
michael@0 202 "sw $t9, 4($s2) \n"
michael@0 203
michael@0 204 "addiu $AT, -1 \n"
michael@0 205 "addiu %[src], 4 \n"
michael@0 206
michael@0 207 "bnez $AT, 1b \n"
michael@0 208 " addu %[dst], $s2, %[dst_stride] \n"
michael@0 209 "b 2f \n"
michael@0 210 //dst + dst_stride unaligned
michael@0 211 "11: \n"
michael@0 212 "lw $t0, 0(%[src]) \n"
michael@0 213 "lwx $t1, %[src_stride](%[src]) \n"
michael@0 214 "lwx $t8, $t2(%[src]) \n"
michael@0 215 "lwx $t9, $t3(%[src]) \n"
michael@0 216
michael@0 217 // t0 = | 30 | 20 | 10 | 00 |
michael@0 218 // t1 = | 31 | 21 | 11 | 01 |
michael@0 219 // t8 = | 32 | 22 | 12 | 02 |
michael@0 220 // t9 = | 33 | 23 | 13 | 03 |
michael@0 221
michael@0 222 "precr.qb.ph $s0, $t1, $t0 \n"
michael@0 223 "precr.qb.ph $s1, $t9, $t8 \n"
michael@0 224 "precrq.qb.ph $s2, $t1, $t0 \n"
michael@0 225 "precrq.qb.ph $s3, $t9, $t8 \n"
michael@0 226
michael@0 227 // s0 = | 21 | 01 | 20 | 00 |
michael@0 228 // s1 = | 23 | 03 | 22 | 02 |
michael@0 229 // s2 = | 31 | 11 | 30 | 10 |
michael@0 230 // s3 = | 33 | 13 | 32 | 12 |
michael@0 231
michael@0 232 "precr.qb.ph $s4, $s1, $s0 \n"
michael@0 233 "precrq.qb.ph $s5, $s1, $s0 \n"
michael@0 234 "precr.qb.ph $s6, $s3, $s2 \n"
michael@0 235 "precrq.qb.ph $s7, $s3, $s2 \n"
michael@0 236
michael@0 237 // s4 = | 03 | 02 | 01 | 00 |
michael@0 238 // s5 = | 23 | 22 | 21 | 20 |
michael@0 239 // s6 = | 13 | 12 | 11 | 10 |
michael@0 240 // s7 = | 33 | 32 | 31 | 30 |
michael@0 241
michael@0 242 "lwx $t0, $t4(%[src]) \n"
michael@0 243 "lwx $t1, $t5(%[src]) \n"
michael@0 244 "lwx $t8, $t6(%[src]) \n"
michael@0 245 "lwx $t9, $t7(%[src]) \n"
michael@0 246
michael@0 247 // t0 = | 34 | 24 | 14 | 04 |
michael@0 248 // t1 = | 35 | 25 | 15 | 05 |
michael@0 249 // t8 = | 36 | 26 | 16 | 06 |
michael@0 250 // t9 = | 37 | 27 | 17 | 07 |
michael@0 251
michael@0 252 "precr.qb.ph $s0, $t1, $t0 \n"
michael@0 253 "precr.qb.ph $s1, $t9, $t8 \n"
michael@0 254 "precrq.qb.ph $s2, $t1, $t0 \n"
michael@0 255 "precrq.qb.ph $s3, $t9, $t8 \n"
michael@0 256
michael@0 257 // s0 = | 25 | 05 | 24 | 04 |
michael@0 258 // s1 = | 27 | 07 | 26 | 06 |
michael@0 259 // s2 = | 35 | 15 | 34 | 14 |
michael@0 260 // s3 = | 37 | 17 | 36 | 16 |
michael@0 261
michael@0 262 "precr.qb.ph $t0, $s1, $s0 \n"
michael@0 263 "precrq.qb.ph $t1, $s1, $s0 \n"
michael@0 264 "precr.qb.ph $t8, $s3, $s2 \n"
michael@0 265 "precrq.qb.ph $t9, $s3, $s2 \n"
michael@0 266
michael@0 267 // t0 = | 07 | 06 | 05 | 04 |
michael@0 268 // t1 = | 27 | 26 | 25 | 24 |
michael@0 269 // t8 = | 17 | 16 | 15 | 14 |
michael@0 270 // t9 = | 37 | 36 | 35 | 34 |
michael@0 271
michael@0 272 "addu $s0, %[dst], %[dst_stride] \n"
michael@0 273 "addu $s1, $s0, %[dst_stride] \n"
michael@0 274 "addu $s2, $s1, %[dst_stride] \n"
michael@0 275
michael@0 276 "swr $s4, 0(%[dst]) \n"
michael@0 277 "swl $s4, 3(%[dst]) \n"
michael@0 278 "swr $t0, 4(%[dst]) \n"
michael@0 279 "swl $t0, 7(%[dst]) \n"
michael@0 280 "swr $s6, 0($s0) \n"
michael@0 281 "swl $s6, 3($s0) \n"
michael@0 282 "swr $t8, 4($s0) \n"
michael@0 283 "swl $t8, 7($s0) \n"
michael@0 284 "swr $s5, 0($s1) \n"
michael@0 285 "swl $s5, 3($s1) \n"
michael@0 286 "swr $t1, 4($s1) \n"
michael@0 287 "swl $t1, 7($s1) \n"
michael@0 288 "swr $s7, 0($s2) \n"
michael@0 289 "swl $s7, 3($s2) \n"
michael@0 290 "swr $t9, 4($s2) \n"
michael@0 291 "swl $t9, 7($s2) \n"
michael@0 292
michael@0 293 "addiu $AT, -1 \n"
michael@0 294 "addiu %[src], 4 \n"
michael@0 295
michael@0 296 "bnez $AT, 11b \n"
michael@0 297 " addu %[dst], $s2, %[dst_stride] \n"
michael@0 298 "2: \n"
michael@0 299 ".set pop \n"
michael@0 300 ".set at \n"
michael@0 301 :[src] "+r" (src),
michael@0 302 [dst] "+r" (dst),
michael@0 303 [width] "+r" (width)
michael@0 304 :[src_stride] "r" (src_stride),
michael@0 305 [dst_stride] "r" (dst_stride)
michael@0 306 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 307 "t6", "t7", "t8", "t9",
michael@0 308 "s0", "s1", "s2", "s3", "s4",
michael@0 309 "s5", "s6", "s7"
michael@0 310 );
michael@0 311 }
michael@0 312
michael@0 313 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
michael@0 314 uint8* dst_a, int dst_stride_a,
michael@0 315 uint8* dst_b, int dst_stride_b,
michael@0 316 int width) {
michael@0 317 __asm__ __volatile__ (
michael@0 318 ".set push \n"
michael@0 319 ".set noreorder \n"
michael@0 320 "beqz %[width], 2f \n"
michael@0 321 " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
michael@0 322 "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
michael@0 323 "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
michael@0 324 "addu $t3, $t2, %[src_stride] \n"
michael@0 325 "addu $t5, $t4, %[src_stride] \n"
michael@0 326 "addu $t6, $t2, $t4 \n"
michael@0 327 "subu $t7, $t9, %[src_stride] \n"
michael@0 328 "srl $t1, %[width], 1 \n"
michael@0 329
michael@0 330 // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
michael@0 331 "andi $t0, %[dst_a], 0x3 \n"
michael@0 332 "andi $t8, %[dst_b], 0x3 \n"
michael@0 333 "or $t0, $t0, $t8 \n"
michael@0 334 "andi $t8, %[dst_stride_a], 0x3 \n"
michael@0 335 "andi $s5, %[dst_stride_b], 0x3 \n"
michael@0 336 "or $t8, $t8, $s5 \n"
michael@0 337 "or $t0, $t0, $t8 \n"
michael@0 338 "bnez $t0, 11f \n"
michael@0 339 " nop \n"
michael@0 340 // dst + dst_stride word aligned (both, a & b dst addresses)
michael@0 341 "1: \n"
michael@0 342 "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
michael@0 343 "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
michael@0 344 "addu $s5, %[dst_a], %[dst_stride_a] \n"
michael@0 345 "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
michael@0 346 "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
michael@0 347 "addu $s6, %[dst_b], %[dst_stride_b] \n"
michael@0 348
michael@0 349 "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
michael@0 350 "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
michael@0 351 "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
michael@0 352 "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
michael@0 353
michael@0 354 "sll $t0, $t0, 16 \n"
michael@0 355 "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
michael@0 356 "sll $t9, $t9, 16 \n"
michael@0 357 "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
michael@0 358
michael@0 359 "sw $s3, 0($s5) \n"
michael@0 360 "sw $s4, 0($s6) \n"
michael@0 361
michael@0 362 "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
michael@0 363 "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
michael@0 364
michael@0 365 "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
michael@0 366 "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
michael@0 367 "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
michael@0 368 "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
michael@0 369 "sw $s3, 0(%[dst_a]) \n"
michael@0 370 "sw $s4, 0(%[dst_b]) \n"
michael@0 371
michael@0 372 "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
michael@0 373 "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
michael@0 374 "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
michael@0 375 "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
michael@0 376
michael@0 377 "sll $t0, $t0, 16 \n"
michael@0 378 "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
michael@0 379 "sll $t9, $t9, 16 \n"
michael@0 380 "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
michael@0 381 "sw $s3, 4($s5) \n"
michael@0 382 "sw $s4, 4($s6) \n"
michael@0 383
michael@0 384 "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
michael@0 385 "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
michael@0 386
michael@0 387 "addiu %[src], 4 \n"
michael@0 388 "addiu $t1, -1 \n"
michael@0 389 "sll $t0, %[dst_stride_a], 1 \n"
michael@0 390 "sll $t8, %[dst_stride_b], 1 \n"
michael@0 391 "sw $s3, 4(%[dst_a]) \n"
michael@0 392 "sw $s4, 4(%[dst_b]) \n"
michael@0 393 "addu %[dst_a], %[dst_a], $t0 \n"
michael@0 394 "bnez $t1, 1b \n"
michael@0 395 " addu %[dst_b], %[dst_b], $t8 \n"
michael@0 396 "b 2f \n"
michael@0 397 " nop \n"
michael@0 398
michael@0 399 // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
michael@0 400 "11: \n"
michael@0 401 "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
michael@0 402 "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
michael@0 403 "addu $s5, %[dst_a], %[dst_stride_a] \n"
michael@0 404 "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
michael@0 405 "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
michael@0 406 "addu $s6, %[dst_b], %[dst_stride_b] \n"
michael@0 407
michael@0 408 "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
michael@0 409 "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
michael@0 410 "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
michael@0 411 "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
michael@0 412
michael@0 413 "sll $t0, $t0, 16 \n"
michael@0 414 "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
michael@0 415 "sll $t9, $t9, 16 \n"
michael@0 416 "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
michael@0 417
michael@0 418 "swr $s3, 0($s5) \n"
michael@0 419 "swl $s3, 3($s5) \n"
michael@0 420 "swr $s4, 0($s6) \n"
michael@0 421 "swl $s4, 3($s6) \n"
michael@0 422
michael@0 423 "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
michael@0 424 "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
michael@0 425
michael@0 426 "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
michael@0 427 "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
michael@0 428 "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
michael@0 429 "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
michael@0 430 "swr $s3, 0(%[dst_a]) \n"
michael@0 431 "swl $s3, 3(%[dst_a]) \n"
michael@0 432 "swr $s4, 0(%[dst_b]) \n"
michael@0 433 "swl $s4, 3(%[dst_b]) \n"
michael@0 434
michael@0 435 "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
michael@0 436 "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
michael@0 437 "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
michael@0 438 "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
michael@0 439
michael@0 440 "sll $t0, $t0, 16 \n"
michael@0 441 "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
michael@0 442 "sll $t9, $t9, 16 \n"
michael@0 443 "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
michael@0 444
michael@0 445 "swr $s3, 4($s5) \n"
michael@0 446 "swl $s3, 7($s5) \n"
michael@0 447 "swr $s4, 4($s6) \n"
michael@0 448 "swl $s4, 7($s6) \n"
michael@0 449
michael@0 450 "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
michael@0 451 "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
michael@0 452
michael@0 453 "addiu %[src], 4 \n"
michael@0 454 "addiu $t1, -1 \n"
michael@0 455 "sll $t0, %[dst_stride_a], 1 \n"
michael@0 456 "sll $t8, %[dst_stride_b], 1 \n"
michael@0 457 "swr $s3, 4(%[dst_a]) \n"
michael@0 458 "swl $s3, 7(%[dst_a]) \n"
michael@0 459 "swr $s4, 4(%[dst_b]) \n"
michael@0 460 "swl $s4, 7(%[dst_b]) \n"
michael@0 461 "addu %[dst_a], %[dst_a], $t0 \n"
michael@0 462 "bnez $t1, 11b \n"
michael@0 463 " addu %[dst_b], %[dst_b], $t8 \n"
michael@0 464
michael@0 465 "2: \n"
michael@0 466 ".set pop \n"
michael@0 467 : [src] "+r" (src),
michael@0 468 [dst_a] "+r" (dst_a),
michael@0 469 [dst_b] "+r" (dst_b),
michael@0 470 [width] "+r" (width),
michael@0 471 [src_stride] "+r" (src_stride)
michael@0 472 : [dst_stride_a] "r" (dst_stride_a),
michael@0 473 [dst_stride_b] "r" (dst_stride_b)
michael@0 474 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 475 "t6", "t7", "t8", "t9",
michael@0 476 "s0", "s1", "s2", "s3",
michael@0 477 "s4", "s5", "s6"
michael@0 478 );
michael@0 479 }
michael@0 480
michael@0 481 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
michael@0 482
michael@0 483 #ifdef __cplusplus
michael@0 484 } // extern "C"
michael@0 485 } // namespace libyuv
michael@0 486 #endif

mercurial