media/libyuv/source/scale_mips.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/basic_types.h"
michael@0 12 #include "libyuv/row.h"
michael@0 13
michael@0 14 #ifdef __cplusplus
michael@0 15 namespace libyuv {
michael@0 16 extern "C" {
michael@0 17 #endif
michael@0 18
michael@0 19 // This module is for GCC MIPS DSPR2
michael@0 20 #if !defined(LIBYUV_DISABLE_MIPS) && \
michael@0 21 defined(__mips_dsp) && (__mips_dsp_rev >= 2)
michael@0 22
michael@0 23 void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 24 uint8* dst, int dst_width) {
michael@0 25 __asm__ __volatile__(
michael@0 26 ".set push \n"
michael@0 27 ".set noreorder \n"
michael@0 28
michael@0 29 "srl $t9, %[dst_width], 4 \n" // iterations -> by 16
michael@0 30 "beqz $t9, 2f \n"
michael@0 31 " nop \n"
michael@0 32
michael@0 33 ".p2align 2 \n"
michael@0 34 "1: \n"
michael@0 35 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 36 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
michael@0 37 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
michael@0 38 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
michael@0 39 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
michael@0 40 "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
michael@0 41 "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
michael@0 42 "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
michael@0 43 // TODO(fbarchard): Use odd pixels instead of even.
michael@0 44 "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
michael@0 45 "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
michael@0 46 "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
michael@0 47 "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
michael@0 48 "addiu %[src_ptr], %[src_ptr], 32 \n"
michael@0 49 "addiu $t9, $t9, -1 \n"
michael@0 50 "sw $t8, 0(%[dst]) \n"
michael@0 51 "sw $t0, 4(%[dst]) \n"
michael@0 52 "sw $t1, 8(%[dst]) \n"
michael@0 53 "sw $t2, 12(%[dst]) \n"
michael@0 54 "bgtz $t9, 1b \n"
michael@0 55 " addiu %[dst], %[dst], 16 \n"
michael@0 56
michael@0 57 "2: \n"
michael@0 58 "andi $t9, %[dst_width], 0xf \n" // residue
michael@0 59 "beqz $t9, 3f \n"
michael@0 60 " nop \n"
michael@0 61
michael@0 62 "21: \n"
michael@0 63 "lbu $t0, 0(%[src_ptr]) \n"
michael@0 64 "addiu %[src_ptr], %[src_ptr], 2 \n"
michael@0 65 "addiu $t9, $t9, -1 \n"
michael@0 66 "sb $t0, 0(%[dst]) \n"
michael@0 67 "bgtz $t9, 21b \n"
michael@0 68 " addiu %[dst], %[dst], 1 \n"
michael@0 69
michael@0 70 "3: \n"
michael@0 71 ".set pop \n"
michael@0 72 : [src_ptr] "+r" (src_ptr),
michael@0 73 [dst] "+r" (dst)
michael@0 74 : [dst_width] "r" (dst_width)
michael@0 75 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 76 "t6", "t7", "t8", "t9"
michael@0 77 );
michael@0 78 }
michael@0 79
michael@0 80 void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 81 uint8* dst, int dst_width) {
michael@0 82 const uint8* t = src_ptr + src_stride;
michael@0 83
michael@0 84 __asm__ __volatile__ (
michael@0 85 ".set push \n"
michael@0 86 ".set noreorder \n"
michael@0 87
michael@0 88 "srl $t9, %[dst_width], 3 \n" // iterations -> step 8
michael@0 89 "bltz $t9, 2f \n"
michael@0 90 " nop \n"
michael@0 91
michael@0 92 ".p2align 2 \n"
michael@0 93 "1: \n"
michael@0 94 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 95 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
michael@0 96 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
michael@0 97 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
michael@0 98 "lw $t4, 0(%[t]) \n" // |19|18|17|16|
michael@0 99 "lw $t5, 4(%[t]) \n" // |23|22|21|20|
michael@0 100 "lw $t6, 8(%[t]) \n" // |27|26|25|24|
michael@0 101 "lw $t7, 12(%[t]) \n" // |31|30|29|28|
michael@0 102 "addiu $t9, $t9, -1 \n"
michael@0 103 "srl $t8, $t0, 16 \n" // |X|X|3|2|
michael@0 104 "ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
michael@0 105 "ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
michael@0 106 "raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
michael@0 107 "raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
michael@0 108 "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
michael@0 109 "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
michael@0 110 "srl $t8, $t1, 16 \n" // |X|X|7|6|
michael@0 111 "ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
michael@0 112 "ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
michael@0 113 "raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
michael@0 114 "raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
michael@0 115 "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
michael@0 116 "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
michael@0 117 "srl $t8, $t2, 16 \n" // |X|X|11|10|
michael@0 118 "ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
michael@0 119 "ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
michael@0 120 "raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
michael@0 121 "raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
michael@0 122 "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
michael@0 123 "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
michael@0 124 "srl $t8, $t3, 16 \n" // |X|X|15|14|
michael@0 125 "ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
michael@0 126 "ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
michael@0 127 "raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
michael@0 128 "raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
michael@0 129 "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
michael@0 130 "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
michael@0 131 "addiu %[src_ptr], %[src_ptr], 16 \n"
michael@0 132 "addiu %[t], %[t], 16 \n"
michael@0 133 "sb $t0, 0(%[dst]) \n"
michael@0 134 "sb $t4, 1(%[dst]) \n"
michael@0 135 "sb $t1, 2(%[dst]) \n"
michael@0 136 "sb $t5, 3(%[dst]) \n"
michael@0 137 "sb $t2, 4(%[dst]) \n"
michael@0 138 "sb $t6, 5(%[dst]) \n"
michael@0 139 "sb $t3, 6(%[dst]) \n"
michael@0 140 "sb $t7, 7(%[dst]) \n"
michael@0 141 "bgtz $t9, 1b \n"
michael@0 142 " addiu %[dst], %[dst], 8 \n"
michael@0 143
michael@0 144 "2: \n"
michael@0 145 "andi $t9, %[dst_width], 0x7 \n" // x = residue
michael@0 146 "beqz $t9, 3f \n"
michael@0 147 " nop \n"
michael@0 148
michael@0 149 "21: \n"
michael@0 150 "lwr $t1, 0(%[src_ptr]) \n"
michael@0 151 "lwl $t1, 3(%[src_ptr]) \n"
michael@0 152 "lwr $t2, 0(%[t]) \n"
michael@0 153 "lwl $t2, 3(%[t]) \n"
michael@0 154 "srl $t8, $t1, 16 \n"
michael@0 155 "ins $t1, $t2, 16, 16 \n"
michael@0 156 "ins $t2, $t8, 0, 16 \n"
michael@0 157 "raddu.w.qb $t1, $t1 \n"
michael@0 158 "raddu.w.qb $t2, $t2 \n"
michael@0 159 "shra_r.w $t1, $t1, 2 \n"
michael@0 160 "shra_r.w $t2, $t2, 2 \n"
michael@0 161 "sb $t1, 0(%[dst]) \n"
michael@0 162 "sb $t2, 1(%[dst]) \n"
michael@0 163 "addiu %[src_ptr], %[src_ptr], 4 \n"
michael@0 164 "addiu $t9, $t9, -2 \n"
michael@0 165 "addiu %[t], %[t], 4 \n"
michael@0 166 "bgtz $t9, 21b \n"
michael@0 167 " addiu %[dst], %[dst], 2 \n"
michael@0 168
michael@0 169 "3: \n"
michael@0 170 ".set pop \n"
michael@0 171
michael@0 172 : [src_ptr] "+r" (src_ptr),
michael@0 173 [dst] "+r" (dst), [t] "+r" (t)
michael@0 174 : [dst_width] "r" (dst_width)
michael@0 175 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 176 "t6", "t7", "t8", "t9"
michael@0 177 );
michael@0 178 }
michael@0 179
michael@0 180 void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 181 uint8* dst, int dst_width) {
michael@0 182 __asm__ __volatile__ (
michael@0 183 ".set push \n"
michael@0 184 ".set noreorder \n"
michael@0 185
michael@0 186 "srl $t9, %[dst_width], 3 \n"
michael@0 187 "beqz $t9, 2f \n"
michael@0 188 " nop \n"
michael@0 189
michael@0 190 ".p2align 2 \n"
michael@0 191 "1: \n"
michael@0 192 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 193 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
michael@0 194 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
michael@0 195 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
michael@0 196 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
michael@0 197 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
michael@0 198 "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
michael@0 199 "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
michael@0 200 "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
michael@0 201 "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
michael@0 202 "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
michael@0 203 "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
michael@0 204 "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
michael@0 205 "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
michael@0 206 "addiu %[src_ptr], %[src_ptr], 32 \n"
michael@0 207 "addiu $t9, $t9, -1 \n"
michael@0 208 "sw $t1, 0(%[dst]) \n"
michael@0 209 "sw $t5, 4(%[dst]) \n"
michael@0 210 "bgtz $t9, 1b \n"
michael@0 211 " addiu %[dst], %[dst], 8 \n"
michael@0 212
michael@0 213 "2: \n"
michael@0 214 "andi $t9, %[dst_width], 7 \n" // residue
michael@0 215 "beqz $t9, 3f \n"
michael@0 216 " nop \n"
michael@0 217
michael@0 218 "21: \n"
michael@0 219 "lbu $t1, 0(%[src_ptr]) \n"
michael@0 220 "addiu %[src_ptr], %[src_ptr], 4 \n"
michael@0 221 "addiu $t9, $t9, -1 \n"
michael@0 222 "sb $t1, 0(%[dst]) \n"
michael@0 223 "bgtz $t9, 21b \n"
michael@0 224 " addiu %[dst], %[dst], 1 \n"
michael@0 225
michael@0 226 "3: \n"
michael@0 227 ".set pop \n"
michael@0 228 : [src_ptr] "+r" (src_ptr),
michael@0 229 [dst] "+r" (dst)
michael@0 230 : [dst_width] "r" (dst_width)
michael@0 231 : "t1", "t2", "t3", "t4", "t5",
michael@0 232 "t6", "t7", "t8", "t9"
michael@0 233 );
michael@0 234 }
michael@0 235
michael@0 236 void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 237 uint8* dst, int dst_width) {
michael@0 238 intptr_t stride = src_stride;
michael@0 239 const uint8* s1 = src_ptr + stride;
michael@0 240 const uint8* s2 = s1 + stride;
michael@0 241 const uint8* s3 = s2 + stride;
michael@0 242
michael@0 243 __asm__ __volatile__ (
michael@0 244 ".set push \n"
michael@0 245 ".set noreorder \n"
michael@0 246
michael@0 247 "srl $t9, %[dst_width], 1 \n"
michael@0 248 "andi $t8, %[dst_width], 1 \n"
michael@0 249
michael@0 250 ".p2align 2 \n"
michael@0 251 "1: \n"
michael@0 252 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 253 "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
michael@0 254 "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
michael@0 255 "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
michael@0 256 "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
michael@0 257 "lw $t5, 4(%[s1]) \n" // |23|22|21|20|
michael@0 258 "lw $t6, 4(%[s2]) \n" // |27|26|25|24|
michael@0 259 "lw $t7, 4(%[s3]) \n" // |31|30|29|28|
michael@0 260 "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
michael@0 261 "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
michael@0 262 "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
michael@0 263 "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
michael@0 264 "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
michael@0 265 "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
michael@0 266 "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
michael@0 267 "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
michael@0 268 "add $t0, $t0, $t1 \n"
michael@0 269 "add $t1, $t2, $t3 \n"
michael@0 270 "add $t0, $t0, $t1 \n"
michael@0 271 "add $t4, $t4, $t5 \n"
michael@0 272 "add $t6, $t6, $t7 \n"
michael@0 273 "add $t4, $t4, $t6 \n"
michael@0 274 "shra_r.w $t0, $t0, 4 \n"
michael@0 275 "shra_r.w $t4, $t4, 4 \n"
michael@0 276 "sb $t0, 0(%[dst]) \n"
michael@0 277 "sb $t4, 1(%[dst]) \n"
michael@0 278 "addiu %[src_ptr], %[src_ptr], 8 \n"
michael@0 279 "addiu %[s1], %[s1], 8 \n"
michael@0 280 "addiu %[s2], %[s2], 8 \n"
michael@0 281 "addiu %[s3], %[s3], 8 \n"
michael@0 282 "addiu $t9, $t9, -1 \n"
michael@0 283 "bgtz $t9, 1b \n"
michael@0 284 " addiu %[dst], %[dst], 2 \n"
michael@0 285 "beqz $t8, 2f \n"
michael@0 286 " nop \n"
michael@0 287
michael@0 288 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 289 "lw $t1, 0(%[s1]) \n" // |7|6|5|4|
michael@0 290 "lw $t2, 0(%[s2]) \n" // |11|10|9|8|
michael@0 291 "lw $t3, 0(%[s3]) \n" // |15|14|13|12|
michael@0 292 "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
michael@0 293 "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
michael@0 294 "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
michael@0 295 "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
michael@0 296 "add $t0, $t0, $t1 \n"
michael@0 297 "add $t1, $t2, $t3 \n"
michael@0 298 "add $t0, $t0, $t1 \n"
michael@0 299 "shra_r.w $t0, $t0, 4 \n"
michael@0 300 "sb $t0, 0(%[dst]) \n"
michael@0 301
michael@0 302 "2: \n"
michael@0 303 ".set pop \n"
michael@0 304
michael@0 305 : [src_ptr] "+r" (src_ptr),
michael@0 306 [dst] "+r" (dst),
michael@0 307 [s1] "+r" (s1),
michael@0 308 [s2] "+r" (s2),
michael@0 309 [s3] "+r" (s3)
michael@0 310 : [dst_width] "r" (dst_width)
michael@0 311 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 312 "t6","t7", "t8", "t9"
michael@0 313 );
michael@0 314 }
michael@0 315
michael@0 316 void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 317 uint8* dst, int dst_width) {
michael@0 318 __asm__ __volatile__ (
michael@0 319 ".set push \n"
michael@0 320 ".set noreorder \n"
michael@0 321 ".p2align 2 \n"
michael@0 322 "1: \n"
michael@0 323 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 324 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
michael@0 325 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
michael@0 326 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
michael@0 327 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
michael@0 328 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
michael@0 329 "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
michael@0 330 "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
michael@0 331 "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
michael@0 332 "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
michael@0 333 "addiu %[dst_width], %[dst_width], -24 \n"
michael@0 334 "ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
michael@0 335 "ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
michael@0 336 "ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
michael@0 337 "ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
michael@0 338 "addiu %[src_ptr], %[src_ptr], 32 \n"
michael@0 339 "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
michael@0 340 "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
michael@0 341 "prepend $t1, $t2, 8 \n" // |4|3|1|0|
michael@0 342 "prepend $t3, $t4, 24 \n" // |15|13|12|11|
michael@0 343 "prepend $t5, $t6, 8 \n" // |20|19|17|16|
michael@0 344 "prepend $t7, $t8, 24 \n" // |31|29|28|27|
michael@0 345 "sw $t1, 0(%[dst]) \n"
michael@0 346 "sw $t0, 4(%[dst]) \n"
michael@0 347 "sw $t3, 8(%[dst]) \n"
michael@0 348 "sw $t5, 12(%[dst]) \n"
michael@0 349 "sw $t9, 16(%[dst]) \n"
michael@0 350 "sw $t7, 20(%[dst]) \n"
michael@0 351 "bnez %[dst_width], 1b \n"
michael@0 352 " addiu %[dst], %[dst], 24 \n"
michael@0 353 ".set pop \n"
michael@0 354 : [src_ptr] "+r" (src_ptr),
michael@0 355 [dst] "+r" (dst),
michael@0 356 [dst_width] "+r" (dst_width)
michael@0 357 :
michael@0 358 : "t0", "t1", "t2", "t3", "t4", "t5",
michael@0 359 "t6","t7", "t8", "t9"
michael@0 360 );
michael@0 361 }
michael@0 362
michael@0 363 void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 364 uint8* d, int dst_width) {
michael@0 365 __asm__ __volatile__ (
michael@0 366 ".set push \n"
michael@0 367 ".set noreorder \n"
michael@0 368 "repl.ph $t3, 3 \n" // 0x00030003
michael@0 369
michael@0 370 ".p2align 2 \n"
michael@0 371 "1: \n"
michael@0 372 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
michael@0 373 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
michael@0 374 "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
michael@0 375 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
michael@0 376 "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
michael@0 377 "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
michael@0 378 "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
michael@0 379 "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
michael@0 380 "raddu.w.qb $t0, $t0 \n"
michael@0 381 "raddu.w.qb $t1, $t1 \n"
michael@0 382 "shra_r.w $t0, $t0, 1 \n"
michael@0 383 "shra_r.w $t1, $t1, 1 \n"
michael@0 384 "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
michael@0 385 "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
michael@0 386 "rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
michael@0 387 "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
michael@0 388 "addu.ph $t2, $t2, $t4 \n"
michael@0 389 "addu.ph $t6, $t6, $t5 \n"
michael@0 390 "sll $t5, $t0, 1 \n"
michael@0 391 "add $t0, $t5, $t0 \n"
michael@0 392 "shra_r.ph $t2, $t2, 2 \n"
michael@0 393 "shra_r.ph $t6, $t6, 2 \n"
michael@0 394 "shll.ph $t4, $t2, 1 \n"
michael@0 395 "addq.ph $t4, $t4, $t2 \n"
michael@0 396 "addu $t0, $t0, $t1 \n"
michael@0 397 "addiu %[src_ptr], %[src_ptr], 4 \n"
michael@0 398 "shra_r.w $t0, $t0, 2 \n"
michael@0 399 "addu.ph $t6, $t6, $t4 \n"
michael@0 400 "shra_r.ph $t6, $t6, 2 \n"
michael@0 401 "srl $t1, $t6, 16 \n"
michael@0 402 "addiu %[dst_width], %[dst_width], -3 \n"
michael@0 403 "sb $t1, 0(%[d]) \n"
michael@0 404 "sb $t0, 1(%[d]) \n"
michael@0 405 "sb $t6, 2(%[d]) \n"
michael@0 406 "bgtz %[dst_width], 1b \n"
michael@0 407 " addiu %[d], %[d], 3 \n"
michael@0 408 "3: \n"
michael@0 409 ".set pop \n"
michael@0 410 : [src_ptr] "+r" (src_ptr),
michael@0 411 [src_stride] "+r" (src_stride),
michael@0 412 [d] "+r" (d),
michael@0 413 [dst_width] "+r" (dst_width)
michael@0 414 :
michael@0 415 : "t0", "t1", "t2", "t3",
michael@0 416 "t4", "t5", "t6"
michael@0 417 );
michael@0 418 }
michael@0 419
michael@0 420 void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 421 uint8* d, int dst_width) {
michael@0 422 __asm__ __volatile__ (
michael@0 423 ".set push \n"
michael@0 424 ".set noreorder \n"
michael@0 425 "repl.ph $t2, 3 \n" // 0x00030003
michael@0 426
michael@0 427 ".p2align 2 \n"
michael@0 428 "1: \n"
michael@0 429 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
michael@0 430 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
michael@0 431 "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
michael@0 432 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
michael@0 433 "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
michael@0 434 "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
michael@0 435 "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
michael@0 436 "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
michael@0 437 "raddu.w.qb $t0, $t0 \n"
michael@0 438 "raddu.w.qb $t1, $t1 \n"
michael@0 439 "shra_r.w $t0, $t0, 1 \n"
michael@0 440 "shra_r.w $t1, $t1, 1 \n"
michael@0 441 "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
michael@0 442 "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
michael@0 443 "rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
michael@0 444 "rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
michael@0 445 "addu.ph $t4, $t4, $t3 \n"
michael@0 446 "addu.ph $t6, $t6, $t5 \n"
michael@0 447 "shra_r.ph $t6, $t6, 2 \n"
michael@0 448 "shra_r.ph $t4, $t4, 2 \n"
michael@0 449 "addu.ph $t6, $t6, $t4 \n"
michael@0 450 "addiu %[src_ptr], %[src_ptr], 4 \n"
michael@0 451 "shra_r.ph $t6, $t6, 1 \n"
michael@0 452 "addu $t0, $t0, $t1 \n"
michael@0 453 "addiu %[dst_width], %[dst_width], -3 \n"
michael@0 454 "shra_r.w $t0, $t0, 1 \n"
michael@0 455 "srl $t1, $t6, 16 \n"
michael@0 456 "sb $t1, 0(%[d]) \n"
michael@0 457 "sb $t0, 1(%[d]) \n"
michael@0 458 "sb $t6, 2(%[d]) \n"
michael@0 459 "bgtz %[dst_width], 1b \n"
michael@0 460 " addiu %[d], %[d], 3 \n"
michael@0 461 "3: \n"
michael@0 462 ".set pop \n"
michael@0 463 : [src_ptr] "+r" (src_ptr),
michael@0 464 [src_stride] "+r" (src_stride),
michael@0 465 [d] "+r" (d),
michael@0 466 [dst_width] "+r" (dst_width)
michael@0 467 :
michael@0 468 : "t0", "t1", "t2", "t3",
michael@0 469 "t4", "t5", "t6"
michael@0 470 );
michael@0 471 }
michael@0 472
michael@0 473 void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 474 uint8* dst, int dst_width) {
michael@0 475 __asm__ __volatile__ (
michael@0 476 ".set push \n"
michael@0 477 ".set noreorder \n"
michael@0 478
michael@0 479 ".p2align 2 \n"
michael@0 480 "1: \n"
michael@0 481 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
michael@0 482 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
michael@0 483 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
michael@0 484 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
michael@0 485 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
michael@0 486 "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
michael@0 487 "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
michael@0 488 "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
michael@0 489 "wsbh $t0, $t0 \n" // |2|3|0|1|
michael@0 490 "wsbh $t6, $t6 \n" // |26|27|24|25|
michael@0 491 "srl $t0, $t0, 8 \n" // |X|2|3|0|
michael@0 492 "srl $t3, $t3, 16 \n" // |X|X|15|14|
michael@0 493 "srl $t5, $t5, 16 \n" // |X|X|23|22|
michael@0 494 "srl $t7, $t7, 16 \n" // |X|X|31|30|
michael@0 495 "ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
michael@0 496 "ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
michael@0 497 "ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
michael@0 498 "ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
michael@0 499 "prepend $t2, $t3, 24 \n" // |X|15|14|11|
michael@0 500 "ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
michael@0 501 "ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
michael@0 502 "addiu %[src_ptr], %[src_ptr], 32 \n"
michael@0 503 "addiu %[dst_width], %[dst_width], -12 \n"
michael@0 504 "addiu $t8,%[dst_width], -12 \n"
michael@0 505 "sw $t1, 0(%[dst]) \n"
michael@0 506 "sw $t4, 4(%[dst]) \n"
michael@0 507 "sw $t6, 8(%[dst]) \n"
michael@0 508 "bgez $t8, 1b \n"
michael@0 509 " addiu %[dst], %[dst], 12 \n"
michael@0 510 ".set pop \n"
michael@0 511 : [src_ptr] "+r" (src_ptr),
michael@0 512 [dst] "+r" (dst),
michael@0 513 [dst_width] "+r" (dst_width)
michael@0 514 :
michael@0 515 : "t0", "t1", "t2", "t3", "t4",
michael@0 516 "t5", "t6", "t7", "t8"
michael@0 517 );
michael@0 518 }
michael@0 519
michael@0 520 void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
michael@0 521 uint8* dst_ptr, int dst_width) {
michael@0 522 intptr_t stride = src_stride;
michael@0 523 const uint8* t = src_ptr + stride;
michael@0 524 const int c = 0x2AAA;
michael@0 525
michael@0 526 __asm__ __volatile__ (
michael@0 527 ".set push \n"
michael@0 528 ".set noreorder \n"
michael@0 529
michael@0 530 ".p2align 2 \n"
michael@0 531 "1: \n"
michael@0 532 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
michael@0 533 "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
michael@0 534 "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
michael@0 535 "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
michael@0 536 "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
michael@0 537 "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
michael@0 538 "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
michael@0 539 "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
michael@0 540 "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
michael@0 541 "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
michael@0 542 "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
michael@0 543 "srl $t4, $t4, 2 \n" // t4 / 4
michael@0 544 "srl $t6, $t6, 16 \n" // |0|0|S3|T3|
michael@0 545 "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
michael@0 546 "addu $t6, $t5, $t6 \n"
michael@0 547 "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
michael@0 548 "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
michael@0 549 "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
michael@0 550 "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
michael@0 551 "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
michael@0 552 "addu $t0, $t0, $t2 \n"
michael@0 553 "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
michael@0 554 "addiu %[src_ptr], %[src_ptr], 8 \n"
michael@0 555 "addiu %[t], %[t], 8 \n"
michael@0 556 "addiu %[dst_width], %[dst_width], -3 \n"
michael@0 557 "addiu %[dst_ptr], %[dst_ptr], 3 \n"
michael@0 558 "srl $t6, $t6, 16 \n"
michael@0 559 "srl $t0, $t0, 16 \n"
michael@0 560 "sb $t4, -1(%[dst_ptr]) \n"
michael@0 561 "sb $t6, -2(%[dst_ptr]) \n"
michael@0 562 "bgtz %[dst_width], 1b \n"
michael@0 563 " sb $t0, -3(%[dst_ptr]) \n"
michael@0 564 ".set pop \n"
michael@0 565 : [src_ptr] "+r" (src_ptr),
michael@0 566 [dst_ptr] "+r" (dst_ptr),
michael@0 567 [t] "+r" (t),
michael@0 568 [dst_width] "+r" (dst_width)
michael@0 569 : [c] "r" (c)
michael@0 570 : "t0", "t1", "t2", "t3", "t4", "t5", "t6"
michael@0 571 );
michael@0 572 }
michael@0 573
michael@0 574 void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
michael@0 575 ptrdiff_t src_stride,
michael@0 576 uint8* dst_ptr, int dst_width) {
michael@0 577 intptr_t stride = src_stride;
michael@0 578 const uint8* s1 = src_ptr + stride;
michael@0 579 stride += stride;
michael@0 580 const uint8* s2 = src_ptr + stride;
michael@0 581 const int c1 = 0x1C71;
michael@0 582 const int c2 = 0x2AAA;
michael@0 583
michael@0 584 __asm__ __volatile__ (
michael@0 585 ".set push \n"
michael@0 586 ".set noreorder \n"
michael@0 587
michael@0 588 ".p2align 2 \n"
michael@0 589 "1: \n"
michael@0 590 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
michael@0 591 "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
michael@0 592 "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
michael@0 593 "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
michael@0 594 "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
michael@0 595 "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
michael@0 596 "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
michael@0 597 "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
michael@0 598 "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
michael@0 599 "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
michael@0 600 "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
michael@0 601 "sll $t8, $t5, 16 \n" // |R5|R4|0|0|
michael@0 602 "raddu.w.qb $t8, $t8 \n" // R5+R4
michael@0 603 "addu $t7, $t7, $t8 \n"
michael@0 604 "srl $t8, $t5, 16 \n" // |0|0|R7|R6|
michael@0 605 "raddu.w.qb $t8, $t8 \n" // R7 + R6
michael@0 606 "addu $t6, $t6, $t8 \n"
michael@0 607 "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
michael@0 608 "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
michael@0 609 "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
michael@0 610 "srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
michael@0 611 "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
michael@0 612 "addu $t7, $t7, $t8 \n"
michael@0 613 "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
michael@0 614 "sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
michael@0 615 "sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
michael@0 616 "sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
michael@0 617 "raddu.w.qb $t0, $t0 \n"
michael@0 618 "raddu.w.qb $t2, $t2 \n"
michael@0 619 "raddu.w.qb $t4, $t4 \n"
michael@0 620 "addu $t0, $t0, $t2 \n"
michael@0 621 "addu $t0, $t0, $t4 \n"
michael@0 622 "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
michael@0 623 "addiu %[src_ptr], %[src_ptr], 8 \n"
michael@0 624 "addiu %[s1], %[s1], 8 \n"
michael@0 625 "addiu %[s2], %[s2], 8 \n"
michael@0 626 "addiu %[dst_width], %[dst_width], -3 \n"
michael@0 627 "addiu %[dst_ptr], %[dst_ptr], 3 \n"
michael@0 628 "srl $t6, $t6, 16 \n"
michael@0 629 "srl $t7, $t7, 16 \n"
michael@0 630 "srl $t0, $t0, 16 \n"
michael@0 631 "sb $t6, -1(%[dst_ptr]) \n"
michael@0 632 "sb $t7, -2(%[dst_ptr]) \n"
michael@0 633 "bgtz %[dst_width], 1b \n"
michael@0 634 " sb $t0, -3(%[dst_ptr]) \n"
michael@0 635 ".set pop \n"
michael@0 636 : [src_ptr] "+r" (src_ptr),
michael@0 637 [dst_ptr] "+r" (dst_ptr),
michael@0 638 [s1] "+r" (s1),
michael@0 639 [s2] "+r" (s2),
michael@0 640 [dst_width] "+r" (dst_width)
michael@0 641 : [c1] "r" (c1), [c2] "r" (c2)
michael@0 642 : "t0", "t1", "t2", "t3", "t4",
michael@0 643 "t5", "t6", "t7", "t8"
michael@0 644 );
michael@0 645 }
michael@0 646
michael@0 647 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
michael@0 648
michael@0 649 #ifdef __cplusplus
michael@0 650 } // extern "C"
michael@0 651 } // namespace libyuv
michael@0 652 #endif
michael@0 653

mercurial