media/libyuv/source/rotate_mips.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "libyuv/row.h"
    13 #include "libyuv/basic_types.h"
    15 #ifdef __cplusplus
    16 namespace libyuv {
    17 extern "C" {
    18 #endif
    20 #if !defined(LIBYUV_DISABLE_MIPS) && \
    21     defined(__mips_dsp) && (__mips_dsp_rev >= 2)
    23 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
    24                              uint8* dst, int dst_stride,
    25                              int width) {
    26    __asm__ __volatile__ (
    27       ".set push                                         \n"
    28       ".set noreorder                                    \n"
    29       "sll              $t2, %[src_stride], 0x1          \n" // src_stride x 2
    30       "sll              $t4, %[src_stride], 0x2          \n" // src_stride x 4
    31       "sll              $t9, %[src_stride], 0x3          \n" // src_stride x 8
    32       "addu             $t3, $t2, %[src_stride]          \n"
    33       "addu             $t5, $t4, %[src_stride]          \n"
    34       "addu             $t6, $t2, $t4                    \n"
    35       "andi             $t0, %[dst], 0x3                 \n"
    36       "andi             $t1, %[dst_stride], 0x3          \n"
    37       "or               $t0, $t0, $t1                    \n"
    38       "bnez             $t0, 11f                         \n"
    39       " subu            $t7, $t9, %[src_stride]          \n"
    40 //dst + dst_stride word aligned
    41     "1:                                                  \n"
    42       "lbu              $t0, 0(%[src])                   \n"
    43       "lbux             $t1, %[src_stride](%[src])       \n"
    44       "lbux             $t8, $t2(%[src])                 \n"
    45       "lbux             $t9, $t3(%[src])                 \n"
    46       "sll              $t1, $t1, 16                     \n"
    47       "sll              $t9, $t9, 16                     \n"
    48       "or               $t0, $t0, $t1                    \n"
    49       "or               $t8, $t8, $t9                    \n"
    50       "precr.qb.ph      $s0, $t8, $t0                    \n"
    51       "lbux             $t0, $t4(%[src])                 \n"
    52       "lbux             $t1, $t5(%[src])                 \n"
    53       "lbux             $t8, $t6(%[src])                 \n"
    54       "lbux             $t9, $t7(%[src])                 \n"
    55       "sll              $t1, $t1, 16                     \n"
    56       "sll              $t9, $t9, 16                     \n"
    57       "or               $t0, $t0, $t1                    \n"
    58       "or               $t8, $t8, $t9                    \n"
    59       "precr.qb.ph      $s1, $t8, $t0                    \n"
    60       "sw               $s0, 0(%[dst])                   \n"
    61       "addiu            %[width], -1                     \n"
    62       "addiu            %[src], 1                        \n"
    63       "sw               $s1, 4(%[dst])                   \n"
    64       "bnez             %[width], 1b                     \n"
    65       " addu            %[dst], %[dst], %[dst_stride]    \n"
    66       "b                2f                               \n"
    67 //dst + dst_stride unaligned
    68    "11:                                                  \n"
    69       "lbu              $t0, 0(%[src])                   \n"
    70       "lbux             $t1, %[src_stride](%[src])       \n"
    71       "lbux             $t8, $t2(%[src])                 \n"
    72       "lbux             $t9, $t3(%[src])                 \n"
    73       "sll              $t1, $t1, 16                     \n"
    74       "sll              $t9, $t9, 16                     \n"
    75       "or               $t0, $t0, $t1                    \n"
    76       "or               $t8, $t8, $t9                    \n"
    77       "precr.qb.ph      $s0, $t8, $t0                    \n"
    78       "lbux             $t0, $t4(%[src])                 \n"
    79       "lbux             $t1, $t5(%[src])                 \n"
    80       "lbux             $t8, $t6(%[src])                 \n"
    81       "lbux             $t9, $t7(%[src])                 \n"
    82       "sll              $t1, $t1, 16                     \n"
    83       "sll              $t9, $t9, 16                     \n"
    84       "or               $t0, $t0, $t1                    \n"
    85       "or               $t8, $t8, $t9                    \n"
    86       "precr.qb.ph      $s1, $t8, $t0                    \n"
    87       "swr              $s0, 0(%[dst])                   \n"
    88       "swl              $s0, 3(%[dst])                   \n"
    89       "addiu            %[width], -1                     \n"
    90       "addiu            %[src], 1                        \n"
    91       "swr              $s1, 4(%[dst])                   \n"
    92       "swl              $s1, 7(%[dst])                   \n"
    93       "bnez             %[width], 11b                    \n"
    94        "addu             %[dst], %[dst], %[dst_stride]   \n"
    95     "2:                                                  \n"
    96       ".set pop                                          \n"
    97       :[src] "+r" (src),
    98        [dst] "+r" (dst),
    99        [width] "+r" (width)
   100       :[src_stride] "r" (src_stride),
   101        [dst_stride] "r" (dst_stride)
   102       : "t0", "t1",  "t2", "t3", "t4", "t5",
   103         "t6", "t7", "t8", "t9",
   104         "s0", "s1"
   105   );
   106 }
   108 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
   109                                   uint8* dst, int dst_stride,
   110                                   int width) {
   111   __asm__ __volatile__ (
   112       ".set noat                                         \n"
   113       ".set push                                         \n"
   114       ".set noreorder                                    \n"
   115       "beqz             %[width], 2f                     \n"
   116       " sll             $t2, %[src_stride], 0x1          \n"  // src_stride x 2
   117       "sll              $t4, %[src_stride], 0x2          \n"  // src_stride x 4
   118       "sll              $t9, %[src_stride], 0x3          \n"  // src_stride x 8
   119       "addu             $t3, $t2, %[src_stride]          \n"
   120       "addu             $t5, $t4, %[src_stride]          \n"
   121       "addu             $t6, $t2, $t4                    \n"
   123       "srl              $AT, %[width], 0x2               \n"
   124       "andi             $t0, %[dst], 0x3                 \n"
   125       "andi             $t1, %[dst_stride], 0x3          \n"
   126       "or               $t0, $t0, $t1                    \n"
   127       "bnez             $t0, 11f                         \n"
   128       " subu            $t7, $t9, %[src_stride]          \n"
   129 //dst + dst_stride word aligned
   130       "1:                                                \n"
   131       "lw               $t0, 0(%[src])                   \n"
   132       "lwx              $t1, %[src_stride](%[src])       \n"
   133       "lwx              $t8, $t2(%[src])                 \n"
   134       "lwx              $t9, $t3(%[src])                 \n"
   136 // t0 = | 30 | 20 | 10 | 00 |
   137 // t1 = | 31 | 21 | 11 | 01 |
   138 // t8 = | 32 | 22 | 12 | 02 |
   139 // t9 = | 33 | 23 | 13 | 03 |
   141       "precr.qb.ph     $s0, $t1, $t0                     \n"
   142       "precr.qb.ph     $s1, $t9, $t8                     \n"
   143       "precrq.qb.ph    $s2, $t1, $t0                     \n"
   144       "precrq.qb.ph    $s3, $t9, $t8                     \n"
   146   // s0 = | 21 | 01 | 20 | 00 |
   147   // s1 = | 23 | 03 | 22 | 02 |
   148   // s2 = | 31 | 11 | 30 | 10 |
   149   // s3 = | 33 | 13 | 32 | 12 |
   151       "precr.qb.ph     $s4, $s1, $s0                     \n"
   152       "precrq.qb.ph    $s5, $s1, $s0                     \n"
   153       "precr.qb.ph     $s6, $s3, $s2                     \n"
   154       "precrq.qb.ph    $s7, $s3, $s2                     \n"
   156   // s4 = | 03 | 02 | 01 | 00 |
   157   // s5 = | 23 | 22 | 21 | 20 |
   158   // s6 = | 13 | 12 | 11 | 10 |
   159   // s7 = | 33 | 32 | 31 | 30 |
   161       "lwx              $t0, $t4(%[src])                 \n"
   162       "lwx              $t1, $t5(%[src])                 \n"
   163       "lwx              $t8, $t6(%[src])                 \n"
   164       "lwx              $t9, $t7(%[src])                 \n"
   166 // t0 = | 34 | 24 | 14 | 04 |
   167 // t1 = | 35 | 25 | 15 | 05 |
   168 // t8 = | 36 | 26 | 16 | 06 |
   169 // t9 = | 37 | 27 | 17 | 07 |
   171       "precr.qb.ph     $s0, $t1, $t0                     \n"
   172       "precr.qb.ph     $s1, $t9, $t8                     \n"
   173       "precrq.qb.ph    $s2, $t1, $t0                     \n"
   174       "precrq.qb.ph    $s3, $t9, $t8                     \n"
   176   // s0 = | 25 | 05 | 24 | 04 |
   177   // s1 = | 27 | 07 | 26 | 06 |
   178   // s2 = | 35 | 15 | 34 | 14 |
   179   // s3 = | 37 | 17 | 36 | 16 |
   181       "precr.qb.ph     $t0, $s1, $s0                     \n"
   182       "precrq.qb.ph    $t1, $s1, $s0                     \n"
   183       "precr.qb.ph     $t8, $s3, $s2                     \n"
   184       "precrq.qb.ph    $t9, $s3, $s2                     \n"
   186   // t0 = | 07 | 06 | 05 | 04 |
   187   // t1 = | 27 | 26 | 25 | 24 |
   188   // t8 = | 17 | 16 | 15 | 14 |
   189   // t9 = | 37 | 36 | 35 | 34 |
   191       "addu            $s0, %[dst], %[dst_stride]        \n"
   192       "addu            $s1, $s0, %[dst_stride]           \n"
   193       "addu            $s2, $s1, %[dst_stride]           \n"
   195       "sw              $s4, 0(%[dst])                    \n"
   196       "sw              $t0, 4(%[dst])                    \n"
   197       "sw              $s6, 0($s0)                       \n"
   198       "sw              $t8, 4($s0)                       \n"
   199       "sw              $s5, 0($s1)                       \n"
   200       "sw              $t1, 4($s1)                       \n"
   201       "sw              $s7, 0($s2)                       \n"
   202       "sw              $t9, 4($s2)                       \n"
   204       "addiu            $AT, -1                          \n"
   205       "addiu            %[src], 4                        \n"
   207       "bnez             $AT, 1b                          \n"
   208       " addu            %[dst], $s2, %[dst_stride]       \n"
   209       "b                2f                               \n"
   210 //dst + dst_stride unaligned
   211       "11:                                               \n"
   212       "lw               $t0, 0(%[src])                   \n"
   213       "lwx              $t1, %[src_stride](%[src])       \n"
   214       "lwx              $t8, $t2(%[src])                 \n"
   215       "lwx              $t9, $t3(%[src])                 \n"
   217 // t0 = | 30 | 20 | 10 | 00 |
   218 // t1 = | 31 | 21 | 11 | 01 |
   219 // t8 = | 32 | 22 | 12 | 02 |
   220 // t9 = | 33 | 23 | 13 | 03 |
   222       "precr.qb.ph     $s0, $t1, $t0                     \n"
   223       "precr.qb.ph     $s1, $t9, $t8                     \n"
   224       "precrq.qb.ph    $s2, $t1, $t0                     \n"
   225       "precrq.qb.ph    $s3, $t9, $t8                     \n"
   227   // s0 = | 21 | 01 | 20 | 00 |
   228   // s1 = | 23 | 03 | 22 | 02 |
   229   // s2 = | 31 | 11 | 30 | 10 |
   230   // s3 = | 33 | 13 | 32 | 12 |
   232       "precr.qb.ph     $s4, $s1, $s0                     \n"
   233       "precrq.qb.ph    $s5, $s1, $s0                     \n"
   234       "precr.qb.ph     $s6, $s3, $s2                     \n"
   235       "precrq.qb.ph    $s7, $s3, $s2                     \n"
   237   // s4 = | 03 | 02 | 01 | 00 |
   238   // s5 = | 23 | 22 | 21 | 20 |
   239   // s6 = | 13 | 12 | 11 | 10 |
   240   // s7 = | 33 | 32 | 31 | 30 |
   242       "lwx              $t0, $t4(%[src])                 \n"
   243       "lwx              $t1, $t5(%[src])                 \n"
   244       "lwx              $t8, $t6(%[src])                 \n"
   245       "lwx              $t9, $t7(%[src])                 \n"
   247 // t0 = | 34 | 24 | 14 | 04 |
   248 // t1 = | 35 | 25 | 15 | 05 |
   249 // t8 = | 36 | 26 | 16 | 06 |
   250 // t9 = | 37 | 27 | 17 | 07 |
   252       "precr.qb.ph     $s0, $t1, $t0                     \n"
   253       "precr.qb.ph     $s1, $t9, $t8                     \n"
   254       "precrq.qb.ph    $s2, $t1, $t0                     \n"
   255       "precrq.qb.ph    $s3, $t9, $t8                     \n"
   257   // s0 = | 25 | 05 | 24 | 04 |
   258   // s1 = | 27 | 07 | 26 | 06 |
   259   // s2 = | 35 | 15 | 34 | 14 |
   260   // s3 = | 37 | 17 | 36 | 16 |
   262       "precr.qb.ph     $t0, $s1, $s0                     \n"
   263       "precrq.qb.ph    $t1, $s1, $s0                     \n"
   264       "precr.qb.ph     $t8, $s3, $s2                     \n"
   265       "precrq.qb.ph    $t9, $s3, $s2                     \n"
   267   // t0 = | 07 | 06 | 05 | 04 |
   268   // t1 = | 27 | 26 | 25 | 24 |
   269   // t8 = | 17 | 16 | 15 | 14 |
   270   // t9 = | 37 | 36 | 35 | 34 |
   272       "addu            $s0, %[dst], %[dst_stride]        \n"
   273       "addu            $s1, $s0, %[dst_stride]           \n"
   274       "addu            $s2, $s1, %[dst_stride]           \n"
   276       "swr              $s4, 0(%[dst])                   \n"
   277       "swl              $s4, 3(%[dst])                   \n"
   278       "swr              $t0, 4(%[dst])                   \n"
   279       "swl              $t0, 7(%[dst])                   \n"
   280       "swr              $s6, 0($s0)                      \n"
   281       "swl              $s6, 3($s0)                      \n"
   282       "swr              $t8, 4($s0)                      \n"
   283       "swl              $t8, 7($s0)                      \n"
   284       "swr              $s5, 0($s1)                      \n"
   285       "swl              $s5, 3($s1)                      \n"
   286       "swr              $t1, 4($s1)                      \n"
   287       "swl              $t1, 7($s1)                      \n"
   288       "swr              $s7, 0($s2)                      \n"
   289       "swl              $s7, 3($s2)                      \n"
   290       "swr              $t9, 4($s2)                      \n"
   291       "swl              $t9, 7($s2)                      \n"
   293       "addiu            $AT, -1                          \n"
   294       "addiu            %[src], 4                        \n"
   296       "bnez             $AT, 11b                         \n"
   297       " addu            %[dst], $s2, %[dst_stride]       \n"
   298       "2:                                                \n"
   299       ".set pop                                          \n"
   300       ".set at                                           \n"
   301       :[src] "+r" (src),
   302        [dst] "+r" (dst),
   303        [width] "+r" (width)
   304       :[src_stride] "r" (src_stride),
   305        [dst_stride] "r" (dst_stride)
   306       : "t0", "t1",  "t2", "t3",  "t4", "t5",
   307         "t6", "t7", "t8", "t9",
   308         "s0", "s1", "s2", "s3", "s4",
   309         "s5", "s6", "s7"
   310   );
   311 }
   313 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
   314                                uint8* dst_a, int dst_stride_a,
   315                                uint8* dst_b, int dst_stride_b,
   316                                int width) {
   317   __asm__ __volatile__ (
   318       ".set push                                         \n"
   319       ".set noreorder                                    \n"
   320       "beqz            %[width], 2f                      \n"
   321       " sll            $t2, %[src_stride], 0x1           \n" // src_stride x 2
   322       "sll             $t4, %[src_stride], 0x2           \n" // src_stride x 4
   323       "sll             $t9, %[src_stride], 0x3           \n" // src_stride x 8
   324       "addu            $t3, $t2, %[src_stride]           \n"
   325       "addu            $t5, $t4, %[src_stride]           \n"
   326       "addu            $t6, $t2, $t4                     \n"
   327       "subu            $t7, $t9, %[src_stride]           \n"
   328       "srl             $t1, %[width], 1                  \n"
   330 // check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
   331       "andi            $t0, %[dst_a], 0x3                \n"
   332       "andi            $t8, %[dst_b], 0x3                \n"
   333       "or              $t0, $t0, $t8                     \n"
   334       "andi            $t8, %[dst_stride_a], 0x3         \n"
   335       "andi            $s5, %[dst_stride_b], 0x3         \n"
   336       "or              $t8, $t8, $s5                     \n"
   337       "or              $t0, $t0, $t8                     \n"
   338       "bnez            $t0, 11f                          \n"
   339       " nop                                              \n"
   340 // dst + dst_stride word aligned (both, a & b dst addresses)
   341     "1:                                                  \n"
   342       "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
   343       "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
   344       "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
   345       "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
   346       "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
   347       "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
   349       "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
   350       "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
   351       "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
   352       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
   354       "sll             $t0, $t0, 16                      \n"
   355       "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
   356       "sll             $t9, $t9, 16                      \n"
   357       "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
   359       "sw              $s3, 0($s5)                       \n"
   360       "sw              $s4, 0($s6)                       \n"
   362       "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
   363       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
   365       "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
   366       "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
   367       "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
   368       "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
   369       "sw              $s3, 0(%[dst_a])                  \n"
   370       "sw              $s4, 0(%[dst_b])                  \n"
   372       "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
   373       "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
   374       "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
   375       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
   377       "sll             $t0, $t0, 16                      \n"
   378       "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
   379       "sll             $t9, $t9, 16                      \n"
   380       "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
   381       "sw              $s3, 4($s5)                       \n"
   382       "sw              $s4, 4($s6)                       \n"
   384       "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
   385       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
   387       "addiu           %[src], 4                         \n"
   388       "addiu           $t1, -1                           \n"
   389       "sll             $t0, %[dst_stride_a], 1           \n"
   390       "sll             $t8, %[dst_stride_b], 1           \n"
   391       "sw              $s3, 4(%[dst_a])                  \n"
   392       "sw              $s4, 4(%[dst_b])                  \n"
   393       "addu            %[dst_a], %[dst_a], $t0           \n"
   394       "bnez            $t1, 1b                           \n"
   395       " addu           %[dst_b], %[dst_b], $t8           \n"
   396       "b               2f                                \n"
   397       " nop                                              \n"
   399 // dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
   400    "11:                                                  \n"
   401       "lw              $t0, 0(%[src])                    \n" // |B0|A0|b0|a0|
   402       "lwx             $t8, %[src_stride](%[src])        \n" // |B1|A1|b1|a1|
   403       "addu            $s5, %[dst_a], %[dst_stride_a]    \n"
   404       "lwx             $t9, $t2(%[src])                  \n" // |B2|A2|b2|a2|
   405       "lwx             $s0, $t3(%[src])                  \n" // |B3|A3|b3|a3|
   406       "addu            $s6, %[dst_b], %[dst_stride_b]    \n"
   408       "precrq.ph.w     $s1, $t8, $t0                     \n" // |B1|A1|B0|A0|
   409       "precrq.ph.w     $s2, $s0, $t9                     \n" // |B3|A3|B2|A2|
   410       "precr.qb.ph     $s3, $s2, $s1                     \n" // |A3|A2|A1|A0|
   411       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B3|B2|B1|B0|
   413       "sll             $t0, $t0, 16                      \n"
   414       "packrl.ph       $s1, $t8, $t0                     \n" // |b1|a1|b0|a0|
   415       "sll             $t9, $t9, 16                      \n"
   416       "packrl.ph       $s2, $s0, $t9                     \n" // |b3|a3|b2|a2|
   418       "swr             $s3, 0($s5)                       \n"
   419       "swl             $s3, 3($s5)                       \n"
   420       "swr             $s4, 0($s6)                       \n"
   421       "swl             $s4, 3($s6)                       \n"
   423       "precr.qb.ph     $s3, $s2, $s1                     \n" // |a3|a2|a1|a0|
   424       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b3|b2|b1|b0|
   426       "lwx             $t0, $t4(%[src])                  \n" // |B4|A4|b4|a4|
   427       "lwx             $t8, $t5(%[src])                  \n" // |B5|A5|b5|a5|
   428       "lwx             $t9, $t6(%[src])                  \n" // |B6|A6|b6|a6|
   429       "lwx             $s0, $t7(%[src])                  \n" // |B7|A7|b7|a7|
   430       "swr             $s3, 0(%[dst_a])                  \n"
   431       "swl             $s3, 3(%[dst_a])                  \n"
   432       "swr             $s4, 0(%[dst_b])                  \n"
   433       "swl             $s4, 3(%[dst_b])                  \n"
   435       "precrq.ph.w     $s1, $t8, $t0                     \n" // |B5|A5|B4|A4|
   436       "precrq.ph.w     $s2, $s0, $t9                     \n" // |B6|A6|B7|A7|
   437       "precr.qb.ph     $s3, $s2, $s1                     \n" // |A7|A6|A5|A4|
   438       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |B7|B6|B5|B4|
   440       "sll             $t0, $t0, 16                      \n"
   441       "packrl.ph       $s1, $t8, $t0                     \n" // |b5|a5|b4|a4|
   442       "sll             $t9, $t9, 16                      \n"
   443       "packrl.ph       $s2, $s0, $t9                     \n" // |b7|a7|b6|a6|
   445       "swr             $s3, 4($s5)                       \n"
   446       "swl             $s3, 7($s5)                       \n"
   447       "swr             $s4, 4($s6)                       \n"
   448       "swl             $s4, 7($s6)                       \n"
   450       "precr.qb.ph     $s3, $s2, $s1                     \n" // |a7|a6|a5|a4|
   451       "precrq.qb.ph    $s4, $s2, $s1                     \n" // |b7|b6|b5|b4|
   453       "addiu           %[src], 4                         \n"
   454       "addiu           $t1, -1                           \n"
   455       "sll             $t0, %[dst_stride_a], 1           \n"
   456       "sll             $t8, %[dst_stride_b], 1           \n"
   457       "swr             $s3, 4(%[dst_a])                  \n"
   458       "swl             $s3, 7(%[dst_a])                  \n"
   459       "swr             $s4, 4(%[dst_b])                  \n"
   460       "swl             $s4, 7(%[dst_b])                  \n"
   461       "addu            %[dst_a], %[dst_a], $t0           \n"
   462       "bnez            $t1, 11b                          \n"
   463       " addu           %[dst_b], %[dst_b], $t8           \n"
   465       "2:                                                \n"
   466       ".set pop                                          \n"
   467       : [src] "+r" (src),
   468         [dst_a] "+r" (dst_a),
   469         [dst_b] "+r" (dst_b),
   470         [width] "+r" (width),
   471         [src_stride] "+r" (src_stride)
   472       : [dst_stride_a] "r" (dst_stride_a),
   473         [dst_stride_b] "r" (dst_stride_b)
   474       : "t0", "t1",  "t2", "t3",  "t4", "t5",
   475         "t6", "t7", "t8", "t9",
   476         "s0", "s1", "s2", "s3",
   477         "s4", "s5", "s6"
   478   );
   479 }
   481 #endif  // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
   483 #ifdef __cplusplus
   484 }  // extern "C"
   485 }  // namespace libyuv
   486 #endif

mercurial