gfx/cairo/libpixman/src/pixman-mips-dspr2-asm.S

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  * Copyright (c) 2012
     3  *      MIPS Technologies, Inc., California.
     4  *
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions
     7  * are met:
     8  * 1. Redistributions of source code must retain the above copyright
     9  *    notice, this list of conditions and the following disclaimer.
    10  * 2. Redistributions in binary form must reproduce the above copyright
    11  *    notice, this list of conditions and the following disclaimer in the
    12  *    documentation and/or other materials provided with the distribution.
    13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
    14  *    contributors may be used to endorse or promote products derived from
    15  *    this software without specific prior written permission.
    16  *
    17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
    18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
    21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    27  * SUCH DAMAGE.
    28  *
    29  * Author:  Nemanja Lukic (nlukic@mips.com)
    30  */
    32 #include "pixman-private.h"
    33 #include "pixman-mips-dspr2-asm.h"
    35 LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
    36 /*
    37  * a0 - *dest
    38  * a1 - count (bytes)
    39  * a2 - value to fill buffer with
    40  */
    42     beqz     a1, 3f
    43      andi    t1, a0, 0x0002
    44     beqz     t1, 0f          /* check if address is 4-byte aligned */
    45      nop
    46     sh       a2, 0(a0)
    47     addiu    a0, a0, 2
    48     addiu    a1, a1, -2
    49 0:
    50     srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
    51     replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
    52     beqz     t1, 2f
    53      nop
    54 1:
    55     addiu    t1, t1, -1
    56     beqz     t1, 11f
    57      addiu   a1, a1, -32
    58     pref     30, 32(a0)
    59     sw       a2, 0(a0)
    60     sw       a2, 4(a0)
    61     sw       a2, 8(a0)
    62     sw       a2, 12(a0)
    63     sw       a2, 16(a0)
    64     sw       a2, 20(a0)
    65     sw       a2, 24(a0)
    66     sw       a2, 28(a0)
    67     b        1b
    68      addiu   a0, a0, 32
    69 11:
    70     sw       a2, 0(a0)
    71     sw       a2, 4(a0)
    72     sw       a2, 8(a0)
    73     sw       a2, 12(a0)
    74     sw       a2, 16(a0)
    75     sw       a2, 20(a0)
    76     sw       a2, 24(a0)
    77     sw       a2, 28(a0)
    78     addiu    a0, a0, 32
    79 2:
    80     blez     a1, 3f
    81      addiu   a1, a1, -2
    82     sh       a2, 0(a0)
    83     b        2b
    84      addiu   a0, a0, 2
    85 3:
    86     jr       ra
    87      nop
    89 END(pixman_fill_buff16_mips)
    91 LEAF_MIPS32R2(pixman_fill_buff32_mips)
    92 /*
    93  * a0 - *dest
    94  * a1 - count (bytes)
    95  * a2 - value to fill buffer with
    96  */
    98     beqz     a1, 3f
    99      nop
   100     srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
   101     beqz     t1, 2f
   102      nop
   103 1:
   104     addiu    t1, t1, -1
   105     beqz     t1, 11f
   106      addiu   a1, a1, -32
   107     pref     30, 32(a0)
   108     sw       a2, 0(a0)
   109     sw       a2, 4(a0)
   110     sw       a2, 8(a0)
   111     sw       a2, 12(a0)
   112     sw       a2, 16(a0)
   113     sw       a2, 20(a0)
   114     sw       a2, 24(a0)
   115     sw       a2, 28(a0)
   116     b        1b
   117      addiu   a0, a0, 32
   118 11:
   119     sw       a2, 0(a0)
   120     sw       a2, 4(a0)
   121     sw       a2, 8(a0)
   122     sw       a2, 12(a0)
   123     sw       a2, 16(a0)
   124     sw       a2, 20(a0)
   125     sw       a2, 24(a0)
   126     sw       a2, 28(a0)
   127     addiu    a0, a0, 32
   128 2:
   129     blez     a1, 3f
   130      addiu   a1, a1, -4
   131     sw       a2, 0(a0)
   132     b        2b
   133      addiu   a0, a0, 4
   134 3:
   135     jr       ra
   136      nop
   138 END(pixman_fill_buff32_mips)
   140 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
   141 /*
   142  * a0 - dst (r5g6b5)
   143  * a1 - src (a8r8g8b8)
   144  * a2 - w
   145  */
   147     beqz     a2, 3f
   148      nop
   149     addiu    t1, a2, -1
   150     beqz     t1, 2f
   151      nop
   152     li       t4, 0xf800f800
   153     li       t5, 0x07e007e0
   154     li       t6, 0x001f001f
   155 1:
   156     lw       t0, 0(a1)
   157     lw       t1, 4(a1)
   158     addiu    a1, a1, 8
   159     addiu    a2, a2, -2
   161     CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
   163     sh       t2, 0(a0)
   164     sh       t3, 2(a0)
   166     addiu    t2, a2, -1
   167     bgtz     t2, 1b
   168      addiu   a0, a0, 4
   169 2:
   170     beqz     a2, 3f
   171      nop
   172     lw       t0, 0(a1)
   174     CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
   176     sh       t1, 0(a0)
   177 3:
   178     j        ra
   179      nop
   181 END(pixman_composite_src_8888_0565_asm_mips)
   183 LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
   184 /*
   185  * a0 - dst (a8r8g8b8)
   186  * a1 - src (r5g6b5)
   187  * a2 - w
   188  */
   190     beqz     a2, 3f
   191      nop
   192     addiu    t1, a2, -1
   193     beqz     t1, 2f
   194      nop
   195     li       t4, 0x07e007e0
   196     li       t5, 0x001F001F
   197 1:
   198     lhu      t0, 0(a1)
   199     lhu      t1, 2(a1)
   200     addiu    a1, a1, 4
   201     addiu    a2, a2, -2
   203     CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
   205     sw       t2, 0(a0)
   206     sw       t3, 4(a0)
   208     addiu    t2, a2, -1
   209     bgtz     t2, 1b
   210      addiu   a0, a0, 8
   211 2:
   212     beqz     a2, 3f
   213      nop
   214     lhu      t0, 0(a1)
   216     CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
   218     sw       t1, 0(a0)
   219 3:
   220     j        ra
   221      nop
   223 END(pixman_composite_src_0565_8888_asm_mips)
   225 LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
   226 /*
   227  * a0 - dst (a8r8g8b8)
   228  * a1 - src (x8r8g8b8)
   229  * a2 - w
   230  */
   232     beqz     a2, 4f
   233      nop
   234     li       t9, 0xff000000
   235     srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
   236     beqz     t8, 3f       /* branch if less than 8 src pixels */
   237      nop
   238 1:
   239     addiu    t8, t8, -1
   240     beqz     t8, 2f
   241      addiu   a2, a2, -8
   242     pref     0, 32(a1)
   243     lw       t0, 0(a1)
   244     lw       t1, 4(a1)
   245     lw       t2, 8(a1)
   246     lw       t3, 12(a1)
   247     lw       t4, 16(a1)
   248     lw       t5, 20(a1)
   249     lw       t6, 24(a1)
   250     lw       t7, 28(a1)
   251     addiu    a1, a1, 32
   252     or       t0, t0, t9
   253     or       t1, t1, t9
   254     or       t2, t2, t9
   255     or       t3, t3, t9
   256     or       t4, t4, t9
   257     or       t5, t5, t9
   258     or       t6, t6, t9
   259     or       t7, t7, t9
   260     pref     30, 32(a0)
   261     sw       t0, 0(a0)
   262     sw       t1, 4(a0)
   263     sw       t2, 8(a0)
   264     sw       t3, 12(a0)
   265     sw       t4, 16(a0)
   266     sw       t5, 20(a0)
   267     sw       t6, 24(a0)
   268     sw       t7, 28(a0)
   269     b        1b
   270      addiu   a0, a0, 32
   271 2:
   272     lw       t0, 0(a1)
   273     lw       t1, 4(a1)
   274     lw       t2, 8(a1)
   275     lw       t3, 12(a1)
   276     lw       t4, 16(a1)
   277     lw       t5, 20(a1)
   278     lw       t6, 24(a1)
   279     lw       t7, 28(a1)
   280     addiu    a1, a1, 32
   281     or       t0, t0, t9
   282     or       t1, t1, t9
   283     or       t2, t2, t9
   284     or       t3, t3, t9
   285     or       t4, t4, t9
   286     or       t5, t5, t9
   287     or       t6, t6, t9
   288     or       t7, t7, t9
   289     sw       t0, 0(a0)
   290     sw       t1, 4(a0)
   291     sw       t2, 8(a0)
   292     sw       t3, 12(a0)
   293     sw       t4, 16(a0)
   294     sw       t5, 20(a0)
   295     sw       t6, 24(a0)
   296     sw       t7, 28(a0)
   297     beqz     a2, 4f
   298      addiu   a0, a0, 32
   299 3:
   300     lw       t0, 0(a1)
   301     addiu    a1, a1, 4
   302     addiu    a2, a2, -1
   303     or       t1, t0, t9
   304     sw       t1, 0(a0)
   305     bnez     a2, 3b
   306      addiu   a0, a0, 4
   307 4:
   308     jr       ra
   309      nop
   311 END(pixman_composite_src_x888_8888_asm_mips)
   313 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
   314 /*
   315  * a0 - dst  (a8r8g8b8)
   316  * a1 - src  (32bit constant)
   317  * a2 - mask (a8)
   318  * a3 - w
   319  */
   322     SAVE_REGS_ON_STACK 0, v0
   323     li       v0, 0x00ff00ff
   325     beqz     a3, 3f
   326      nop
   327     addiu    t1, a3, -1
   328     beqz     t1, 2f
   329      nop
   331 1:
   332                        /* a1 = source      (32bit constant) */
   333     lbu      t0, 0(a2) /* t2 = mask        (a8) */
   334     lbu      t1, 1(a2) /* t3 = mask        (a8) */
   335     addiu    a2, a2, 2
   337     MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
   339     sw       t2, 0(a0)
   340     sw       t3, 4(a0)
   341     addiu    a3, a3, -2
   342     addiu    t2, a3, -1
   343     bgtz     t2, 1b
   344      addiu   a0, a0, 8
   346     beqz     a3, 3f
   347      nop
   349 2:
   350     lbu      t0, 0(a2)
   351     addiu    a2, a2, 1
   353     MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
   355     sw       t1, 0(a0)
   356     addiu    a3, a3, -1
   357     addiu    a0, a0, 4
   359 3:
   360     RESTORE_REGS_FROM_STACK 0, v0
   361     j        ra
   362      nop
   364 END(pixman_composite_src_n_8_8888_asm_mips)
   366 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
   367 /*
   368  * a0 - dst  (a8)
   369  * a1 - src  (32bit constant)
   370  * a2 - mask (a8)
   371  * a3 - w
   372  */
   374     li                t9, 0x00ff00ff
   375     beqz              a3, 3f
   376      nop
   377     srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
   378     beqz              t7, 1f      /* branch if less than 4 src pixels */
   379      nop
   381     srl               t8, a1, 24
   382     replv.ph          t8, t8
   384 0:
   385     beqz              t7, 1f
   386      addiu            t7, t7, -1
   387     lbu               t0, 0(a2)
   388     lbu               t1, 1(a2)
   389     lbu               t2, 2(a2)
   390     lbu               t3, 3(a2)
   392     addiu             a2, a2, 4
   394     precr_sra.ph.w    t1, t0, 0
   395     precr_sra.ph.w    t3, t2, 0
   396     precr.qb.ph       t0, t3, t1
   398     muleu_s.ph.qbl    t2, t0, t8
   399     muleu_s.ph.qbr    t3, t0, t8
   400     shra_r.ph         t4, t2, 8
   401     shra_r.ph         t5, t3, 8
   402     and               t4, t4, t9
   403     and               t5, t5, t9
   404     addq.ph           t2, t2, t4
   405     addq.ph           t3, t3, t5
   406     shra_r.ph         t2, t2, 8
   407     shra_r.ph         t3, t3, 8
   408     precr.qb.ph       t2, t2, t3
   410     sb                t2, 0(a0)
   411     srl               t2, t2, 8
   412     sb                t2, 1(a0)
   413     srl               t2, t2, 8
   414     sb                t2, 2(a0)
   415     srl               t2, t2, 8
   416     sb                t2, 3(a0)
   417     addiu             a3, a3, -4
   418     b                 0b
   419      addiu            a0, a0, 4
   421 1:
   422     beqz              a3, 3f
   423      nop
   424     srl               t8, a1, 24
   425 2:
   426     lbu               t0, 0(a2)
   427     addiu             a2, a2, 1
   429     mul               t2, t0, t8
   430     shra_r.ph         t3, t2, 8
   431     andi              t3, t3, 0x00ff
   432     addq.ph           t2, t2, t3
   433     shra_r.ph         t2, t2, 8
   435     sb                t2, 0(a0)
   436     addiu             a3, a3, -1
   437     bnez              a3, 2b
   438      addiu            a0, a0, 1
   440 3:
   441     j                 ra
   442      nop
   444 END(pixman_composite_src_n_8_8_asm_mips)
   446 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
   447 /*
   448  * a0 - dst  (a8r8g8b8)
   449  * a1 - src  (32bit constant)
   450  * a2 - mask (a8r8g8b8)
   451  * a3 - w
   452  */
   454     SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
   455     beqz         a3, 4f
   456      nop
   457     li           t6, 0xff
   458     addiu        t7, zero, -1 /* t7 = 0xffffffff */
   459     srl          t8, a1, 24   /* t8 = srca */
   460     li           t9, 0x00ff00ff
   461     addiu        t1, a3, -1
   462     beqz         t1, 3f       /* last pixel */
   463      nop
   464     beq          t8, t6, 2f   /* if (srca == 0xff) */
   465      nop
   466 1:
   467                               /* a1 = src */
   468     lw           t0, 0(a2)    /* t0 = mask */
   469     lw           t1, 4(a2)    /* t1 = mask */
   470     or           t2, t0, t1
   471     beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
   472      addiu       a2, a2, 8
   473     and          t3, t0, t1
   474     move         t4, a1       /* t4 = src */
   475     move         t5, a1       /* t5 = src */
   476     lw           t2, 0(a0)    /* t2 = dst */
   477     beq          t3, t7, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
   478      lw          t3, 4(a0)    /* t3 = dst */
   479     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
   480     MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
   481 11:
   482     not          t0, t0
   483     not          t1, t1
   484     MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
   485     addu_s.qb    t2, t4, t2
   486     addu_s.qb    t3, t5, t3
   487     sw           t2, 0(a0)
   488     sw           t3, 4(a0)
   489 12:
   490     addiu        a3, a3, -2
   491     addiu        t1, a3, -1
   492     bgtz         t1, 1b
   493      addiu       a0, a0, 8
   494     b            3f
   495      nop
   496 2:
   497                               /* a1 = src */
   498     lw           t0, 0(a2)    /* t0 = mask */
   499     lw           t1, 4(a2)    /* t1 = mask */
   500     or           t2, t0, t1
   501     beqz         t2, 22f      /* if (t0 == 0) & (t1 == 0) */
   502      addiu       a2, a2, 8
   503     and          t2, t0, t1
   504     move         t4, a1
   505     beq          t2, t7, 21f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
   506      move        t5, a1
   507     lw           t2, 0(a0)    /* t2 = dst */
   508     lw           t3, 4(a0)    /* t3 = dst */
   509     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
   510     not          t0, t0
   511     not          t1, t1
   512     MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
   513     addu_s.qb    t4, t4, t2
   514     addu_s.qb    t5, t5, t3
   515 21:
   516     sw           t4, 0(a0)
   517     sw           t5, 4(a0)
   518 22:
   519     addiu        a3, a3, -2
   520     addiu        t1, a3, -1
   521     bgtz         t1, 2b
   522      addiu       a0, a0, 8
   523 3:
   524     blez         a3, 4f
   525      nop
   526                               /* a1 = src */
   527     lw           t1, 0(a2)    /* t1 = mask */
   528     beqz         t1, 4f
   529      nop
   530     move         t2, a1       /* t2 = src */
   531     beq          t1, t7, 31f
   532      lw          t0, 0(a0)    /* t0 = dst */
   534     MIPS_UN8x4_MUL_UN8x4  a1, t1, t2, t9, t3, t4, t5, t6
   535     MIPS_UN8x4_MUL_UN8    t1, t8, t1, t9, t3, t4, t5
   536 31:
   537     not          t1, t1
   538     MIPS_UN8x4_MUL_UN8x4  t0, t1, t0, t9, t3, t4, t5, t6
   539     addu_s.qb    t0, t2, t0
   540     sw           t0, 0(a0)
   541 4:
   542     RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
   543     j            ra
   544      nop
   546 END(pixman_composite_over_n_8888_8888_ca_asm_mips)
   548 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
   549 /*
   550  * a0 - dst  (r5g6b5)
   551  * a1 - src  (32bit constant)
   552  * a2 - mask (a8r8g8b8)
   553  * a3 - w
   554  */
   556     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   557     beqz         a3, 4f
   558      nop
   559     li           t5, 0xf800f800
   560     li           t6, 0x07e007e0
   561     li           t7, 0x001F001F
   562     li           t9, 0x00ff00ff
   564     srl          t8, a1, 24   /* t8 = srca */
   565     addiu        t1, a3, -1
   566     beqz         t1, 3f       /* last pixel */
   567      nop
   568     li           s0, 0xff     /* s0 = 0xff */
   569     addiu        s1, zero, -1 /* s1 = 0xffffffff */
   571     beq          t8, s0, 2f   /* if (srca == 0xff) */
   572      nop
   573 1:
   574                               /* a1 = src */
   575     lw           t0, 0(a2)    /* t0 = mask */
   576     lw           t1, 4(a2)    /* t1 = mask */
   577     or           t2, t0, t1
   578     beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
   579      addiu       a2, a2, 8
   580     and          t3, t0, t1
   581     move         s2, a1       /* s2 = src */
   582     move         s3, a1       /* s3 = src */
   583     lhu          t2, 0(a0)    /* t2 = dst */
   584     beq          t3, s1, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
   585      lhu         t3, 2(a0)    /* t3 = dst */
   586     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
   587     MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
   588 11:
   589     not          t0, t0
   590     not          t1, t1
   591     CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
   592     MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
   593     addu_s.qb    s2, s2, s4
   594     addu_s.qb    s3, s3, s5
   595     CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
   596     sh           t2, 0(a0)
   597     sh           t3, 2(a0)
   598 12:
   599     addiu        a3, a3, -2
   600     addiu        t1, a3, -1
   601     bgtz         t1, 1b
   602      addiu       a0, a0, 4
   603     b            3f
   604      nop
   605 2:
   606                               /* a1 = src */
   607     lw           t0, 0(a2)    /* t0 = mask */
   608     lw           t1, 4(a2)    /* t1 = mask */
   609     or           t2, t0, t1
   610     beqz         t2, 22f      /* if (t0 == 0) & (t1 == 0) */
   611      addiu       a2, a2, 8
   612     and          t3, t0, t1
   613     move         t2, a1
   614     beq          t3, s1, 21f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
   615      move        t3, a1
   616     lhu          t2, 0(a0)    /* t2 = dst */
   617     lhu          t3, 2(a0)    /* t3 = dst */
   618     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
   619     not          t0, t0
   620     not          t1, t1
   621     CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
   622     MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
   623     addu_s.qb    t2, s2, s4
   624     addu_s.qb    t3, s3, s5
   625 21:
   626     CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
   627     sh           t0, 0(a0)
   628     sh           t1, 2(a0)
   629 22:
   630     addiu        a3, a3, -2
   631     addiu        t1, a3, -1
   632     bgtz         t1, 2b
   633      addiu       a0, a0, 4
   634 3:
   635     blez         a3, 4f
   636      nop
   637                               /* a1 = src */
   638     lw           t1, 0(a2)    /* t1 = mask */
   639     beqz         t1, 4f
   640      nop
   641     move         t2, a1       /* t2 = src */
   642     beq          t1, t7, 31f
   643      lhu         t0, 0(a0)    /* t0 = dst */
   645     MIPS_UN8x4_MUL_UN8x4     a1, t1, t2, t9, t3, t4, t5, t6
   646     MIPS_UN8x4_MUL_UN8       t1, t8, t1, t9, t3, t4, t5
   647 31:
   648     not          t1, t1
   649     CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
   650     MIPS_UN8x4_MUL_UN8x4     s1, t1, t3, t9, t4, t5, t6, t7
   651     addu_s.qb    t0, t2, t3
   652     CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
   653     sh           s1, 0(a0)
   654 4:
   655     RESTORE_REGS_FROM_STACK  20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   656     j            ra
   657      nop
   659 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
   661 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
   662 /*
   663  * a0 - dst  (a8r8g8b8)
   664  * a1 - src  (32bit constant)
   665  * a2 - mask (a8)
   666  * a3 - w
   667  */
   669     SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
   670     beqz      a3, 4f
   671      nop
   672     li        t4, 0x00ff00ff
   673     li        t5, 0xff
   674     addiu     t0, a3, -1
   675     beqz      t0, 3f         /* last pixel */
   676      srl      t6, a1, 24     /* t6 = srca */
   677     not       s4, a1
   678     beq       t5, t6, 2f     /* if (srca == 0xff) */
   679      srl      s4, s4, 24
   680 1:
   681                              /* a1 = src */
   682     lbu       t0, 0(a2)      /* t0 = mask */
   683     lbu       t1, 1(a2)      /* t1 = mask */
   684     or        t2, t0, t1
   685     beqz      t2, 111f       /* if (t0 == 0) && (t1 == 0) */
   686      addiu    a2, a2, 2
   687     and       t3, t0, t1
   689     lw        t2, 0(a0)      /* t2 = dst */
   690     beq       t3, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
   691      lw       t3, 4(a0)      /* t3 = dst */
   693     MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
   694     not       s2, s0
   695     not       s3, s1
   696     srl       s2, s2, 24
   697     srl       s3, s3, 24
   698     MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
   699     addu_s.qb s2, t2, s0
   700     addu_s.qb s3, t3, s1
   701     sw        s2, 0(a0)
   702     b         111f
   703      sw       s3, 4(a0)
   704 11:
   705     MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
   706     addu_s.qb s2, t2, a1
   707     addu_s.qb s3, t3, a1
   708     sw        s2, 0(a0)
   709     sw        s3, 4(a0)
   711 111:
   712     addiu     a3, a3, -2
   713     addiu     t0, a3, -1
   714     bgtz      t0, 1b
   715      addiu    a0, a0, 8
   716     b         3f
   717      nop
   718 2:
   719                              /* a1 = src */
   720     lbu       t0, 0(a2)      /* t0 = mask */
   721     lbu       t1, 1(a2)      /* t1 = mask */
   722     or        t2, t0, t1
   723     beqz      t2, 222f       /* if (t0 == 0) && (t1 == 0) */
   724      addiu    a2, a2, 2
   725     and       t3, t0, t1
   726     beq       t3, t5, 22f    /* if (t0 == 0xff) && (t1 == 0xff) */
   727      nop
   728     lw        t2, 0(a0)      /* t2 = dst */
   729     lw        t3, 4(a0)      /* t3 = dst */
   731     OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
   732                            t6, t7, t4, t8, t9, s0, s1, s2, s3
   733     sw        t6, 0(a0)
   734     b         222f
   735      sw        t7, 4(a0)
   736 22:
   737     sw        a1, 0(a0)
   738     sw        a1, 4(a0)
   739 222:
   740     addiu     a3, a3, -2
   741     addiu     t0, a3, -1
   742     bgtz      t0, 2b
   743      addiu    a0, a0, 8
   744 3:
   745     blez      a3, 4f
   746      nop
   747                              /* a1 = src */
   748     lbu       t0, 0(a2)      /* t0 = mask */
   749     beqz      t0, 4f         /* if (t0 == 0) */
   750      addiu    a2, a2, 1
   751     move      t3, a1
   752     beq       t0, t5, 31f    /* if (t0 == 0xff) */
   753      lw       t1, 0(a0)      /* t1 = dst */
   755     MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
   756 31:
   757     not       t2, t3
   758     srl       t2, t2, 24
   759     MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
   760     addu_s.qb t2, t1, t3
   761     sw        t2, 0(a0)
   762 4:
   763     RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
   764     j         ra
   765      nop
   767 END(pixman_composite_over_n_8_8888_asm_mips)
   769 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
   770 /*
   771  * a0 - dst  (r5g6b5)
   772  * a1 - src  (32bit constant)
   773  * a2 - mask (a8)
   774  * a3 - w
   775  */
   776     SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   777     beqz     a3, 4f
   778      nop
   779     li       t4, 0x00ff00ff
   780     li       t5, 0xff
   781     li       t6, 0xf800f800
   782     li       t7, 0x07e007e0
   783     li       t8, 0x001F001F
   784     addiu    t1, a3, -1
   785     beqz     t1, 3f         /* last pixel */
   786      srl     t0, a1, 24     /* t0 = srca */
   787     not      v0, a1
   788     beq      t0, t5, 2f     /* if (srca == 0xff) */
   789      srl     v0, v0, 24
   790 1:
   791                             /* a1 = src */
   792     lbu      t0, 0(a2)      /* t0 = mask */
   793     lbu      t1, 1(a2)      /* t1 = mask */
   794     or       t2, t0, t1
   795     beqz     t2, 111f       /* if (t0 == 0) && (t1 == 0) */
   796      addiu   a2, a2, 2
   797     lhu      t2, 0(a0)      /* t2 = dst */
   798     lhu      t3, 2(a0)      /* t3 = dst */
   799     CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
   800     and      t9, t0, t1
   801     beq      t9, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
   802      nop
   804     MIPS_2xUN8x4_MUL_2xUN8   a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
   805     not      s4, s2
   806     not      s5, s3
   807     srl      s4, s4, 24
   808     srl      s5, s5, 24
   809     MIPS_2xUN8x4_MUL_2xUN8   s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
   810     addu_s.qb                s4, s2, s0
   811     addu_s.qb                s5, s3, s1
   812     CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
   813     sh       t2, 0(a0)
   814     b        111f
   815      sh      t3, 2(a0)
   816 11:
   817     MIPS_2xUN8x4_MUL_2xUN8   s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
   818     addu_s.qb                s4, a1, s0
   819     addu_s.qb                s5, a1, s1
   820     CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
   821     sh       t2, 0(a0)
   822     sh       t3, 2(a0)
   823 111:
   824     addiu    a3, a3, -2
   825     addiu    t0, a3, -1
   826     bgtz     t0, 1b
   827      addiu   a0, a0, 4
   828     b        3f
   829      nop
   830 2:
   831     CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
   832 21:
   833                             /* a1 = src */
   834     lbu      t0, 0(a2)      /* t0 = mask */
   835     lbu      t1, 1(a2)      /* t1 = mask */
   836     or       t2, t0, t1
   837     beqz     t2, 222f       /* if (t0 == 0) && (t1 == 0) */
   838      addiu   a2, a2, 2
   839     and      t9, t0, t1
   840     move     s2, s0
   841     beq      t9, t5, 22f    /* if (t0 == 0xff) && (t2 == 0xff) */
   842      move    s3, s0
   843     lhu      t2, 0(a0)      /* t2 = dst */
   844     lhu      t3, 2(a0)      /* t3 = dst */
   846     CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
   847     OVER_2x8888_2x8_2x8888   a1, a1, t0, t1, s2, s3, \
   848                              t2, t3, t4, t9, s4, s5, s6, s7, s8
   849     CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
   850 22:
   851     sh       s2, 0(a0)
   852     sh       s3, 2(a0)
   853 222:
   854     addiu    a3, a3, -2
   855     addiu    t0, a3, -1
   856     bgtz     t0, 21b
   857      addiu   a0, a0, 4
   858 3:
   859     blez      a3, 4f
   860      nop
   861                             /* a1 = src */
   862     lbu      t0, 0(a2)      /* t0 = mask */
   863     beqz     t0, 4f         /* if (t0 == 0) */
   864      nop
   865     lhu      t1, 0(a0)      /* t1 = dst */
   866     CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
   867     beq      t0, t5, 31f    /* if (t0 == 0xff) */
   868      move    t3, a1
   870     MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
   871 31:
   872     not      t6, t3
   873     srl      t6, t6, 24
   874     MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
   875     addu_s.qb                t1, t2, t3
   876     CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
   877     sh       t2, 0(a0)
   878 4:
   879     RESTORE_REGS_FROM_STACK  24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   880     j        ra
   881      nop
   883 END(pixman_composite_over_n_8_0565_asm_mips)
   885 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
   886 /*
   887  * a0 - dst  (a8r8g8b8)
   888  * a1 - src  (a8r8g8b8)
   889  * a2 - mask (32bit constant)
   890  * a3 - w
   891  */
   893     SAVE_REGS_ON_STACK 0, s0
   894     li       t4, 0x00ff00ff
   895     beqz     a3, 3f
   896      nop
   897     addiu    t1, a3, -1
   898     srl      a2, a2, 24
   899     beqz     t1, 2f
   900      nop
   902 1:
   903     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   904     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   905                        /* a2 = mask        (32bit constant) */
   906     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   907     lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   908     addiu    a1, a1, 8
   910     OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
   911                            t5, t6, t4, t7, t8, t9, t0, t1, s0
   913     sw       t5, 0(a0)
   914     sw       t6, 4(a0)
   915     addiu    a3, a3, -2
   916     addiu    t1, a3, -1
   917     bgtz     t1, 1b
   918      addiu   a0, a0, 8
   919 2:
   920     beqz     a3, 3f
   921      nop
   922     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   923                        /* a2 = mask        (32bit constant) */
   924     lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   926     OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
   928     sw       t3, 0(a0)
   929 3:
   930     RESTORE_REGS_FROM_STACK 0, s0
   931     j        ra
   932      nop
   934 END(pixman_composite_over_8888_n_8888_asm_mips)
   936 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
   937 /*
   938  * a0 - dst  (r5g6b5)
   939  * a1 - src  (a8r8g8b8)
   940  * a2 - mask (32bit constant)
   941  * a3 - w
   942  */
   944     SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
   945     li       t6, 0x00ff00ff
   946     li       t7, 0xf800f800
   947     li       t8, 0x07e007e0
   948     li       t9, 0x001F001F
   949     beqz     a3, 3f
   950      nop
   951     srl      a2, a2, 24
   952     addiu    t1, a3, -1
   953     beqz     t1, 2f
   954      nop
   955 1:
   956     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   957     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   958                        /* a2 = mask        (32bit constant) */
   959     lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   960     lhu      t3, 2(a0) /* t2 = destination (r5g6b5) */
   961     addiu    a1, a1, 8
   963     CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3
   964     OVER_2x8888_2x8_2x8888   t0, t1, a2, a2, t4, t5, \
   965                              t2, t3, t6, t0, t1, s0, s1, s2, s3
   966     CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1
   968     sh       t4, 0(a0)
   969     sh       t5, 2(a0)
   970     addiu    a3, a3, -2
   971     addiu    t1, a3, -1
   972     bgtz     t1, 1b
   973      addiu   a0, a0, 4
   974 2:
   975     beqz     a3, 3f
   976      nop
   977     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   978                        /* a2 = mask        (32bit constant) */
   979     lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
   981     CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5
   982     OVER_8888_8_8888         t0, a2, t2, t1, t6, t3, t4, t5, t7
   983     CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5
   985     sh       t3, 0(a0)
   986 3:
   987     RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
   988     j                 ra
   989      nop
   991 END(pixman_composite_over_8888_n_0565_asm_mips)
   993 LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
   994 /*
   995  * a0 - dst  (r5g6b5)
   996  * a1 - src  (r5g6b5)
   997  * a2 - mask (32bit constant)
   998  * a3 - w
   999  */
  1001     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
  1002     li       t6, 0x00ff00ff
  1003     li       t7, 0xf800f800
  1004     li       t8, 0x07e007e0
  1005     li       t9, 0x001F001F
  1006     beqz     a3, 3f
  1007      nop
  1008     srl      a2, a2, 24
  1009     addiu    t1, a3, -1
  1010     beqz     t1, 2f
  1011      nop
  1012 1:
  1013     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1014     lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
  1015                        /* a2 = mask        (32bit constant) */
  1016     lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
  1017     lhu      t3, 2(a0) /* t3 = destination (r5g6b5) */
  1018     addiu    a1, a1, 4
  1020     CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3
  1021     CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5
  1022     OVER_2x8888_2x8_2x8888   t4, t5, a2, a2, s0, s1, \
  1023                              t0, t1, t6, s2, s3, s4, s5, t4, t5
  1024     CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3
  1026     sh       s0, 0(a0)
  1027     sh       s1, 2(a0)
  1028     addiu    a3, a3, -2
  1029     addiu    t1, a3, -1
  1030     bgtz     t1, 1b
  1031      addiu   a0, a0, 4
  1032 2:
  1033     beqz     a3, 3f
  1034      nop
  1035     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1036                        /* a2 = mask        (32bit constant) */
  1037     lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
  1039     CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5
  1040     CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5
  1041     OVER_8888_8_8888         t2, a2, t3, t0, t6, t1, t4, t5, t7
  1042     CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
  1044     sh       t3, 0(a0)
  1045 3:
  1046     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
  1047     j        ra
  1048      nop
  1050 END(pixman_composite_over_0565_n_0565_asm_mips)
  1052 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
  1053 /*
  1054  * a0 - dst  (a8r8g8b8)
  1055  * a1 - src  (a8r8g8b8)
  1056  * a2 - mask (a8)
  1057  * a3 - w
  1058  */
  1060     SAVE_REGS_ON_STACK 0, s0, s1
  1061     li       t4, 0x00ff00ff
  1062     beqz     a3, 3f
  1063      nop
  1064     addiu    t1, a3, -1
  1065     beqz     t1, 2f
  1066      nop
  1067 1:
  1068     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1069     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1070     lbu      t2, 0(a2) /* t2 = mask        (a8) */
  1071     lbu      t3, 1(a2) /* t3 = mask        (a8) */
  1072     lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
  1073     lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
  1074     addiu    a1, a1, 8
  1075     addiu    a2, a2, 2
  1077     OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
  1078                            t7, t8, t4, t9, s0, s1, t0, t1, t2
  1080     sw       t7, 0(a0)
  1081     sw       t8, 4(a0)
  1082     addiu    a3, a3, -2
  1083     addiu    t1, a3, -1
  1084     bgtz     t1, 1b
  1085      addiu   a0, a0, 8
  1086 2:
  1087     beqz     a3, 3f
  1088      nop
  1089     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1090     lbu      t1, 0(a2) /* t1 = mask        (a8) */
  1091     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1093     OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
  1095     sw       t3, 0(a0)
  1096 3:
  1097     RESTORE_REGS_FROM_STACK 0, s0, s1
  1098     j        ra
  1099      nop
  1101 END(pixman_composite_over_8888_8_8888_asm_mips)
  1103 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
  1104 /*
  1105  * a0 - dst  (r5g6b5)
  1106  * a1 - src  (a8r8g8b8)
  1107  * a2 - mask (a8)
  1108  * a3 - w
  1109  */
  1111     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
  1112     li       t6, 0x00ff00ff
  1113     li       t7, 0xf800f800
  1114     li       t8, 0x07e007e0
  1115     li       t9, 0x001F001F
  1116     beqz     a3, 3f
  1117      nop
  1118     addiu    t1, a3, -1
  1119     beqz     t1, 2f
  1120      nop
  1121 1:
  1122     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1123     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1124     lbu      t2, 0(a2) /* t2 = mask        (a8) */
  1125     lbu      t3, 1(a2) /* t3 = mask        (a8) */
  1126     lhu      t4, 0(a0) /* t4 = destination (r5g6b5) */
  1127     lhu      t5, 2(a0) /* t5 = destination (r5g6b5) */
  1128     addiu    a1, a1, 8
  1129     addiu    a2, a2, 2
  1131     CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
  1132     OVER_2x8888_2x8_2x8888   t0, t1, t2, t3, s0, s1, \
  1133                              t4, t5, t6, s2, s3, s4, s5, t0, t1
  1134     CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
  1136     sh       s0, 0(a0)
  1137     sh       s1, 2(a0)
  1138     addiu    a3, a3, -2
  1139     addiu    t1, a3, -1
  1140     bgtz     t1, 1b
  1141      addiu   a0, a0, 4
  1142 2:
  1143     beqz     a3, 3f
  1144      nop
  1145     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1146     lbu      t1, 0(a2) /* t1 = mask        (a8) */
  1147     lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
  1149     CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
  1150     OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
  1151     CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
  1153     sh       t3, 0(a0)
  1154 3:
  1155     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
  1156     j        ra
  1157      nop
  1159 END(pixman_composite_over_8888_8_0565_asm_mips)
  1161 LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
  1162 /*
  1163  * a0 - dst  (r5g6b5)
  1164  * a1 - src  (r5g6b5)
  1165  * a2 - mask (a8)
  1166  * a3 - w
  1167  */
  1169     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
  1170     li       t4, 0xf800f800
  1171     li       t5, 0x07e007e0
  1172     li       t6, 0x001F001F
  1173     li       t7, 0x00ff00ff
  1174     beqz     a3, 3f
  1175      nop
  1176     addiu    t1, a3, -1
  1177     beqz     t1, 2f
  1178      nop
  1179 1:
  1180     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1181     lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
  1182     lbu      t2, 0(a2) /* t2 = mask        (a8) */
  1183     lbu      t3, 1(a2) /* t3 = mask        (a8) */
  1184     lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
  1185     lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
  1186     addiu    a1, a1, 4
  1187     addiu    a2, a2, 2
  1189     CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
  1190     CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
  1191     OVER_2x8888_2x8_2x8888   s0, s1, t2, t3, s2, s3, \
  1192                              t0, t1, t7, s4, s5, t8, t9, s0, s1
  1193     CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
  1195     sh       s0, 0(a0)
  1196     sh       s1, 2(a0)
  1197     addiu    a3, a3, -2
  1198     addiu    t1, a3, -1
  1199     bgtz     t1, 1b
  1200      addiu   a0, a0, 4
  1201 2:
  1202     beqz     a3, 3f
  1203      nop
  1204     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1205     lbu      t1, 0(a2) /* t1 = mask        (a8) */
  1206     lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
  1208     CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
  1209     CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
  1210     OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
  1211     CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
  1213     sh       t3, 0(a0)
  1214 3:
  1215     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
  1216     j        ra
  1217      nop
  1219 END(pixman_composite_over_0565_8_0565_asm_mips)
  1221 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
  1222 /*
  1223  * a0 - dst  (a8r8g8b8)
  1224  * a1 - src  (a8r8g8b8)
  1225  * a2 - mask (a8r8g8b8)
  1226  * a3 - w
  1227  */
  1229     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1230     li       t4, 0x00ff00ff
  1231     beqz     a3, 3f
  1232      nop
  1233     addiu    t1, a3, -1
  1234     beqz     t1, 2f
  1235      nop
  1236 1:
  1237     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1238     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1239     lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
  1240     lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
  1241     lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
  1242     lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
  1243     addiu    a1, a1, 8
  1244     addiu    a2, a2, 8
  1245     srl      t2, t2, 24
  1246     srl      t3, t3, 24
  1248     OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1
  1250     sw       t7, 0(a0)
  1251     sw       t8, 4(a0)
  1252     addiu    a3, a3, -2
  1253     addiu    t1, a3, -1
  1254     bgtz     t1, 1b
  1255      addiu   a0, a0, 8
  1256 2:
  1257     beqz     a3, 3f
  1258      nop
  1259     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1260     lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
  1261     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1262     srl      t1, t1, 24
  1264     OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
  1266     sw       t3, 0(a0)
  1267 3:
  1268     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1269     j        ra
  1270      nop
  1272 END(pixman_composite_over_8888_8888_8888_asm_mips)
  1274 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
  1275 /*
  1276  * a0 - dst  (a8r8g8b8)
  1277  * a1 - src  (a8r8g8b8)
  1278  * a2 - w
  1279  */
  1281     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1282     li           t4, 0x00ff00ff
  1283     beqz         a2, 3f
  1284      nop
  1285     addiu        t1, a2, -1
  1286     beqz         t1, 2f
  1287      nop
  1288 1:
  1289     lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1290     lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1291     lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1292     lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
  1293     addiu        a1, a1, 8
  1295     not          t5, t0
  1296     srl          t5, t5, 24
  1297     not          t6, t1
  1298     srl          t6, t6, 24
  1300     or           t7, t5, t6
  1301     beqz         t7, 11f
  1302      or          t8, t0, t1
  1303     beqz         t8, 12f
  1305     MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3
  1307     addu_s.qb    t0, t7, t0
  1308     addu_s.qb    t1, t8, t1
  1309 11:
  1310     sw           t0, 0(a0)
  1311     sw           t1, 4(a0)
  1312 12:
  1313     addiu        a2, a2, -2
  1314     addiu        t1, a2, -1
  1315     bgtz         t1, 1b
  1316      addiu       a0, a0, 8
  1317 2:
  1318     beqz         a2, 3f
  1319      nop
  1321     lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1322     lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
  1323     addiu        a1, a1, 4
  1325     not          t2, t0
  1326     srl          t2, t2, 24
  1328     beqz         t2, 21f
  1329      nop
  1330     beqz         t0, 3f
  1332     MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7
  1334     addu_s.qb    t0, t3, t0
  1335 21:
  1336     sw           t0, 0(a0)
  1338 3:
  1339     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1340     j            ra
  1341      nop
  1343 END(pixman_composite_over_8888_8888_asm_mips)
  1345 LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
  1346 /*
  1347  * a0 - dst  (r5g6b5)
  1348  * a1 - src  (32bit constant)
  1349  * a2 - w
  1350  */
  1352     beqz         a2, 5f
  1353      nop
  1355     not          t0, a1
  1356     srl          t0, t0, 24
  1357     bgtz         t0, 1f
  1358      nop
  1359     CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
  1360 0:
  1361     sh           t1, 0(a0)
  1362     addiu        a2, a2, -1
  1363     bgtz         a2, 0b
  1364      addiu       a0, a0, 2
  1365     j            ra
  1366      nop
  1368 1:
  1369     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1370     li           t4, 0x00ff00ff
  1371     li           t5, 0xf800f800
  1372     li           t6, 0x07e007e0
  1373     li           t7, 0x001F001F
  1374     addiu        t1, a2, -1
  1375     beqz         t1, 3f
  1376      nop
  1377 2:
  1378     lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
  1379     lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
  1381     CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
  1382     MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
  1383     addu_s.qb                t1, t1, a1
  1384     addu_s.qb                t2, t2, a1
  1385     CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
  1387     sh           t3, 0(a0)
  1388     sh           t8, 2(a0)
  1390     addiu        a2, a2, -2
  1391     addiu        t1, a2, -1
  1392     bgtz         t1, 2b
  1393      addiu       a0, a0, 4
  1394 3:
  1395     beqz         a2, 4f
  1396      nop
  1398     lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
  1400     CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
  1401     MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
  1402     addu_s.qb                t1, t1, a1
  1403     CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
  1405     sh           t2, 0(a0)
  1407 4:
  1408     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1409 5:
  1410     j            ra
  1411      nop
  1413 END(pixman_composite_over_n_0565_asm_mips)
  1415 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
  1416 /*
  1417  * a0 - dst  (a8r8g8b8)
  1418  * a1 - src  (32bit constant)
  1419  * a2 - w
  1420  */
  1422     beqz         a2, 5f
  1423      nop
  1425     not          t0, a1
  1426     srl          t0, t0, 24
  1427     bgtz         t0, 1f
  1428      nop
  1429 0:
  1430     sw           a1, 0(a0)
  1431     addiu        a2, a2, -1
  1432     bgtz         a2, 0b
  1433      addiu       a0, a0, 4
  1434     j            ra
  1435      nop
  1437 1:
  1438     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1439     li           t4, 0x00ff00ff
  1440     addiu        t1, a2, -1
  1441     beqz         t1, 3f
  1442      nop
  1443 2:
  1444     lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1445     lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
  1447     MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
  1449     addu_s.qb    t7, t7, a1
  1450     addu_s.qb    t8, t8, a1
  1452     sw           t7, 0(a0)
  1453     sw           t8, 4(a0)
  1455     addiu        a2, a2, -2
  1456     addiu        t1, a2, -1
  1457     bgtz         t1, 2b
  1458      addiu       a0, a0, 8
  1459 3:
  1460     beqz         a2, 4f
  1461      nop
  1463     lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
  1465     MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
  1467     addu_s.qb    t3, t3, a1
  1469     sw           t3, 0(a0)
  1471 4:
  1472     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1473 5:
  1474     j            ra
  1475      nop
  1477 END(pixman_composite_over_n_8888_asm_mips)
  1479 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
  1480 /*
  1481  * a0 - dst  (a8)
  1482  * a1 - src  (a8)
  1483  * a2 - mask (a8)
  1484  * a3 - w
  1485  */
  1487     SAVE_REGS_ON_STACK 0, v0, v1
  1488     li                t9, 0x00ff00ff
  1489     beqz              a3, 3f
  1490      nop
  1492     srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
  1493     beqz              v0, 1f      /* branch if less than 4 src pixels */
  1494      nop
  1496 0:
  1497     beqz              v0, 1f
  1498      addiu            v0, v0, -1
  1499     lbu               t0, 0(a2)
  1500     lbu               t1, 1(a2)
  1501     lbu               t2, 2(a2)
  1502     lbu               t3, 3(a2)
  1503     lbu               t4, 0(a0)
  1504     lbu               t5, 1(a0)
  1505     lbu               t6, 2(a0)
  1506     lbu               t7, 3(a0)
  1508     addiu             a2, a2, 4
  1510     precr_sra.ph.w    t1, t0, 0
  1511     precr_sra.ph.w    t3, t2, 0
  1512     precr_sra.ph.w    t5, t4, 0
  1513     precr_sra.ph.w    t7, t6, 0
  1515     precr.qb.ph       t0, t3, t1
  1516     precr.qb.ph       t1, t7, t5
  1518     lbu               t4, 0(a1)
  1519     lbu               v1, 1(a1)
  1520     lbu               t7, 2(a1)
  1521     lbu               t8, 3(a1)
  1523     addiu             a1, a1, 4
  1525     precr_sra.ph.w    v1, t4, 0
  1526     precr_sra.ph.w    t8, t7, 0
  1528     muleu_s.ph.qbl    t2, t0, t8
  1529     muleu_s.ph.qbr    t3, t0, v1
  1530     shra_r.ph         t4, t2, 8
  1531     shra_r.ph         t5, t3, 8
  1532     and               t4, t4, t9
  1533     and               t5, t5, t9
  1534     addq.ph           t2, t2, t4
  1535     addq.ph           t3, t3, t5
  1536     shra_r.ph         t2, t2, 8
  1537     shra_r.ph         t3, t3, 8
  1538     precr.qb.ph       t0, t2, t3
  1540     addu_s.qb         t2, t0, t1
  1542     sb                t2, 0(a0)
  1543     srl               t2, t2, 8
  1544     sb                t2, 1(a0)
  1545     srl               t2, t2, 8
  1546     sb                t2, 2(a0)
  1547     srl               t2, t2, 8
  1548     sb                t2, 3(a0)
  1549     addiu             a3, a3, -4
  1550     b                 0b
  1551      addiu            a0, a0, 4
  1553 1:
  1554     beqz              a3, 3f
  1555      nop
  1556 2:
  1557     lbu               t8, 0(a1)
  1558     lbu               t0, 0(a2)
  1559     lbu               t1, 0(a0)
  1560     addiu             a1, a1, 1
  1561     addiu             a2, a2, 1
  1563     mul               t2, t0, t8
  1564     shra_r.ph         t3, t2, 8
  1565     andi              t3, t3, 0xff
  1566     addq.ph           t2, t2, t3
  1567     shra_r.ph         t2, t2, 8
  1568     andi              t2, t2, 0xff
  1570     addu_s.qb         t2, t2, t1
  1571     sb                t2, 0(a0)
  1572     addiu             a3, a3, -1
  1573     bnez              a3, 2b
  1574      addiu            a0, a0, 1
  1576 3:
  1577     RESTORE_REGS_FROM_STACK 0, v0, v1
  1578     j                 ra
  1579      nop
  1581 END(pixman_composite_add_8_8_8_asm_mips)
  1583 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
  1584 /*
  1585  * a0 - dst  (a8)
  1586  * a1 - src  (32bit constant)
  1587  * a2 - mask (a8)
  1588  * a3 - w
  1589  */
  1591     SAVE_REGS_ON_STACK 0, v0
  1592     li                t9, 0x00ff00ff
  1593     beqz              a3, 3f
  1594      nop
  1596     srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
  1597     beqz              v0, 1f      /* branch if less than 4 src pixels */
  1598      nop
  1600     srl               t8, a1, 24
  1601     replv.ph          t8, t8
  1603 0:
  1604     beqz              v0, 1f
  1605      addiu            v0, v0, -1
  1606     lbu               t0, 0(a2)
  1607     lbu               t1, 1(a2)
  1608     lbu               t2, 2(a2)
  1609     lbu               t3, 3(a2)
  1610     lbu               t4, 0(a0)
  1611     lbu               t5, 1(a0)
  1612     lbu               t6, 2(a0)
  1613     lbu               t7, 3(a0)
  1615     addiu             a2, a2, 4
  1617     precr_sra.ph.w    t1, t0, 0
  1618     precr_sra.ph.w    t3, t2, 0
  1619     precr_sra.ph.w    t5, t4, 0
  1620     precr_sra.ph.w    t7, t6, 0
  1622     precr.qb.ph       t0, t3, t1
  1623     precr.qb.ph       t1, t7, t5
  1625     muleu_s.ph.qbl    t2, t0, t8
  1626     muleu_s.ph.qbr    t3, t0, t8
  1627     shra_r.ph         t4, t2, 8
  1628     shra_r.ph         t5, t3, 8
  1629     and               t4, t4, t9
  1630     and               t5, t5, t9
  1631     addq.ph           t2, t2, t4
  1632     addq.ph           t3, t3, t5
  1633     shra_r.ph         t2, t2, 8
  1634     shra_r.ph         t3, t3, 8
  1635     precr.qb.ph       t0, t2, t3
  1637     addu_s.qb         t2, t0, t1
  1639     sb                t2, 0(a0)
  1640     srl               t2, t2, 8
  1641     sb                t2, 1(a0)
  1642     srl               t2, t2, 8
  1643     sb                t2, 2(a0)
  1644     srl               t2, t2, 8
  1645     sb                t2, 3(a0)
  1646     addiu             a3, a3, -4
  1647     b                 0b
  1648      addiu            a0, a0, 4
  1650 1:
  1651     beqz              a3, 3f
  1652      nop
  1653     srl               t8, a1, 24
  1654 2:
  1655     lbu               t0, 0(a2)
  1656     lbu               t1, 0(a0)
  1657     addiu             a2, a2, 1
  1659     mul               t2, t0, t8
  1660     shra_r.ph         t3, t2, 8
  1661     andi              t3, t3, 0xff
  1662     addq.ph           t2, t2, t3
  1663     shra_r.ph         t2, t2, 8
  1664     andi              t2, t2, 0xff
  1666     addu_s.qb         t2, t2, t1
  1667     sb                t2, 0(a0)
  1668     addiu             a3, a3, -1
  1669     bnez              a3, 2b
  1670      addiu            a0, a0, 1
  1672 3:
  1673     RESTORE_REGS_FROM_STACK 0, v0
  1674     j                 ra
  1675      nop
  1677 END(pixman_composite_add_n_8_8_asm_mips)
  1679 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
  1680 /*
  1681  * a0 - dst  (a8r8g8b8)
  1682  * a1 - src  (32bit constant)
  1683  * a2 - mask (a8)
  1684  * a3 - w
  1685  */
  1687     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1688     li       t4, 0x00ff00ff
  1689     beqz     a3, 3f
  1690      nop
  1691     addiu    t1, a3, -1
  1692     beqz     t1, 2f
  1693      nop
  1694 1:
  1695                        /* a1 = source      (32bit constant) */
  1696     lbu      t0, 0(a2) /* t0 = mask        (a8) */
  1697     lbu      t1, 1(a2) /* t1 = mask        (a8) */
  1698     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1699     lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
  1700     addiu    a2, a2, 2
  1702     MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
  1703                                        t0, t1, \
  1704                                        t2, t3, \
  1705                                        t5, t6, \
  1706                                        t4, t7, t8, t9, s0, s1, s2
  1708     sw       t5, 0(a0)
  1709     sw       t6, 4(a0)
  1710     addiu    a3, a3, -2
  1711     addiu    t1, a3, -1
  1712     bgtz     t1, 1b
  1713      addiu   a0, a0, 8
  1714 2:
  1715     beqz     a3, 3f
  1716      nop
  1717                        /* a1 = source      (32bit constant) */
  1718     lbu      t0, 0(a2) /* t0 = mask        (a8) */
  1719     lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
  1721     MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
  1723     sw       t2, 0(a0)
  1724 3:
  1725     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1726     j        ra
  1727      nop
  1729 END(pixman_composite_add_n_8_8888_asm_mips)
  1731 LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
  1732 /*
  1733  * a0 - dst  (r5g6b5)
  1734  * a1 - src  (r5g6b5)
  1735  * a2 - mask (a8)
  1736  * a3 - w
  1737  */
  1739     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  1740     li       t4, 0xf800f800
  1741     li       t5, 0x07e007e0
  1742     li       t6, 0x001F001F
  1743     li       t7, 0x00ff00ff
  1744     beqz     a3, 3f
  1745      nop
  1746     addiu    t1, a3, -1
  1747     beqz     t1, 2f
  1748      nop
  1749 1:
  1750     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1751     lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
  1752     lbu      t2, 0(a2) /* t2 = mask        (a8) */
  1753     lbu      t3, 1(a2) /* t3 = mask        (a8) */
  1754     lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
  1755     lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
  1756     addiu    a1, a1, 4
  1757     addiu    a2, a2, 2
  1759     CONVERT_2x0565_TO_2x8888  t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
  1760     CONVERT_2x0565_TO_2x8888  t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
  1761     MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4  s0, s1, \
  1762                                         t2, t3, \
  1763                                         s2, s3, \
  1764                                         t0, t1, \
  1765                                         t7, s4, s5, s6, s7, t8, t9
  1766     CONVERT_2x8888_TO_2x0565  t0, t1, s0, s1, t4, t5, t6, s2, s3
  1768     sh       s0, 0(a0)
  1769     sh       s1, 2(a0)
  1770     addiu    a3, a3, -2
  1771     addiu    t1, a3, -1
  1772     bgtz     t1, 1b
  1773      addiu   a0, a0, 4
  1774 2:
  1775     beqz     a3, 3f
  1776      nop
  1777     lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
  1778     lbu      t1, 0(a2) /* t1 = mask        (a8) */
  1779     lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
  1781     CONVERT_1x0565_TO_1x8888  t0, t3, t4, t5
  1782     CONVERT_1x0565_TO_1x8888  t2, t4, t5, t6
  1783     MIPS_UN8x4_MUL_UN8_ADD_UN8x4  t3, t1, t4, t0, t7, t2, t5, t6
  1784     CONVERT_1x8888_TO_1x0565  t0, t3, t4, t5
  1786     sh       t3, 0(a0)
  1787 3:
  1788     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  1789     j        ra
  1790      nop
  1792 END(pixman_composite_add_0565_8_0565_asm_mips)
  1794 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
  1795 /*
  1796  * a0 - dst  (a8r8g8b8)
  1797  * a1 - src  (a8r8g8b8)
  1798  * a2 - mask (a8)
  1799  * a3 - w
  1800  */
  1802     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1803     li       t4, 0x00ff00ff
  1804     beqz     a3, 3f
  1805      nop
  1806     addiu    t1, a3, -1
  1807     beqz     t1, 2f
  1808      nop
  1809 1:
  1810     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1811     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1812     lbu      t2, 0(a2) /* t2 = mask        (a8) */
  1813     lbu      t3, 1(a2) /* t3 = mask        (a8) */
  1814     lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
  1815     lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
  1816     addiu    a1, a1, 8
  1817     addiu    a2, a2, 2
  1819     MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
  1820                                        t2, t3, \
  1821                                        t5, t6, \
  1822                                        t7, t8, \
  1823                                        t4, t9, s0, s1, s2, t0, t1
  1825     sw       t7, 0(a0)
  1826     sw       t8, 4(a0)
  1827     addiu    a3, a3, -2
  1828     addiu    t1, a3, -1
  1829     bgtz     t1, 1b
  1830      addiu   a0, a0, 8
  1831 2:
  1832     beqz     a3, 3f
  1833      nop
  1834     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1835     lbu      t1, 0(a2) /* t1 = mask        (a8) */
  1836     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1838     MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
  1840     sw       t3, 0(a0)
  1841 3:
  1842     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1843     j        ra
  1844      nop
  1846 END(pixman_composite_add_8888_8_8888_asm_mips)
  1848 LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
  1849 /*
  1850  * a0 - dst  (a8r8g8b8)
  1851  * a1 - src  (a8r8g8b8)
  1852  * a2 - mask (32bit constant)
  1853  * a3 - w
  1854  */
  1856     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1857     li       t4, 0x00ff00ff
  1858     beqz     a3, 3f
  1859      nop
  1860     srl      a2, a2, 24
  1861     addiu    t1, a3, -1
  1862     beqz     t1, 2f
  1863      nop
  1864 1:
  1865     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1866     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1867                        /* a2 = mask        (32bit constant) */
  1868     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1869     lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
  1870     addiu    a1, a1, 8
  1872     MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
  1873                                        a2, a2, \
  1874                                        t2, t3, \
  1875                                        t5, t6, \
  1876                                        t4, t7, t8, t9, s0, s1, s2
  1878     sw       t5, 0(a0)
  1879     sw       t6, 4(a0)
  1880     addiu    a3, a3, -2
  1881     addiu    t1, a3, -1
  1882     bgtz     t1, 1b
  1883      addiu   a0, a0, 8
  1884 2:
  1885     beqz     a3, 3f
  1886      nop
  1887     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1888                        /* a2 = mask        (32bit constant) */
  1889     lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
  1891     MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
  1893     sw       t3, 0(a0)
  1894 3:
  1895     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1896     j        ra
  1897      nop
  1899 END(pixman_composite_add_8888_n_8888_asm_mips)
  1901 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
  1902 /*
  1903  * a0 - dst  (a8r8g8b8)
  1904  * a1 - src  (a8r8g8b8)
  1905  * a2 - mask (a8r8g8b8)
  1906  * a3 - w
  1907  */
  1909     SAVE_REGS_ON_STACK 0, s0, s1, s2
  1910     li       t4, 0x00ff00ff
  1911     beqz     a3, 3f
  1912      nop
  1913     addiu    t1, a3, -1
  1914     beqz     t1, 2f
  1915      nop
  1916 1:
  1917     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1918     lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
  1919     lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
  1920     lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
  1921     lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
  1922     lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
  1923     addiu    a1, a1, 8
  1924     addiu    a2, a2, 8
  1925     srl      t2, t2, 24
  1926     srl      t3, t3, 24
  1928     MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
  1929                                        t2, t3, \
  1930                                        t5, t6, \
  1931                                        t7, t8, \
  1932                                        t4, t9, s0, s1, s2, t0, t1
  1934     sw       t7, 0(a0)
  1935     sw       t8, 4(a0)
  1936     addiu    a3, a3, -2
  1937     addiu    t1, a3, -1
  1938     bgtz     t1, 1b
  1939      addiu   a0, a0, 8
  1940 2:
  1941     beqz     a3, 3f
  1942      nop
  1943     lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
  1944     lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
  1945     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  1946     srl      t1, t1, 24
  1948     MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
  1950     sw       t3, 0(a0)
  1951 3:
  1952     RESTORE_REGS_FROM_STACK 0, s0, s1, s2
  1953     j        ra
  1954      nop
  1956 END(pixman_composite_add_8888_8888_8888_asm_mips)
  1958 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
  1959 /*
  1960  * a0 - dst  (a8)
  1961  * a1 - src  (a8)
  1962  * a2 - w
  1963  */
  1965     beqz              a2, 3f
  1966      nop
  1967     srl               t9, a2, 2   /* t9 = how many multiples of 4 dst pixels */
  1968     beqz              t9, 1f      /* branch if less than 4 src pixels */
  1969      nop
  1971 0:
  1972     beqz              t9, 1f
  1973      addiu            t9, t9, -1
  1974     lbu               t0, 0(a1)
  1975     lbu               t1, 1(a1)
  1976     lbu               t2, 2(a1)
  1977     lbu               t3, 3(a1)
  1978     lbu               t4, 0(a0)
  1979     lbu               t5, 1(a0)
  1980     lbu               t6, 2(a0)
  1981     lbu               t7, 3(a0)
  1983     addiu             a1, a1, 4
  1985     precr_sra.ph.w    t1, t0, 0
  1986     precr_sra.ph.w    t3, t2, 0
  1987     precr_sra.ph.w    t5, t4, 0
  1988     precr_sra.ph.w    t7, t6, 0
  1990     precr.qb.ph       t0, t3, t1
  1991     precr.qb.ph       t1, t7, t5
  1993     addu_s.qb         t2, t0, t1
  1995     sb                t2, 0(a0)
  1996     srl               t2, t2, 8
  1997     sb                t2, 1(a0)
  1998     srl               t2, t2, 8
  1999     sb                t2, 2(a0)
  2000     srl               t2, t2, 8
  2001     sb                t2, 3(a0)
  2002     addiu             a2, a2, -4
  2003     b                 0b
  2004      addiu            a0, a0, 4
  2006 1:
  2007     beqz              a2, 3f
  2008      nop
  2009 2:
  2010     lbu               t0, 0(a1)
  2011     lbu               t1, 0(a0)
  2012     addiu             a1, a1, 1
  2014     addu_s.qb         t2, t0, t1
  2015     sb                t2, 0(a0)
  2016     addiu             a2, a2, -1
  2017     bnez              a2, 2b
  2018      addiu            a0, a0, 1
  2020 3:
  2021     j                 ra
  2022      nop
  2024 END(pixman_composite_add_8_8_asm_mips)
  2026 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
  2027 /*
  2028  * a0 - dst (a8r8g8b8)
  2029  * a1 - src (a8r8g8b8)
  2030  * a2 - w
  2031  */
  2033     beqz         a2, 4f
  2034      nop
  2036     srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
  2037     beqz         t9, 3f         /* branch if less than 4 src pixels */
  2038      nop
  2039 1:
  2040     addiu        t9, t9, -1
  2041     beqz         t9, 2f
  2042      addiu       a2, a2, -4
  2044     lw           t0, 0(a1)
  2045     lw           t1, 4(a1)
  2046     lw           t2, 8(a1)
  2047     lw           t3, 12(a1)
  2048     lw           t4, 0(a0)
  2049     lw           t5, 4(a0)
  2050     lw           t6, 8(a0)
  2051     lw           t7, 12(a0)
  2052     addiu        a1, a1, 16
  2054     addu_s.qb    t4, t4, t0
  2055     addu_s.qb    t5, t5, t1
  2056     addu_s.qb    t6, t6, t2
  2057     addu_s.qb    t7, t7, t3
  2059     sw           t4, 0(a0)
  2060     sw           t5, 4(a0)
  2061     sw           t6, 8(a0)
  2062     sw           t7, 12(a0)
  2063     b            1b
  2064      addiu       a0, a0, 16
  2065 2:
  2066     lw           t0, 0(a1)
  2067     lw           t1, 4(a1)
  2068     lw           t2, 8(a1)
  2069     lw           t3, 12(a1)
  2070     lw           t4, 0(a0)
  2071     lw           t5, 4(a0)
  2072     lw           t6, 8(a0)
  2073     lw           t7, 12(a0)
  2074     addiu        a1, a1, 16
  2076     addu_s.qb    t4, t4, t0
  2077     addu_s.qb    t5, t5, t1
  2078     addu_s.qb    t6, t6, t2
  2079     addu_s.qb    t7, t7, t3
  2081     sw           t4, 0(a0)
  2082     sw           t5, 4(a0)
  2083     sw           t6, 8(a0)
  2084     sw           t7, 12(a0)
  2086     beqz         a2, 4f
  2087      addiu       a0, a0, 16
  2088 3:
  2089     lw           t0, 0(a1)
  2090     lw           t1, 0(a0)
  2091     addiu        a1, a1, 4
  2092     addiu        a2, a2, -1
  2093     addu_s.qb    t1, t1, t0
  2094     sw           t1, 0(a0)
  2095     bnez         a2, 3b
  2096      addiu       a0, a0, 4
  2097 4:
  2098     jr           ra
  2099      nop
  2101 END(pixman_composite_add_8888_8888_asm_mips)
  2103 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
  2104 /*
  2105  * a0 - dst  (r5g6b5)
  2106  * a1 - src  (a8)
  2107  * a2 - w
  2108  */
  2110     beqz     a2, 4f
  2111      nop
  2113     SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
  2114     li       t2, 0xf800f800
  2115     li       t3, 0x07e007e0
  2116     li       t4, 0x001F001F
  2117     li       t5, 0x00ff00ff
  2119     addiu    t1, a2, -1
  2120     beqz     t1, 2f
  2121      nop
  2122 1:
  2123     lbu      t0, 0(a1) /* t0 = source      (a8) */
  2124     lbu      t1, 1(a1) /* t1 = source      (a8) */
  2125     lhu      t6, 0(a0) /* t6 = destination (r5g6b5) */
  2126     lhu      t7, 2(a0) /* t7 = destination (r5g6b5) */
  2127     addiu    a1, a1, 2
  2129     not      t0, t0
  2130     not      t1, t1
  2131     andi     t0, 0xff  /* t0 = neg source1 */
  2132     andi     t1, 0xff  /* t1 = neg source2 */
  2133     CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
  2134     MIPS_2xUN8x4_MUL_2xUN8   t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
  2135     CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
  2137     sh       t8, 0(a0)
  2138     sh       t9, 2(a0)
  2139     addiu    a2, a2, -2
  2140     addiu    t1, a2, -1
  2141     bgtz     t1, 1b
  2142      addiu   a0, a0, 4
  2143 2:
  2144     beqz     a2, 3f
  2145      nop
  2146     lbu      t0, 0(a1) /* t0 = source      (a8) */
  2147     lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
  2149     not      t0, t0
  2150     andi     t0, 0xff  /* t0 = neg source */
  2151     CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
  2152     MIPS_UN8x4_MUL_UN8        t2, t0, t1, t5, t3, t4, t6
  2153     CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
  2155     sh       t2, 0(a0)
  2156 3:
  2157     RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
  2158 4:
  2159     j        ra
  2160      nop
  2162 END(pixman_composite_out_reverse_8_0565_asm_mips)
  2164 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
  2165 /*
  2166  * a0 - dst  (a8r8g8b8)
  2167  * a1 - src  (a8)
  2168  * a2 - w
  2169  */
  2171     beqz     a2, 3f
  2172      nop
  2173     li       t4, 0x00ff00ff
  2174     addiu    t1, a2, -1
  2175     beqz     t1, 2f
  2176      nop
  2177 1:
  2178     lbu      t0, 0(a1) /* t0 = source      (a8) */
  2179     lbu      t1, 1(a1) /* t1 = source      (a8) */
  2180     lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
  2181     lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
  2182     addiu    a1, a1, 2
  2183     not      t0, t0
  2184     not      t1, t1
  2185     andi     t0, 0xff  /* t0 = neg source */
  2186     andi     t1, 0xff  /* t1 = neg source */
  2188     MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
  2190     sw       t5, 0(a0)
  2191     sw       t6, 4(a0)
  2192     addiu    a2, a2, -2
  2193     addiu    t1, a2, -1
  2194     bgtz     t1, 1b
  2195      addiu   a0, a0, 8
  2196 2:
  2197     beqz     a2, 3f
  2198      nop
  2199     lbu      t0, 0(a1) /* t0 = source      (a8) */
  2200     lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
  2201     not      t0, t0
  2202     andi     t0, 0xff  /* t0 = neg source */
  2204     MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
  2206     sw       t2, 0(a0)
  2207 3:
  2208     j        ra
  2209      nop
  2211 END(pixman_composite_out_reverse_8_8888_asm_mips)
  2213 LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
  2214 /*
  2215  * a0 - dst  (a8r8g8b8)
  2216  * a1 - src  (32bit constant)
  2217  * a2 - w
  2218  */
  2220     beqz              a2, 5f
  2221      nop
  2223     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  2224     li                t0, 0x00ff00ff
  2225     srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
  2226     beqz              t9, 2f      /* branch if less than 4 src pixels */
  2227      nop
  2228 1:
  2229     beqz              t9, 2f
  2230      addiu            t9, t9, -1
  2232     lw                t1, 0(a0)
  2233     lw                t2, 4(a0)
  2234     lw                t3, 8(a0)
  2235     lw                t4, 12(a0)
  2237     addiu             a2, a2, -4
  2239     not               t5, t1
  2240     not               t6, t2
  2241     not               t7, t3
  2242     not               t8, t4
  2243     srl               t5, t5, 24
  2244     srl               t6, t6, 24
  2245     srl               t7, t7, 24
  2246     srl               t8, t8, 24
  2247     replv.ph          t5, t5
  2248     replv.ph          t6, t6
  2249     replv.ph          t7, t7
  2250     replv.ph          t8, t8
  2251     muleu_s.ph.qbl    s0, a1, t5
  2252     muleu_s.ph.qbr    s1, a1, t5
  2253     muleu_s.ph.qbl    s2, a1, t6
  2254     muleu_s.ph.qbr    s3, a1, t6
  2255     muleu_s.ph.qbl    s4, a1, t7
  2256     muleu_s.ph.qbr    s5, a1, t7
  2257     muleu_s.ph.qbl    s6, a1, t8
  2258     muleu_s.ph.qbr    s7, a1, t8
  2260     shra_r.ph         t5, s0, 8
  2261     shra_r.ph         t6, s1, 8
  2262     shra_r.ph         t7, s2, 8
  2263     shra_r.ph         t8, s3, 8
  2264     and               t5, t5, t0
  2265     and               t6, t6, t0
  2266     and               t7, t7, t0
  2267     and               t8, t8, t0
  2268     addq.ph           s0, s0, t5
  2269     addq.ph           s1, s1, t6
  2270     addq.ph           s2, s2, t7
  2271     addq.ph           s3, s3, t8
  2272     shra_r.ph         s0, s0, 8
  2273     shra_r.ph         s1, s1, 8
  2274     shra_r.ph         s2, s2, 8
  2275     shra_r.ph         s3, s3, 8
  2276     shra_r.ph         t5, s4, 8
  2277     shra_r.ph         t6, s5, 8
  2278     shra_r.ph         t7, s6, 8
  2279     shra_r.ph         t8, s7, 8
  2280     and               t5, t5, t0
  2281     and               t6, t6, t0
  2282     and               t7, t7, t0
  2283     and               t8, t8, t0
  2284     addq.ph           s4, s4, t5
  2285     addq.ph           s5, s5, t6
  2286     addq.ph           s6, s6, t7
  2287     addq.ph           s7, s7, t8
  2288     shra_r.ph         s4, s4, 8
  2289     shra_r.ph         s5, s5, 8
  2290     shra_r.ph         s6, s6, 8
  2291     shra_r.ph         s7, s7, 8
  2293     precr.qb.ph       t5, s0, s1
  2294     precr.qb.ph       t6, s2, s3
  2295     precr.qb.ph       t7, s4, s5
  2296     precr.qb.ph       t8, s6, s7
  2297     addu_s.qb         t5, t1, t5
  2298     addu_s.qb         t6, t2, t6
  2299     addu_s.qb         t7, t3, t7
  2300     addu_s.qb         t8, t4, t8
  2302     sw                t5, 0(a0)
  2303     sw                t6, 4(a0)
  2304     sw                t7, 8(a0)
  2305     sw                t8, 12(a0)
  2306     b                 1b
  2307      addiu            a0, a0, 16
  2309 2:
  2310     beqz              a2, 4f
  2311      nop
  2312 3:
  2313     lw                t1, 0(a0)
  2315     not               t2, t1
  2316     srl               t2, t2, 24
  2317     replv.ph          t2, t2
  2319     muleu_s.ph.qbl    t4, a1, t2
  2320     muleu_s.ph.qbr    t5, a1, t2
  2321     shra_r.ph         t6, t4, 8
  2322     shra_r.ph         t7, t5, 8
  2324     and               t6,t6,t0
  2325     and               t7,t7,t0
  2327     addq.ph           t8, t4, t6
  2328     addq.ph           t9, t5, t7
  2330     shra_r.ph         t8, t8, 8
  2331     shra_r.ph         t9, t9, 8
  2333     precr.qb.ph       t9, t8, t9
  2335     addu_s.qb         t9, t1, t9
  2336     sw                t9, 0(a0)
  2338     addiu             a2, a2, -1
  2339     bnez              a2, 3b
  2340      addiu            a0, a0, 4
  2341 4:
  2342     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  2343 5:
  2344     j                 ra
  2345      nop
  2347 END(pixman_composite_over_reverse_n_8888_asm_mips)
  2349 LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
  2350 /*
  2351  * a0 - dst  (a8)
  2352  * a1 - src  (a8r8g8b8)
  2353  * a2 - w
  2354  */
  2356     beqz              a2, 5f
  2357      nop
  2359     SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  2360     move              t7, a1
  2361     srl               t5, t7, 24
  2362     replv.ph          t5, t5
  2363     srl               t9, a2, 2   /* t1 = how many multiples of 4 src pixels */
  2364     beqz              t9, 2f      /* branch if less than 4 src pixels */
  2365      nop
  2367 1:
  2368     addiu             t9, t9, -1
  2369     addiu             a2, a2, -4
  2370     lbu               t0, 0(a0)
  2371     lbu               t1, 1(a0)
  2372     lbu               t2, 2(a0)
  2373     lbu               t3, 3(a0)
  2375     muleu_s.ph.qbl    s0, t0, t5
  2376     muleu_s.ph.qbr    s1, t0, t5
  2377     muleu_s.ph.qbl    s2, t1, t5
  2378     muleu_s.ph.qbr    s3, t1, t5
  2379     muleu_s.ph.qbl    s4, t2, t5
  2380     muleu_s.ph.qbr    s5, t2, t5
  2381     muleu_s.ph.qbl    s6, t3, t5
  2382     muleu_s.ph.qbr    s7, t3, t5
  2384     shrl.ph           t4, s0, 8
  2385     shrl.ph           t6, s1, 8
  2386     shrl.ph           t7, s2, 8
  2387     shrl.ph           t8, s3, 8
  2388     addq.ph           t0, s0, t4
  2389     addq.ph           t1, s1, t6
  2390     addq.ph           t2, s2, t7
  2391     addq.ph           t3, s3, t8
  2392     shra_r.ph         t0, t0, 8
  2393     shra_r.ph         t1, t1, 8
  2394     shra_r.ph         t2, t2, 8
  2395     shra_r.ph         t3, t3, 8
  2396     shrl.ph           t4, s4, 8
  2397     shrl.ph           t6, s5, 8
  2398     shrl.ph           t7, s6, 8
  2399     shrl.ph           t8, s7, 8
  2400     addq.ph           s0, s4, t4
  2401     addq.ph           s1, s5, t6
  2402     addq.ph           s2, s6, t7
  2403     addq.ph           s3, s7, t8
  2404     shra_r.ph         t4, s0, 8
  2405     shra_r.ph         t6, s1, 8
  2406     shra_r.ph         t7, s2, 8
  2407     shra_r.ph         t8, s3, 8
  2409     precr.qb.ph       s0, t0, t1
  2410     precr.qb.ph       s1, t2, t3
  2411     precr.qb.ph       s2, t4, t6
  2412     precr.qb.ph       s3, t7, t8
  2414     sb                s0, 0(a0)
  2415     sb                s1, 1(a0)
  2416     sb                s2, 2(a0)
  2417     sb                s3, 3(a0)
  2418     bgtz              t9, 1b
  2419      addiu            a0, a0, 4
  2420 2:
  2421     beqz              a2, 4f
  2422      nop
  2423 3:
  2424     lbu               t1, 0(a0)
  2426     muleu_s.ph.qbl    t4, t1, t5
  2427     muleu_s.ph.qbr    t7, t1, t5
  2428     shrl.ph           t6, t4, 8
  2429     shrl.ph           t0, t7, 8
  2430     addq.ph           t8, t4, t6
  2431     addq.ph           t9, t7, t0
  2432     shra_r.ph         t8, t8, 8
  2433     shra_r.ph         t9, t9, 8
  2434     precr.qb.ph       t2, t8, t9
  2435     sb                t2, 0(a0)
  2436     addiu             a2, a2, -1
  2437     bnez              a2, 3b
  2438      addiu            a0, a0, 1
  2439 4:
  2440     RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
  2441 5:
  2442     j                 ra
  2443      nop
  2445 END(pixman_composite_in_n_8_asm_mips)
  2447 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
  2448 /*
  2449  * a0     - dst  (r5g6b5)
  2450  * a1     - src  (a8r8g8b8)
  2451  * a2     - mask (a8)
  2452  * a3     - w
  2453  * 16(sp) - vx
  2454  * 20(sp) - unit_x
  2455  */
  2456     beqz     a3, 4f
  2457      nop
  2459     SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
  2460     lw       v0, 36(sp) /* v0 = vx */
  2461     lw       v1, 40(sp) /* v1 = unit_x */
  2462     li       t6, 0x00ff00ff
  2463     li       t7, 0xf800f800
  2464     li       t8, 0x07e007e0
  2465     li       t9, 0x001F001F
  2467     addiu    t1, a3, -1
  2468     beqz     t1, 2f
  2469      nop
  2470 1:
  2471     sra      t0, v0, 16 /* t0 = vx >> 16 */
  2472     sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
  2473     addu     t0, a1, t0
  2474     lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
  2475     addu     v0, v0, v1 /* v0 = vx + unit_x */
  2476     sra      t1, v0, 16 /* t1 = vx >> 16 */
  2477     sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
  2478     addu     t1, a1, t1
  2479     lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
  2480     addu     v0, v0, v1 /* v0 = vx + unit_x */
  2481     lbu      t2, 0(a2)  /* t2 = mask        (a8) */
  2482     lbu      t3, 1(a2)  /* t3 = mask        (a8) */
  2483     lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
  2484     lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
  2485     addiu    a2, a2, 2
  2487     CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
  2488     OVER_2x8888_2x8_2x8888   t0, t1, \
  2489                              t2, t3, \
  2490                              s0, s1, \
  2491                              t4, t5, \
  2492                              t6, s2, s3, s4, s5, t2, t3
  2493     CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
  2495     sh       s0, 0(a0)
  2496     sh       s1, 2(a0)
  2497     addiu    a3, a3, -2
  2498     addiu    t1, a3, -1
  2499     bgtz     t1, 1b
  2500      addiu   a0, a0, 4
  2501 2:
  2502     beqz     a3, 3f
  2503      nop
  2504     sra      t0, v0, 16 /* t0 = vx >> 16 */
  2505     sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
  2506     addu     t0, a1, t0
  2507     lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
  2508     lbu      t1, 0(a2)  /* t1 = mask        (a8) */
  2509     lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
  2511     CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
  2512     OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
  2513     CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
  2515     sh       t3, 0(a0)
  2516 3:
  2517     RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
  2518 4:
  2519     j        ra
  2520      nop
  2522 END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
  2524 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
  2525 /*
  2526  * a0     - dst  (r5g6b5)
  2527  * a1     - src  (r5g6b5)
  2528  * a2     - mask (a8)
  2529  * a3     - w
  2530  * 16(sp) - vx
  2531  * 20(sp) - unit_x
  2532  */
  2534     beqz     a3, 4f
  2535      nop
  2536     SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
  2537     lw       v0, 36(sp) /* v0 = vx */
  2538     lw       v1, 40(sp) /* v1 = unit_x */
  2539     li       t4, 0xf800f800
  2540     li       t5, 0x07e007e0
  2541     li       t6, 0x001F001F
  2542     li       t7, 0x00ff00ff
  2544     addiu    t1, a3, -1
  2545     beqz     t1, 2f
  2546      nop
  2547 1:
  2548     sra      t0, v0, 16 /* t0 = vx >> 16 */
  2549     sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
  2550     addu     t0, a1, t0
  2551     lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
  2552     addu     v0, v0, v1 /* v0 = vx + unit_x */
  2553     sra      t1, v0, 16 /* t1 = vx >> 16 */
  2554     sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
  2555     addu     t1, a1, t1
  2556     lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
  2557     addu     v0, v0, v1 /* v0 = vx + unit_x */
  2558     lbu      t2, 0(a2)  /* t2 = mask        (a8) */
  2559     lbu      t3, 1(a2)  /* t3 = mask        (a8) */
  2560     lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
  2561     lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
  2562     addiu    a2, a2, 2
  2564     CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
  2565     CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
  2566     OVER_2x8888_2x8_2x8888   s0, s1, \
  2567                              t2, t3, \
  2568                              s2, s3, \
  2569                              t0, t1, \
  2570                              t7, t8, t9, s4, s5, s0, s1
  2571     CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
  2573     sh       s0, 0(a0)
  2574     sh       s1, 2(a0)
  2575     addiu    a3, a3, -2
  2576     addiu    t1, a3, -1
  2577     bgtz     t1, 1b
  2578      addiu   a0, a0, 4
  2579 2:
  2580     beqz     a3, 3f
  2581      nop
  2582     sra      t0, v0, 16 /* t0 = vx >> 16 */
  2583     sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
  2584     addu     t0, a1, t0
  2586     lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
  2587     lbu      t1, 0(a2)  /* t1 = mask        (a8) */
  2588     lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
  2590     CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
  2591     CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
  2592     OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
  2593     CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
  2595     sh       t3, 0(a0)
  2596 3:
  2597     RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
  2598 4:
  2599     j        ra
  2600      nop
  2602 END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
  2604 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
  2605 /*
  2606  * a0     - *dst
  2607  * a1     - *src_top
  2608  * a2     - *src_bottom
  2609  * a3     - w
  2610  * 16(sp) - wt
  2611  * 20(sp) - wb
  2612  * 24(sp) - vx
  2613  * 28(sp) - unit_x
  2614  */
  2616     beqz     a3, 1f
  2617      nop
  2619     SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2621     lw       s0, 36(sp)     /* s0 = wt */
  2622     lw       s1, 40(sp)     /* s1 = wb */
  2623     lw       s2, 44(sp)     /* s2 = vx */
  2624     lw       s3, 48(sp)     /* s3 = unit_x */
  2625     li       v0, BILINEAR_INTERPOLATION_RANGE
  2627     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2628     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2629 0:
  2630     andi     t4, s2, 0xffff /* t4 = (short)vx */
  2631     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2632     subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2634     mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2635     mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
  2636     mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2637     mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
  2639     sra      t9, s2, 16
  2640     sll      t9, t9, 2
  2641     addiu    t8, t9, 4
  2642     lwx      t0, t9(a1)     /* t0 = tl */
  2643     lwx      t1, t8(a1)     /* t1 = tr */
  2644     addiu    a3, a3, -1
  2645     lwx      t2, t9(a2)     /* t2 = bl */
  2646     lwx      t3, t8(a2)     /* t3 = br */
  2648     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2650     addu     s2, s2, s3     /* vx += unit_x; */
  2651     sw       t0, 0(a0)
  2652     bnez     a3, 0b
  2653      addiu   a0, a0, 4
  2655     RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2656 1:
  2657     j        ra
  2658      nop
  2660 END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
  2662 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
  2663 /*
  2664  * a0     - *dst
  2665  * a1     - *src_top
  2666  * a2     - *src_bottom
  2667  * a3     - w
  2668  * 16(sp) - wt
  2669  * 20(sp) - wb
  2670  * 24(sp) - vx
  2671  * 28(sp) - unit_x
  2672  */
  2674     beqz     a3, 1f
  2675      nop
  2677     SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2679     lw       s0, 36(sp)     /* s0 = wt */
  2680     lw       s1, 40(sp)     /* s1 = wb */
  2681     lw       s2, 44(sp)     /* s2 = vx */
  2682     lw       s3, 48(sp)     /* s3 = unit_x */
  2683     li       v0, BILINEAR_INTERPOLATION_RANGE
  2685     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2686     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2687 0:
  2688     andi     t4, s2, 0xffff /* t4 = (short)vx */
  2689     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2690     subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2692     mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2693     mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
  2694     mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2695     mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
  2697     sra      t9, s2, 16
  2698     sll      t9, t9, 2
  2699     addiu    t8, t9, 4
  2700     lwx      t0, t9(a1)     /* t0 = tl */
  2701     lwx      t1, t8(a1)     /* t1 = tr */
  2702     addiu    a3, a3, -1
  2703     lwx      t2, t9(a2)     /* t2 = bl */
  2704     lwx      t3, t8(a2)     /* t3 = br */
  2706     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2707     CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
  2709     addu     s2, s2, s3     /* vx += unit_x; */
  2710     sh       t1, 0(a0)
  2711     bnez     a3, 0b
  2712      addiu   a0, a0, 2
  2714     RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2715 1:
  2716     j        ra
  2717      nop
  2719 END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
  2721 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
  2722 /*
  2723  * a0     - *dst
  2724  * a1     - *src_top
  2725  * a2     - *src_bottom
  2726  * a3     - w
  2727  * 16(sp) - wt
  2728  * 20(sp) - wb
  2729  * 24(sp) - vx
  2730  * 28(sp) - unit_x
  2731  */
  2733     beqz     a3, 1f
  2734      nop
  2736     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2738     lw       s0, 44(sp)     /* s0 = wt */
  2739     lw       s1, 48(sp)     /* s1 = wb */
  2740     lw       s2, 52(sp)     /* s2 = vx */
  2741     lw       s3, 56(sp)     /* s3 = unit_x */
  2742     li       v0, BILINEAR_INTERPOLATION_RANGE
  2743     li       v1, 0x07e007e0
  2744     li       s8, 0x001f001f
  2746     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2747     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2748 0:
  2749     andi     t4, s2, 0xffff /* t4 = (short)vx */
  2750     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2751     subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2753     mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2754     mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
  2755     mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2756     mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
  2758     sra      t9, s2, 16
  2759     sll      t9, t9, 1
  2760     addiu    t8, t9, 2
  2761     lhx      t0, t9(a1)     /* t0 = tl */
  2762     lhx      t1, t8(a1)     /* t1 = tr */
  2763     andi     t1, t1, 0xffff
  2764     addiu    a3, a3, -1
  2765     lhx      t2, t9(a2)     /* t2 = bl */
  2766     lhx      t3, t8(a2)     /* t3 = br */
  2767     andi     t3, t3, 0xffff
  2769     CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
  2770     CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
  2771     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2773     addu     s2, s2, s3     /* vx += unit_x; */
  2774     sw       t0, 0(a0)
  2775     bnez     a3, 0b
  2776      addiu   a0, a0, 4
  2778     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2779 1:
  2780     j        ra
  2781      nop
  2783 END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
  2785 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
  2786 /*
  2787  * a0     - *dst
  2788  * a1     - *src_top
  2789  * a2     - *src_bottom
  2790  * a3     - w
  2791  * 16(sp) - wt
  2792  * 20(sp) - wb
  2793  * 24(sp) - vx
  2794  * 28(sp) - unit_x
  2795  */
  2797     beqz     a3, 1f
  2798      nop
  2800     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2802     lw       s0, 44(sp)     /* s0 = wt */
  2803     lw       s1, 48(sp)     /* s1 = wb */
  2804     lw       s2, 52(sp)     /* s2 = vx */
  2805     lw       s3, 56(sp)     /* s3 = unit_x */
  2806     li       v0, BILINEAR_INTERPOLATION_RANGE
  2807     li       v1, 0x07e007e0
  2808     li       s8, 0x001f001f
  2810     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2811     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2812 0:
  2813     andi     t4, s2, 0xffff /* t4 = (short)vx */
  2814     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2815     subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2817     mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2818     mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
  2819     mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2820     mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
  2822     sra      t9, s2, 16
  2823     sll      t9, t9, 1
  2824     addiu    t8, t9, 2
  2825     lhx      t0, t9(a1)     /* t0 = tl */
  2826     lhx      t1, t8(a1)     /* t1 = tr */
  2827     andi     t1, t1, 0xffff
  2828     addiu    a3, a3, -1
  2829     lhx      t2, t9(a2)     /* t2 = bl */
  2830     lhx      t3, t8(a2)     /* t3 = br */
  2831     andi     t3, t3, 0xffff
  2833     CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
  2834     CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
  2835     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2836     CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
  2838     addu     s2, s2, s3     /* vx += unit_x; */
  2839     sh       t1, 0(a0)
  2840     bnez     a3, 0b
  2841      addiu   a0, a0, 2
  2843     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2844 1:
  2845     j        ra
  2846      nop
  2848 END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
  2850 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
  2851 /*
  2852  * a0     - *dst
  2853  * a1     - *src_top
  2854  * a2     - *src_bottom
  2855  * a3     - w
  2856  * 16(sp) - wt
  2857  * 20(sp) - wb
  2858  * 24(sp) - vx
  2859  * 28(sp) - unit_x
  2860  */
  2862     beqz     a3, 1f
  2863      nop
  2865     SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2867     lw       s0, 40(sp)     /* s0 = wt */
  2868     lw       s1, 44(sp)     /* s1 = wb */
  2869     lw       s2, 48(sp)     /* s2 = vx */
  2870     lw       s3, 52(sp)     /* s3 = unit_x */
  2871     li       v0, BILINEAR_INTERPOLATION_RANGE
  2872     li       s8, 0x00ff00ff
  2874     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2875     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2876 0:
  2877     andi     t4, s2, 0xffff /* t4 = (short)vx */
  2878     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2879     subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2881     mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2882     mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
  2883     mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2884     mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
  2886     sra      t9, s2, 16
  2887     sll      t9, t9, 2
  2888     addiu    t8, t9, 4
  2889     lwx      t0, t9(a1)     /* t0 = tl */
  2890     lwx      t1, t8(a1)     /* t1 = tr */
  2891     addiu    a3, a3, -1
  2892     lwx      t2, t9(a2)     /* t2 = bl */
  2893     lwx      t3, t8(a2)     /* t3 = br */
  2895     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2896     lw       t1, 0(a0)      /* t1 = dest */
  2897     OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
  2899     addu     s2, s2, s3     /* vx += unit_x; */
  2900     sw       t2, 0(a0)
  2901     bnez     a3, 0b
  2902      addiu   a0, a0, 4
  2904     RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2905 1:
  2906     j        ra
  2907      nop
  2909 END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
  2911 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
  2912 /*
  2913  * a0     - *dst
  2914  * a1     - *src_top
  2915  * a2     - *src_bottom
  2916  * a3     - w
  2917  * 16(sp) - wt
  2918  * 20(sp) - wb
  2919  * 24(sp) - vx
  2920  * 28(sp) - unit_x
  2921  */
  2923     beqz         a3, 1f
  2924      nop
  2926     SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2928     lw           s0, 36(sp)     /* s0 = wt */
  2929     lw           s1, 40(sp)     /* s1 = wb */
  2930     lw           s2, 44(sp)     /* s2 = vx */
  2931     lw           s3, 48(sp)     /* s3 = unit_x */
  2932     li           v0, BILINEAR_INTERPOLATION_RANGE
  2934     sll          s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2935     sll          s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2936 0:
  2937     andi         t4, s2, 0xffff /* t4 = (short)vx */
  2938     srl          t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  2939     subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
  2941     mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
  2942     mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
  2943     mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
  2944     mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
  2946     sra          t9, s2, 16
  2947     sll          t9, t9, 2
  2948     addiu        t8, t9, 4
  2949     lwx          t0, t9(a1)     /* t0 = tl */
  2950     lwx          t1, t8(a1)     /* t1 = tr */
  2951     addiu        a3, a3, -1
  2952     lwx          t2, t9(a2)     /* t2 = bl */
  2953     lwx          t3, t8(a2)     /* t3 = br */
  2955     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  2956     lw           t1, 0(a0)
  2957     addu_s.qb    t2, t0, t1
  2959     addu         s2, s2, s3     /* vx += unit_x; */
  2960     sw           t2, 0(a0)
  2961     bnez         a3, 0b
  2962      addiu       a0, a0, 4
  2964     RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
  2965 1:
  2966     j            ra
  2967      nop
  2969 END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
  2971 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
  2972 /*
  2973  * a0     - *dst
  2974  * a1     - *mask
  2975  * a2     - *src_top
  2976  * a3     - *src_bottom
  2977  * 16(sp) - wt
  2978  * 20(sp) - wb
  2979  * 24(sp) - vx
  2980  * 28(sp) - unit_x
  2981  * 32(sp) - w
  2982  */
  2984     lw       v1, 32(sp)
  2985     beqz     v1, 1f
  2986      nop
  2988     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  2990     lw       s0, 44(sp)        /* s0 = wt */
  2991     lw       s1, 48(sp)        /* s1 = wb */
  2992     lw       s2, 52(sp)        /* s2 = vx */
  2993     lw       s3, 56(sp)        /* s3 = unit_x */
  2994     li       v0, BILINEAR_INTERPOLATION_RANGE
  2995     li       s8, 0x00ff00ff
  2997     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2998     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  2999 0:
  3000     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3001     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3002     subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
  3004     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3005     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3006     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3007     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3009     sra      t9, s2, 16
  3010     sll      t9, t9, 2
  3011     addiu    t8, t9, 4
  3012     lwx      t0, t9(a2)        /* t0 = tl */
  3013     lwx      t1, t8(a2)        /* t1 = tr */
  3014     addiu    v1, v1, -1
  3015     lwx      t2, t9(a3)        /* t2 = bl */
  3016     lwx      t3, t8(a3)        /* t3 = br */
  3018     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3019     lbu      t1, 0(a1)         /* t1 = mask */
  3020     addiu    a1, a1, 1
  3021     MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
  3023     addu     s2, s2, s3        /* vx += unit_x; */
  3024     sw       t0, 0(a0)
  3025     bnez     v1, 0b
  3026      addiu   a0, a0, 4
  3028     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3029 1:
  3030     j        ra
  3031      nop
  3033 END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
  3035 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
  3036 /*
  3037  * a0     - *dst
  3038  * a1     - *mask
  3039  * a2     - *src_top
  3040  * a3     - *src_bottom
  3041  * 16(sp) - wt
  3042  * 20(sp) - wb
  3043  * 24(sp) - vx
  3044  * 28(sp) - unit_x
  3045  * 32(sp) - w
  3046  */
  3048     lw       v1, 32(sp)
  3049     beqz     v1, 1f
  3050      nop
  3052     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3054     lw       s0, 44(sp)        /* s0 = wt */
  3055     lw       s1, 48(sp)        /* s1 = wb */
  3056     lw       s2, 52(sp)        /* s2 = vx */
  3057     lw       s3, 56(sp)        /* s3 = unit_x */
  3058     li       v0, BILINEAR_INTERPOLATION_RANGE
  3059     li       s8, 0x00ff00ff
  3061     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3062     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3063 0:
  3064     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3065     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3066     subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
  3068     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3069     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3070     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3071     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3073     sra      t9, s2, 16
  3074     sll      t9, t9, 2
  3075     addiu    t8, t9, 4
  3076     lwx      t0, t9(a2)        /* t0 = tl */
  3077     lwx      t1, t8(a2)        /* t1 = tr */
  3078     addiu    v1, v1, -1
  3079     lwx      t2, t9(a3)        /* t2 = bl */
  3080     lwx      t3, t8(a3)        /* t3 = br */
  3082     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3083     lbu      t1, 0(a1)         /* t1 = mask */
  3084     addiu    a1, a1, 1
  3085     MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
  3086     CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
  3088     addu     s2, s2, s3        /* vx += unit_x; */
  3089     sh       t1, 0(a0)
  3090     bnez     v1, 0b
  3091      addiu   a0, a0, 2
  3093     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3094 1:
  3095     j        ra
  3096      nop
  3098 END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
  3100 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
  3101 /*
  3102  * a0     - *dst
  3103  * a1     - *mask
  3104  * a2     - *src_top
  3105  * a3     - *src_bottom
  3106  * 16(sp) - wt
  3107  * 20(sp) - wb
  3108  * 24(sp) - vx
  3109  * 28(sp) - unit_x
  3110  * 32(sp) - w
  3111  */
  3113     lw       t0, 32(sp)
  3114     beqz     t0, 1f
  3115      nop
  3117     SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
  3119     lw       s0, 48(sp)        /* s0 = wt */
  3120     lw       s1, 52(sp)        /* s1 = wb */
  3121     lw       s2, 56(sp)        /* s2 = vx */
  3122     lw       s3, 60(sp)        /* s3 = unit_x */
  3123     lw       ra, 64(sp)        /* ra = w */
  3124     li       v0, 0x00ff00ff
  3125     li       v1, 0x07e007e0
  3126     li       s8, 0x001f001f
  3128     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3129     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3130 0:
  3131     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3132     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3133     li       t5, BILINEAR_INTERPOLATION_RANGE
  3134     subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
  3136     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3137     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3138     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3139     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3141     sra      t9, s2, 16
  3142     sll      t9, t9, 1
  3143     addiu    t8, t9, 2
  3144     lhx      t0, t9(a2)        /* t0 = tl */
  3145     lhx      t1, t8(a2)        /* t1 = tr */
  3146     andi     t1, t1, 0xffff
  3147     addiu    ra, ra, -1
  3148     lhx      t2, t9(a3)        /* t2 = bl */
  3149     lhx      t3, t8(a3)        /* t3 = br */
  3150     andi     t3, t3, 0xffff
  3152     CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
  3153     CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
  3154     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3155     lbu      t1, 0(a1)         /* t1 = mask */
  3156     addiu    a1, a1, 1
  3157     MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
  3159     addu     s2, s2, s3        /* vx += unit_x; */
  3160     sw       t0, 0(a0)
  3161     bnez     ra, 0b
  3162      addiu   a0, a0, 4
  3164     RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
  3165 1:
  3166     j        ra
  3167      nop
  3169 END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
  3171 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
  3172 /*
  3173  * a0     - *dst
  3174  * a1     - *mask
  3175  * a2     - *src_top
  3176  * a3     - *src_bottom
  3177  * 16(sp) - wt
  3178  * 20(sp) - wb
  3179  * 24(sp) - vx
  3180  * 28(sp) - unit_x
  3181  * 32(sp) - w
  3182  */
  3184     lw       t0, 32(sp)
  3185     beqz     t0, 1f
  3186      nop
  3188     SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
  3190     lw       s0, 48(sp)        /* s0 = wt */
  3191     lw       s1, 52(sp)        /* s1 = wb */
  3192     lw       s2, 56(sp)        /* s2 = vx */
  3193     lw       s3, 60(sp)        /* s3 = unit_x */
  3194     lw       ra, 64(sp)        /* ra = w */
  3195     li       v0, 0x00ff00ff
  3196     li       v1, 0x07e007e0
  3197     li       s8, 0x001f001f
  3199     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3200     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3201 0:
  3202     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3203     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3204     li       t5, BILINEAR_INTERPOLATION_RANGE
  3205     subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
  3207     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3208     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3209     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3210     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3212     sra      t9, s2, 16
  3213     sll      t9, t9, 1
  3214     addiu    t8, t9, 2
  3215     lhx      t0, t9(a2)        /* t0 = tl */
  3216     lhx      t1, t8(a2)        /* t1 = tr */
  3217     andi     t1, t1, 0xffff
  3218     addiu    ra, ra, -1
  3219     lhx      t2, t9(a3)        /* t2 = bl */
  3220     lhx      t3, t8(a3)        /* t3 = br */
  3221     andi     t3, t3, 0xffff
  3223     CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
  3224     CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
  3225     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3226     lbu      t1, 0(a1)         /* t1 = mask */
  3227     addiu    a1, a1, 1
  3228     MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
  3229     CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
  3231     addu     s2, s2, s3        /* vx += unit_x; */
  3232     sh       t1, 0(a0)
  3233     bnez     ra, 0b
  3234      addiu   a0, a0, 2
  3236     RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
  3237 1:
  3238     j        ra
  3239      nop
  3241 END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
  3243 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
  3244 /*
  3245  * a0     - dst        (a8r8g8b8)
  3246  * a1     - mask       (a8)
  3247  * a2     - src_top    (a8r8g8b8)
  3248  * a3     - src_bottom (a8r8g8b8)
  3249  * 16(sp) - wt
  3250  * 20(sp) - wb
  3251  * 24(sp) - vx
  3252  * 28(sp) - unit_x
  3253  * 32(sp) - w
  3254  */
  3256     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3258     lw       v1, 60(sp)        /* v1 = w(sp + 32 + 28 save regs stack offset)*/
  3259     beqz     v1, 1f
  3260      nop
  3262     lw       s0, 44(sp)        /* s0 = wt */
  3263     lw       s1, 48(sp)        /* s1 = wb */
  3264     lw       s2, 52(sp)        /* s2 = vx */
  3265     lw       s3, 56(sp)        /* s3 = unit_x */
  3266     li       v0, BILINEAR_INTERPOLATION_RANGE
  3267     li       s8, 0x00ff00ff
  3269     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3270     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3272 0:
  3273     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3274     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3275     subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
  3277     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3278     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3279     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3280     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3282     sra      t9, s2, 16
  3283     sll      t9, t9, 2
  3284     addiu    t8, t9, 4
  3285     lwx      t0, t9(a2)        /* t0 = tl */
  3286     lwx      t1, t8(a2)        /* t1 = tr */
  3287     addiu    v1, v1, -1
  3288     lwx      t2, t9(a3)        /* t2 = bl */
  3289     lwx      t3, t8(a3)        /* t3 = br */
  3291     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
  3292                                       t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3293     lbu      t1, 0(a1)         /* t1 = mask */
  3294     lw       t2, 0(a0)         /* t2 = dst */
  3295     addiu    a1, a1, 1
  3296     OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
  3298     addu     s2, s2, s3        /* vx += unit_x; */
  3299     sw       t0, 0(a0)
  3300     bnez     v1, 0b
  3301      addiu   a0, a0, 4
  3303 1:
  3304     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3305     j        ra
  3306      nop
  3308 END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
  3310 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
  3311 /*
  3312  * a0     - *dst
  3313  * a1     - *mask
  3314  * a2     - *src_top
  3315  * a3     - *src_bottom
  3316  * 16(sp) - wt
  3317  * 20(sp) - wb
  3318  * 24(sp) - vx
  3319  * 28(sp) - unit_x
  3320  * 32(sp) - w
  3321  */
  3323     lw       v1, 32(sp)
  3324     beqz     v1, 1f
  3325      nop
  3327     SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3329     lw       s0, 44(sp)        /* s0 = wt */
  3330     lw       s1, 48(sp)        /* s1 = wb */
  3331     lw       s2, 52(sp)        /* s2 = vx */
  3332     lw       s3, 56(sp)        /* s3 = unit_x */
  3333     li       v0, BILINEAR_INTERPOLATION_RANGE
  3334     li       s8, 0x00ff00ff
  3336     sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3337     sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
  3338 0:
  3339     andi     t4, s2, 0xffff    /* t4 = (short)vx */
  3340     srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
  3341     subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
  3343     mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
  3344     mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
  3345     mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
  3346     mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
  3348     sra      t9, s2, 16
  3349     sll      t9, t9, 2
  3350     addiu    t8, t9, 4
  3351     lwx      t0, t9(a2)        /* t0 = tl */
  3352     lwx      t1, t8(a2)        /* t1 = tr */
  3353     addiu    v1, v1, -1
  3354     lwx      t2, t9(a3)        /* t2 = bl */
  3355     lwx      t3, t8(a3)        /* t3 = br */
  3357     BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
  3358     lbu      t1, 0(a1)         /* t1 = mask */
  3359     lw       t2, 0(a0)         /* t2 = dst */
  3360     addiu    a1, a1, 1
  3361     MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
  3363     addu     s2, s2, s3        /* vx += unit_x; */
  3364     sw       t0, 0(a0)
  3365     bnez     v1, 0b
  3366      addiu   a0, a0, 4
  3368     RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
  3369 1:
  3370     j        ra
  3371      nop
  3373 END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)

mercurial