gfx/ycbcr/yuv_convert_arm.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
     2 // Use of this source code is governed by a BSD-style license that can be
     3 // found in the LICENSE file.
     5 // contributor Siarhei Siamashka <siarhei.siamashka@gmail.com>
     7 #include "yuv_convert.h"
     8 #include "ycbcr_to_rgb565.h"
    12 #ifdef HAVE_YCBCR_TO_RGB565
    14 namespace mozilla {
    16 namespace gfx {
    18 #  if defined(MOZILLA_MAY_SUPPORT_NEON)
    19 void __attribute((noinline,optimize("-fomit-frame-pointer")))
    20     yuv42x_to_rgb565_row_neon(uint16 *dst,
    21                               const uint8 *y,
    22                               const uint8 *u,
    23                               const uint8 *v,
    24                               int n,
    25                               int oddflag)
    26 {
    27     static __attribute__((aligned(16))) uint16 acc_r[8] = {
    28         22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840,
    29     };
    30     static __attribute__((aligned(16))) uint16 acc_g[8] = {
    31         17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312,
    32     };
    33     static __attribute__((aligned(16))) uint16 acc_b[8] = {
    34         28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832,
    35     };
    36     /*
    37      * Registers:
    38      * q0, q1 : d0, d1, d2, d3  - are used for initial loading of YUV data
    39      * q2     : d4, d5          - are used for storing converted RGB data
    40      * q3     : d6, d7          - are used for temporary storage
    41      *
    42      * q4-q7 - reserved
    43      *
    44      * q8, q9 : d16, d17, d18, d19  - are used for expanded Y data
    45      * q10    : d20, d21
    46      * q11    : d22, d23
    47      * q12    : d24, d25
    48      * q13    : d26, d27
    49      * q13, q14, q15            - various constants (#16, #149, #204, #50, #104, #154)
    50      */
    51     asm volatile (
    52 ".fpu neon\n"
    53 /* Allow to build on targets not supporting neon, and force the object file
    54  * target to avoid bumping the final binary target */
    55 ".arch armv7-a\n"
    56 ".object_arch armv4t\n"
    57 ".macro convert_macroblock size\n"
    58 /* load up to 16 source pixels */
    59 	".if \\size == 16\n"
    60 	    "pld [%[y], #64]\n"
    61 	    "pld [%[u], #64]\n"
    62 	    "pld [%[v], #64]\n"
    63 	    "vld1.8 {d1}, [%[y]]!\n"
    64 	    "vld1.8 {d3}, [%[y]]!\n"
    65 	    "vld1.8 {d0}, [%[u]]!\n"
    66 	    "vld1.8 {d2}, [%[v]]!\n"
    67 	".elseif \\size == 8\n"
    68 	    "vld1.8 {d1}, [%[y]]!\n"
    69 	    "vld1.8 {d0[0]}, [%[u]]!\n"
    70 	    "vld1.8 {d0[1]}, [%[u]]!\n"
    71 	    "vld1.8 {d0[2]}, [%[u]]!\n"
    72 	    "vld1.8 {d0[3]}, [%[u]]!\n"
    73 	    "vld1.8 {d2[0]}, [%[v]]!\n"
    74 	    "vld1.8 {d2[1]}, [%[v]]!\n"
    75 	    "vld1.8 {d2[2]}, [%[v]]!\n"
    76 	    "vld1.8 {d2[3]}, [%[v]]!\n"
    77 	".elseif \\size == 4\n"
    78 	    "vld1.8 {d1[0]}, [%[y]]!\n"
    79 	    "vld1.8 {d1[1]}, [%[y]]!\n"
    80 	    "vld1.8 {d1[2]}, [%[y]]!\n"
    81 	    "vld1.8 {d1[3]}, [%[y]]!\n"
    82 	    "vld1.8 {d0[0]}, [%[u]]!\n"
    83 	    "vld1.8 {d0[1]}, [%[u]]!\n"
    84 	    "vld1.8 {d2[0]}, [%[v]]!\n"
    85 	    "vld1.8 {d2[1]}, [%[v]]!\n"
    86 	".elseif \\size == 2\n"
    87 	    "vld1.8 {d1[0]}, [%[y]]!\n"
    88 	    "vld1.8 {d1[1]}, [%[y]]!\n"
    89 	    "vld1.8 {d0[0]}, [%[u]]!\n"
    90 	    "vld1.8 {d2[0]}, [%[v]]!\n"
    91 	".elseif \\size == 1\n"
    92 	    "vld1.8 {d1[0]}, [%[y]]!\n"
    93 	    "vld1.8 {d0[0]}, [%[u]]!\n"
    94 	    "vld1.8 {d2[0]}, [%[v]]!\n"
    95 	".else\n"
    96 	    ".error \"unsupported macroblock size\"\n"
    97 	".endif\n"
    99         /* d1 - Y data (first 8 bytes) */
   100         /* d3 - Y data (next 8 bytes) */
   101         /* d0 - U data, d2 - V data */
   103 	/* split even and odd Y color components */
   104 	"vuzp.8      d1, d3\n"                       /* d1 - evenY, d3 - oddY */
   105 	/* clip upper and lower boundaries */
   106 	"vqadd.u8    q0, q0, q4\n"
   107 	"vqadd.u8    q1, q1, q4\n"
   108 	"vqsub.u8    q0, q0, q5\n"
   109 	"vqsub.u8    q1, q1, q5\n"
   111 	"vshr.u8     d4, d2, #1\n"                   /* d4 = V >> 1 */
   113 	"vmull.u8    q8, d1, d27\n"                  /* q8 = evenY * 149 */
   114 	"vmull.u8    q9, d3, d27\n"                  /* q9 = oddY * 149 */
   116 	"vld1.16     {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */
   117 	"vsubw.u8    q10, q10, d4\n"                 /* red acc -= (V >> 1) */
   118 	"vmlsl.u8    q10, d2, d28\n"                 /* red acc -= V * 204 */
   119 	"vld1.16     {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */
   120 	"vmlsl.u8    q11, d2, d30\n"                 /* green acc -= V * 104 */
   121 	"vmlsl.u8    q11, d0, d29\n"                 /* green acc -= U * 50 */
   122 	"vld1.16     {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */
   123 	"vmlsl.u8    q12, d0, d30\n"                 /* blue acc -= U * 104 */
   124 	"vmlsl.u8    q12, d0, d31\n"                 /* blue acc -= U * 154 */
   126 	"vhsub.s16   q3, q8, q10\n"                  /* calculate even red components */
   127 	"vhsub.s16   q10, q9, q10\n"                 /* calculate odd red components */
   128 	"vqshrun.s16 d0, q3, #6\n"                   /* right shift, narrow and saturate even red components */
   129 	"vqshrun.s16 d3, q10, #6\n"                  /* right shift, narrow and saturate odd red components */
   131 	"vhadd.s16   q3, q8, q11\n"                  /* calculate even green components */
   132 	"vhadd.s16   q11, q9, q11\n"                 /* calculate odd green components */
   133 	"vqshrun.s16 d1, q3, #6\n"                   /* right shift, narrow and saturate even green components */
   134 	"vqshrun.s16 d4, q11, #6\n"                  /* right shift, narrow and saturate odd green components */
   136 	"vhsub.s16   q3, q8, q12\n"                  /* calculate even blue components */
   137 	"vhsub.s16   q12, q9, q12\n"                 /* calculate odd blue components */
   138 	"vqshrun.s16 d2, q3, #6\n"                   /* right shift, narrow and saturate even blue components */
   139 	"vqshrun.s16 d5, q12, #6\n"                  /* right shift, narrow and saturate odd blue components */
   141 	"vzip.8      d0, d3\n"                       /* join even and odd red components */
   142 	"vzip.8      d1, d4\n"                       /* join even and odd green components */
   143 	"vzip.8      d2, d5\n"                       /* join even and odd blue components */
   145 	"vshll.u8    q3, d0, #8\n\t"
   146 	"vshll.u8    q8, d1, #8\n\t"
   147 	"vshll.u8    q9, d2, #8\n\t"
   148 	"vsri.u16    q3, q8, #5\t\n"
   149 	"vsri.u16    q3, q9, #11\t\n"
   150 	/* store pixel data to memory */
   151 	".if \\size == 16\n"
   152 	"    vst1.16 {d6, d7}, [%[dst]]!\n"
   153 	"    vshll.u8    q3, d3, #8\n\t"
   154 	"    vshll.u8    q8, d4, #8\n\t"
   155 	"    vshll.u8    q9, d5, #8\n\t"
   156 	"    vsri.u16    q3, q8, #5\t\n"
   157 	"    vsri.u16    q3, q9, #11\t\n"
   158 	"    vst1.16 {d6, d7}, [%[dst]]!\n"
   159 	".elseif \\size == 8\n"
   160 	"    vst1.16 {d6, d7}, [%[dst]]!\n"
   161 	".elseif \\size == 4\n"
   162 	"    vst1.16 {d6}, [%[dst]]!\n"
   163 	".elseif \\size == 2\n"
   164 	"    vst1.16 {d6[0]}, [%[dst]]!\n"
   165 	"    vst1.16 {d6[1]}, [%[dst]]!\n"
   166 	".elseif \\size == 1\n"
   167 	"    vst1.16 {d6[0]}, [%[dst]]!\n"
   168 	".endif\n"
   169 	".endm\n"
   171 	"vmov.u8     d8, #15\n" /* add this to U/V to saturate upper boundary */
   172 	"vmov.u8     d9, #20\n" /* add this to Y to saturate upper boundary */
   173 	"vmov.u8     d10, #31\n" /* sub this from U/V to saturate lower boundary */
   174 	"vmov.u8     d11, #36\n" /* sub this from Y to saturate lower boundary */
   176 	"vmov.u8     d26, #16\n"
   177 	"vmov.u8     d27, #149\n"
   178 	"vmov.u8     d28, #204\n"
   179 	"vmov.u8     d29, #50\n"
   180 	"vmov.u8     d30, #104\n"
   181 	"vmov.u8     d31, #154\n"
   183 	"cmp         %[oddflag], #0\n"
   184 	"beq         1f\n"
   185 	"convert_macroblock 1\n"
   186 	"sub         %[n], %[n], #1\n"
   187     "1:\n"
   188 	"subs        %[n], %[n], #16\n"
   189 	"blt         2f\n"
   190     "1:\n"
   191 	"convert_macroblock 16\n"
   192 	"subs        %[n], %[n], #16\n"
   193 	"bge         1b\n"
   194     "2:\n"
   195 	"tst         %[n], #8\n"
   196 	"beq         3f\n"
   197 	"convert_macroblock 8\n"
   198     "3:\n"
   199 	"tst         %[n], #4\n"
   200 	"beq         4f\n"
   201 	"convert_macroblock 4\n"
   202     "4:\n"
   203 	"tst         %[n], #2\n"
   204 	"beq         5f\n"
   205 	"convert_macroblock 2\n"
   206     "5:\n"
   207 	"tst         %[n], #1\n"
   208 	"beq         6f\n"
   209 	"convert_macroblock 1\n"
   210     "6:\n"
   211 	".purgem convert_macroblock\n"
   212 	: [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n)
   213 	: [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]),
   214 	  [oddflag] "r" (oddflag)
   215 	: "cc", "memory",
   216 	  "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
   217 	  "d8",  "d9",  "d10", "d11", /* "d12", "d13", "d14", "d15", */
   218 	  "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
   219 	  "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
   220     );
   221 }
   222 #  endif // MOZILLA_MAY_SUPPORT_NEON
   224 } // namespace gfx
   226 } // namespace mozilla
   228 #endif // HAVE_YCBCR_TO_RGB565

mercurial