media/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /*
     2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS.  All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "./vp9_rtcd.h"
    12 #include "vp9/common/vp9_common.h"
    14 void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
    15                                       int16_t *output,
    16                                       int output_stride);
    17 void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,
    18                                       int16_t *output,
    19                                       int16_t *pass1Output,
    20                                       int16_t skip_adding,
    21                                       uint8_t *dest,
    22                                       int dest_stride);
    23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
    24                                      int16_t *output,
    25                                      int output_stride);
    26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
    27                                      int16_t *output,
    28                                      int16_t *pass1Output,
    29                                      int16_t skip_adding,
    30                                      uint8_t *dest,
    31                                      int dest_stride);
    33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
    34 extern void vp9_push_neon(int64_t *store);
    35 extern void vp9_pop_neon(int64_t *store);
    37 void vp9_idct16x16_256_add_neon(const int16_t *input,
    38                                 uint8_t *dest, int dest_stride) {
    39   int64_t store_reg[8];
    40   int16_t pass1_output[16*16] = {0};
    41   int16_t row_idct_output[16*16] = {0};
    43   // save d8-d15 register values.
    44   vp9_push_neon(store_reg);
    46   /* Parallel idct on the upper 8 rows */
    47   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
    48   // stage 6 result in pass1_output.
    49   vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
    51   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
    52   // with result in pass1(pass1_output) to calculate final result in stage 7
    53   // which will be saved into row_idct_output.
    54   vp9_idct16x16_256_add_neon_pass2(input+1,
    55                                      row_idct_output,
    56                                      pass1_output,
    57                                      0,
    58                                      dest,
    59                                      dest_stride);
    61   /* Parallel idct on the lower 8 rows */
    62   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
    63   // stage 6 result in pass1_output.
    64   vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
    66   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
    67   // with result in pass1(pass1_output) to calculate final result in stage 7
    68   // which will be saved into row_idct_output.
    69   vp9_idct16x16_256_add_neon_pass2(input+8*16+1,
    70                                      row_idct_output+8,
    71                                      pass1_output,
    72                                      0,
    73                                      dest,
    74                                      dest_stride);
    76   /* Parallel idct on the left 8 columns */
    77   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
    78   // stage 6 result in pass1_output.
    79   vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
    81   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
    82   // with result in pass1(pass1_output) to calculate final result in stage 7.
    83   // Then add the result to the destination data.
    84   vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
    85                                      row_idct_output,
    86                                      pass1_output,
    87                                      1,
    88                                      dest,
    89                                      dest_stride);
    91   /* Parallel idct on the right 8 columns */
    92   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
    93   // stage 6 result in pass1_output.
    94   vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
    96   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
    97   // with result in pass1(pass1_output) to calculate final result in stage 7.
    98   // Then add the result to the destination data.
    99   vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
   100                                      row_idct_output+8,
   101                                      pass1_output,
   102                                      1,
   103                                      dest+8,
   104                                      dest_stride);
   106   // restore d8-d15 register values.
   107   vp9_pop_neon(store_reg);
   109   return;
   110 }
   112 void vp9_idct16x16_10_add_neon(const int16_t *input,
   113                                uint8_t *dest, int dest_stride) {
   114   int64_t store_reg[8];
   115   int16_t pass1_output[16*16] = {0};
   116   int16_t row_idct_output[16*16] = {0};
   118   // save d8-d15 register values.
   119   vp9_push_neon(store_reg);
   121   /* Parallel idct on the upper 8 rows */
   122   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
   123   // stage 6 result in pass1_output.
   124   vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
   126   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
   127   // with result in pass1(pass1_output) to calculate final result in stage 7
   128   // which will be saved into row_idct_output.
   129   vp9_idct16x16_10_add_neon_pass2(input+1,
   130                                         row_idct_output,
   131                                         pass1_output,
   132                                         0,
   133                                         dest,
   134                                         dest_stride);
   136   /* Skip Parallel idct on the lower 8 rows as they are all 0s */
   138   /* Parallel idct on the left 8 columns */
   139   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
   140   // stage 6 result in pass1_output.
   141   vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
   143   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
   144   // with result in pass1(pass1_output) to calculate final result in stage 7.
   145   // Then add the result to the destination data.
   146   vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
   147                                      row_idct_output,
   148                                      pass1_output,
   149                                      1,
   150                                      dest,
   151                                      dest_stride);
   153   /* Parallel idct on the right 8 columns */
   154   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
   155   // stage 6 result in pass1_output.
   156   vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
   158   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
   159   // with result in pass1(pass1_output) to calculate final result in stage 7.
   160   // Then add the result to the destination data.
   161   vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
   162                                      row_idct_output+8,
   163                                      pass1_output,
   164                                      1,
   165                                      dest+8,
   166                                      dest_stride);
   168   // restore d8-d15 register values.
   169   vp9_pop_neon(store_reg);
   171   return;
   172 }

mercurial