The Tor Browser: media/libvpx/vp9/common/arm/neon/vp9_idct16x16

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /*

     2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.

     3  *

     4  *  Use of this source code is governed by a BSD-style license

     5  *  that can be found in the LICENSE file in the root of the source

     6  *  tree. An additional intellectual property rights grant can be found

     7  *  in the file PATENTS.  All contributing project authors may

     8  *  be found in the AUTHORS file in the root of the source tree.

     9  */

    11 #include "./vp9_rtcd.h"

    12 #include "vp9/common/vp9_common.h"

    14 void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,

    15                                       int16_t *output,

    16                                       int output_stride);

    17 void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,

    18                                       int16_t *output,

    19                                       int16_t *pass1Output,

    20                                       int16_t skip_adding,

    21                                       uint8_t *dest,

    22                                       int dest_stride);

    23 void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,

    24                                      int16_t *output,

    25                                      int output_stride);

    26 void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,

    27                                      int16_t *output,

    28                                      int16_t *pass1Output,

    29                                      int16_t skip_adding,

    30                                      uint8_t *dest,

    31                                      int dest_stride);

    33 /* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */

    34 extern void vp9_push_neon(int64_t *store);

    35 extern void vp9_pop_neon(int64_t *store);

    37 void vp9_idct16x16_256_add_neon(const int16_t *input,

    38                                 uint8_t *dest, int dest_stride) {

    39   int64_t store_reg[8];

    40   int16_t pass1_output[16*16] = {0};

    41   int16_t row_idct_output[16*16] = {0};

    43   // save d8-d15 register values.

    44   vp9_push_neon(store_reg);

    46   /* Parallel idct on the upper 8 rows */

    47   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

    48   // stage 6 result in pass1_output.

    49   vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

    51   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

    52   // with result in pass1(pass1_output) to calculate final result in stage 7

    53   // which will be saved into row_idct_output.

    54   vp9_idct16x16_256_add_neon_pass2(input+1,

    55                                      row_idct_output,

    56                                      pass1_output,

    57                                      0,

    58                                      dest,

    59                                      dest_stride);

    61   /* Parallel idct on the lower 8 rows */

    62   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

    63   // stage 6 result in pass1_output.

    64   vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);

    66   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

    67   // with result in pass1(pass1_output) to calculate final result in stage 7

    68   // which will be saved into row_idct_output.

    69   vp9_idct16x16_256_add_neon_pass2(input+8*16+1,

    70                                      row_idct_output+8,

    71                                      pass1_output,

    72                                      0,

    73                                      dest,

    74                                      dest_stride);

    76   /* Parallel idct on the left 8 columns */

    77   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

    78   // stage 6 result in pass1_output.

    79   vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

    81   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

    82   // with result in pass1(pass1_output) to calculate final result in stage 7.

    83   // Then add the result to the destination data.

    84   vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

    85                                      row_idct_output,

    86                                      pass1_output,

    87                                      1,

    88                                      dest,

    89                                      dest_stride);

    91   /* Parallel idct on the right 8 columns */

    92   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

    93   // stage 6 result in pass1_output.

    94   vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

    96   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

    97   // with result in pass1(pass1_output) to calculate final result in stage 7.

    98   // Then add the result to the destination data.

    99   vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

   100                                      row_idct_output+8,

   101                                      pass1_output,

   102                                      1,

   103                                      dest+8,

   104                                      dest_stride);

   106   // restore d8-d15 register values.

   107   vp9_pop_neon(store_reg);

   109   return;

   110 }

   112 void vp9_idct16x16_10_add_neon(const int16_t *input,

   113                                uint8_t *dest, int dest_stride) {

   114   int64_t store_reg[8];

   115   int16_t pass1_output[16*16] = {0};

   116   int16_t row_idct_output[16*16] = {0};

   118   // save d8-d15 register values.

   119   vp9_push_neon(store_reg);

   121   /* Parallel idct on the upper 8 rows */

   122   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   123   // stage 6 result in pass1_output.

   124   vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

   126   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   127   // with result in pass1(pass1_output) to calculate final result in stage 7

   128   // which will be saved into row_idct_output.

   129   vp9_idct16x16_10_add_neon_pass2(input+1,

   130                                         row_idct_output,

   131                                         pass1_output,

   132                                         0,

   133                                         dest,

   134                                         dest_stride);

   136   /* Skip Parallel idct on the lower 8 rows as they are all 0s */

   138   /* Parallel idct on the left 8 columns */

   139   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   140   // stage 6 result in pass1_output.

   141   vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

   143   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   144   // with result in pass1(pass1_output) to calculate final result in stage 7.

   145   // Then add the result to the destination data.

   146   vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

   147                                      row_idct_output,

   148                                      pass1_output,

   149                                      1,

   150                                      dest,

   151                                      dest_stride);

   153   /* Parallel idct on the right 8 columns */

   154   // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

   155   // stage 6 result in pass1_output.

   156   vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

   158   // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

   159   // with result in pass1(pass1_output) to calculate final result in stage 7.

   160   // Then add the result to the destination data.

   161   vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

   162                                      row_idct_output+8,

   163                                      pass1_output,

   164                                      1,

   165                                      dest+8,

   166                                      dest_stride);

   168   // restore d8-d15 register values.

   169   vp9_pop_neon(store_reg);

   171   return;

   172 }

The Tor Browser / file revision

media/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c@ac0c01689b40

media/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c