Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 4 | |
michael@0 | 5 | #include "vis_proto.h" |
michael@0 | 6 | |
michael@0 | 7 | /***************************************************************/ |
michael@0 | 8 | |
michael@0 | 9 | typedef int t_s32; |
michael@0 | 10 | typedef unsigned int t_u32; |
michael@0 | 11 | #if defined(__sparcv9) |
michael@0 | 12 | typedef long t_s64; |
michael@0 | 13 | typedef unsigned long t_u64; |
michael@0 | 14 | #else |
michael@0 | 15 | typedef long long t_s64; |
michael@0 | 16 | typedef unsigned long long t_u64; |
michael@0 | 17 | #endif |
michael@0 | 18 | typedef double t_d64; |
michael@0 | 19 | |
michael@0 | 20 | /***************************************************************/ |
michael@0 | 21 | |
michael@0 | 22 | typedef union { |
michael@0 | 23 | t_d64 d64; |
michael@0 | 24 | struct { |
michael@0 | 25 | t_s32 i0; |
michael@0 | 26 | t_s32 i1; |
michael@0 | 27 | } i32s; |
michael@0 | 28 | } d64_2_i32; |
michael@0 | 29 | |
michael@0 | 30 | /***************************************************************/ |
michael@0 | 31 | |
michael@0 | 32 | #define BUFF_SIZE 256 |
michael@0 | 33 | |
michael@0 | 34 | #define A_BITS 19 |
michael@0 | 35 | #define A_MASK ((1 << A_BITS) - 1) |
michael@0 | 36 | |
michael@0 | 37 | /***************************************************************/ |
michael@0 | 38 | |
michael@0 | 39 | static t_u64 mask_cnst[] = { |
michael@0 | 40 | 0x8000000080000000ull |
michael@0 | 41 | }; |
michael@0 | 42 | |
michael@0 | 43 | /***************************************************************/ |
michael@0 | 44 | |
michael@0 | 45 | #define DEF_VARS(N) \ |
michael@0 | 46 | t_d64 *py = (t_d64*)y; \ |
michael@0 | 47 | t_d64 mask = *((t_d64*)mask_cnst); \ |
michael@0 | 48 | t_d64 ca = (1u << 31) - 1; \ |
michael@0 | 49 | t_d64 da = (t_d64)a; \ |
michael@0 | 50 | t_s64 buff[N], s; \ |
michael@0 | 51 | d64_2_i32 dy |
michael@0 | 52 | |
michael@0 | 53 | /***************************************************************/ |
michael@0 | 54 | |
michael@0 | 55 | #define MUL_U32_S64_2(i) \ |
michael@0 | 56 | dy.d64 = vis_fxnor(mask, py[i]); \ |
michael@0 | 57 | buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \ |
michael@0 | 58 | buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da |
michael@0 | 59 | |
michael@0 | 60 | #define MUL_U32_S64_2_D(i) \ |
michael@0 | 61 | dy.d64 = vis_fxnor(mask, py[i]); \ |
michael@0 | 62 | d0 = ca - (t_d64)dy.i32s.i0; \ |
michael@0 | 63 | d1 = ca - (t_d64)dy.i32s.i1; \ |
michael@0 | 64 | buff[4*(i) ] = (t_s64)(d0 * da); \ |
michael@0 | 65 | buff[4*(i)+1] = (t_s64)(d0 * db); \ |
michael@0 | 66 | buff[4*(i)+2] = (t_s64)(d1 * da); \ |
michael@0 | 67 | buff[4*(i)+3] = (t_s64)(d1 * db) |
michael@0 | 68 | |
michael@0 | 69 | /***************************************************************/ |
michael@0 | 70 | |
michael@0 | 71 | #define ADD_S64_U32(i) \ |
michael@0 | 72 | s = buff[i] + x[i] + c; \ |
michael@0 | 73 | z[i] = s; \ |
michael@0 | 74 | c = (s >> 32) |
michael@0 | 75 | |
michael@0 | 76 | #define ADD_S64_U32_D(i) \ |
michael@0 | 77 | s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \ |
michael@0 | 78 | z[i] = s; \ |
michael@0 | 79 | uc = ((t_u64)s >> 32) |
michael@0 | 80 | |
michael@0 | 81 | /***************************************************************/ |
michael@0 | 82 | |
michael@0 | 83 | #define MUL_U32_S64_8(i) \ |
michael@0 | 84 | MUL_U32_S64_2(i); \ |
michael@0 | 85 | MUL_U32_S64_2(i+1); \ |
michael@0 | 86 | MUL_U32_S64_2(i+2); \ |
michael@0 | 87 | MUL_U32_S64_2(i+3) |
michael@0 | 88 | |
michael@0 | 89 | #define MUL_U32_S64_D_8(i) \ |
michael@0 | 90 | MUL_U32_S64_2_D(i); \ |
michael@0 | 91 | MUL_U32_S64_2_D(i+1); \ |
michael@0 | 92 | MUL_U32_S64_2_D(i+2); \ |
michael@0 | 93 | MUL_U32_S64_2_D(i+3) |
michael@0 | 94 | |
michael@0 | 95 | /***************************************************************/ |
michael@0 | 96 | |
michael@0 | 97 | #define ADD_S64_U32_8(i) \ |
michael@0 | 98 | ADD_S64_U32(i); \ |
michael@0 | 99 | ADD_S64_U32(i+1); \ |
michael@0 | 100 | ADD_S64_U32(i+2); \ |
michael@0 | 101 | ADD_S64_U32(i+3); \ |
michael@0 | 102 | ADD_S64_U32(i+4); \ |
michael@0 | 103 | ADD_S64_U32(i+5); \ |
michael@0 | 104 | ADD_S64_U32(i+6); \ |
michael@0 | 105 | ADD_S64_U32(i+7) |
michael@0 | 106 | |
michael@0 | 107 | #define ADD_S64_U32_D_8(i) \ |
michael@0 | 108 | ADD_S64_U32_D(i); \ |
michael@0 | 109 | ADD_S64_U32_D(i+1); \ |
michael@0 | 110 | ADD_S64_U32_D(i+2); \ |
michael@0 | 111 | ADD_S64_U32_D(i+3); \ |
michael@0 | 112 | ADD_S64_U32_D(i+4); \ |
michael@0 | 113 | ADD_S64_U32_D(i+5); \ |
michael@0 | 114 | ADD_S64_U32_D(i+6); \ |
michael@0 | 115 | ADD_S64_U32_D(i+7) |
michael@0 | 116 | |
michael@0 | 117 | /***************************************************************/ |
michael@0 | 118 | |
michael@0 | 119 | t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) |
michael@0 | 120 | { |
michael@0 | 121 | if (a < (1 << A_BITS)) { |
michael@0 | 122 | |
michael@0 | 123 | if (n == 8) { |
michael@0 | 124 | DEF_VARS(8); |
michael@0 | 125 | t_s32 c = 0; |
michael@0 | 126 | |
michael@0 | 127 | MUL_U32_S64_8(0); |
michael@0 | 128 | ADD_S64_U32_8(0); |
michael@0 | 129 | |
michael@0 | 130 | return c; |
michael@0 | 131 | |
michael@0 | 132 | } else if (n == 16) { |
michael@0 | 133 | DEF_VARS(16); |
michael@0 | 134 | t_s32 c = 0; |
michael@0 | 135 | |
michael@0 | 136 | MUL_U32_S64_8(0); |
michael@0 | 137 | MUL_U32_S64_8(4); |
michael@0 | 138 | ADD_S64_U32_8(0); |
michael@0 | 139 | ADD_S64_U32_8(8); |
michael@0 | 140 | |
michael@0 | 141 | return c; |
michael@0 | 142 | |
michael@0 | 143 | } else { |
michael@0 | 144 | DEF_VARS(BUFF_SIZE); |
michael@0 | 145 | t_s32 i, c = 0; |
michael@0 | 146 | |
michael@0 | 147 | #pragma pipeloop(0) |
michael@0 | 148 | for (i = 0; i < (n+1)/2; i ++) { |
michael@0 | 149 | MUL_U32_S64_2(i); |
michael@0 | 150 | } |
michael@0 | 151 | |
michael@0 | 152 | #pragma pipeloop(0) |
michael@0 | 153 | for (i = 0; i < n; i ++) { |
michael@0 | 154 | ADD_S64_U32(i); |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | return c; |
michael@0 | 158 | |
michael@0 | 159 | } |
michael@0 | 160 | } else { |
michael@0 | 161 | |
michael@0 | 162 | if (n == 8) { |
michael@0 | 163 | DEF_VARS(2*8); |
michael@0 | 164 | t_d64 d0, d1, db; |
michael@0 | 165 | t_u32 uc = 0; |
michael@0 | 166 | |
michael@0 | 167 | da = (t_d64)(a & A_MASK); |
michael@0 | 168 | db = (t_d64)(a >> A_BITS); |
michael@0 | 169 | |
michael@0 | 170 | MUL_U32_S64_D_8(0); |
michael@0 | 171 | ADD_S64_U32_D_8(0); |
michael@0 | 172 | |
michael@0 | 173 | return uc; |
michael@0 | 174 | |
michael@0 | 175 | } else if (n == 16) { |
michael@0 | 176 | DEF_VARS(2*16); |
michael@0 | 177 | t_d64 d0, d1, db; |
michael@0 | 178 | t_u32 uc = 0; |
michael@0 | 179 | |
michael@0 | 180 | da = (t_d64)(a & A_MASK); |
michael@0 | 181 | db = (t_d64)(a >> A_BITS); |
michael@0 | 182 | |
michael@0 | 183 | MUL_U32_S64_D_8(0); |
michael@0 | 184 | MUL_U32_S64_D_8(4); |
michael@0 | 185 | ADD_S64_U32_D_8(0); |
michael@0 | 186 | ADD_S64_U32_D_8(8); |
michael@0 | 187 | |
michael@0 | 188 | return uc; |
michael@0 | 189 | |
michael@0 | 190 | } else { |
michael@0 | 191 | DEF_VARS(2*BUFF_SIZE); |
michael@0 | 192 | t_d64 d0, d1, db; |
michael@0 | 193 | t_u32 i, uc = 0; |
michael@0 | 194 | |
michael@0 | 195 | da = (t_d64)(a & A_MASK); |
michael@0 | 196 | db = (t_d64)(a >> A_BITS); |
michael@0 | 197 | |
michael@0 | 198 | #pragma pipeloop(0) |
michael@0 | 199 | for (i = 0; i < (n+1)/2; i ++) { |
michael@0 | 200 | MUL_U32_S64_2_D(i); |
michael@0 | 201 | } |
michael@0 | 202 | |
michael@0 | 203 | #pragma pipeloop(0) |
michael@0 | 204 | for (i = 0; i < n; i ++) { |
michael@0 | 205 | ADD_S64_U32_D(i); |
michael@0 | 206 | } |
michael@0 | 207 | |
michael@0 | 208 | return uc; |
michael@0 | 209 | } |
michael@0 | 210 | } |
michael@0 | 211 | } |
michael@0 | 212 | |
michael@0 | 213 | /***************************************************************/ |
michael@0 | 214 | |
michael@0 | 215 | t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) |
michael@0 | 216 | { |
michael@0 | 217 | return mul_add(x, x, y, n, a); |
michael@0 | 218 | } |
michael@0 | 219 | |
michael@0 | 220 | /***************************************************************/ |