security/nss/lib/freebl/mpi/mpv_sparc.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 #include "vis_proto.h"
michael@0 6
michael@0 7 /***************************************************************/
michael@0 8
michael@0 9 typedef int t_s32;
michael@0 10 typedef unsigned int t_u32;
michael@0 11 #if defined(__sparcv9)
michael@0 12 typedef long t_s64;
michael@0 13 typedef unsigned long t_u64;
michael@0 14 #else
michael@0 15 typedef long long t_s64;
michael@0 16 typedef unsigned long long t_u64;
michael@0 17 #endif
michael@0 18 typedef double t_d64;
michael@0 19
michael@0 20 /***************************************************************/
michael@0 21
michael@0 22 typedef union {
michael@0 23 t_d64 d64;
michael@0 24 struct {
michael@0 25 t_s32 i0;
michael@0 26 t_s32 i1;
michael@0 27 } i32s;
michael@0 28 } d64_2_i32;
michael@0 29
michael@0 30 /***************************************************************/
michael@0 31
michael@0 32 #define BUFF_SIZE 256
michael@0 33
michael@0 34 #define A_BITS 19
michael@0 35 #define A_MASK ((1 << A_BITS) - 1)
michael@0 36
michael@0 37 /***************************************************************/
michael@0 38
michael@0 39 static t_u64 mask_cnst[] = {
michael@0 40 0x8000000080000000ull
michael@0 41 };
michael@0 42
michael@0 43 /***************************************************************/
michael@0 44
michael@0 45 #define DEF_VARS(N) \
michael@0 46 t_d64 *py = (t_d64*)y; \
michael@0 47 t_d64 mask = *((t_d64*)mask_cnst); \
michael@0 48 t_d64 ca = (1u << 31) - 1; \
michael@0 49 t_d64 da = (t_d64)a; \
michael@0 50 t_s64 buff[N], s; \
michael@0 51 d64_2_i32 dy
michael@0 52
michael@0 53 /***************************************************************/
michael@0 54
michael@0 55 #define MUL_U32_S64_2(i) \
michael@0 56 dy.d64 = vis_fxnor(mask, py[i]); \
michael@0 57 buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \
michael@0 58 buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
michael@0 59
michael@0 60 #define MUL_U32_S64_2_D(i) \
michael@0 61 dy.d64 = vis_fxnor(mask, py[i]); \
michael@0 62 d0 = ca - (t_d64)dy.i32s.i0; \
michael@0 63 d1 = ca - (t_d64)dy.i32s.i1; \
michael@0 64 buff[4*(i) ] = (t_s64)(d0 * da); \
michael@0 65 buff[4*(i)+1] = (t_s64)(d0 * db); \
michael@0 66 buff[4*(i)+2] = (t_s64)(d1 * da); \
michael@0 67 buff[4*(i)+3] = (t_s64)(d1 * db)
michael@0 68
michael@0 69 /***************************************************************/
michael@0 70
michael@0 71 #define ADD_S64_U32(i) \
michael@0 72 s = buff[i] + x[i] + c; \
michael@0 73 z[i] = s; \
michael@0 74 c = (s >> 32)
michael@0 75
michael@0 76 #define ADD_S64_U32_D(i) \
michael@0 77 s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \
michael@0 78 z[i] = s; \
michael@0 79 uc = ((t_u64)s >> 32)
michael@0 80
michael@0 81 /***************************************************************/
michael@0 82
michael@0 83 #define MUL_U32_S64_8(i) \
michael@0 84 MUL_U32_S64_2(i); \
michael@0 85 MUL_U32_S64_2(i+1); \
michael@0 86 MUL_U32_S64_2(i+2); \
michael@0 87 MUL_U32_S64_2(i+3)
michael@0 88
michael@0 89 #define MUL_U32_S64_D_8(i) \
michael@0 90 MUL_U32_S64_2_D(i); \
michael@0 91 MUL_U32_S64_2_D(i+1); \
michael@0 92 MUL_U32_S64_2_D(i+2); \
michael@0 93 MUL_U32_S64_2_D(i+3)
michael@0 94
michael@0 95 /***************************************************************/
michael@0 96
michael@0 97 #define ADD_S64_U32_8(i) \
michael@0 98 ADD_S64_U32(i); \
michael@0 99 ADD_S64_U32(i+1); \
michael@0 100 ADD_S64_U32(i+2); \
michael@0 101 ADD_S64_U32(i+3); \
michael@0 102 ADD_S64_U32(i+4); \
michael@0 103 ADD_S64_U32(i+5); \
michael@0 104 ADD_S64_U32(i+6); \
michael@0 105 ADD_S64_U32(i+7)
michael@0 106
michael@0 107 #define ADD_S64_U32_D_8(i) \
michael@0 108 ADD_S64_U32_D(i); \
michael@0 109 ADD_S64_U32_D(i+1); \
michael@0 110 ADD_S64_U32_D(i+2); \
michael@0 111 ADD_S64_U32_D(i+3); \
michael@0 112 ADD_S64_U32_D(i+4); \
michael@0 113 ADD_S64_U32_D(i+5); \
michael@0 114 ADD_S64_U32_D(i+6); \
michael@0 115 ADD_S64_U32_D(i+7)
michael@0 116
michael@0 117 /***************************************************************/
michael@0 118
michael@0 119 t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
michael@0 120 {
michael@0 121 if (a < (1 << A_BITS)) {
michael@0 122
michael@0 123 if (n == 8) {
michael@0 124 DEF_VARS(8);
michael@0 125 t_s32 c = 0;
michael@0 126
michael@0 127 MUL_U32_S64_8(0);
michael@0 128 ADD_S64_U32_8(0);
michael@0 129
michael@0 130 return c;
michael@0 131
michael@0 132 } else if (n == 16) {
michael@0 133 DEF_VARS(16);
michael@0 134 t_s32 c = 0;
michael@0 135
michael@0 136 MUL_U32_S64_8(0);
michael@0 137 MUL_U32_S64_8(4);
michael@0 138 ADD_S64_U32_8(0);
michael@0 139 ADD_S64_U32_8(8);
michael@0 140
michael@0 141 return c;
michael@0 142
michael@0 143 } else {
michael@0 144 DEF_VARS(BUFF_SIZE);
michael@0 145 t_s32 i, c = 0;
michael@0 146
michael@0 147 #pragma pipeloop(0)
michael@0 148 for (i = 0; i < (n+1)/2; i ++) {
michael@0 149 MUL_U32_S64_2(i);
michael@0 150 }
michael@0 151
michael@0 152 #pragma pipeloop(0)
michael@0 153 for (i = 0; i < n; i ++) {
michael@0 154 ADD_S64_U32(i);
michael@0 155 }
michael@0 156
michael@0 157 return c;
michael@0 158
michael@0 159 }
michael@0 160 } else {
michael@0 161
michael@0 162 if (n == 8) {
michael@0 163 DEF_VARS(2*8);
michael@0 164 t_d64 d0, d1, db;
michael@0 165 t_u32 uc = 0;
michael@0 166
michael@0 167 da = (t_d64)(a & A_MASK);
michael@0 168 db = (t_d64)(a >> A_BITS);
michael@0 169
michael@0 170 MUL_U32_S64_D_8(0);
michael@0 171 ADD_S64_U32_D_8(0);
michael@0 172
michael@0 173 return uc;
michael@0 174
michael@0 175 } else if (n == 16) {
michael@0 176 DEF_VARS(2*16);
michael@0 177 t_d64 d0, d1, db;
michael@0 178 t_u32 uc = 0;
michael@0 179
michael@0 180 da = (t_d64)(a & A_MASK);
michael@0 181 db = (t_d64)(a >> A_BITS);
michael@0 182
michael@0 183 MUL_U32_S64_D_8(0);
michael@0 184 MUL_U32_S64_D_8(4);
michael@0 185 ADD_S64_U32_D_8(0);
michael@0 186 ADD_S64_U32_D_8(8);
michael@0 187
michael@0 188 return uc;
michael@0 189
michael@0 190 } else {
michael@0 191 DEF_VARS(2*BUFF_SIZE);
michael@0 192 t_d64 d0, d1, db;
michael@0 193 t_u32 i, uc = 0;
michael@0 194
michael@0 195 da = (t_d64)(a & A_MASK);
michael@0 196 db = (t_d64)(a >> A_BITS);
michael@0 197
michael@0 198 #pragma pipeloop(0)
michael@0 199 for (i = 0; i < (n+1)/2; i ++) {
michael@0 200 MUL_U32_S64_2_D(i);
michael@0 201 }
michael@0 202
michael@0 203 #pragma pipeloop(0)
michael@0 204 for (i = 0; i < n; i ++) {
michael@0 205 ADD_S64_U32_D(i);
michael@0 206 }
michael@0 207
michael@0 208 return uc;
michael@0 209 }
michael@0 210 }
michael@0 211 }
michael@0 212
michael@0 213 /***************************************************************/
michael@0 214
michael@0 215 t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
michael@0 216 {
michael@0 217 return mul_add(x, x, y, n, a);
michael@0 218 }
michael@0 219
michael@0 220 /***************************************************************/

mercurial