1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,220 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "vis_proto.h" 1.9 + 1.10 +/***************************************************************/ 1.11 + 1.12 +typedef int t_s32; 1.13 +typedef unsigned int t_u32; 1.14 +#if defined(__sparcv9) 1.15 +typedef long t_s64; 1.16 +typedef unsigned long t_u64; 1.17 +#else 1.18 +typedef long long t_s64; 1.19 +typedef unsigned long long t_u64; 1.20 +#endif 1.21 +typedef double t_d64; 1.22 + 1.23 +/***************************************************************/ 1.24 + 1.25 +typedef union { 1.26 + t_d64 d64; 1.27 + struct { 1.28 + t_s32 i0; 1.29 + t_s32 i1; 1.30 + } i32s; 1.31 +} d64_2_i32; 1.32 + 1.33 +/***************************************************************/ 1.34 + 1.35 +#define BUFF_SIZE 256 1.36 + 1.37 +#define A_BITS 19 1.38 +#define A_MASK ((1 << A_BITS) - 1) 1.39 + 1.40 +/***************************************************************/ 1.41 + 1.42 +static t_u64 mask_cnst[] = { 1.43 + 0x8000000080000000ull 1.44 +}; 1.45 + 1.46 +/***************************************************************/ 1.47 + 1.48 +#define DEF_VARS(N) \ 1.49 + t_d64 *py = (t_d64*)y; \ 1.50 + t_d64 mask = *((t_d64*)mask_cnst); \ 1.51 + t_d64 ca = (1u << 31) - 1; \ 1.52 + t_d64 da = (t_d64)a; \ 1.53 + t_s64 buff[N], s; \ 1.54 + d64_2_i32 dy 1.55 + 1.56 +/***************************************************************/ 1.57 + 1.58 +#define MUL_U32_S64_2(i) \ 1.59 + dy.d64 = vis_fxnor(mask, py[i]); \ 1.60 + buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \ 1.61 + buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da 1.62 + 1.63 +#define MUL_U32_S64_2_D(i) \ 1.64 + dy.d64 = vis_fxnor(mask, py[i]); \ 1.65 + d0 = ca - (t_d64)dy.i32s.i0; \ 1.66 + d1 = ca - (t_d64)dy.i32s.i1; \ 1.67 + buff[4*(i) ] = (t_s64)(d0 * da); \ 1.68 + buff[4*(i)+1] = (t_s64)(d0 * db); \ 1.69 + buff[4*(i)+2] = (t_s64)(d1 * da); \ 1.70 + buff[4*(i)+3] = (t_s64)(d1 * db) 1.71 + 1.72 +/***************************************************************/ 1.73 + 1.74 +#define ADD_S64_U32(i) \ 1.75 + s = buff[i] + x[i] + c; \ 1.76 + z[i] = s; \ 1.77 + c = (s >> 32) 1.78 + 1.79 +#define ADD_S64_U32_D(i) \ 1.80 + s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \ 1.81 + z[i] = s; \ 1.82 + uc = ((t_u64)s >> 32) 1.83 + 1.84 +/***************************************************************/ 1.85 + 1.86 +#define MUL_U32_S64_8(i) \ 1.87 + MUL_U32_S64_2(i); \ 1.88 + MUL_U32_S64_2(i+1); \ 1.89 + MUL_U32_S64_2(i+2); \ 1.90 + MUL_U32_S64_2(i+3) 1.91 + 1.92 +#define MUL_U32_S64_D_8(i) \ 1.93 + MUL_U32_S64_2_D(i); \ 1.94 + MUL_U32_S64_2_D(i+1); \ 1.95 + MUL_U32_S64_2_D(i+2); \ 1.96 + MUL_U32_S64_2_D(i+3) 1.97 + 1.98 +/***************************************************************/ 1.99 + 1.100 +#define ADD_S64_U32_8(i) \ 1.101 + ADD_S64_U32(i); \ 1.102 + ADD_S64_U32(i+1); \ 1.103 + ADD_S64_U32(i+2); \ 1.104 + ADD_S64_U32(i+3); \ 1.105 + ADD_S64_U32(i+4); \ 1.106 + ADD_S64_U32(i+5); \ 1.107 + ADD_S64_U32(i+6); \ 1.108 + ADD_S64_U32(i+7) 1.109 + 1.110 +#define ADD_S64_U32_D_8(i) \ 1.111 + ADD_S64_U32_D(i); \ 1.112 + ADD_S64_U32_D(i+1); \ 1.113 + ADD_S64_U32_D(i+2); \ 1.114 + ADD_S64_U32_D(i+3); \ 1.115 + ADD_S64_U32_D(i+4); \ 1.116 + ADD_S64_U32_D(i+5); \ 1.117 + ADD_S64_U32_D(i+6); \ 1.118 + ADD_S64_U32_D(i+7) 1.119 + 1.120 +/***************************************************************/ 1.121 + 1.122 +t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) 1.123 +{ 1.124 + if (a < (1 << A_BITS)) { 1.125 + 1.126 + if (n == 8) { 1.127 + DEF_VARS(8); 1.128 + t_s32 c = 0; 1.129 + 1.130 + MUL_U32_S64_8(0); 1.131 + ADD_S64_U32_8(0); 1.132 + 1.133 + return c; 1.134 + 1.135 + } else if (n == 16) { 1.136 + DEF_VARS(16); 1.137 + t_s32 c = 0; 1.138 + 1.139 + MUL_U32_S64_8(0); 1.140 + MUL_U32_S64_8(4); 1.141 + ADD_S64_U32_8(0); 1.142 + ADD_S64_U32_8(8); 1.143 + 1.144 + return c; 1.145 + 1.146 + } else { 1.147 + DEF_VARS(BUFF_SIZE); 1.148 + t_s32 i, c = 0; 1.149 + 1.150 +#pragma pipeloop(0) 1.151 + for (i = 0; i < (n+1)/2; i ++) { 1.152 + MUL_U32_S64_2(i); 1.153 + } 1.154 + 1.155 +#pragma pipeloop(0) 1.156 + for (i = 0; i < n; i ++) { 1.157 + ADD_S64_U32(i); 1.158 + } 1.159 + 1.160 + return c; 1.161 + 1.162 + } 1.163 + } else { 1.164 + 1.165 + if (n == 8) { 1.166 + DEF_VARS(2*8); 1.167 + t_d64 d0, d1, db; 1.168 + t_u32 uc = 0; 1.169 + 1.170 + da = (t_d64)(a & A_MASK); 1.171 + db = (t_d64)(a >> A_BITS); 1.172 + 1.173 + MUL_U32_S64_D_8(0); 1.174 + ADD_S64_U32_D_8(0); 1.175 + 1.176 + return uc; 1.177 + 1.178 + } else if (n == 16) { 1.179 + DEF_VARS(2*16); 1.180 + t_d64 d0, d1, db; 1.181 + t_u32 uc = 0; 1.182 + 1.183 + da = (t_d64)(a & A_MASK); 1.184 + db = (t_d64)(a >> A_BITS); 1.185 + 1.186 + MUL_U32_S64_D_8(0); 1.187 + MUL_U32_S64_D_8(4); 1.188 + ADD_S64_U32_D_8(0); 1.189 + ADD_S64_U32_D_8(8); 1.190 + 1.191 + return uc; 1.192 + 1.193 + } else { 1.194 + DEF_VARS(2*BUFF_SIZE); 1.195 + t_d64 d0, d1, db; 1.196 + t_u32 i, uc = 0; 1.197 + 1.198 + da = (t_d64)(a & A_MASK); 1.199 + db = (t_d64)(a >> A_BITS); 1.200 + 1.201 +#pragma pipeloop(0) 1.202 + for (i = 0; i < (n+1)/2; i ++) { 1.203 + MUL_U32_S64_2_D(i); 1.204 + } 1.205 + 1.206 +#pragma pipeloop(0) 1.207 + for (i = 0; i < n; i ++) { 1.208 + ADD_S64_U32_D(i); 1.209 + } 1.210 + 1.211 + return uc; 1.212 + } 1.213 + } 1.214 +} 1.215 + 1.216 +/***************************************************************/ 1.217 + 1.218 +t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) 1.219 +{ 1.220 + return mul_add(x, x, y, n, a); 1.221 +} 1.222 + 1.223 +/***************************************************************/