1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/mpi/mpvalpha.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,181 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "mpi-priv.h" 1.9 +#include <c_asm.h> 1.10 + 1.11 + 1.12 +#define MP_MUL_DxD(a, b, Phi, Plo) \ 1.13 + { Plo = asm ("mulq %a0, %a1, %v0", a, b); \ 1.14 + Phi = asm ("umulh %a0, %a1, %v0", a, b); } \ 1.15 + 1.16 +/* This is empty for the loop in s_mpv_mul_d */ 1.17 +#define CARRY_ADD 1.18 + 1.19 +#define ONE_MUL \ 1.20 + a_i = *a++; \ 1.21 + MP_MUL_DxD(a_i, b, a1b1, a0b0); \ 1.22 + a0b0 += carry; \ 1.23 + if (a0b0 < carry) \ 1.24 + ++a1b1; \ 1.25 + CARRY_ADD \ 1.26 + *c++ = a0b0; \ 1.27 + carry = a1b1; \ 1.28 + 1.29 +#define FOUR_MUL \ 1.30 + ONE_MUL \ 1.31 + ONE_MUL \ 1.32 + ONE_MUL \ 1.33 + ONE_MUL \ 1.34 + 1.35 +#define SIXTEEN_MUL \ 1.36 + FOUR_MUL \ 1.37 + FOUR_MUL \ 1.38 + FOUR_MUL \ 1.39 + FOUR_MUL \ 1.40 + 1.41 +#define THIRTYTWO_MUL \ 1.42 + SIXTEEN_MUL \ 1.43 + SIXTEEN_MUL \ 1.44 + 1.45 +#define ONETWENTYEIGHT_MUL \ 1.46 + THIRTYTWO_MUL \ 1.47 + THIRTYTWO_MUL \ 1.48 + THIRTYTWO_MUL \ 1.49 + THIRTYTWO_MUL \ 1.50 + 1.51 + 1.52 +#define EXPAND_256(CALL) \ 1.53 + mp_digit carry = 0; \ 1.54 + mp_digit a_i; \ 1.55 + mp_digit a0b0, a1b1; \ 1.56 + if (a_len &255) { \ 1.57 + if (a_len &1) { \ 1.58 + ONE_MUL \ 1.59 + } \ 1.60 + if (a_len &2) { \ 1.61 + ONE_MUL \ 1.62 + ONE_MUL \ 1.63 + } \ 1.64 + if (a_len &4) { \ 1.65 + FOUR_MUL \ 1.66 + } \ 1.67 + if (a_len &8) { \ 1.68 + FOUR_MUL \ 1.69 + FOUR_MUL \ 1.70 + } \ 1.71 + if (a_len & 16 ) { \ 1.72 + SIXTEEN_MUL \ 1.73 + } \ 1.74 + if (a_len & 32 ) { \ 1.75 + THIRTYTWO_MUL \ 1.76 + } \ 1.77 + if (a_len & 64 ) { \ 1.78 + THIRTYTWO_MUL \ 1.79 + THIRTYTWO_MUL \ 1.80 + } \ 1.81 + if (a_len & 128) { \ 1.82 + ONETWENTYEIGHT_MUL \ 1.83 + } \ 1.84 + a_len = a_len & (-256); \ 1.85 + } \ 1.86 + if (a_len>=256 ) { \ 1.87 + carry = CALL(a, a_len, b, c, carry); \ 1.88 + c += a_len; \ 1.89 + } \ 1.90 + 1.91 +#define FUNC_NAME(NAME) \ 1.92 +mp_digit NAME(const mp_digit *a, \ 1.93 + mp_size a_len, \ 1.94 + mp_digit b, mp_digit *c, \ 1.95 + mp_digit carry) \ 1.96 + 1.97 +#define DECLARE_MUL_256(FNAME) \ 1.98 +FUNC_NAME(FNAME) \ 1.99 +{ \ 1.100 + mp_digit a_i; \ 1.101 + mp_digit a0b0, a1b1; \ 1.102 + while (a_len) { \ 1.103 + ONETWENTYEIGHT_MUL \ 1.104 + ONETWENTYEIGHT_MUL \ 1.105 + a_len-= 256; \ 1.106 + } \ 1.107 + return carry; \ 1.108 +} \ 1.109 + 1.110 +/* Expanding the loop in s_mpv_mul_d appeared to slow down the 1.111 + (admittedly) small number of tests (i.e., timetest) used to 1.112 + measure performance, so this define disables that optimization. */ 1.113 +#define DO_NOT_EXPAND 1 1.114 + 1.115 +/* Need forward declaration so it can be instantiated after 1.116 + the routine that uses it; this helps locality somewhat */ 1.117 +#if !defined(DO_NOT_EXPAND) 1.118 +FUNC_NAME(s_mpv_mul_d_MUL256); 1.119 +#endif 1.120 + 1.121 +/* c = a * b */ 1.122 +void s_mpv_mul_d(const mp_digit *a, mp_size a_len, 1.123 + mp_digit b, mp_digit *c) 1.124 +{ 1.125 +#if defined(DO_NOT_EXPAND) 1.126 + mp_digit carry = 0; 1.127 + while (a_len--) { 1.128 + mp_digit a_i = *a++; 1.129 + mp_digit a0b0, a1b1; 1.130 + 1.131 + MP_MUL_DxD(a_i, b, a1b1, a0b0); 1.132 + 1.133 + a0b0 += carry; 1.134 + if (a0b0 < carry) 1.135 + ++a1b1; 1.136 + *c++ = a0b0; 1.137 + carry = a1b1; 1.138 + } 1.139 +#else 1.140 + EXPAND_256(s_mpv_mul_d_MUL256) 1.141 +#endif 1.142 + *c = carry; 1.143 +} 1.144 + 1.145 +#if !defined(DO_NOT_EXPAND) 1.146 +DECLARE_MUL_256(s_mpv_mul_d_MUL256) 1.147 +#endif 1.148 + 1.149 +#undef CARRY_ADD 1.150 +/* This is redefined for the loop in s_mpv_mul_d_add */ 1.151 +#define CARRY_ADD \ 1.152 + a0b0 += a_i = *c; \ 1.153 + if (a0b0 < a_i) \ 1.154 + ++a1b1; \ 1.155 + 1.156 +/* Need forward declaration so it can be instantiated between the 1.157 + two routines that use it; this helps locality somewhat */ 1.158 +FUNC_NAME(s_mpv_mul_d_add_MUL256); 1.159 + 1.160 +/* c += a * b */ 1.161 +void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, 1.162 + mp_digit b, mp_digit *c) 1.163 +{ 1.164 + EXPAND_256(s_mpv_mul_d_add_MUL256) 1.165 + *c = carry; 1.166 +} 1.167 + 1.168 +/* Instantiate multiply 256 routine here */ 1.169 +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) 1.170 + 1.171 +/* Presently, this is only used by the Montgomery arithmetic code. */ 1.172 +/* c += a * b */ 1.173 +void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, 1.174 + mp_digit b, mp_digit *c) 1.175 +{ 1.176 + EXPAND_256(s_mpv_mul_d_add_MUL256) 1.177 + while (carry) { 1.178 + mp_digit c_i = *c; 1.179 + carry += c_i; 1.180 + *c++ = carry; 1.181 + carry = carry < c_i; 1.182 + } 1.183 +} 1.184 +