security/nss/lib/freebl/mpi/mpvalpha.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/mpvalpha.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,181 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +#include "mpi-priv.h"
     1.9 +#include <c_asm.h>
    1.10 +
    1.11 +
    1.12 +#define MP_MUL_DxD(a, b, Phi, Plo)		\
    1.13 + { Plo = asm ("mulq %a0, %a1, %v0", a, b);	\
    1.14 +   Phi = asm ("umulh %a0, %a1, %v0", a, b); }	\
    1.15 +
    1.16 +/* This is empty for the loop in s_mpv_mul_d	*/
    1.17 +#define CARRY_ADD
    1.18 +
    1.19 +#define ONE_MUL				\
    1.20 +    a_i = *a++;				\
    1.21 +    MP_MUL_DxD(a_i, b, a1b1, a0b0);	\
    1.22 +    a0b0 += carry;			\
    1.23 +    if (a0b0 < carry)			\
    1.24 +      ++a1b1;				\
    1.25 +    CARRY_ADD				\
    1.26 +    *c++ = a0b0;			\
    1.27 +    carry = a1b1;			\
    1.28 +
    1.29 +#define FOUR_MUL			\
    1.30 +	ONE_MUL				\
    1.31 +	ONE_MUL				\
    1.32 +	ONE_MUL				\
    1.33 +	ONE_MUL				\
    1.34 +
    1.35 +#define SIXTEEN_MUL			\
    1.36 +	FOUR_MUL			\
    1.37 +	FOUR_MUL			\
    1.38 +	FOUR_MUL			\
    1.39 +	FOUR_MUL			\
    1.40 +
    1.41 +#define THIRTYTWO_MUL			\
    1.42 +	SIXTEEN_MUL			\
    1.43 +	SIXTEEN_MUL			\
    1.44 +
    1.45 +#define ONETWENTYEIGHT_MUL		\
    1.46 +	THIRTYTWO_MUL			\
    1.47 +	THIRTYTWO_MUL			\
    1.48 +	THIRTYTWO_MUL			\
    1.49 +	THIRTYTWO_MUL			\
    1.50 +
    1.51 +
    1.52 +#define EXPAND_256(CALL)		\
    1.53 + mp_digit carry = 0;			\
    1.54 + mp_digit a_i;				\
    1.55 + mp_digit a0b0, a1b1;			\
    1.56 + if (a_len &255) {			\
    1.57 +	if (a_len &1) {			\
    1.58 +	  ONE_MUL			\
    1.59 +	}				\
    1.60 +	if (a_len &2) {			\
    1.61 +	  ONE_MUL			\
    1.62 +	  ONE_MUL			\
    1.63 +	}				\
    1.64 +	if (a_len &4) {			\
    1.65 +	  FOUR_MUL			\
    1.66 +	}				\
    1.67 +	if (a_len &8) {			\
    1.68 +	  FOUR_MUL			\
    1.69 +	  FOUR_MUL			\
    1.70 +	}				\
    1.71 +	if (a_len & 16 ) {		\
    1.72 +	  SIXTEEN_MUL			\
    1.73 +	}				\
    1.74 +	if (a_len & 32 ) {		\
    1.75 +	  THIRTYTWO_MUL			\
    1.76 +	}				\
    1.77 +	if (a_len & 64 ) {		\
    1.78 +	  THIRTYTWO_MUL			\
    1.79 +	  THIRTYTWO_MUL			\
    1.80 +	}				\
    1.81 +	if (a_len & 128) {		\
    1.82 +	  ONETWENTYEIGHT_MUL		\
    1.83 +	}				\
    1.84 +	a_len = a_len & (-256);		\
    1.85 +  }					\
    1.86 +  if (a_len>=256 ) {			\
    1.87 +	carry = CALL(a, a_len, b, c, carry);	\
    1.88 +	c += a_len;			\
    1.89 +  }					\
    1.90 +
    1.91 +#define FUNC_NAME(NAME)			\
    1.92 +mp_digit NAME(const mp_digit *a, 	\
    1.93 +	mp_size a_len,			\
    1.94 +	mp_digit b, mp_digit *c, 	\
    1.95 +	mp_digit carry)			\
    1.96 +
    1.97 +#define DECLARE_MUL_256(FNAME)		\
    1.98 +FUNC_NAME(FNAME)			\
    1.99 +{					\
   1.100 +  mp_digit a_i;				\
   1.101 +  mp_digit a0b0, a1b1;			\
   1.102 +  while (a_len) {			\
   1.103 +	ONETWENTYEIGHT_MUL		\
   1.104 +	ONETWENTYEIGHT_MUL		\
   1.105 +	a_len-= 256;			\
   1.106 +  }					\
   1.107 +  return carry;				\
   1.108 +}					\
   1.109 +
   1.110 +/* Expanding the loop in s_mpv_mul_d appeared to slow down the
   1.111 +   (admittedly) small number of tests (i.e., timetest) used to
   1.112 +   measure performance, so this define disables that optimization. */
   1.113 +#define DO_NOT_EXPAND 1
   1.114 +
   1.115 +/* Need forward declaration so it can be instantiated after
   1.116 +	the routine that uses it; this helps locality somewhat	*/
   1.117 +#if !defined(DO_NOT_EXPAND)
   1.118 +FUNC_NAME(s_mpv_mul_d_MUL256);
   1.119 +#endif
   1.120 +
   1.121 +/* c = a * b */
   1.122 +void s_mpv_mul_d(const mp_digit *a, mp_size a_len, 
   1.123 +			mp_digit b, mp_digit *c)
   1.124 +{
   1.125 +#if defined(DO_NOT_EXPAND)
   1.126 +  mp_digit carry = 0;
   1.127 +  while (a_len--) {
   1.128 +    mp_digit a_i = *a++;
   1.129 +    mp_digit a0b0, a1b1;
   1.130 +
   1.131 +    MP_MUL_DxD(a_i, b, a1b1, a0b0);
   1.132 +
   1.133 +    a0b0 += carry;
   1.134 +    if (a0b0 < carry)
   1.135 +      ++a1b1;
   1.136 +    *c++ = a0b0;
   1.137 +    carry = a1b1;
   1.138 +  }
   1.139 +#else
   1.140 +  EXPAND_256(s_mpv_mul_d_MUL256)
   1.141 +#endif
   1.142 +  *c = carry;
   1.143 +}
   1.144 +
   1.145 +#if !defined(DO_NOT_EXPAND)
   1.146 +DECLARE_MUL_256(s_mpv_mul_d_MUL256)
   1.147 +#endif
   1.148 +
   1.149 +#undef CARRY_ADD
   1.150 +/* This is redefined for the loop in s_mpv_mul_d_add */
   1.151 +#define CARRY_ADD			\
   1.152 +    a0b0 += a_i = *c;			\
   1.153 +    if (a0b0 < a_i)			\
   1.154 +      ++a1b1;				\
   1.155 +
   1.156 +/* Need forward declaration so it can be instantiated between the
   1.157 +	two routines that use it; this helps locality somewhat	*/
   1.158 +FUNC_NAME(s_mpv_mul_d_add_MUL256);
   1.159 +
   1.160 +/* c += a * b */
   1.161 +void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, 
   1.162 +			mp_digit b, mp_digit *c)
   1.163 +{
   1.164 +  EXPAND_256(s_mpv_mul_d_add_MUL256)
   1.165 +  *c = carry;
   1.166 +}
   1.167 +
   1.168 +/* Instantiate multiply 256 routine here */
   1.169 +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
   1.170 +
   1.171 +/* Presently, this is only used by the Montgomery arithmetic code. */
   1.172 +/* c += a * b */
   1.173 +void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, 
   1.174 +			mp_digit b, mp_digit *c)
   1.175 +{
   1.176 +  EXPAND_256(s_mpv_mul_d_add_MUL256)
   1.177 +  while (carry) {
   1.178 +    mp_digit c_i = *c;
   1.179 +    carry += c_i;
   1.180 +    *c++ = carry;
   1.181 +    carry = carry < c_i;
   1.182 +  }
   1.183 +}
   1.184 +

mercurial