The Tor Browser: security/nss/lib/freebl/mpi/mpi

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public

     2  * License, v. 2.0. If a copy of the MPL was not distributed with this

     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     5 /* Multiplication performance enhancements for sparc v8+vis CPUs. */

     7 #include "mpi-priv.h"

     8 #include <stddef.h>

     9 #include <sys/systeminfo.h>

    10 #include <strings.h>

    12 /* In the functions below, */

    13 /* vector y must be 8-byte aligned, and n must be even */

    14 /* returns carry out of high order word of result */

    15 /* maximum n is 256 */

    17 /* vector x += vector y * scaler a; where y is of length n words. */

    18 extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);

    20 /* vector z = vector x + vector y * scaler a; where y is of length n words. */

    21 extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,

    22 			int n, mp_digit a);

    24 /* v8 versions of these functions run on any Sparc v8 CPU. */

    26 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */

    27 #define MP_MUL_DxD(a, b, Phi, Plo) \

    28   { unsigned long long product = (unsigned long long)a * b; \

    29     Plo = (mp_digit)product; \

    30     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }

    32 /* c = a * b */

    33 static void

    34 v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

    35 {

    36 #if !defined(MP_NO_MP_WORD)

    37   mp_digit   d = 0;

    39   /* Inner product:  Digits of a */

    40   while (a_len--) {

    41     mp_word w = ((mp_word)b * *a++) + d;

    42     *c++ = ACCUM(w);

    43     d = CARRYOUT(w);

    44   }

    45   *c = d;

    46 #else

    47   mp_digit carry = 0;

    48   while (a_len--) {

    49     mp_digit a_i = *a++;

    50     mp_digit a0b0, a1b1;

    52     MP_MUL_DxD(a_i, b, a1b1, a0b0);

    54     a0b0 += carry;

    55     if (a0b0 < carry)

    56       ++a1b1;

    57     *c++ = a0b0;

    58     carry = a1b1;

    59   }

    60   *c = carry;

    61 #endif

    62 }

    64 /* c += a * b */

    65 static void

    66 v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

    67 {

    68 #if !defined(MP_NO_MP_WORD)

    69   mp_digit   d = 0;

    71   /* Inner product:  Digits of a */

    72   while (a_len--) {

    73     mp_word w = ((mp_word)b * *a++) + *c + d;

    74     *c++ = ACCUM(w);

    75     d = CARRYOUT(w);

    76   }

    77   *c = d;

    78 #else

    79   mp_digit carry = 0;

    80   while (a_len--) {

    81     mp_digit a_i = *a++;

    82     mp_digit a0b0, a1b1;

    84     MP_MUL_DxD(a_i, b, a1b1, a0b0);

    86     a0b0 += carry;

    87     if (a0b0 < carry)

    88       ++a1b1;

    89     a0b0 += a_i = *c;

    90     if (a0b0 < a_i)

    91       ++a1b1;

    92     *c++ = a0b0;

    93     carry = a1b1;

    94   }

    95   *c = carry;

    96 #endif

    97 }

    99 /* Presently, this is only used by the Montgomery arithmetic code. */

   100 /* c += a * b */

   101 static void

   102 v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

   103 {

   104 #if !defined(MP_NO_MP_WORD)

   105   mp_digit   d = 0;

   107   /* Inner product:  Digits of a */

   108   while (a_len--) {

   109     mp_word w = ((mp_word)b * *a++) + *c + d;

   110     *c++ = ACCUM(w);

   111     d = CARRYOUT(w);

   112   }

   114   while (d) {

   115     mp_word w = (mp_word)*c + d;

   116     *c++ = ACCUM(w);

   117     d = CARRYOUT(w);

   118   }

   119 #else

   120   mp_digit carry = 0;

   121   while (a_len--) {

   122     mp_digit a_i = *a++;

   123     mp_digit a0b0, a1b1;

   125     MP_MUL_DxD(a_i, b, a1b1, a0b0);

   127     a0b0 += carry;

   128     if (a0b0 < carry)

   129       ++a1b1;

   131     a0b0 += a_i = *c;

   132     if (a0b0 < a_i)

   133       ++a1b1;

   135     *c++ = a0b0;

   136     carry = a1b1;

   137   }

   138   while (carry) {

   139     mp_digit c_i = *c;

   140     carry += c_i;

   141     *c++ = carry;

   142     carry = carry < c_i;

   143   }

   144 #endif

   145 }

   147 /* These functions run only on v8plus+vis or v9+vis CPUs. */

   149 /* c = a * b */

   150 void

   151 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

   152 {

   153     mp_digit d;

   154     mp_digit x[258];

   155     if (a_len <= 256) {

   156 	if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

   157 	    mp_digit * px;

   158 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

   159 	    memcpy(px, a, a_len * sizeof(*a));

   160 	    a = px;

   161 	    if (a_len & 1) {

   162 		px[a_len] = 0;

   163 	    }

   164 	}

   165 	s_mp_setz(c, a_len + 1);

   166 	d = mul_add_inp(c, a, a_len, b);

   167 	c[a_len] = d;

   168     } else {

   169 	v8_mpv_mul_d(a, a_len, b, c);

   170     }

   171 }

   173 /* c += a * b, where a is a_len words long. */

   174 void

   175 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

   176 {

   177     mp_digit d;

   178     mp_digit x[258];

   179     if (a_len <= 256) {

   180 	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

   181 	    mp_digit * px;

   182 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

   183 	    memcpy(px, a, a_len * sizeof(*a));

   184 	    a = px;

   185 	    if (a_len & 1) {

   186 		px[a_len] = 0;

   187 	    }

   188 	}

   189 	d = mul_add_inp(c, a, a_len, b);

   190 	c[a_len] = d;

   191     } else {

   192 	v8_mpv_mul_d_add(a, a_len, b, c);

   193     }

   194 }

   196 /* c += a * b, where a is y words long. */

   197 void

   198 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)

   199 {

   200     mp_digit d;

   201     mp_digit x[258];

   202     if (a_len <= 256) {

   203 	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {

   204 	    mp_digit * px;

   205 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;

   206 	    memcpy(px, a, a_len * sizeof(*a));

   207 	    a = px;

   208 	    if (a_len & 1) {

   209 		px[a_len] = 0;

   210 	    }

   211 	}

   212 	d = mul_add_inp(c, a, a_len, b);

   213 	if (d) {

   214 	    c += a_len;

   215 	    do {

   216 		mp_digit sum = d + *c;

   217 		*c++ = sum;

   218 		d = sum < d;

   219 	    } while (d);

   220 	}

   221     } else {

   222 	v8_mpv_mul_d_add_prop(a, a_len, b, c);

   223     }

   224 }

The Tor Browser / file revision