The Tor Browser: security/nss/lib/freebl/mpi/mpi_sparc.c@b8a032363ba2 (annotated)

security/nss/lib/freebl/mpi/mpi_sparc.c@b8a032363ba2 (annotated)

security/nss/lib/freebl/mpi/mpi_sparc.c

Thu, 22 Jan 2015 13:21:57 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Thu, 22 Jan 2015 13:21:57 +0100
branch: TOR_BUG_9701
changeset 15: b8a032363ba2
permissions: -rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 /* Multiplication performance enhancements for sparc v8+vis CPUs. */
 #include "mpi-priv.h"
 #include <stddef.h>
 #include <sys/systeminfo.h>
 #include <strings.h>
 /* In the functions below, */
 /* vector y must be 8-byte aligned, and n must be even */
 /* returns carry out of high order word of result */
 /* maximum n is 256 */
 /* vector x += vector y * scaler a; where y is of length n words. */
 extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
 /* vector z = vector x + vector y * scaler a; where y is of length n words. */
 extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
 			int n, mp_digit a);
 /* v8 versions of these functions run on any Sparc v8 CPU. */
 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
 #define MP_MUL_DxD(a, b, Phi, Plo) \
   { unsigned long long product = (unsigned long long)a * b; \
     Plo = (mp_digit)product; \
     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
 /* c = a * b */
 static void
 v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
 #if !defined(MP_NO_MP_WORD)
   mp_digit   d = 0;
   /* Inner product:  Digits of a */
   while (a_len--) {
     mp_word w = ((mp_word)b * *a++) + d;
     *c++ = ACCUM(w);
     d = CARRYOUT(w);
   }
   *c = d;
 #else
   mp_digit carry = 0;
   while (a_len--) {
     mp_digit a_i = *a++;
     mp_digit a0b0, a1b1;
     MP_MUL_DxD(a_i, b, a1b1, a0b0);
     a0b0 += carry;
     if (a0b0 < carry)
       ++a1b1;
     *c++ = a0b0;
     carry = a1b1;
   }
   *c = carry;
 #endif
 }
 /* c += a * b */
 static void
 v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
 #if !defined(MP_NO_MP_WORD)
   mp_digit   d = 0;
   /* Inner product:  Digits of a */
   while (a_len--) {
     mp_word w = ((mp_word)b * *a++) + *c + d;
     *c++ = ACCUM(w);
     d = CARRYOUT(w);
   }
   *c = d;
 #else
   mp_digit carry = 0;
   while (a_len--) {
     mp_digit a_i = *a++;
     mp_digit a0b0, a1b1;
     MP_MUL_DxD(a_i, b, a1b1, a0b0);
     a0b0 += carry;
     if (a0b0 < carry)
       ++a1b1;
     a0b0 += a_i = *c;
     if (a0b0 < a_i)
       ++a1b1;
     *c++ = a0b0;
     carry = a1b1;
   }
   *c = carry;
 #endif
 }
 /* Presently, this is only used by the Montgomery arithmetic code. */
 /* c += a * b */
 static void
 v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
 #if !defined(MP_NO_MP_WORD)
   mp_digit   d = 0;
   /* Inner product:  Digits of a */
   while (a_len--) {
     mp_word w = ((mp_word)b * *a++) + *c + d;
     *c++ = ACCUM(w);
     d = CARRYOUT(w);
   }
   while (d) {
     mp_word w = (mp_word)*c + d;
     *c++ = ACCUM(w);
     d = CARRYOUT(w);
   }
 #else
   mp_digit carry = 0;
   while (a_len--) {
     mp_digit a_i = *a++;
     mp_digit a0b0, a1b1;
     MP_MUL_DxD(a_i, b, a1b1, a0b0);
     a0b0 += carry;
     if (a0b0 < carry)
       ++a1b1;
     a0b0 += a_i = *c;
     if (a0b0 < a_i)
       ++a1b1;
     *c++ = a0b0;
     carry = a1b1;
   }
   while (carry) {
     mp_digit c_i = *c;
     carry += c_i;
     *c++ = carry;
     carry = carry < c_i;
   }
 #endif
 }
 /* These functions run only on v8plus+vis or v9+vis CPUs. */
 /* c = a * b */
 void
 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
     mp_digit d;
     mp_digit x[258];
     if (a_len <= 256) {
 	if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
 	    mp_digit * px;
 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
 	    memcpy(px, a, a_len * sizeof(*a));
 	    a = px;
 	    if (a_len & 1) {
 		px[a_len] = 0;
 	    }
 	}
 	s_mp_setz(c, a_len + 1);
 	d = mul_add_inp(c, a, a_len, b);
 	c[a_len] = d;
     } else {
 	v8_mpv_mul_d(a, a_len, b, c);
     }
 }
 /* c += a * b, where a is a_len words long. */
 void
 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
     mp_digit d;
     mp_digit x[258];
     if (a_len <= 256) {
 	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
 	    mp_digit * px;
 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
 	    memcpy(px, a, a_len * sizeof(*a));
 	    a = px;
 	    if (a_len & 1) {
 		px[a_len] = 0;
 	    }
 	}
 	d = mul_add_inp(c, a, a_len, b);
 	c[a_len] = d;
     } else {
 	v8_mpv_mul_d_add(a, a_len, b, c);
     }
 }
 /* c += a * b, where a is y words long. */
 void
 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
 {
     mp_digit d;
     mp_digit x[258];
     if (a_len <= 256) {
 	if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
 	    mp_digit * px;
 	    px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
 	    memcpy(px, a, a_len * sizeof(*a));
 	    a = px;
 	    if (a_len & 1) {
 		px[a_len] = 0;
 	    }
 	}
 	d = mul_add_inp(c, a, a_len, b);
 	if (d) {
 	    c += a_len;
 	    do {
 		mp_digit sum = d + *c;
 		*c++ = sum;
 		d = sum < d;
 	    } while (d);
 	}
     } else {
 	v8_mpv_mul_d_add_prop(a, a_len, b, c);
     }
 }

The Tor Browser / annotate

security/nss/lib/freebl/mpi/mpi_sparc.c@b8a032363ba2 (annotated)

security/nss/lib/freebl/mpi/mpi_sparc.c