security/nss/lib/freebl/mpi/mpi_sparc.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 /* Multiplication performance enhancements for sparc v8+vis CPUs. */
michael@0 6
michael@0 7 #include "mpi-priv.h"
michael@0 8 #include <stddef.h>
michael@0 9 #include <sys/systeminfo.h>
michael@0 10 #include <strings.h>
michael@0 11
michael@0 12 /* In the functions below, */
michael@0 13 /* vector y must be 8-byte aligned, and n must be even */
michael@0 14 /* returns carry out of high order word of result */
michael@0 15 /* maximum n is 256 */
michael@0 16
michael@0 17 /* vector x += vector y * scaler a; where y is of length n words. */
michael@0 18 extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
michael@0 19
michael@0 20 /* vector z = vector x + vector y * scaler a; where y is of length n words. */
michael@0 21 extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
michael@0 22 int n, mp_digit a);
michael@0 23
michael@0 24 /* v8 versions of these functions run on any Sparc v8 CPU. */
michael@0 25
michael@0 26 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
michael@0 27 #define MP_MUL_DxD(a, b, Phi, Plo) \
michael@0 28 { unsigned long long product = (unsigned long long)a * b; \
michael@0 29 Plo = (mp_digit)product; \
michael@0 30 Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
michael@0 31
michael@0 32 /* c = a * b */
michael@0 33 static void
michael@0 34 v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 35 {
michael@0 36 #if !defined(MP_NO_MP_WORD)
michael@0 37 mp_digit d = 0;
michael@0 38
michael@0 39 /* Inner product: Digits of a */
michael@0 40 while (a_len--) {
michael@0 41 mp_word w = ((mp_word)b * *a++) + d;
michael@0 42 *c++ = ACCUM(w);
michael@0 43 d = CARRYOUT(w);
michael@0 44 }
michael@0 45 *c = d;
michael@0 46 #else
michael@0 47 mp_digit carry = 0;
michael@0 48 while (a_len--) {
michael@0 49 mp_digit a_i = *a++;
michael@0 50 mp_digit a0b0, a1b1;
michael@0 51
michael@0 52 MP_MUL_DxD(a_i, b, a1b1, a0b0);
michael@0 53
michael@0 54 a0b0 += carry;
michael@0 55 if (a0b0 < carry)
michael@0 56 ++a1b1;
michael@0 57 *c++ = a0b0;
michael@0 58 carry = a1b1;
michael@0 59 }
michael@0 60 *c = carry;
michael@0 61 #endif
michael@0 62 }
michael@0 63
michael@0 64 /* c += a * b */
michael@0 65 static void
michael@0 66 v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 67 {
michael@0 68 #if !defined(MP_NO_MP_WORD)
michael@0 69 mp_digit d = 0;
michael@0 70
michael@0 71 /* Inner product: Digits of a */
michael@0 72 while (a_len--) {
michael@0 73 mp_word w = ((mp_word)b * *a++) + *c + d;
michael@0 74 *c++ = ACCUM(w);
michael@0 75 d = CARRYOUT(w);
michael@0 76 }
michael@0 77 *c = d;
michael@0 78 #else
michael@0 79 mp_digit carry = 0;
michael@0 80 while (a_len--) {
michael@0 81 mp_digit a_i = *a++;
michael@0 82 mp_digit a0b0, a1b1;
michael@0 83
michael@0 84 MP_MUL_DxD(a_i, b, a1b1, a0b0);
michael@0 85
michael@0 86 a0b0 += carry;
michael@0 87 if (a0b0 < carry)
michael@0 88 ++a1b1;
michael@0 89 a0b0 += a_i = *c;
michael@0 90 if (a0b0 < a_i)
michael@0 91 ++a1b1;
michael@0 92 *c++ = a0b0;
michael@0 93 carry = a1b1;
michael@0 94 }
michael@0 95 *c = carry;
michael@0 96 #endif
michael@0 97 }
michael@0 98
michael@0 99 /* Presently, this is only used by the Montgomery arithmetic code. */
michael@0 100 /* c += a * b */
michael@0 101 static void
michael@0 102 v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 103 {
michael@0 104 #if !defined(MP_NO_MP_WORD)
michael@0 105 mp_digit d = 0;
michael@0 106
michael@0 107 /* Inner product: Digits of a */
michael@0 108 while (a_len--) {
michael@0 109 mp_word w = ((mp_word)b * *a++) + *c + d;
michael@0 110 *c++ = ACCUM(w);
michael@0 111 d = CARRYOUT(w);
michael@0 112 }
michael@0 113
michael@0 114 while (d) {
michael@0 115 mp_word w = (mp_word)*c + d;
michael@0 116 *c++ = ACCUM(w);
michael@0 117 d = CARRYOUT(w);
michael@0 118 }
michael@0 119 #else
michael@0 120 mp_digit carry = 0;
michael@0 121 while (a_len--) {
michael@0 122 mp_digit a_i = *a++;
michael@0 123 mp_digit a0b0, a1b1;
michael@0 124
michael@0 125 MP_MUL_DxD(a_i, b, a1b1, a0b0);
michael@0 126
michael@0 127 a0b0 += carry;
michael@0 128 if (a0b0 < carry)
michael@0 129 ++a1b1;
michael@0 130
michael@0 131 a0b0 += a_i = *c;
michael@0 132 if (a0b0 < a_i)
michael@0 133 ++a1b1;
michael@0 134
michael@0 135 *c++ = a0b0;
michael@0 136 carry = a1b1;
michael@0 137 }
michael@0 138 while (carry) {
michael@0 139 mp_digit c_i = *c;
michael@0 140 carry += c_i;
michael@0 141 *c++ = carry;
michael@0 142 carry = carry < c_i;
michael@0 143 }
michael@0 144 #endif
michael@0 145 }
michael@0 146
michael@0 147 /* These functions run only on v8plus+vis or v9+vis CPUs. */
michael@0 148
michael@0 149 /* c = a * b */
michael@0 150 void
michael@0 151 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 152 {
michael@0 153 mp_digit d;
michael@0 154 mp_digit x[258];
michael@0 155 if (a_len <= 256) {
michael@0 156 if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
michael@0 157 mp_digit * px;
michael@0 158 px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
michael@0 159 memcpy(px, a, a_len * sizeof(*a));
michael@0 160 a = px;
michael@0 161 if (a_len & 1) {
michael@0 162 px[a_len] = 0;
michael@0 163 }
michael@0 164 }
michael@0 165 s_mp_setz(c, a_len + 1);
michael@0 166 d = mul_add_inp(c, a, a_len, b);
michael@0 167 c[a_len] = d;
michael@0 168 } else {
michael@0 169 v8_mpv_mul_d(a, a_len, b, c);
michael@0 170 }
michael@0 171 }
michael@0 172
michael@0 173 /* c += a * b, where a is a_len words long. */
michael@0 174 void
michael@0 175 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 176 {
michael@0 177 mp_digit d;
michael@0 178 mp_digit x[258];
michael@0 179 if (a_len <= 256) {
michael@0 180 if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
michael@0 181 mp_digit * px;
michael@0 182 px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
michael@0 183 memcpy(px, a, a_len * sizeof(*a));
michael@0 184 a = px;
michael@0 185 if (a_len & 1) {
michael@0 186 px[a_len] = 0;
michael@0 187 }
michael@0 188 }
michael@0 189 d = mul_add_inp(c, a, a_len, b);
michael@0 190 c[a_len] = d;
michael@0 191 } else {
michael@0 192 v8_mpv_mul_d_add(a, a_len, b, c);
michael@0 193 }
michael@0 194 }
michael@0 195
michael@0 196 /* c += a * b, where a is y words long. */
michael@0 197 void
michael@0 198 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
michael@0 199 {
michael@0 200 mp_digit d;
michael@0 201 mp_digit x[258];
michael@0 202 if (a_len <= 256) {
michael@0 203 if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
michael@0 204 mp_digit * px;
michael@0 205 px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
michael@0 206 memcpy(px, a, a_len * sizeof(*a));
michael@0 207 a = px;
michael@0 208 if (a_len & 1) {
michael@0 209 px[a_len] = 0;
michael@0 210 }
michael@0 211 }
michael@0 212 d = mul_add_inp(c, a, a_len, b);
michael@0 213 if (d) {
michael@0 214 c += a_len;
michael@0 215 do {
michael@0 216 mp_digit sum = d + *c;
michael@0 217 *c++ = sum;
michael@0 218 d = sum < d;
michael@0 219 } while (d);
michael@0 220 }
michael@0 221 } else {
michael@0 222 v8_mpv_mul_d_add_prop(a, a_len, b, c);
michael@0 223 }
michael@0 224 }

mercurial