Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "mpi-priv.h"
6 #include <c_asm.h>
9 #define MP_MUL_DxD(a, b, Phi, Plo) \
10 { Plo = asm ("mulq %a0, %a1, %v0", a, b); \
11 Phi = asm ("umulh %a0, %a1, %v0", a, b); } \
13 /* This is empty for the loop in s_mpv_mul_d */
14 #define CARRY_ADD
16 #define ONE_MUL \
17 a_i = *a++; \
18 MP_MUL_DxD(a_i, b, a1b1, a0b0); \
19 a0b0 += carry; \
20 if (a0b0 < carry) \
21 ++a1b1; \
22 CARRY_ADD \
23 *c++ = a0b0; \
24 carry = a1b1; \
26 #define FOUR_MUL \
27 ONE_MUL \
28 ONE_MUL \
29 ONE_MUL \
30 ONE_MUL \
32 #define SIXTEEN_MUL \
33 FOUR_MUL \
34 FOUR_MUL \
35 FOUR_MUL \
36 FOUR_MUL \
38 #define THIRTYTWO_MUL \
39 SIXTEEN_MUL \
40 SIXTEEN_MUL \
42 #define ONETWENTYEIGHT_MUL \
43 THIRTYTWO_MUL \
44 THIRTYTWO_MUL \
45 THIRTYTWO_MUL \
46 THIRTYTWO_MUL \
49 #define EXPAND_256(CALL) \
50 mp_digit carry = 0; \
51 mp_digit a_i; \
52 mp_digit a0b0, a1b1; \
53 if (a_len &255) { \
54 if (a_len &1) { \
55 ONE_MUL \
56 } \
57 if (a_len &2) { \
58 ONE_MUL \
59 ONE_MUL \
60 } \
61 if (a_len &4) { \
62 FOUR_MUL \
63 } \
64 if (a_len &8) { \
65 FOUR_MUL \
66 FOUR_MUL \
67 } \
68 if (a_len & 16 ) { \
69 SIXTEEN_MUL \
70 } \
71 if (a_len & 32 ) { \
72 THIRTYTWO_MUL \
73 } \
74 if (a_len & 64 ) { \
75 THIRTYTWO_MUL \
76 THIRTYTWO_MUL \
77 } \
78 if (a_len & 128) { \
79 ONETWENTYEIGHT_MUL \
80 } \
81 a_len = a_len & (-256); \
82 } \
83 if (a_len>=256 ) { \
84 carry = CALL(a, a_len, b, c, carry); \
85 c += a_len; \
86 } \
88 #define FUNC_NAME(NAME) \
89 mp_digit NAME(const mp_digit *a, \
90 mp_size a_len, \
91 mp_digit b, mp_digit *c, \
92 mp_digit carry) \
94 #define DECLARE_MUL_256(FNAME) \
95 FUNC_NAME(FNAME) \
96 { \
97 mp_digit a_i; \
98 mp_digit a0b0, a1b1; \
99 while (a_len) { \
100 ONETWENTYEIGHT_MUL \
101 ONETWENTYEIGHT_MUL \
102 a_len-= 256; \
103 } \
104 return carry; \
105 } \
107 /* Expanding the loop in s_mpv_mul_d appeared to slow down the
108 (admittedly) small number of tests (i.e., timetest) used to
109 measure performance, so this define disables that optimization. */
110 #define DO_NOT_EXPAND 1
112 /* Need forward declaration so it can be instantiated after
113 the routine that uses it; this helps locality somewhat */
114 #if !defined(DO_NOT_EXPAND)
115 FUNC_NAME(s_mpv_mul_d_MUL256);
116 #endif
118 /* c = a * b */
119 void s_mpv_mul_d(const mp_digit *a, mp_size a_len,
120 mp_digit b, mp_digit *c)
121 {
122 #if defined(DO_NOT_EXPAND)
123 mp_digit carry = 0;
124 while (a_len--) {
125 mp_digit a_i = *a++;
126 mp_digit a0b0, a1b1;
128 MP_MUL_DxD(a_i, b, a1b1, a0b0);
130 a0b0 += carry;
131 if (a0b0 < carry)
132 ++a1b1;
133 *c++ = a0b0;
134 carry = a1b1;
135 }
136 #else
137 EXPAND_256(s_mpv_mul_d_MUL256)
138 #endif
139 *c = carry;
140 }
142 #if !defined(DO_NOT_EXPAND)
143 DECLARE_MUL_256(s_mpv_mul_d_MUL256)
144 #endif
146 #undef CARRY_ADD
147 /* This is redefined for the loop in s_mpv_mul_d_add */
148 #define CARRY_ADD \
149 a0b0 += a_i = *c; \
150 if (a0b0 < a_i) \
151 ++a1b1; \
153 /* Need forward declaration so it can be instantiated between the
154 two routines that use it; this helps locality somewhat */
155 FUNC_NAME(s_mpv_mul_d_add_MUL256);
157 /* c += a * b */
158 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
159 mp_digit b, mp_digit *c)
160 {
161 EXPAND_256(s_mpv_mul_d_add_MUL256)
162 *c = carry;
163 }
165 /* Instantiate multiply 256 routine here */
166 DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
168 /* Presently, this is only used by the Montgomery arithmetic code. */
169 /* c += a * b */
170 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
171 mp_digit b, mp_digit *c)
172 {
173 EXPAND_256(s_mpv_mul_d_add_MUL256)
174 while (carry) {
175 mp_digit c_i = *c;
176 carry += c_i;
177 *c++ = carry;
178 carry = carry < c_i;
179 }
180 }