security/nss/lib/freebl/mpi/mpv_sparc.c

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:5157ef07327a
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "vis_proto.h"
6
7 /***************************************************************/
8
9 typedef int t_s32;
10 typedef unsigned int t_u32;
11 #if defined(__sparcv9)
12 typedef long t_s64;
13 typedef unsigned long t_u64;
14 #else
15 typedef long long t_s64;
16 typedef unsigned long long t_u64;
17 #endif
18 typedef double t_d64;
19
20 /***************************************************************/
21
22 typedef union {
23 t_d64 d64;
24 struct {
25 t_s32 i0;
26 t_s32 i1;
27 } i32s;
28 } d64_2_i32;
29
30 /***************************************************************/
31
32 #define BUFF_SIZE 256
33
34 #define A_BITS 19
35 #define A_MASK ((1 << A_BITS) - 1)
36
37 /***************************************************************/
38
39 static t_u64 mask_cnst[] = {
40 0x8000000080000000ull
41 };
42
43 /***************************************************************/
44
45 #define DEF_VARS(N) \
46 t_d64 *py = (t_d64*)y; \
47 t_d64 mask = *((t_d64*)mask_cnst); \
48 t_d64 ca = (1u << 31) - 1; \
49 t_d64 da = (t_d64)a; \
50 t_s64 buff[N], s; \
51 d64_2_i32 dy
52
53 /***************************************************************/
54
55 #define MUL_U32_S64_2(i) \
56 dy.d64 = vis_fxnor(mask, py[i]); \
57 buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \
58 buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
59
60 #define MUL_U32_S64_2_D(i) \
61 dy.d64 = vis_fxnor(mask, py[i]); \
62 d0 = ca - (t_d64)dy.i32s.i0; \
63 d1 = ca - (t_d64)dy.i32s.i1; \
64 buff[4*(i) ] = (t_s64)(d0 * da); \
65 buff[4*(i)+1] = (t_s64)(d0 * db); \
66 buff[4*(i)+2] = (t_s64)(d1 * da); \
67 buff[4*(i)+3] = (t_s64)(d1 * db)
68
69 /***************************************************************/
70
71 #define ADD_S64_U32(i) \
72 s = buff[i] + x[i] + c; \
73 z[i] = s; \
74 c = (s >> 32)
75
76 #define ADD_S64_U32_D(i) \
77 s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc; \
78 z[i] = s; \
79 uc = ((t_u64)s >> 32)
80
81 /***************************************************************/
82
83 #define MUL_U32_S64_8(i) \
84 MUL_U32_S64_2(i); \
85 MUL_U32_S64_2(i+1); \
86 MUL_U32_S64_2(i+2); \
87 MUL_U32_S64_2(i+3)
88
89 #define MUL_U32_S64_D_8(i) \
90 MUL_U32_S64_2_D(i); \
91 MUL_U32_S64_2_D(i+1); \
92 MUL_U32_S64_2_D(i+2); \
93 MUL_U32_S64_2_D(i+3)
94
95 /***************************************************************/
96
97 #define ADD_S64_U32_8(i) \
98 ADD_S64_U32(i); \
99 ADD_S64_U32(i+1); \
100 ADD_S64_U32(i+2); \
101 ADD_S64_U32(i+3); \
102 ADD_S64_U32(i+4); \
103 ADD_S64_U32(i+5); \
104 ADD_S64_U32(i+6); \
105 ADD_S64_U32(i+7)
106
107 #define ADD_S64_U32_D_8(i) \
108 ADD_S64_U32_D(i); \
109 ADD_S64_U32_D(i+1); \
110 ADD_S64_U32_D(i+2); \
111 ADD_S64_U32_D(i+3); \
112 ADD_S64_U32_D(i+4); \
113 ADD_S64_U32_D(i+5); \
114 ADD_S64_U32_D(i+6); \
115 ADD_S64_U32_D(i+7)
116
117 /***************************************************************/
118
119 t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
120 {
121 if (a < (1 << A_BITS)) {
122
123 if (n == 8) {
124 DEF_VARS(8);
125 t_s32 c = 0;
126
127 MUL_U32_S64_8(0);
128 ADD_S64_U32_8(0);
129
130 return c;
131
132 } else if (n == 16) {
133 DEF_VARS(16);
134 t_s32 c = 0;
135
136 MUL_U32_S64_8(0);
137 MUL_U32_S64_8(4);
138 ADD_S64_U32_8(0);
139 ADD_S64_U32_8(8);
140
141 return c;
142
143 } else {
144 DEF_VARS(BUFF_SIZE);
145 t_s32 i, c = 0;
146
147 #pragma pipeloop(0)
148 for (i = 0; i < (n+1)/2; i ++) {
149 MUL_U32_S64_2(i);
150 }
151
152 #pragma pipeloop(0)
153 for (i = 0; i < n; i ++) {
154 ADD_S64_U32(i);
155 }
156
157 return c;
158
159 }
160 } else {
161
162 if (n == 8) {
163 DEF_VARS(2*8);
164 t_d64 d0, d1, db;
165 t_u32 uc = 0;
166
167 da = (t_d64)(a & A_MASK);
168 db = (t_d64)(a >> A_BITS);
169
170 MUL_U32_S64_D_8(0);
171 ADD_S64_U32_D_8(0);
172
173 return uc;
174
175 } else if (n == 16) {
176 DEF_VARS(2*16);
177 t_d64 d0, d1, db;
178 t_u32 uc = 0;
179
180 da = (t_d64)(a & A_MASK);
181 db = (t_d64)(a >> A_BITS);
182
183 MUL_U32_S64_D_8(0);
184 MUL_U32_S64_D_8(4);
185 ADD_S64_U32_D_8(0);
186 ADD_S64_U32_D_8(8);
187
188 return uc;
189
190 } else {
191 DEF_VARS(2*BUFF_SIZE);
192 t_d64 d0, d1, db;
193 t_u32 i, uc = 0;
194
195 da = (t_d64)(a & A_MASK);
196 db = (t_d64)(a >> A_BITS);
197
198 #pragma pipeloop(0)
199 for (i = 0; i < (n+1)/2; i ++) {
200 MUL_U32_S64_2_D(i);
201 }
202
203 #pragma pipeloop(0)
204 for (i = 0; i < n; i ++) {
205 ADD_S64_U32_D(i);
206 }
207
208 return uc;
209 }
210 }
211 }
212
213 /***************************************************************/
214
215 t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
216 {
217 return mul_add(x, x, y, n, a);
218 }
219
220 /***************************************************************/

mercurial