Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 4 | |
michael@0 | 5 | #ifdef SOLARIS |
michael@0 | 6 | #define RF_INLINE_MACROS 1 |
michael@0 | 7 | #endif |
michael@0 | 8 | |
michael@0 | 9 | static const double TwoTo16=65536.0; |
michael@0 | 10 | static const double TwoToMinus16=1.0/65536.0; |
michael@0 | 11 | static const double Zero=0.0; |
michael@0 | 12 | static const double TwoTo32=65536.0*65536.0; |
michael@0 | 13 | static const double TwoToMinus32=1.0/(65536.0*65536.0); |
michael@0 | 14 | |
michael@0 | 15 | #ifdef RF_INLINE_MACROS |
michael@0 | 16 | |
michael@0 | 17 | double upper32(double); |
michael@0 | 18 | double lower32(double, double); |
michael@0 | 19 | double mod(double, double, double); |
michael@0 | 20 | |
michael@0 | 21 | void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, |
michael@0 | 22 | const double * /* 2^16*/, |
michael@0 | 23 | const double * /* 0 */, |
michael@0 | 24 | double * /*result16*/, |
michael@0 | 25 | double * /* result32 */, |
michael@0 | 26 | float * /*source - should be unsigned int* |
michael@0 | 27 | converted to float* */); |
michael@0 | 28 | |
michael@0 | 29 | #else |
michael@0 | 30 | #ifdef MP_USE_FLOOR |
michael@0 | 31 | #include <math.h> |
michael@0 | 32 | #else |
michael@0 | 33 | #define floor(d) ((double)((unsigned long long)(d))) |
michael@0 | 34 | #endif |
michael@0 | 35 | |
michael@0 | 36 | static double upper32(double x) |
michael@0 | 37 | { |
michael@0 | 38 | return floor(x*TwoToMinus32); |
michael@0 | 39 | } |
michael@0 | 40 | |
michael@0 | 41 | static double lower32(double x, double y) |
michael@0 | 42 | { |
michael@0 | 43 | return x-TwoTo32*floor(x*TwoToMinus32); |
michael@0 | 44 | } |
michael@0 | 45 | |
michael@0 | 46 | static double mod(double x, double oneoverm, double m) |
michael@0 | 47 | { |
michael@0 | 48 | return x-m*floor(x*oneoverm); |
michael@0 | 49 | } |
michael@0 | 50 | |
michael@0 | 51 | #endif |
michael@0 | 52 | |
michael@0 | 53 | |
michael@0 | 54 | static void cleanup(double *dt, int from, int tlen) |
michael@0 | 55 | { |
michael@0 | 56 | int i; |
michael@0 | 57 | double tmp,tmp1,x,x1; |
michael@0 | 58 | |
michael@0 | 59 | tmp=tmp1=Zero; |
michael@0 | 60 | /* original code ** |
michael@0 | 61 | for(i=2*from;i<2*tlen-2;i++) |
michael@0 | 62 | { |
michael@0 | 63 | x=dt[i]; |
michael@0 | 64 | dt[i]=lower32(x,Zero)+tmp1; |
michael@0 | 65 | tmp1=tmp; |
michael@0 | 66 | tmp=upper32(x); |
michael@0 | 67 | } |
michael@0 | 68 | dt[tlen-2]+=tmp1; |
michael@0 | 69 | dt[tlen-1]+=tmp; |
michael@0 | 70 | **end original code ***/ |
michael@0 | 71 | /* new code ***/ |
michael@0 | 72 | for(i=2*from;i<2*tlen;i+=2) |
michael@0 | 73 | { |
michael@0 | 74 | x=dt[i]; |
michael@0 | 75 | x1=dt[i+1]; |
michael@0 | 76 | dt[i]=lower32(x,Zero)+tmp; |
michael@0 | 77 | dt[i+1]=lower32(x1,Zero)+tmp1; |
michael@0 | 78 | tmp=upper32(x); |
michael@0 | 79 | tmp1=upper32(x1); |
michael@0 | 80 | } |
michael@0 | 81 | /** end new code **/ |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | |
michael@0 | 85 | void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) |
michael@0 | 86 | { |
michael@0 | 87 | int i; |
michael@0 | 88 | long long t, t1, a, b, c, d; |
michael@0 | 89 | |
michael@0 | 90 | t1=0; |
michael@0 | 91 | a=(long long)d16[0]; |
michael@0 | 92 | b=(long long)d16[1]; |
michael@0 | 93 | for(i=0; i<ilen-1; i++) |
michael@0 | 94 | { |
michael@0 | 95 | c=(long long)d16[2*i+2]; |
michael@0 | 96 | t1+=(unsigned int)a; |
michael@0 | 97 | t=(a>>32); |
michael@0 | 98 | d=(long long)d16[2*i+3]; |
michael@0 | 99 | t1+=(b&0xffff)<<16; |
michael@0 | 100 | t+=(b>>16)+(t1>>32); |
michael@0 | 101 | i32[i]=(unsigned int)t1; |
michael@0 | 102 | t1=t; |
michael@0 | 103 | a=c; |
michael@0 | 104 | b=d; |
michael@0 | 105 | } |
michael@0 | 106 | t1+=(unsigned int)a; |
michael@0 | 107 | t=(a>>32); |
michael@0 | 108 | t1+=(b&0xffff)<<16; |
michael@0 | 109 | i32[i]=(unsigned int)t1; |
michael@0 | 110 | } |
michael@0 | 111 | |
michael@0 | 112 | void conv_i32_to_d32(double *d32, unsigned int *i32, int len) |
michael@0 | 113 | { |
michael@0 | 114 | int i; |
michael@0 | 115 | |
michael@0 | 116 | #pragma pipeloop(0) |
michael@0 | 117 | for(i=0;i<len;i++) d32[i]=(double)(i32[i]); |
michael@0 | 118 | } |
michael@0 | 119 | |
michael@0 | 120 | |
michael@0 | 121 | void conv_i32_to_d16(double *d16, unsigned int *i32, int len) |
michael@0 | 122 | { |
michael@0 | 123 | int i; |
michael@0 | 124 | unsigned int a; |
michael@0 | 125 | |
michael@0 | 126 | #pragma pipeloop(0) |
michael@0 | 127 | for(i=0;i<len;i++) |
michael@0 | 128 | { |
michael@0 | 129 | a=i32[i]; |
michael@0 | 130 | d16[2*i]=(double)(a&0xffff); |
michael@0 | 131 | d16[2*i+1]=(double)(a>>16); |
michael@0 | 132 | } |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | |
michael@0 | 136 | void conv_i32_to_d32_and_d16(double *d32, double *d16, |
michael@0 | 137 | unsigned int *i32, int len) |
michael@0 | 138 | { |
michael@0 | 139 | int i = 0; |
michael@0 | 140 | unsigned int a; |
michael@0 | 141 | |
michael@0 | 142 | #pragma pipeloop(0) |
michael@0 | 143 | #ifdef RF_INLINE_MACROS |
michael@0 | 144 | for(;i<len-3;i+=4) |
michael@0 | 145 | { |
michael@0 | 146 | i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, |
michael@0 | 147 | &(d16[2*i]), &(d32[i]), (float *)(&(i32[i]))); |
michael@0 | 148 | } |
michael@0 | 149 | #endif |
michael@0 | 150 | for(;i<len;i++) |
michael@0 | 151 | { |
michael@0 | 152 | a=i32[i]; |
michael@0 | 153 | d32[i]=(double)(i32[i]); |
michael@0 | 154 | d16[2*i]=(double)(a&0xffff); |
michael@0 | 155 | d16[2*i+1]=(double)(a>>16); |
michael@0 | 156 | } |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | |
michael@0 | 160 | void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) |
michael@0 | 161 | { |
michael@0 | 162 | long long acc; |
michael@0 | 163 | int i; |
michael@0 | 164 | |
michael@0 | 165 | if(i32[len]>0) i=-1; |
michael@0 | 166 | else |
michael@0 | 167 | { |
michael@0 | 168 | for(i=len-1; i>=0; i--) |
michael@0 | 169 | { |
michael@0 | 170 | if(i32[i]!=nint[i]) break; |
michael@0 | 171 | } |
michael@0 | 172 | } |
michael@0 | 173 | if((i<0)||(i32[i]>nint[i])) |
michael@0 | 174 | { |
michael@0 | 175 | acc=0; |
michael@0 | 176 | for(i=0;i<len;i++) |
michael@0 | 177 | { |
michael@0 | 178 | acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]); |
michael@0 | 179 | i32[i]=(unsigned int)acc; |
michael@0 | 180 | acc=acc>>32; |
michael@0 | 181 | } |
michael@0 | 182 | } |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | |
michael@0 | 186 | |
michael@0 | 187 | |
michael@0 | 188 | /* |
michael@0 | 189 | ** the lengths of the input arrays should be at least the following: |
michael@0 | 190 | ** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] |
michael@0 | 191 | ** all of them should be different from one another |
michael@0 | 192 | ** |
michael@0 | 193 | */ |
michael@0 | 194 | void mont_mulf_noconv(unsigned int *result, |
michael@0 | 195 | double *dm1, double *dm2, double *dt, |
michael@0 | 196 | double *dn, unsigned int *nint, |
michael@0 | 197 | int nlen, double dn0) |
michael@0 | 198 | { |
michael@0 | 199 | int i, j, jj; |
michael@0 | 200 | int tmp; |
michael@0 | 201 | double digit, m2j, nextm2j, a, b; |
michael@0 | 202 | double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; |
michael@0 | 203 | |
michael@0 | 204 | pdm1=&(dm1[0]); |
michael@0 | 205 | pdm2=&(dm2[0]); |
michael@0 | 206 | pdn=&(dn[0]); |
michael@0 | 207 | pdm2[2*nlen]=Zero; |
michael@0 | 208 | |
michael@0 | 209 | if (nlen!=16) |
michael@0 | 210 | { |
michael@0 | 211 | for(i=0;i<4*nlen+2;i++) dt[i]=Zero; |
michael@0 | 212 | |
michael@0 | 213 | a=dt[0]=pdm1[0]*pdm2[0]; |
michael@0 | 214 | digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); |
michael@0 | 215 | |
michael@0 | 216 | pdtj=&(dt[0]); |
michael@0 | 217 | for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) |
michael@0 | 218 | { |
michael@0 | 219 | m2j=pdm2[j]; |
michael@0 | 220 | a=pdtj[0]+pdn[0]*digit; |
michael@0 | 221 | b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; |
michael@0 | 222 | pdtj[1]=b; |
michael@0 | 223 | |
michael@0 | 224 | #pragma pipeloop(0) |
michael@0 | 225 | for(i=1;i<nlen;i++) |
michael@0 | 226 | { |
michael@0 | 227 | pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit; |
michael@0 | 228 | } |
michael@0 | 229 | if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;} |
michael@0 | 230 | |
michael@0 | 231 | digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16); |
michael@0 | 232 | } |
michael@0 | 233 | } |
michael@0 | 234 | else |
michael@0 | 235 | { |
michael@0 | 236 | a=dt[0]=pdm1[0]*pdm2[0]; |
michael@0 | 237 | |
michael@0 | 238 | dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]= |
michael@0 | 239 | dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]= |
michael@0 | 240 | dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]= |
michael@0 | 241 | dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]= |
michael@0 | 242 | dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]= |
michael@0 | 243 | dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]= |
michael@0 | 244 | dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]= |
michael@0 | 245 | dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]= |
michael@0 | 246 | dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]= |
michael@0 | 247 | dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]= |
michael@0 | 248 | dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero; |
michael@0 | 249 | |
michael@0 | 250 | pdn_0=pdn[0]; |
michael@0 | 251 | pdm1_0=pdm1[0]; |
michael@0 | 252 | |
michael@0 | 253 | digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); |
michael@0 | 254 | pdtj=&(dt[0]); |
michael@0 | 255 | |
michael@0 | 256 | for(j=0;j<32;j++,pdtj++) |
michael@0 | 257 | { |
michael@0 | 258 | |
michael@0 | 259 | m2j=pdm2[j]; |
michael@0 | 260 | a=pdtj[0]+pdn_0*digit; |
michael@0 | 261 | b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16; |
michael@0 | 262 | pdtj[1]=b; |
michael@0 | 263 | |
michael@0 | 264 | /**** this loop will be fully unrolled: |
michael@0 | 265 | for(i=1;i<16;i++) |
michael@0 | 266 | { |
michael@0 | 267 | pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit; |
michael@0 | 268 | } |
michael@0 | 269 | *************************************/ |
michael@0 | 270 | pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit; |
michael@0 | 271 | pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit; |
michael@0 | 272 | pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit; |
michael@0 | 273 | pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit; |
michael@0 | 274 | pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit; |
michael@0 | 275 | pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit; |
michael@0 | 276 | pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit; |
michael@0 | 277 | pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit; |
michael@0 | 278 | pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit; |
michael@0 | 279 | pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit; |
michael@0 | 280 | pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit; |
michael@0 | 281 | pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit; |
michael@0 | 282 | pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit; |
michael@0 | 283 | pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit; |
michael@0 | 284 | pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit; |
michael@0 | 285 | /* no need for cleenup, cannot overflow */ |
michael@0 | 286 | digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16); |
michael@0 | 287 | } |
michael@0 | 288 | } |
michael@0 | 289 | |
michael@0 | 290 | conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1); |
michael@0 | 291 | |
michael@0 | 292 | adjust_montf_result(result,nint,nlen); |
michael@0 | 293 | |
michael@0 | 294 | } |
michael@0 | 295 |