michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifdef SOLARIS michael@0: #define RF_INLINE_MACROS 1 michael@0: #endif michael@0: michael@0: static const double TwoTo16=65536.0; michael@0: static const double TwoToMinus16=1.0/65536.0; michael@0: static const double Zero=0.0; michael@0: static const double TwoTo32=65536.0*65536.0; michael@0: static const double TwoToMinus32=1.0/(65536.0*65536.0); michael@0: michael@0: #ifdef RF_INLINE_MACROS michael@0: michael@0: double upper32(double); michael@0: double lower32(double, double); michael@0: double mod(double, double, double); michael@0: michael@0: void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, michael@0: const double * /* 2^16*/, michael@0: const double * /* 0 */, michael@0: double * /*result16*/, michael@0: double * /* result32 */, michael@0: float * /*source - should be unsigned int* michael@0: converted to float* */); michael@0: michael@0: #else michael@0: #ifdef MP_USE_FLOOR michael@0: #include michael@0: #else michael@0: #define floor(d) ((double)((unsigned long long)(d))) michael@0: #endif michael@0: michael@0: static double upper32(double x) michael@0: { michael@0: return floor(x*TwoToMinus32); michael@0: } michael@0: michael@0: static double lower32(double x, double y) michael@0: { michael@0: return x-TwoTo32*floor(x*TwoToMinus32); michael@0: } michael@0: michael@0: static double mod(double x, double oneoverm, double m) michael@0: { michael@0: return x-m*floor(x*oneoverm); michael@0: } michael@0: michael@0: #endif michael@0: michael@0: michael@0: static void cleanup(double *dt, int from, int tlen) michael@0: { michael@0: int i; michael@0: double tmp,tmp1,x,x1; michael@0: michael@0: tmp=tmp1=Zero; michael@0: /* original code ** michael@0: for(i=2*from;i<2*tlen-2;i++) michael@0: { michael@0: x=dt[i]; michael@0: dt[i]=lower32(x,Zero)+tmp1; michael@0: tmp1=tmp; michael@0: tmp=upper32(x); michael@0: } michael@0: dt[tlen-2]+=tmp1; michael@0: dt[tlen-1]+=tmp; michael@0: **end original code ***/ michael@0: /* new code ***/ michael@0: for(i=2*from;i<2*tlen;i+=2) michael@0: { michael@0: x=dt[i]; michael@0: x1=dt[i+1]; michael@0: dt[i]=lower32(x,Zero)+tmp; michael@0: dt[i+1]=lower32(x1,Zero)+tmp1; michael@0: tmp=upper32(x); michael@0: tmp1=upper32(x1); michael@0: } michael@0: /** end new code **/ michael@0: } michael@0: michael@0: michael@0: void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) michael@0: { michael@0: int i; michael@0: long long t, t1, a, b, c, d; michael@0: michael@0: t1=0; michael@0: a=(long long)d16[0]; michael@0: b=(long long)d16[1]; michael@0: for(i=0; i>32); michael@0: d=(long long)d16[2*i+3]; michael@0: t1+=(b&0xffff)<<16; michael@0: t+=(b>>16)+(t1>>32); michael@0: i32[i]=(unsigned int)t1; michael@0: t1=t; michael@0: a=c; michael@0: b=d; michael@0: } michael@0: t1+=(unsigned int)a; michael@0: t=(a>>32); michael@0: t1+=(b&0xffff)<<16; michael@0: i32[i]=(unsigned int)t1; michael@0: } michael@0: michael@0: void conv_i32_to_d32(double *d32, unsigned int *i32, int len) michael@0: { michael@0: int i; michael@0: michael@0: #pragma pipeloop(0) michael@0: for(i=0;i>16); michael@0: } michael@0: } michael@0: michael@0: michael@0: void conv_i32_to_d32_and_d16(double *d32, double *d16, michael@0: unsigned int *i32, int len) michael@0: { michael@0: int i = 0; michael@0: unsigned int a; michael@0: michael@0: #pragma pipeloop(0) michael@0: #ifdef RF_INLINE_MACROS michael@0: for(;i>16); michael@0: } michael@0: } michael@0: michael@0: michael@0: void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) michael@0: { michael@0: long long acc; michael@0: int i; michael@0: michael@0: if(i32[len]>0) i=-1; michael@0: else michael@0: { michael@0: for(i=len-1; i>=0; i--) michael@0: { michael@0: if(i32[i]!=nint[i]) break; michael@0: } michael@0: } michael@0: if((i<0)||(i32[i]>nint[i])) michael@0: { michael@0: acc=0; michael@0: for(i=0;i>32; michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: michael@0: /* michael@0: ** the lengths of the input arrays should be at least the following: michael@0: ** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] michael@0: ** all of them should be different from one another michael@0: ** michael@0: */ michael@0: void mont_mulf_noconv(unsigned int *result, michael@0: double *dm1, double *dm2, double *dt, michael@0: double *dn, unsigned int *nint, michael@0: int nlen, double dn0) michael@0: { michael@0: int i, j, jj; michael@0: int tmp; michael@0: double digit, m2j, nextm2j, a, b; michael@0: double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; michael@0: michael@0: pdm1=&(dm1[0]); michael@0: pdm2=&(dm2[0]); michael@0: pdn=&(dn[0]); michael@0: pdm2[2*nlen]=Zero; michael@0: michael@0: if (nlen!=16) michael@0: { michael@0: for(i=0;i<4*nlen+2;i++) dt[i]=Zero; michael@0: michael@0: a=dt[0]=pdm1[0]*pdm2[0]; michael@0: digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); michael@0: michael@0: pdtj=&(dt[0]); michael@0: for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) michael@0: { michael@0: m2j=pdm2[j]; michael@0: a=pdtj[0]+pdn[0]*digit; michael@0: b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; michael@0: pdtj[1]=b; michael@0: michael@0: #pragma pipeloop(0) michael@0: for(i=1;i