security/nss/lib/freebl/mpi/montmulfv8.s

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/montmulfv8.s	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1818 @@
     1.4 +!  
     1.5 +! This Source Code Form is subject to the terms of the Mozilla Public
     1.6 +! License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 +! file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.8 +
     1.9 +	.section	".text",#alloc,#execinstr
    1.10 +	.file	"montmulf.c"
    1.11 +
    1.12 +	.section	".rodata",#alloc
    1.13 +	.global	TwoTo16
    1.14 +	.align	8
    1.15 +!
    1.16 +! CONSTANT POOL
    1.17 +!
    1.18 +	.global TwoTo16
    1.19 +TwoTo16:
    1.20 +	.word	1089470464
    1.21 +	.word	0
    1.22 +	.type	TwoTo16,#object
    1.23 +	.size	TwoTo16,8
    1.24 +	.global	TwoToMinus16
    1.25 +!
    1.26 +! CONSTANT POOL
    1.27 +!
    1.28 +	.global TwoToMinus16
    1.29 +TwoToMinus16:
    1.30 +	.word	1055916032
    1.31 +	.word	0
    1.32 +	.type	TwoToMinus16,#object
    1.33 +	.size	TwoToMinus16,8
    1.34 +	.global	Zero
    1.35 +!
    1.36 +! CONSTANT POOL
    1.37 +!
    1.38 +	.global Zero
    1.39 +Zero:
    1.40 +	.word	0
    1.41 +	.word	0
    1.42 +	.type	Zero,#object
    1.43 +	.size	Zero,8
    1.44 +	.global	TwoTo32
    1.45 +!
    1.46 +! CONSTANT POOL
    1.47 +!
    1.48 +	.global TwoTo32
    1.49 +TwoTo32:
    1.50 +	.word	1106247680
    1.51 +	.word	0
    1.52 +	.type	TwoTo32,#object
    1.53 +	.size	TwoTo32,8
    1.54 +	.global	TwoToMinus32
    1.55 +!
    1.56 +! CONSTANT POOL
    1.57 +!
    1.58 +	.global TwoToMinus32
    1.59 +TwoToMinus32:
    1.60 +	.word	1039138816
    1.61 +	.word	0
    1.62 +	.type	TwoToMinus32,#object
    1.63 +	.size	TwoToMinus32,8
    1.64 +
    1.65 +	.section	".text",#alloc,#execinstr
    1.66 +/* 000000	   0 */		.align	4
    1.67 +!
    1.68 +! SUBROUTINE conv_d16_to_i32
    1.69 +!
    1.70 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
    1.71 +
    1.72 +                       	.global conv_d16_to_i32
    1.73 +                       conv_d16_to_i32:
    1.74 +/* 000000	     */		save	%sp,-128,%sp
    1.75 +! FILE montmulf.c
    1.76 +
    1.77 +!   36		      !#define RF_INLINE_MACROS
    1.78 +!   38		      !static const double TwoTo16=65536.0;
    1.79 +!   39		      !static const double TwoToMinus16=1.0/65536.0;
    1.80 +!   40		      !static const double Zero=0.0;
    1.81 +!   41		      !static const double TwoTo32=65536.0*65536.0;
    1.82 +!   42		      !static const double TwoToMinus32=1.0/(65536.0*65536.0);
    1.83 +!   44		      !#ifdef RF_INLINE_MACROS
    1.84 +!   46		      !double upper32(double);
    1.85 +!   47		      !double lower32(double, double);
    1.86 +!   48		      !double mod(double, double, double);
    1.87 +!   50		      !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, 
    1.88 +!   51		      !			  const double * /* 2^16*/,
    1.89 +!   52		      !			  const double * /* 0 */,
    1.90 +!   53		      !			  double *       /*result16*/, 
    1.91 +!   54		      !			  double *       /* result32 */,
    1.92 +!   55		      !			  float *  /*source - should be unsigned int*
    1.93 +!   56		      !		          	       converted to float* */);
    1.94 +!   58		      !#else
    1.95 +!   60		      !static double upper32(double x)
    1.96 +!   61		      !{
    1.97 +!   62		      !  return floor(x*TwoToMinus32);
    1.98 +!   63		      !}
    1.99 +!   65		      !static double lower32(double x, double y)
   1.100 +!   66		      !{
   1.101 +!   67		      !  return x-TwoTo32*floor(x*TwoToMinus32);
   1.102 +!   68		      !}
   1.103 +!   70		      !static double mod(double x, double oneoverm, double m)
   1.104 +!   71		      !{
   1.105 +!   72		      !  return x-m*floor(x*oneoverm);
   1.106 +!   73		      !}
   1.107 +!   75		      !#endif
   1.108 +!   78		      !static void cleanup(double *dt, int from, int tlen)
   1.109 +!   79		      !{
   1.110 +!   80		      ! int i;
   1.111 +!   81		      ! double tmp,tmp1,x,x1;
   1.112 +!   83		      ! tmp=tmp1=Zero;
   1.113 +!   84		      ! /* original code **
   1.114 +!   85		      ! for(i=2*from;i<2*tlen-2;i++)
   1.115 +!   86		      !   {
   1.116 +!   87		      !     x=dt[i];
   1.117 +!   88		      !     dt[i]=lower32(x,Zero)+tmp1;
   1.118 +!   89		      !     tmp1=tmp;
   1.119 +!   90		      !     tmp=upper32(x);
   1.120 +!   91		      !   }
   1.121 +!   92		      ! dt[tlen-2]+=tmp1;
   1.122 +!   93		      ! dt[tlen-1]+=tmp;
   1.123 +!   94		      ! **end original code ***/
   1.124 +!   95		      ! /* new code ***/
   1.125 +!   96		      ! for(i=2*from;i<2*tlen;i+=2)
   1.126 +!   97		      !   {
   1.127 +!   98		      !     x=dt[i];
   1.128 +!   99		      !     x1=dt[i+1];
   1.129 +!  100		      !     dt[i]=lower32(x,Zero)+tmp;
   1.130 +!  101		      !     dt[i+1]=lower32(x1,Zero)+tmp1;
   1.131 +!  102		      !     tmp=upper32(x);
   1.132 +!  103		      !     tmp1=upper32(x1);
   1.133 +!  104		      !   }
   1.134 +!  105		      !  /** end new code **/
   1.135 +!  106		      !}
   1.136 +!  109		      !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
   1.137 +!  110		      !{
   1.138 +!  111		      !int i;
   1.139 +!  112		      !long long t, t1, a, b, c, d;
   1.140 +!  114		      ! t1=0;
   1.141 +!  115		      ! a=(long long)d16[0];
   1.142 +
   1.143 +/* 0x0004	 115 */		ldd	[%i1],%f0
   1.144 +/* 0x0008	 110 */		or	%g0,%i1,%o0
   1.145 +
   1.146 +!  116		      ! b=(long long)d16[1];
   1.147 +!  117		      ! for(i=0; i<ilen-1; i++)
   1.148 +
   1.149 +/* 0x000c	 117 */		sub	%i3,1,%g2
   1.150 +/* 0x0010	     */		cmp	%g2,0
   1.151 +/* 0x0014	 114 */		or	%g0,0,%o4
   1.152 +/* 0x0018	 115 */		fdtox	%f0,%f0
   1.153 +/* 0x001c	     */		std	%f0,[%sp+120]
   1.154 +/* 0x0020	 117 */		or	%g0,0,%o7
   1.155 +/* 0x0024	 110 */		or	%g0,%i3,%o1
   1.156 +/* 0x0028	     */		sub	%i3,2,%o2
   1.157 +/* 0x002c	 116 */		ldd	[%o0+8],%f0
   1.158 +/* 0x0030	 110 */		sethi	%hi(0xfc00),%o1
   1.159 +/* 0x0034	     */		add	%o2,1,%g3
   1.160 +/* 0x0038	     */		add	%o1,1023,%o1
   1.161 +/* 0x003c	     */		or	%g0,%i0,%o5
   1.162 +/* 0x0040	 116 */		fdtox	%f0,%f0
   1.163 +/* 0x0044	     */		std	%f0,[%sp+112]
   1.164 +/* 0x0048	     */		ldx	[%sp+112],%g1
   1.165 +/* 0x004c	 115 */		ldx	[%sp+120],%g4
   1.166 +/* 0x0050	 117 */		ble,pt	%icc,.L900000117
   1.167 +/* 0x0054	     */		sethi	%hi(0xfc00),%g2
   1.168 +/* 0x0058	 110 */		or	%g0,-1,%g2
   1.169 +/* 0x005c	 117 */		cmp	%g3,3
   1.170 +/* 0x0060	 110 */		srl	%g2,0,%o3
   1.171 +/* 0x0064	 117 */		bl,pn	%icc,.L77000134
   1.172 +/* 0x0068	     */		or	%g0,%o0,%g2
   1.173 +
   1.174 +!  118		      !   {
   1.175 +!  119		      !     c=(long long)d16[2*i+2];
   1.176 +
   1.177 +/* 0x006c	 119 */		ldd	[%o0+16],%f0
   1.178 +
   1.179 +!  120		      !     t1+=a&0xffffffff;
   1.180 +!  121		      !     t=(a>>32);
   1.181 +!  122		      !     d=(long long)d16[2*i+3];
   1.182 +!  123		      !     t1+=(b&0xffff)<<16;
   1.183 +!  124		      !     t+=(b>>16)+(t1>>32);
   1.184 +!  125		      !     i32[i]=t1&0xffffffff;
   1.185 +!  126		      !     t1=t;
   1.186 +!  127		      !     a=c;
   1.187 +!  128		      !     b=d;
   1.188 +
   1.189 +/* 0x0070	 128 */		add	%o0,16,%g2
   1.190 +/* 0x0074	 123 */		and	%g1,%o1,%o0
   1.191 +/* 0x0078	     */		sllx	%o0,16,%g3
   1.192 +/* 0x007c	 120 */		and	%g4,%o3,%o0
   1.193 +/* 0x0080	 117 */		add	%o0,%g3,%o4
   1.194 +/* 0x0084	 119 */		fdtox	%f0,%f0
   1.195 +/* 0x0088	     */		std	%f0,[%sp+104]
   1.196 +/* 0x008c	 125 */		and	%o4,%o3,%g5
   1.197 +/* 0x0090	 122 */		ldd	[%g2+8],%f2
   1.198 +/* 0x0094	 128 */		add	%o5,4,%o5
   1.199 +/* 0x0098	 124 */		srax	%o4,32,%o4
   1.200 +/* 0x009c	     */		stx	%o4,[%sp+112]
   1.201 +/* 0x00a0	 122 */		fdtox	%f2,%f0
   1.202 +/* 0x00a4	     */		std	%f0,[%sp+96]
   1.203 +/* 0x00a8	 124 */		srax	%g1,16,%o0
   1.204 +/* 0x00ac	     */		ldx	[%sp+112],%o7
   1.205 +/* 0x00b0	 121 */		srax	%g4,32,%o4
   1.206 +/* 0x00b4	 124 */		add	%o0,%o7,%g4
   1.207 +/* 0x00b8	 128 */		or	%g0,1,%o7
   1.208 +/* 0x00bc	 119 */		ldx	[%sp+104],%g3
   1.209 +/* 0x00c0	 124 */		add	%o4,%g4,%o4
   1.210 +/* 0x00c4	 122 */		ldx	[%sp+96],%g1
   1.211 +/* 0x00c8	 125 */		st	%g5,[%o5-4]
   1.212 +/* 0x00cc	 127 */		or	%g0,%g3,%g4
   1.213 +                       .L900000112:
   1.214 +/* 0x00d0	 119 */		ldd	[%g2+16],%f0
   1.215 +/* 0x00d4	 128 */		add	%o7,1,%o7
   1.216 +/* 0x00d8	     */		add	%o5,4,%o5
   1.217 +/* 0x00dc	     */		cmp	%o7,%o2
   1.218 +/* 0x00e0	     */		add	%g2,16,%g2
   1.219 +/* 0x00e4	 119 */		fdtox	%f0,%f0
   1.220 +/* 0x00e8	     */		std	%f0,[%sp+104]
   1.221 +/* 0x00ec	 122 */		ldd	[%g2+8],%f0
   1.222 +/* 0x00f0	     */		fdtox	%f0,%f0
   1.223 +/* 0x00f4	     */		std	%f0,[%sp+96]
   1.224 +/* 0x00f8	 123 */		and	%g1,%o1,%g3
   1.225 +/* 0x00fc	     */		sllx	%g3,16,%g5
   1.226 +/* 0x0100	 120 */		and	%g4,%o3,%g3
   1.227 +/* 0x0104	 117 */		add	%g3,%g5,%g3
   1.228 +/* 0x0108	 124 */		srax	%g1,16,%g1
   1.229 +/* 0x010c	 117 */		add	%g3,%o4,%g3
   1.230 +/* 0x0110	 124 */		srax	%g3,32,%o4
   1.231 +/* 0x0114	     */		stx	%o4,[%sp+112]
   1.232 +/* 0x0118	 119 */		ldx	[%sp+104],%g5
   1.233 +/* 0x011c	 121 */		srax	%g4,32,%o4
   1.234 +/* 0x0120	 124 */		ldx	[%sp+112],%g4
   1.235 +/* 0x0124	     */		add	%g1,%g4,%g4
   1.236 +/* 0x0128	 122 */		ldx	[%sp+96],%g1
   1.237 +/* 0x012c	 124 */		add	%o4,%g4,%o4
   1.238 +/* 0x0130	 125 */		and	%g3,%o3,%g3
   1.239 +/* 0x0134	 127 */		or	%g0,%g5,%g4
   1.240 +/* 0x0138	 128 */		ble,pt	%icc,.L900000112
   1.241 +/* 0x013c	     */		st	%g3,[%o5-4]
   1.242 +                       .L900000115:
   1.243 +/* 0x0140	 128 */		ba	.L900000117
   1.244 +/* 0x0144	     */		sethi	%hi(0xfc00),%g2
   1.245 +                       .L77000134:
   1.246 +/* 0x0148	 119 */		ldd	[%g2+16],%f0
   1.247 +                       .L900000116:
   1.248 +/* 0x014c	 120 */		and	%g4,%o3,%o0
   1.249 +/* 0x0150	 123 */		and	%g1,%o1,%g3
   1.250 +/* 0x0154	 119 */		fdtox	%f0,%f0
   1.251 +/* 0x0158	 120 */		add	%o4,%o0,%o0
   1.252 +/* 0x015c	 119 */		std	%f0,[%sp+104]
   1.253 +/* 0x0160	 128 */		add	%o7,1,%o7
   1.254 +/* 0x0164	 123 */		sllx	%g3,16,%o4
   1.255 +/* 0x0168	 122 */		ldd	[%g2+24],%f2
   1.256 +/* 0x016c	 128 */		add	%g2,16,%g2
   1.257 +/* 0x0170	 123 */		add	%o0,%o4,%o0
   1.258 +/* 0x0174	 128 */		cmp	%o7,%o2
   1.259 +/* 0x0178	 125 */		and	%o0,%o3,%g3
   1.260 +/* 0x017c	 122 */		fdtox	%f2,%f0
   1.261 +/* 0x0180	     */		std	%f0,[%sp+96]
   1.262 +/* 0x0184	 124 */		srax	%o0,32,%o0
   1.263 +/* 0x0188	     */		stx	%o0,[%sp+112]
   1.264 +/* 0x018c	 121 */		srax	%g4,32,%o4
   1.265 +/* 0x0190	 122 */		ldx	[%sp+96],%o0
   1.266 +/* 0x0194	 124 */		srax	%g1,16,%g5
   1.267 +/* 0x0198	     */		ldx	[%sp+112],%g4
   1.268 +/* 0x019c	 119 */		ldx	[%sp+104],%g1
   1.269 +/* 0x01a0	 125 */		st	%g3,[%o5]
   1.270 +/* 0x01a4	 124 */		add	%g5,%g4,%g4
   1.271 +/* 0x01a8	 128 */		add	%o5,4,%o5
   1.272 +/* 0x01ac	 124 */		add	%o4,%g4,%o4
   1.273 +/* 0x01b0	 127 */		or	%g0,%g1,%g4
   1.274 +/* 0x01b4	 128 */		or	%g0,%o0,%g1
   1.275 +/* 0x01b8	     */		ble,a,pt	%icc,.L900000116
   1.276 +/* 0x01bc	     */		ldd	[%g2+16],%f0
   1.277 +                       .L77000127:
   1.278 +
   1.279 +!  129		      !   }
   1.280 +!  130		      !     t1+=a&0xffffffff;
   1.281 +!  131		      !     t=(a>>32);
   1.282 +!  132		      !     t1+=(b&0xffff)<<16;
   1.283 +!  133		      !     i32[i]=t1&0xffffffff;
   1.284 +
   1.285 +/* 0x01c0	 133 */		sethi	%hi(0xfc00),%g2
   1.286 +                       .L900000117:
   1.287 +/* 0x01c4	 133 */		or	%g0,-1,%g3
   1.288 +/* 0x01c8	     */		add	%g2,1023,%g2
   1.289 +/* 0x01cc	     */		srl	%g3,0,%g3
   1.290 +/* 0x01d0	     */		and	%g1,%g2,%g2
   1.291 +/* 0x01d4	     */		and	%g4,%g3,%g4
   1.292 +/* 0x01d8	     */		sllx	%g2,16,%g2
   1.293 +/* 0x01dc	     */		add	%o4,%g4,%g4
   1.294 +/* 0x01e0	     */		add	%g4,%g2,%g2
   1.295 +/* 0x01e4	     */		sll	%o7,2,%g4
   1.296 +/* 0x01e8	     */		and	%g2,%g3,%g2
   1.297 +/* 0x01ec	     */		st	%g2,[%i0+%g4]
   1.298 +/* 0x01f0	     */		ret	! Result = 
   1.299 +/* 0x01f4	     */		restore	%g0,%g0,%g0
   1.300 +/* 0x01f8	   0 */		.type	conv_d16_to_i32,2
   1.301 +/* 0x01f8	     */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
   1.302 +
   1.303 +	.section	".text",#alloc,#execinstr
   1.304 +/* 000000	   0 */		.align	8
   1.305 +!
   1.306 +! CONSTANT POOL
   1.307 +!
   1.308 +                       .L_const_seg_900000201:
   1.309 +/* 000000	   0 */		.word	1127219200,0
   1.310 +/* 0x0008	   0 */		.align	4
   1.311 +/* 0x0008	     */		.skip	16
   1.312 +!
   1.313 +! SUBROUTINE conv_i32_to_d32
   1.314 +!
   1.315 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
   1.316 +
   1.317 +                       	.global conv_i32_to_d32
   1.318 +                       conv_i32_to_d32:
   1.319 +/* 000000	     */		or	%g0,%o7,%g2
   1.320 +
   1.321 +!  135		      !}
   1.322 +!  137		      !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
   1.323 +!  138		      !{
   1.324 +!  139		      !int i;
   1.325 +!  141		      !#pragma pipeloop(0)
   1.326 +!  142		      ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
   1.327 +
   1.328 +/* 0x0004	 142 */		cmp	%o2,0
   1.329 +                       .L900000210:
   1.330 +/* 0x0008	     */		call	.+8
   1.331 +/* 0x000c	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
   1.332 +/* 0x0010	 142 */		or	%g0,0,%o5
   1.333 +/* 0x0014	 138 */		add	%g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
   1.334 +/* 0x0018	     */		or	%g0,%o0,%g5
   1.335 +/* 0x001c	     */		add	%g4,%o7,%g1
   1.336 +/* 0x0020	 142 */		ble,pt	%icc,.L77000140
   1.337 +/* 0x0024	     */		or	%g0,%g2,%o7
   1.338 +/* 0x0028	     */		sethi	%hi(.L_const_seg_900000201),%g2
   1.339 +/* 0x002c	 138 */		or	%g0,%o1,%g4
   1.340 +/* 0x0030	 142 */		add	%g2,%lo(.L_const_seg_900000201),%g2
   1.341 +/* 0x0034	     */		sub	%o2,1,%g3
   1.342 +/* 0x0038	     */		ld	[%g1+%g2],%g2
   1.343 +/* 0x003c	     */		cmp	%o2,9
   1.344 +/* 0x0040	     */		bl,pn	%icc,.L77000144
   1.345 +/* 0x0044	     */		ldd	[%g2],%f8
   1.346 +/* 0x0048	     */		add	%o1,16,%g4
   1.347 +/* 0x004c	     */		sub	%o2,5,%g1
   1.348 +/* 0x0050	     */		ld	[%o1],%f7
   1.349 +/* 0x0054	     */		or	%g0,4,%o5
   1.350 +/* 0x0058	     */		ld	[%o1+4],%f5
   1.351 +/* 0x005c	     */		ld	[%o1+8],%f3
   1.352 +/* 0x0060	     */		fmovs	%f8,%f6
   1.353 +/* 0x0064	     */		ld	[%o1+12],%f1
   1.354 +                       .L900000205:
   1.355 +/* 0x0068	     */		ld	[%g4],%f11
   1.356 +/* 0x006c	     */		add	%o5,5,%o5
   1.357 +/* 0x0070	     */		add	%g4,20,%g4
   1.358 +/* 0x0074	     */		fsubd	%f6,%f8,%f6
   1.359 +/* 0x0078	     */		std	%f6,[%g5]
   1.360 +/* 0x007c	     */		cmp	%o5,%g1
   1.361 +/* 0x0080	     */		add	%g5,40,%g5
   1.362 +/* 0x0084	     */		fmovs	%f8,%f4
   1.363 +/* 0x0088	     */		ld	[%g4-16],%f7
   1.364 +/* 0x008c	     */		fsubd	%f4,%f8,%f12
   1.365 +/* 0x0090	     */		fmovs	%f8,%f2
   1.366 +/* 0x0094	     */		std	%f12,[%g5-32]
   1.367 +/* 0x0098	     */		ld	[%g4-12],%f5
   1.368 +/* 0x009c	     */		fsubd	%f2,%f8,%f12
   1.369 +/* 0x00a0	     */		fmovs	%f8,%f0
   1.370 +/* 0x00a4	     */		std	%f12,[%g5-24]
   1.371 +/* 0x00a8	     */		ld	[%g4-8],%f3
   1.372 +/* 0x00ac	     */		fsubd	%f0,%f8,%f12
   1.373 +/* 0x00b0	     */		fmovs	%f8,%f10
   1.374 +/* 0x00b4	     */		std	%f12,[%g5-16]
   1.375 +/* 0x00b8	     */		ld	[%g4-4],%f1
   1.376 +/* 0x00bc	     */		fsubd	%f10,%f8,%f10
   1.377 +/* 0x00c0	     */		fmovs	%f8,%f6
   1.378 +/* 0x00c4	     */		ble,pt	%icc,.L900000205
   1.379 +/* 0x00c8	     */		std	%f10,[%g5-8]
   1.380 +                       .L900000208:
   1.381 +/* 0x00cc	     */		fmovs	%f8,%f4
   1.382 +/* 0x00d0	     */		add	%g5,32,%g5
   1.383 +/* 0x00d4	     */		cmp	%o5,%g3
   1.384 +/* 0x00d8	     */		fmovs	%f8,%f2
   1.385 +/* 0x00dc	     */		fmovs	%f8,%f0
   1.386 +/* 0x00e0	     */		fsubd	%f6,%f8,%f6
   1.387 +/* 0x00e4	     */		std	%f6,[%g5-32]
   1.388 +/* 0x00e8	     */		fsubd	%f4,%f8,%f4
   1.389 +/* 0x00ec	     */		std	%f4,[%g5-24]
   1.390 +/* 0x00f0	     */		fsubd	%f2,%f8,%f2
   1.391 +/* 0x00f4	     */		std	%f2,[%g5-16]
   1.392 +/* 0x00f8	     */		fsubd	%f0,%f8,%f0
   1.393 +/* 0x00fc	     */		bg,pn	%icc,.L77000140
   1.394 +/* 0x0100	     */		std	%f0,[%g5-8]
   1.395 +                       .L77000144:
   1.396 +/* 0x0104	     */		ld	[%g4],%f1
   1.397 +                       .L900000211:
   1.398 +/* 0x0108	     */		ldd	[%g2],%f8
   1.399 +/* 0x010c	     */		add	%o5,1,%o5
   1.400 +/* 0x0110	     */		add	%g4,4,%g4
   1.401 +/* 0x0114	     */		cmp	%o5,%g3
   1.402 +/* 0x0118	     */		fmovs	%f8,%f0
   1.403 +/* 0x011c	     */		fsubd	%f0,%f8,%f0
   1.404 +/* 0x0120	     */		std	%f0,[%g5]
   1.405 +/* 0x0124	     */		add	%g5,8,%g5
   1.406 +/* 0x0128	     */		ble,a,pt	%icc,.L900000211
   1.407 +/* 0x012c	     */		ld	[%g4],%f1
   1.408 +                       .L77000140:
   1.409 +/* 0x0130	     */		retl	! Result = 
   1.410 +/* 0x0134	     */		nop
   1.411 +/* 0x0138	   0 */		.type	conv_i32_to_d32,2
   1.412 +/* 0x0138	     */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
   1.413 +
   1.414 +	.section	".text",#alloc,#execinstr
   1.415 +/* 000000	   0 */		.align	8
   1.416 +!
   1.417 +! CONSTANT POOL
   1.418 +!
   1.419 +                       .L_const_seg_900000301:
   1.420 +/* 000000	   0 */		.word	1127219200,0
   1.421 +/* 0x0008	   0 */		.align	4
   1.422 +!
   1.423 +! SUBROUTINE conv_i32_to_d16
   1.424 +!
   1.425 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
   1.426 +
   1.427 +                       	.global conv_i32_to_d16
   1.428 +                       conv_i32_to_d16:
   1.429 +/* 000000	     */		save	%sp,-104,%sp
   1.430 +/* 0x0004	     */		or	%g0,%i2,%o0
   1.431 +
   1.432 +!  143		      !}
   1.433 +!  146		      !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
   1.434 +!  147		      !{
   1.435 +!  148		      !int i;
   1.436 +!  149		      !unsigned int a;
   1.437 +!  151		      !#pragma pipeloop(0)
   1.438 +!  152		      ! for(i=0;i<len;i++)
   1.439 +!  153		      !   {
   1.440 +!  154		      !     a=i32[i];
   1.441 +!  155		      !     d16[2*i]=(double)(a&0xffff);
   1.442 +!  156		      !     d16[2*i+1]=(double)(a>>16);
   1.443 +
   1.444 +/* 0x0008	 156 */		sethi	%hi(.L_const_seg_900000301),%g2
   1.445 +                       .L900000310:
   1.446 +/* 0x000c	     */		call	.+8
   1.447 +/* 0x0010	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
   1.448 +/* 0x0014	 152 */		cmp	%o0,0
   1.449 +/* 0x0018	 147 */		add	%g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
   1.450 +/* 0x001c	 152 */		ble,pt	%icc,.L77000150
   1.451 +/* 0x0020	     */		add	%g3,%o7,%o2
   1.452 +/* 0x0024	     */		sub	%i2,1,%o5
   1.453 +/* 0x0028	 156 */		add	%g2,%lo(.L_const_seg_900000301),%o1
   1.454 +/* 0x002c	 152 */		sethi	%hi(0xfc00),%o0
   1.455 +/* 0x0030	     */		ld	[%o2+%o1],%o3
   1.456 +/* 0x0034	     */		add	%o5,1,%g2
   1.457 +/* 0x0038	     */		or	%g0,0,%g1
   1.458 +/* 0x003c	     */		cmp	%g2,3
   1.459 +/* 0x0040	     */		or	%g0,%i1,%o7
   1.460 +/* 0x0044	     */		add	%o0,1023,%o4
   1.461 +/* 0x0048	     */		or	%g0,%i0,%g3
   1.462 +/* 0x004c	     */		bl,pn	%icc,.L77000154
   1.463 +/* 0x0050	     */		add	%o7,4,%o0
   1.464 +/* 0x0054	 155 */		ldd	[%o3],%f0
   1.465 +/* 0x0058	 156 */		or	%g0,1,%g1
   1.466 +/* 0x005c	 154 */		ld	[%o0-4],%o1
   1.467 +/* 0x0060	   0 */		or	%g0,%o0,%o7
   1.468 +/* 0x0064	 155 */		and	%o1,%o4,%o0
   1.469 +                       .L900000306:
   1.470 +/* 0x0068	 155 */		st	%o0,[%sp+96]
   1.471 +/* 0x006c	 156 */		add	%g1,1,%g1
   1.472 +/* 0x0070	     */		add	%g3,16,%g3
   1.473 +/* 0x0074	     */		cmp	%g1,%o5
   1.474 +/* 0x0078	     */		add	%o7,4,%o7
   1.475 +/* 0x007c	 155 */		ld	[%sp+96],%f3
   1.476 +/* 0x0080	     */		fmovs	%f0,%f2
   1.477 +/* 0x0084	     */		fsubd	%f2,%f0,%f2
   1.478 +/* 0x0088	 156 */		srl	%o1,16,%o0
   1.479 +/* 0x008c	 155 */		std	%f2,[%g3-16]
   1.480 +/* 0x0090	 156 */		st	%o0,[%sp+92]
   1.481 +/* 0x0094	     */		ld	[%sp+92],%f3
   1.482 +/* 0x0098	 154 */		ld	[%o7-4],%o1
   1.483 +/* 0x009c	 156 */		fmovs	%f0,%f2
   1.484 +/* 0x00a0	     */		fsubd	%f2,%f0,%f2
   1.485 +/* 0x00a4	 155 */		and	%o1,%o4,%o0
   1.486 +/* 0x00a8	 156 */		ble,pt	%icc,.L900000306
   1.487 +/* 0x00ac	     */		std	%f2,[%g3-8]
   1.488 +                       .L900000309:
   1.489 +/* 0x00b0	 155 */		st	%o0,[%sp+96]
   1.490 +/* 0x00b4	     */		fmovs	%f0,%f2
   1.491 +/* 0x00b8	 156 */		add	%g3,16,%g3
   1.492 +/* 0x00bc	     */		srl	%o1,16,%o0
   1.493 +/* 0x00c0	 155 */		ld	[%sp+96],%f3
   1.494 +/* 0x00c4	     */		fsubd	%f2,%f0,%f2
   1.495 +/* 0x00c8	     */		std	%f2,[%g3-16]
   1.496 +/* 0x00cc	 156 */		st	%o0,[%sp+92]
   1.497 +/* 0x00d0	     */		fmovs	%f0,%f2
   1.498 +/* 0x00d4	     */		ld	[%sp+92],%f3
   1.499 +/* 0x00d8	     */		fsubd	%f2,%f0,%f0
   1.500 +/* 0x00dc	     */		std	%f0,[%g3-8]
   1.501 +/* 0x00e0	     */		ret	! Result = 
   1.502 +/* 0x00e4	     */		restore	%g0,%g0,%g0
   1.503 +                       .L77000154:
   1.504 +/* 0x00e8	 154 */		ld	[%o7],%o0
   1.505 +                       .L900000311:
   1.506 +/* 0x00ec	 155 */		and	%o0,%o4,%o1
   1.507 +/* 0x00f0	     */		st	%o1,[%sp+96]
   1.508 +/* 0x00f4	 156 */		add	%g1,1,%g1
   1.509 +/* 0x00f8	 155 */		ldd	[%o3],%f0
   1.510 +/* 0x00fc	 156 */		srl	%o0,16,%o0
   1.511 +/* 0x0100	     */		add	%o7,4,%o7
   1.512 +/* 0x0104	     */		cmp	%g1,%o5
   1.513 +/* 0x0108	 155 */		fmovs	%f0,%f2
   1.514 +/* 0x010c	     */		ld	[%sp+96],%f3
   1.515 +/* 0x0110	     */		fsubd	%f2,%f0,%f2
   1.516 +/* 0x0114	     */		std	%f2,[%g3]
   1.517 +/* 0x0118	 156 */		st	%o0,[%sp+92]
   1.518 +/* 0x011c	     */		fmovs	%f0,%f2
   1.519 +/* 0x0120	     */		ld	[%sp+92],%f3
   1.520 +/* 0x0124	     */		fsubd	%f2,%f0,%f0
   1.521 +/* 0x0128	     */		std	%f0,[%g3+8]
   1.522 +/* 0x012c	     */		add	%g3,16,%g3
   1.523 +/* 0x0130	     */		ble,a,pt	%icc,.L900000311
   1.524 +/* 0x0134	     */		ld	[%o7],%o0
   1.525 +                       .L77000150:
   1.526 +/* 0x0138	     */		ret	! Result = 
   1.527 +/* 0x013c	     */		restore	%g0,%g0,%g0
   1.528 +/* 0x0140	   0 */		.type	conv_i32_to_d16,2
   1.529 +/* 0x0140	     */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
   1.530 +
   1.531 +	.section	".text",#alloc,#execinstr
   1.532 +/* 000000	   0 */		.align	8
   1.533 +!
   1.534 +! CONSTANT POOL
   1.535 +!
   1.536 +                       .L_const_seg_900000401:
   1.537 +/* 000000	   0 */		.word	1127219200,0
   1.538 +/* 0x0008	   0 */		.align	4
   1.539 +/* 0x0008	     */		.skip	16
   1.540 +!
   1.541 +! SUBROUTINE conv_i32_to_d32_and_d16
   1.542 +!
   1.543 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
   1.544 +
   1.545 +                       	.global conv_i32_to_d32_and_d16
   1.546 +                       conv_i32_to_d32_and_d16:
   1.547 +/* 000000	     */		save	%sp,-120,%sp
   1.548 +                       .L900000415:
   1.549 +/* 0x0004	     */		call	.+8
   1.550 +/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
   1.551 +
   1.552 +!  157		      !   }
   1.553 +!  158		      !}
   1.554 +!  161		      !void conv_i32_to_d32_and_d16(double *d32, double *d16, 
   1.555 +!  162		      !			     unsigned int *i32, int len)
   1.556 +!  163		      !{
   1.557 +!  164		      !int i = 0;
   1.558 +!  165		      !unsigned int a;
   1.559 +!  167		      !#pragma pipeloop(0)
   1.560 +!  168		      !#ifdef RF_INLINE_MACROS
   1.561 +!  169		      ! for(;i<len-3;i+=4)
   1.562 +
   1.563 +/* 0x000c	 169 */		sub	%i3,3,%g2
   1.564 +/* 0x0010	     */		cmp	%g2,0
   1.565 +/* 0x0014	 163 */		add	%g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
   1.566 +
   1.567 +!  170		      !   {
   1.568 +!  171		      !     i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
   1.569 +!  172		      !			  &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
   1.570 +
   1.571 +/* 0x0018	 172 */		sethi	%hi(Zero),%g2
   1.572 +/* 0x001c	 163 */		add	%g4,%o7,%o4
   1.573 +/* 0x0020	 172 */		add	%g2,%lo(Zero),%g2
   1.574 +/* 0x0024	     */		sethi	%hi(TwoToMinus16),%g3
   1.575 +/* 0x0028	     */		ld	[%o4+%g2],%o1
   1.576 +/* 0x002c	     */		sethi	%hi(TwoTo16),%g4
   1.577 +/* 0x0030	     */		add	%g3,%lo(TwoToMinus16),%g2
   1.578 +/* 0x0034	     */		ld	[%o4+%g2],%o3
   1.579 +/* 0x0038	 164 */		or	%g0,0,%g5
   1.580 +/* 0x003c	 172 */		add	%g4,%lo(TwoTo16),%g3
   1.581 +/* 0x0040	     */		ld	[%o4+%g3],%o2
   1.582 +/* 0x0044	 163 */		or	%g0,%i0,%i4
   1.583 +/* 0x0048	 169 */		or	%g0,%i2,%o7
   1.584 +/* 0x004c	     */		ble,pt	%icc,.L900000418
   1.585 +/* 0x0050	     */		cmp	%g5,%i3
   1.586 +/* 0x0054	 172 */		stx	%o7,[%sp+104]
   1.587 +/* 0x0058	 169 */		sub	%i3,4,%o5
   1.588 +/* 0x005c	     */		or	%g0,0,%g4
   1.589 +/* 0x0060	     */		or	%g0,0,%g1
   1.590 +                       .L900000417:
   1.591 +/* 0x0064	     */		ldd	[%o1],%f2
   1.592 +/* 0x0068	 172 */		add	%i4,%g4,%g2
   1.593 +/* 0x006c	     */		add	%i1,%g1,%g3
   1.594 +/* 0x0070	     */		ldd	[%o3],%f0
   1.595 +/* 0x0074	     */		add	%g5,4,%g5
   1.596 +/* 0x0078	     */		fmovd	%f2,%f14
   1.597 +/* 0x007c	     */		ld	[%o7],%f15
   1.598 +/* 0x0080	     */		cmp	%g5,%o5
   1.599 +/* 0x0084	     */		fmovd	%f2,%f10
   1.600 +/* 0x0088	     */		ld	[%o7+4],%f11
   1.601 +/* 0x008c	     */		add	%o7,16,%o7
   1.602 +/* 0x0090	     */		ldx	[%sp+104],%o0
   1.603 +/* 0x0094	     */		fmovd	%f2,%f6
   1.604 +/* 0x0098	     */		stx	%o7,[%sp+112]
   1.605 +/* 0x009c	     */		fxtod	%f14,%f14
   1.606 +/* 0x00a0	     */		ld	[%o0+8],%f7
   1.607 +/* 0x00a4	     */		fxtod	%f10,%f10
   1.608 +/* 0x00a8	     */		ld	[%o0+12],%f3
   1.609 +/* 0x00ac	     */		fxtod	%f6,%f6
   1.610 +/* 0x00b0	     */		ldd	[%o2],%f16
   1.611 +/* 0x00b4	     */		fmuld	%f0,%f14,%f12
   1.612 +/* 0x00b8	     */		fxtod	%f2,%f2
   1.613 +/* 0x00bc	     */		fmuld	%f0,%f10,%f8
   1.614 +/* 0x00c0	     */		std	%f14,[%i4+%g4]
   1.615 +/* 0x00c4	     */		ldx	[%sp+112],%o7
   1.616 +/* 0x00c8	     */		add	%g4,32,%g4
   1.617 +/* 0x00cc	     */		fmuld	%f0,%f6,%f4
   1.618 +/* 0x00d0	     */		fdtox	%f12,%f12
   1.619 +/* 0x00d4	     */		std	%f10,[%g2+8]
   1.620 +/* 0x00d8	     */		fmuld	%f0,%f2,%f0
   1.621 +/* 0x00dc	     */		fdtox	%f8,%f8
   1.622 +/* 0x00e0	     */		std	%f6,[%g2+16]
   1.623 +/* 0x00e4	     */		std	%f2,[%g2+24]
   1.624 +/* 0x00e8	     */		fdtox	%f4,%f4
   1.625 +/* 0x00ec	     */		fdtox	%f0,%f0
   1.626 +/* 0x00f0	     */		fxtod	%f12,%f12
   1.627 +/* 0x00f4	     */		std	%f12,[%g3+8]
   1.628 +/* 0x00f8	     */		fxtod	%f8,%f8
   1.629 +/* 0x00fc	     */		std	%f8,[%g3+24]
   1.630 +/* 0x0100	     */		fxtod	%f4,%f4
   1.631 +/* 0x0104	     */		std	%f4,[%g3+40]
   1.632 +/* 0x0108	     */		fxtod	%f0,%f0
   1.633 +/* 0x010c	     */		std	%f0,[%g3+56]
   1.634 +/* 0x0110	     */		fmuld	%f12,%f16,%f12
   1.635 +/* 0x0114	     */		fmuld	%f8,%f16,%f8
   1.636 +/* 0x0118	     */		fmuld	%f4,%f16,%f4
   1.637 +/* 0x011c	     */		fsubd	%f14,%f12,%f12
   1.638 +/* 0x0120	     */		std	%f12,[%i1+%g1]
   1.639 +/* 0x0124	     */		fmuld	%f0,%f16,%f0
   1.640 +/* 0x0128	     */		fsubd	%f10,%f8,%f8
   1.641 +/* 0x012c	     */		std	%f8,[%g3+16]
   1.642 +/* 0x0130	     */		add	%g1,64,%g1
   1.643 +/* 0x0134	     */		fsubd	%f6,%f4,%f4
   1.644 +/* 0x0138	     */		std	%f4,[%g3+32]
   1.645 +/* 0x013c	     */		fsubd	%f2,%f0,%f0
   1.646 +/* 0x0140	     */		std	%f0,[%g3+48]
   1.647 +/* 0x0144	     */		ble,a,pt	%icc,.L900000417
   1.648 +/* 0x0148	     */		stx	%o7,[%sp+104]
   1.649 +                       .L77000159:
   1.650 +
   1.651 +!  173		      !   }
   1.652 +!  174		      !#endif
   1.653 +!  175		      ! for(;i<len;i++)
   1.654 +
   1.655 +/* 0x014c	 175 */		cmp	%g5,%i3
   1.656 +                       .L900000418:
   1.657 +/* 0x0150	 175 */		bge,pt	%icc,.L77000164
   1.658 +/* 0x0154	     */		nop
   1.659 +
   1.660 +!  176		      !   {
   1.661 +!  177		      !     a=i32[i];
   1.662 +!  178		      !     d32[i]=(double)(i32[i]);
   1.663 +!  179		      !     d16[2*i]=(double)(a&0xffff);
   1.664 +!  180		      !     d16[2*i+1]=(double)(a>>16);
   1.665 +
   1.666 +/* 0x0158	 180 */		sethi	%hi(.L_const_seg_900000401),%g2
   1.667 +/* 0x015c	     */		add	%g2,%lo(.L_const_seg_900000401),%o1
   1.668 +/* 0x0160	 175 */		sethi	%hi(0xfc00),%o0
   1.669 +/* 0x0164	     */		ld	[%o4+%o1],%o2
   1.670 +/* 0x0168	     */		sll	%g5,2,%o3
   1.671 +/* 0x016c	     */		sub	%i3,%g5,%g3
   1.672 +/* 0x0170	     */		sll	%g5,3,%g2
   1.673 +/* 0x0174	     */		add	%o0,1023,%o4
   1.674 +/* 0x0178	 178 */		ldd	[%o2],%f0
   1.675 +/* 0x017c	     */		add	%i2,%o3,%o0
   1.676 +/* 0x0180	 175 */		cmp	%g3,3
   1.677 +/* 0x0184	     */		add	%i4,%g2,%o3
   1.678 +/* 0x0188	     */		sub	%i3,1,%o1
   1.679 +/* 0x018c	     */		sll	%g5,4,%g4
   1.680 +/* 0x0190	     */		bl,pn	%icc,.L77000161
   1.681 +/* 0x0194	     */		add	%i1,%g4,%o5
   1.682 +/* 0x0198	 178 */		ld	[%o0],%f3
   1.683 +/* 0x019c	 180 */		add	%o3,8,%o3
   1.684 +/* 0x01a0	 177 */		ld	[%o0],%o7
   1.685 +/* 0x01a4	 180 */		add	%o5,16,%o5
   1.686 +/* 0x01a8	     */		add	%g5,1,%g5
   1.687 +/* 0x01ac	 178 */		fmovs	%f0,%f2
   1.688 +/* 0x01b0	 180 */		add	%o0,4,%o0
   1.689 +/* 0x01b4	 179 */		and	%o7,%o4,%g1
   1.690 +/* 0x01b8	 178 */		fsubd	%f2,%f0,%f2
   1.691 +/* 0x01bc	     */		std	%f2,[%o3-8]
   1.692 +/* 0x01c0	 180 */		srl	%o7,16,%o7
   1.693 +/* 0x01c4	 179 */		st	%g1,[%sp+96]
   1.694 +/* 0x01c8	     */		fmovs	%f0,%f2
   1.695 +/* 0x01cc	     */		ld	[%sp+96],%f3
   1.696 +/* 0x01d0	     */		fsubd	%f2,%f0,%f2
   1.697 +/* 0x01d4	     */		std	%f2,[%o5-16]
   1.698 +/* 0x01d8	 180 */		st	%o7,[%sp+92]
   1.699 +/* 0x01dc	     */		fmovs	%f0,%f2
   1.700 +/* 0x01e0	     */		ld	[%sp+92],%f3
   1.701 +/* 0x01e4	     */		fsubd	%f2,%f0,%f2
   1.702 +/* 0x01e8	     */		std	%f2,[%o5-8]
   1.703 +                       .L900000411:
   1.704 +/* 0x01ec	 178 */		ld	[%o0],%f3
   1.705 +/* 0x01f0	 180 */		add	%g5,2,%g5
   1.706 +/* 0x01f4	     */		add	%o5,32,%o5
   1.707 +/* 0x01f8	 177 */		ld	[%o0],%o7
   1.708 +/* 0x01fc	 180 */		cmp	%g5,%o1
   1.709 +/* 0x0200	     */		add	%o3,16,%o3
   1.710 +/* 0x0204	 178 */		fmovs	%f0,%f2
   1.711 +/* 0x0208	     */		fsubd	%f2,%f0,%f2
   1.712 +/* 0x020c	     */		std	%f2,[%o3-16]
   1.713 +/* 0x0210	 179 */		and	%o7,%o4,%g1
   1.714 +/* 0x0214	     */		st	%g1,[%sp+96]
   1.715 +/* 0x0218	     */		ld	[%sp+96],%f3
   1.716 +/* 0x021c	     */		fmovs	%f0,%f2
   1.717 +/* 0x0220	     */		fsubd	%f2,%f0,%f2
   1.718 +/* 0x0224	 180 */		srl	%o7,16,%o7
   1.719 +/* 0x0228	 179 */		std	%f2,[%o5-32]
   1.720 +/* 0x022c	 180 */		st	%o7,[%sp+92]
   1.721 +/* 0x0230	     */		ld	[%sp+92],%f3
   1.722 +/* 0x0234	     */		fmovs	%f0,%f2
   1.723 +/* 0x0238	     */		fsubd	%f2,%f0,%f2
   1.724 +/* 0x023c	     */		std	%f2,[%o5-24]
   1.725 +/* 0x0240	     */		add	%o0,4,%o0
   1.726 +/* 0x0244	 178 */		ld	[%o0],%f3
   1.727 +/* 0x0248	 177 */		ld	[%o0],%o7
   1.728 +/* 0x024c	 178 */		fmovs	%f0,%f2
   1.729 +/* 0x0250	     */		fsubd	%f2,%f0,%f2
   1.730 +/* 0x0254	     */		std	%f2,[%o3-8]
   1.731 +/* 0x0258	 179 */		and	%o7,%o4,%g1
   1.732 +/* 0x025c	     */		st	%g1,[%sp+96]
   1.733 +/* 0x0260	     */		ld	[%sp+96],%f3
   1.734 +/* 0x0264	     */		fmovs	%f0,%f2
   1.735 +/* 0x0268	     */		fsubd	%f2,%f0,%f2
   1.736 +/* 0x026c	 180 */		srl	%o7,16,%o7
   1.737 +/* 0x0270	 179 */		std	%f2,[%o5-16]
   1.738 +/* 0x0274	 180 */		st	%o7,[%sp+92]
   1.739 +/* 0x0278	     */		ld	[%sp+92],%f3
   1.740 +/* 0x027c	     */		fmovs	%f0,%f2
   1.741 +/* 0x0280	     */		fsubd	%f2,%f0,%f2
   1.742 +/* 0x0284	     */		std	%f2,[%o5-8]
   1.743 +/* 0x0288	     */		bl,pt	%icc,.L900000411
   1.744 +/* 0x028c	     */		add	%o0,4,%o0
   1.745 +                       .L900000414:
   1.746 +/* 0x0290	 180 */		cmp	%g5,%i3
   1.747 +/* 0x0294	     */		bge,pn	%icc,.L77000164
   1.748 +/* 0x0298	     */		nop
   1.749 +                       .L77000161:
   1.750 +/* 0x029c	 178 */		ld	[%o0],%f3
   1.751 +                       .L900000416:
   1.752 +/* 0x02a0	 178 */		ldd	[%o2],%f0
   1.753 +/* 0x02a4	 180 */		add	%g5,1,%g5
   1.754 +/* 0x02a8	 177 */		ld	[%o0],%o1
   1.755 +/* 0x02ac	 180 */		add	%o0,4,%o0
   1.756 +/* 0x02b0	     */		cmp	%g5,%i3
   1.757 +/* 0x02b4	 178 */		fmovs	%f0,%f2
   1.758 +/* 0x02b8	 179 */		and	%o1,%o4,%o7
   1.759 +/* 0x02bc	 178 */		fsubd	%f2,%f0,%f2
   1.760 +/* 0x02c0	     */		std	%f2,[%o3]
   1.761 +/* 0x02c4	 180 */		srl	%o1,16,%o1
   1.762 +/* 0x02c8	 179 */		st	%o7,[%sp+96]
   1.763 +/* 0x02cc	 180 */		add	%o3,8,%o3
   1.764 +/* 0x02d0	 179 */		fmovs	%f0,%f2
   1.765 +/* 0x02d4	     */		ld	[%sp+96],%f3
   1.766 +/* 0x02d8	     */		fsubd	%f2,%f0,%f2
   1.767 +/* 0x02dc	     */		std	%f2,[%o5]
   1.768 +/* 0x02e0	 180 */		st	%o1,[%sp+92]
   1.769 +/* 0x02e4	     */		fmovs	%f0,%f2
   1.770 +/* 0x02e8	     */		ld	[%sp+92],%f3
   1.771 +/* 0x02ec	     */		fsubd	%f2,%f0,%f0
   1.772 +/* 0x02f0	     */		std	%f0,[%o5+8]
   1.773 +/* 0x02f4	     */		add	%o5,16,%o5
   1.774 +/* 0x02f8	     */		bl,a,pt	%icc,.L900000416
   1.775 +/* 0x02fc	     */		ld	[%o0],%f3
   1.776 +                       .L77000164:
   1.777 +/* 0x0300	     */		ret	! Result = 
   1.778 +/* 0x0304	     */		restore	%g0,%g0,%g0
   1.779 +/* 0x0308	   0 */		.type	conv_i32_to_d32_and_d16,2
   1.780 +/* 0x0308	     */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
   1.781 +
   1.782 +	.section	".text",#alloc,#execinstr
   1.783 +/* 000000	   0 */		.align	4
   1.784 +!
   1.785 +! SUBROUTINE adjust_montf_result
   1.786 +!
   1.787 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
   1.788 +
   1.789 +                       	.global adjust_montf_result
   1.790 +                       adjust_montf_result:
   1.791 +/* 000000	     */		or	%g0,%o2,%g5
   1.792 +
   1.793 +!  181		      !   }
   1.794 +!  182		      !}
   1.795 +!  185		      !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
   1.796 +!  186		      !{
   1.797 +!  187		      !long long acc;
   1.798 +!  188		      !int i;
   1.799 +!  190		      ! if(i32[len]>0) i=-1;
   1.800 +
   1.801 +/* 0x0004	 190 */		or	%g0,-1,%g4
   1.802 +/* 0x0008	     */		sll	%o2,2,%g1
   1.803 +/* 0x000c	     */		ld	[%o0+%g1],%g1
   1.804 +/* 0x0010	     */		cmp	%g1,0
   1.805 +/* 0x0014	     */		bleu,pn	%icc,.L77000175
   1.806 +/* 0x0018	     */		or	%g0,%o1,%o3
   1.807 +/* 0x001c	     */		ba	.L900000511
   1.808 +/* 0x0020	     */		cmp	%g4,0
   1.809 +                       .L77000175:
   1.810 +
   1.811 +!  191		      ! else
   1.812 +!  192		      !   {
   1.813 +!  193		      !     for(i=len-1; i>=0; i--)
   1.814 +
   1.815 +/* 0x0024	 193 */		sub	%o2,1,%g4
   1.816 +/* 0x0028	     */		sll	%g4,2,%g1
   1.817 +/* 0x002c	     */		cmp	%g4,0
   1.818 +/* 0x0030	     */		bl,pt	%icc,.L900000511
   1.819 +/* 0x0034	     */		cmp	%g4,0
   1.820 +/* 0x0038	     */		add	%o1,%g1,%g2
   1.821 +
   1.822 +!  194		      !       {
   1.823 +!  195		      !	 if(i32[i]!=nint[i]) break;
   1.824 +
   1.825 +/* 0x003c	 195 */		ld	[%g2],%o5
   1.826 +/* 0x0040	 193 */		add	%o0,%g1,%g3
   1.827 +                       .L900000510:
   1.828 +/* 0x0044	 195 */		ld	[%g3],%o2
   1.829 +/* 0x0048	     */		sub	%g4,1,%g1
   1.830 +/* 0x004c	     */		sub	%g2,4,%g2
   1.831 +/* 0x0050	     */		sub	%g3,4,%g3
   1.832 +/* 0x0054	     */		cmp	%o2,%o5
   1.833 +/* 0x0058	     */		bne,pn	%icc,.L77000182
   1.834 +/* 0x005c	     */		nop
   1.835 +/* 0x0060	   0 */		or	%g0,%g1,%g4
   1.836 +/* 0x0064	 195 */		cmp	%g1,0
   1.837 +/* 0x0068	     */		bge,a,pt	%icc,.L900000510
   1.838 +/* 0x006c	     */		ld	[%g2],%o5
   1.839 +                       .L77000182:
   1.840 +
   1.841 +!  196		      !       }
   1.842 +!  197		      !   }
   1.843 +!  198		      ! if((i<0)||(i32[i]>nint[i]))
   1.844 +
   1.845 +/* 0x0070	 198 */		cmp	%g4,0
   1.846 +                       .L900000511:
   1.847 +/* 0x0074	 198 */		bl,pn	%icc,.L77000198
   1.848 +/* 0x0078	     */		sll	%g4,2,%g2
   1.849 +/* 0x007c	     */		ld	[%o1+%g2],%g1
   1.850 +/* 0x0080	     */		ld	[%o0+%g2],%g2
   1.851 +/* 0x0084	     */		cmp	%g2,%g1
   1.852 +/* 0x0088	     */		bleu,pt	%icc,.L77000191
   1.853 +/* 0x008c	     */		nop
   1.854 +                       .L77000198:
   1.855 +
   1.856 +!  199		      !   {
   1.857 +!  200		      !     acc=0;
   1.858 +!  201		      !     for(i=0;i<len;i++)
   1.859 +
   1.860 +/* 0x0090	 201 */		cmp	%g5,0
   1.861 +/* 0x0094	     */		ble,pt	%icc,.L77000191
   1.862 +/* 0x0098	     */		nop
   1.863 +/* 0x009c	     */		or	%g0,%g5,%g1
   1.864 +/* 0x00a0	 198 */		or	%g0,-1,%g2
   1.865 +/* 0x00a4	     */		srl	%g2,0,%g3
   1.866 +/* 0x00a8	     */		sub	%g5,1,%g4
   1.867 +/* 0x00ac	 200 */		or	%g0,0,%g5
   1.868 +/* 0x00b0	 201 */		or	%g0,0,%o5
   1.869 +/* 0x00b4	 198 */		or	%g0,%o0,%o4
   1.870 +/* 0x00b8	     */		cmp	%g1,3
   1.871 +/* 0x00bc	 201 */		bl,pn	%icc,.L77000199
   1.872 +/* 0x00c0	     */		add	%o0,8,%g1
   1.873 +/* 0x00c4	     */		add	%o1,4,%g2
   1.874 +
   1.875 +!  202		      !       {
   1.876 +!  203		      !	 acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
   1.877 +
   1.878 +/* 0x00c8	 203 */		ld	[%o0],%o2
   1.879 +/* 0x00cc	     */		ld	[%o1],%o1
   1.880 +/* 0x00d0	   0 */		or	%g0,%g1,%o4
   1.881 +/* 0x00d4	     */		or	%g0,%g2,%o3
   1.882 +/* 0x00d8	 203 */		ld	[%o0+4],%g1
   1.883 +
   1.884 +!  204		      !	 i32[i]=acc&0xffffffff;
   1.885 +!  205		      !	 acc=acc>>32;
   1.886 +
   1.887 +/* 0x00dc	 205 */		or	%g0,2,%o5
   1.888 +/* 0x00e0	 201 */		sub	%o2,%o1,%o2
   1.889 +/* 0x00e4	     */		or	%g0,%o2,%g5
   1.890 +/* 0x00e8	 204 */		and	%o2,%g3,%o2
   1.891 +/* 0x00ec	     */		st	%o2,[%o0]
   1.892 +/* 0x00f0	 205 */		srax	%g5,32,%g5
   1.893 +                       .L900000505:
   1.894 +/* 0x00f4	 203 */		ld	[%o3],%o2
   1.895 +/* 0x00f8	 205 */		add	%o5,1,%o5
   1.896 +/* 0x00fc	     */		add	%o3,4,%o3
   1.897 +/* 0x0100	     */		cmp	%o5,%g4
   1.898 +/* 0x0104	     */		add	%o4,4,%o4
   1.899 +/* 0x0108	 201 */		sub	%g1,%o2,%g1
   1.900 +/* 0x010c	     */		add	%g1,%g5,%g5
   1.901 +/* 0x0110	 204 */		and	%g5,%g3,%o2
   1.902 +/* 0x0114	 203 */		ld	[%o4-4],%g1
   1.903 +/* 0x0118	 204 */		st	%o2,[%o4-8]
   1.904 +/* 0x011c	 205 */		ble,pt	%icc,.L900000505
   1.905 +/* 0x0120	     */		srax	%g5,32,%g5
   1.906 +                       .L900000508:
   1.907 +/* 0x0124	 203 */		ld	[%o3],%g2
   1.908 +/* 0x0128	 201 */		sub	%g1,%g2,%g1
   1.909 +/* 0x012c	     */		add	%g1,%g5,%g1
   1.910 +/* 0x0130	 204 */		and	%g1,%g3,%g2
   1.911 +/* 0x0134	     */		retl	! Result = 
   1.912 +/* 0x0138	     */		st	%g2,[%o4-4]
   1.913 +                       .L77000199:
   1.914 +/* 0x013c	 203 */		ld	[%o4],%g1
   1.915 +                       .L900000509:
   1.916 +/* 0x0140	 203 */		ld	[%o3],%g2
   1.917 +/* 0x0144	     */		add	%g5,%g1,%g1
   1.918 +/* 0x0148	 205 */		add	%o5,1,%o5
   1.919 +/* 0x014c	     */		add	%o3,4,%o3
   1.920 +/* 0x0150	     */		cmp	%o5,%g4
   1.921 +/* 0x0154	 203 */		sub	%g1,%g2,%g1
   1.922 +/* 0x0158	 204 */		and	%g1,%g3,%g2
   1.923 +/* 0x015c	     */		st	%g2,[%o4]
   1.924 +/* 0x0160	 205 */		add	%o4,4,%o4
   1.925 +/* 0x0164	     */		srax	%g1,32,%g5
   1.926 +/* 0x0168	     */		ble,a,pt	%icc,.L900000509
   1.927 +/* 0x016c	     */		ld	[%o4],%g1
   1.928 +                       .L77000191:
   1.929 +/* 0x0170	     */		retl	! Result = 
   1.930 +/* 0x0174	     */		nop
   1.931 +/* 0x0178	   0 */		.type	adjust_montf_result,2
   1.932 +/* 0x0178	     */		.size	adjust_montf_result,(.-adjust_montf_result)
   1.933 +
   1.934 +	.section	".text",#alloc,#execinstr
   1.935 +/* 000000	   0 */		.align	4
   1.936 +/* 000000	     */		.skip	16
   1.937 +!
   1.938 +! SUBROUTINE mont_mulf_noconv
   1.939 +!
   1.940 +! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
   1.941 +
   1.942 +                       	.global mont_mulf_noconv
   1.943 +                       mont_mulf_noconv:
   1.944 +/* 000000	     */		save	%sp,-144,%sp
   1.945 +                       .L900000646:
   1.946 +/* 0x0004	     */		call	.+8
   1.947 +/* 0x0008	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
   1.948 +
   1.949 +!  206		      !       }
   1.950 +!  207		      !   }
   1.951 +!  208		      !}
   1.952 +!  213		      !/*
   1.953 +!  214		      !** the lengths of the input arrays should be at least the following:
   1.954 +!  215		      !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
   1.955 +!  216		      !** all of them should be different from one another
   1.956 +!  217		      !**
   1.957 +!  218		      !*/
   1.958 +!  219		      !void mont_mulf_noconv(unsigned int *result,
   1.959 +!  220		      !		     double *dm1, double *dm2, double *dt,
   1.960 +!  221		      !		     double *dn, unsigned int *nint,
   1.961 +!  222		      !		     int nlen, double dn0)
   1.962 +!  223		      !{
   1.963 +!  224		      ! int i, j, jj;
   1.964 +!  225		      ! int tmp;
   1.965 +!  226		      ! double digit, m2j, nextm2j, a, b;
   1.966 +!  227		      ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
   1.967 +!  229		      ! pdm1=&(dm1[0]);
   1.968 +!  230		      ! pdm2=&(dm2[0]);
   1.969 +!  231		      ! pdn=&(dn[0]);
   1.970 +!  232		      ! pdm2[2*nlen]=Zero;
   1.971 +
   1.972 +/* 0x000c	 232 */		ld	[%fp+92],%o1
   1.973 +/* 0x0010	     */		sethi	%hi(Zero),%g2
   1.974 +/* 0x0014	 223 */		ldd	[%fp+96],%f2
   1.975 +/* 0x0018	     */		add	%g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
   1.976 +/* 0x001c	 232 */		add	%g2,%lo(Zero),%g2
   1.977 +/* 0x0020	 223 */		st	%i0,[%fp+68]
   1.978 +/* 0x0024	     */		add	%g5,%o7,%o3
   1.979 +
   1.980 +!  234		      ! if (nlen!=16)
   1.981 +!  235		      !   {
   1.982 +!  236		      !     for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
   1.983 +!  238		      !     a=dt[0]=pdm1[0]*pdm2[0];
   1.984 +!  239		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
   1.985 +
   1.986 +/* 0x0028	 239 */		sethi	%hi(TwoToMinus16),%g3
   1.987 +/* 0x002c	 232 */		ld	[%o3+%g2],%l0
   1.988 +/* 0x0030	 239 */		sethi	%hi(TwoTo16),%g4
   1.989 +/* 0x0034	 223 */		or	%g0,%i2,%o2
   1.990 +/* 0x0038	     */		fmovd	%f2,%f16
   1.991 +/* 0x003c	     */		st	%i5,[%fp+88]
   1.992 +/* 0x0040	 239 */		add	%g3,%lo(TwoToMinus16),%g2
   1.993 +/* 0x0044	 223 */		or	%g0,%i1,%i2
   1.994 +/* 0x0048	 232 */		ldd	[%l0],%f0
   1.995 +/* 0x004c	 239 */		add	%g4,%lo(TwoTo16),%g3
   1.996 +/* 0x0050	 223 */		or	%g0,%i3,%o0
   1.997 +/* 0x0054	 232 */		sll	%o1,4,%g4
   1.998 +/* 0x0058	 239 */		ld	[%o3+%g2],%g5
   1.999 +/* 0x005c	 223 */		or	%g0,%i3,%i1
  1.1000 +/* 0x0060	 239 */		ld	[%o3+%g3],%g1
  1.1001 +/* 0x0064	 232 */		or	%g0,%o1,%i0
  1.1002 +/* 0x0068	     */		or	%g0,%o2,%i3
  1.1003 +/* 0x006c	 234 */		cmp	%o1,16
  1.1004 +/* 0x0070	     */		be,pn	%icc,.L77000279
  1.1005 +/* 0x0074	     */		std	%f0,[%o2+%g4]
  1.1006 +/* 0x0078	 236 */		sll	%o1,2,%g2
  1.1007 +/* 0x007c	     */		or	%g0,%o0,%o3
  1.1008 +/* 0x0080	 232 */		sll	%o1,1,%o1
  1.1009 +/* 0x0084	 236 */		add	%g2,2,%o2
  1.1010 +/* 0x0088	     */		cmp	%o2,0
  1.1011 +/* 0x008c	     */		ble,a,pt	%icc,.L900000660
  1.1012 +/* 0x0090	     */		ldd	[%i2],%f0
  1.1013 +
  1.1014 +!  241		      !     pdtj=&(dt[0]);
  1.1015 +!  242		      !     for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
  1.1016 +!  243		      !       {
  1.1017 +!  244		      !	 m2j=pdm2[j];
  1.1018 +!  245		      !	 a=pdtj[0]+pdn[0]*digit;
  1.1019 +!  246		      !	 b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
  1.1020 +!  247		      !	 pdtj[1]=b;
  1.1021 +!  249		      !#pragma pipeloop(0)
  1.1022 +!  250		      !	 for(i=1;i<nlen;i++)
  1.1023 +!  251		      !	   {
  1.1024 +!  252		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
  1.1025 +!  253		      !	   }
  1.1026 +!  254		      ! 	 if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
  1.1027 +!  255		      !	 
  1.1028 +!  256		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
  1.1029 +!  257		      !       }
  1.1030 +!  258		      !   }
  1.1031 +!  259		      ! else
  1.1032 +!  260		      !   {
  1.1033 +!  261		      !     a=dt[0]=pdm1[0]*pdm2[0];
  1.1034 +!  263		      !     dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
  1.1035 +!  264		      !     dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
  1.1036 +!  265		      !     dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
  1.1037 +!  266		      !     dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
  1.1038 +!  267		      !     dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
  1.1039 +!  268		      !     dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
  1.1040 +!  269		      !     dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
  1.1041 +!  270		      !     dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
  1.1042 +!  271		      !     dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
  1.1043 +!  272		      !     dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
  1.1044 +!  273		      !     dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
  1.1045 +!  275		      !     pdn_0=pdn[0];
  1.1046 +!  276		      !     pdm1_0=pdm1[0];
  1.1047 +!  278		      !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
  1.1048 +!  279		      !     pdtj=&(dt[0]);
  1.1049 +!  281		      !     for(j=0;j<32;j++,pdtj++)
  1.1050 +
  1.1051 +/* 0x0094	 281 */		add	%g2,2,%o0
  1.1052 +/* 0x0098	 236 */		add	%g2,1,%o2
  1.1053 +/* 0x009c	 281 */		cmp	%o0,3
  1.1054 +/* 0x00a0	     */		bl,pn	%icc,.L77000280
  1.1055 +/* 0x00a4	     */		or	%g0,1,%o0
  1.1056 +/* 0x00a8	     */		add	%o3,8,%o3
  1.1057 +/* 0x00ac	     */		or	%g0,1,%o4
  1.1058 +/* 0x00b0	     */		std	%f0,[%o3-8]
  1.1059 +                       .L900000630:
  1.1060 +/* 0x00b4	     */		std	%f0,[%o3]
  1.1061 +/* 0x00b8	     */		add	%o4,2,%o4
  1.1062 +/* 0x00bc	     */		add	%o3,16,%o3
  1.1063 +/* 0x00c0	     */		cmp	%o4,%g2
  1.1064 +/* 0x00c4	     */		ble,pt	%icc,.L900000630
  1.1065 +/* 0x00c8	     */		std	%f0,[%o3-8]
  1.1066 +                       .L900000633:
  1.1067 +/* 0x00cc	     */		cmp	%o4,%o2
  1.1068 +/* 0x00d0	     */		bg,pn	%icc,.L77000285
  1.1069 +/* 0x00d4	     */		add	%o4,1,%o0
  1.1070 +                       .L77000280:
  1.1071 +/* 0x00d8	     */		std	%f0,[%o3]
  1.1072 +                       .L900000659:
  1.1073 +/* 0x00dc	     */		ldd	[%l0],%f0
  1.1074 +/* 0x00e0	     */		cmp	%o0,%o2
  1.1075 +/* 0x00e4	     */		add	%o3,8,%o3
  1.1076 +/* 0x00e8	     */		add	%o0,1,%o0
  1.1077 +/* 0x00ec	     */		ble,a,pt	%icc,.L900000659
  1.1078 +/* 0x00f0	     */		std	%f0,[%o3]
  1.1079 +                       .L77000285:
  1.1080 +/* 0x00f4	 238 */		ldd	[%i2],%f0
  1.1081 +                       .L900000660:
  1.1082 +/* 0x00f8	 238 */		ldd	[%i3],%f2
  1.1083 +/* 0x00fc	     */		add	%o1,1,%o2
  1.1084 +/* 0x0100	 242 */		cmp	%o1,0
  1.1085 +/* 0x0104	     */		sll	%o2,1,%o0
  1.1086 +/* 0x0108	     */		sub	%o1,1,%o1
  1.1087 +/* 0x010c	 238 */		fmuld	%f0,%f2,%f0
  1.1088 +/* 0x0110	     */		std	%f0,[%i1]
  1.1089 +/* 0x0114	   0 */		or	%g0,0,%l1
  1.1090 +/* 0x0118	     */		ldd	[%l0],%f6
  1.1091 +/* 0x011c	     */		or	%g0,0,%g4
  1.1092 +/* 0x0120	     */		or	%g0,%o2,%i5
  1.1093 +/* 0x0124	     */		ldd	[%g5],%f2
  1.1094 +/* 0x0128	     */		or	%g0,%o1,%g3
  1.1095 +/* 0x012c	     */		or	%g0,%o0,%o3
  1.1096 +/* 0x0130	     */		fdtox	%f0,%f0
  1.1097 +/* 0x0134	     */		ldd	[%g1],%f4
  1.1098 +/* 0x0138	 246 */		add	%i3,8,%o4
  1.1099 +/* 0x013c	     */		or	%g0,0,%l2
  1.1100 +/* 0x0140	     */		or	%g0,%i1,%o5
  1.1101 +/* 0x0144	     */		sub	%i0,1,%o7
  1.1102 +/* 0x0148	     */		fmovs	%f6,%f0
  1.1103 +/* 0x014c	     */		fxtod	%f0,%f0
  1.1104 +/* 0x0150	 239 */		fmuld	%f0,%f16,%f0
  1.1105 +/* 0x0154	     */		fmuld	%f0,%f2,%f2
  1.1106 +/* 0x0158	     */		fdtox	%f2,%f2
  1.1107 +/* 0x015c	     */		fxtod	%f2,%f2
  1.1108 +/* 0x0160	     */		fmuld	%f2,%f4,%f2
  1.1109 +/* 0x0164	     */		fsubd	%f0,%f2,%f22
  1.1110 +/* 0x0168	 242 */		ble,pt	%icc,.L900000653
  1.1111 +/* 0x016c	     */		sll	%i0,4,%g2
  1.1112 +/* 0x0170	 246 */		ldd	[%i4],%f0
  1.1113 +                       .L900000654:
  1.1114 +/* 0x0174	 246 */		fmuld	%f0,%f22,%f8
  1.1115 +/* 0x0178	     */		ldd	[%i2],%f0
  1.1116 +/* 0x017c	 250 */		cmp	%i0,1
  1.1117 +/* 0x0180	 246 */		ldd	[%o4+%l2],%f6
  1.1118 +/* 0x0184	     */		add	%i2,8,%o0
  1.1119 +/* 0x0188	 250 */		or	%g0,1,%o1
  1.1120 +/* 0x018c	 246 */		ldd	[%o5],%f2
  1.1121 +/* 0x0190	     */		add	%o5,16,%l3
  1.1122 +/* 0x0194	     */		fmuld	%f0,%f6,%f6
  1.1123 +/* 0x0198	     */		ldd	[%g5],%f4
  1.1124 +/* 0x019c	     */		faddd	%f2,%f8,%f2
  1.1125 +/* 0x01a0	     */		ldd	[%o5+8],%f0
  1.1126 +/* 0x01a4	 244 */		ldd	[%i3+%l2],%f20
  1.1127 +/* 0x01a8	 246 */		faddd	%f0,%f6,%f0
  1.1128 +/* 0x01ac	     */		fmuld	%f2,%f4,%f2
  1.1129 +/* 0x01b0	     */		faddd	%f0,%f2,%f18
  1.1130 +/* 0x01b4	 247 */		std	%f18,[%o5+8]
  1.1131 +/* 0x01b8	 250 */		ble,pt	%icc,.L900000658
  1.1132 +/* 0x01bc	     */		srl	%g4,31,%g2
  1.1133 +/* 0x01c0	     */		cmp	%o7,7
  1.1134 +/* 0x01c4	 246 */		add	%i4,8,%g2
  1.1135 +/* 0x01c8	 250 */		bl,pn	%icc,.L77000284
  1.1136 +/* 0x01cc	     */		add	%g2,24,%o2
  1.1137 +/* 0x01d0	 252 */		ldd	[%o0+24],%f12
  1.1138 +/* 0x01d4	     */		add	%o5,48,%l3
  1.1139 +/* 0x01d8	     */		ldd	[%o0],%f2
  1.1140 +/* 0x01dc	   0 */		or	%g0,%o2,%g2
  1.1141 +/* 0x01e0	 250 */		sub	%o7,2,%o2
  1.1142 +/* 0x01e4	 252 */		ldd	[%g2-24],%f0
  1.1143 +/* 0x01e8	     */		or	%g0,5,%o1
  1.1144 +/* 0x01ec	     */		ldd	[%o0+8],%f6
  1.1145 +/* 0x01f0	     */		fmuld	%f2,%f20,%f2
  1.1146 +/* 0x01f4	     */		ldd	[%o0+16],%f14
  1.1147 +/* 0x01f8	     */		fmuld	%f0,%f22,%f4
  1.1148 +/* 0x01fc	     */		add	%o0,32,%o0
  1.1149 +/* 0x0200	     */		ldd	[%g2-16],%f8
  1.1150 +/* 0x0204	     */		fmuld	%f6,%f20,%f10
  1.1151 +/* 0x0208	     */		ldd	[%o5+16],%f0
  1.1152 +/* 0x020c	     */		ldd	[%g2-8],%f6
  1.1153 +/* 0x0210	     */		faddd	%f2,%f4,%f4
  1.1154 +/* 0x0214	     */		ldd	[%o5+32],%f2
  1.1155 +                       .L900000642:
  1.1156 +/* 0x0218	 252 */		ldd	[%g2],%f24
  1.1157 +/* 0x021c	     */		add	%o1,3,%o1
  1.1158 +/* 0x0220	     */		add	%g2,24,%g2
  1.1159 +/* 0x0224	     */		fmuld	%f8,%f22,%f8
  1.1160 +/* 0x0228	     */		ldd	[%l3],%f28
  1.1161 +/* 0x022c	     */		cmp	%o1,%o2
  1.1162 +/* 0x0230	     */		add	%o0,24,%o0
  1.1163 +/* 0x0234	     */		ldd	[%o0-24],%f26
  1.1164 +/* 0x0238	     */		faddd	%f0,%f4,%f0
  1.1165 +/* 0x023c	     */		add	%l3,48,%l3
  1.1166 +/* 0x0240	     */		faddd	%f10,%f8,%f10
  1.1167 +/* 0x0244	     */		fmuld	%f14,%f20,%f4
  1.1168 +/* 0x0248	     */		std	%f0,[%l3-80]
  1.1169 +/* 0x024c	     */		ldd	[%g2-16],%f8
  1.1170 +/* 0x0250	     */		fmuld	%f6,%f22,%f6
  1.1171 +/* 0x0254	     */		ldd	[%l3-32],%f0
  1.1172 +/* 0x0258	     */		ldd	[%o0-16],%f14
  1.1173 +/* 0x025c	     */		faddd	%f2,%f10,%f2
  1.1174 +/* 0x0260	     */		faddd	%f4,%f6,%f10
  1.1175 +/* 0x0264	     */		fmuld	%f12,%f20,%f4
  1.1176 +/* 0x0268	     */		std	%f2,[%l3-64]
  1.1177 +/* 0x026c	     */		ldd	[%g2-8],%f6
  1.1178 +/* 0x0270	     */		fmuld	%f24,%f22,%f24
  1.1179 +/* 0x0274	     */		ldd	[%l3-16],%f2
  1.1180 +/* 0x0278	     */		ldd	[%o0-8],%f12
  1.1181 +/* 0x027c	     */		faddd	%f28,%f10,%f10
  1.1182 +/* 0x0280	     */		std	%f10,[%l3-48]
  1.1183 +/* 0x0284	     */		fmuld	%f26,%f20,%f10
  1.1184 +/* 0x0288	     */		ble,pt	%icc,.L900000642
  1.1185 +/* 0x028c	     */		faddd	%f4,%f24,%f4
  1.1186 +                       .L900000645:
  1.1187 +/* 0x0290	 252 */		fmuld	%f8,%f22,%f28
  1.1188 +/* 0x0294	     */		ldd	[%g2],%f24
  1.1189 +/* 0x0298	     */		faddd	%f0,%f4,%f26
  1.1190 +/* 0x029c	     */		fmuld	%f12,%f20,%f8
  1.1191 +/* 0x02a0	     */		add	%l3,32,%l3
  1.1192 +/* 0x02a4	     */		cmp	%o1,%o7
  1.1193 +/* 0x02a8	     */		fmuld	%f14,%f20,%f14
  1.1194 +/* 0x02ac	     */		ldd	[%l3-32],%f4
  1.1195 +/* 0x02b0	     */		add	%g2,8,%g2
  1.1196 +/* 0x02b4	     */		faddd	%f10,%f28,%f12
  1.1197 +/* 0x02b8	     */		fmuld	%f6,%f22,%f6
  1.1198 +/* 0x02bc	     */		ldd	[%l3-16],%f0
  1.1199 +/* 0x02c0	     */		fmuld	%f24,%f22,%f10
  1.1200 +/* 0x02c4	     */		std	%f26,[%l3-64]
  1.1201 +/* 0x02c8	     */		faddd	%f2,%f12,%f2
  1.1202 +/* 0x02cc	     */		std	%f2,[%l3-48]
  1.1203 +/* 0x02d0	     */		faddd	%f14,%f6,%f6
  1.1204 +/* 0x02d4	     */		faddd	%f8,%f10,%f2
  1.1205 +/* 0x02d8	     */		faddd	%f4,%f6,%f4
  1.1206 +/* 0x02dc	     */		std	%f4,[%l3-32]
  1.1207 +/* 0x02e0	     */		faddd	%f0,%f2,%f0
  1.1208 +/* 0x02e4	     */		bg,pn	%icc,.L77000213
  1.1209 +/* 0x02e8	     */		std	%f0,[%l3-16]
  1.1210 +                       .L77000284:
  1.1211 +/* 0x02ec	 252 */		ldd	[%o0],%f0
  1.1212 +                       .L900000657:
  1.1213 +/* 0x02f0	 252 */		ldd	[%g2],%f4
  1.1214 +/* 0x02f4	     */		fmuld	%f0,%f20,%f2
  1.1215 +/* 0x02f8	     */		add	%o1,1,%o1
  1.1216 +/* 0x02fc	     */		ldd	[%l3],%f0
  1.1217 +/* 0x0300	     */		add	%o0,8,%o0
  1.1218 +/* 0x0304	     */		add	%g2,8,%g2
  1.1219 +/* 0x0308	     */		fmuld	%f4,%f22,%f4
  1.1220 +/* 0x030c	     */		cmp	%o1,%o7
  1.1221 +/* 0x0310	     */		faddd	%f2,%f4,%f2
  1.1222 +/* 0x0314	     */		faddd	%f0,%f2,%f0
  1.1223 +/* 0x0318	     */		std	%f0,[%l3]
  1.1224 +/* 0x031c	     */		add	%l3,16,%l3
  1.1225 +/* 0x0320	     */		ble,a,pt	%icc,.L900000657
  1.1226 +/* 0x0324	     */		ldd	[%o0],%f0
  1.1227 +                       .L77000213:
  1.1228 +/* 0x0328	     */		srl	%g4,31,%g2
  1.1229 +                       .L900000658:
  1.1230 +/* 0x032c	 254 */		cmp	%l1,30
  1.1231 +/* 0x0330	     */		bne,a,pt	%icc,.L900000656
  1.1232 +/* 0x0334	     */		fdtox	%f18,%f0
  1.1233 +/* 0x0338	     */		add	%g4,%g2,%g2
  1.1234 +/* 0x033c	     */		sra	%g2,1,%o0
  1.1235 +/* 0x0340	 281 */		ldd	[%l0],%f0
  1.1236 +/* 0x0344	     */		sll	%i5,1,%o2
  1.1237 +/* 0x0348	     */		add	%o0,1,%g2
  1.1238 +/* 0x034c	     */		sll	%g2,1,%o0
  1.1239 +/* 0x0350	 254 */		sub	%o2,1,%o2
  1.1240 +/* 0x0354	 281 */		fmovd	%f0,%f2
  1.1241 +/* 0x0358	     */		sll	%g2,4,%o1
  1.1242 +/* 0x035c	     */		cmp	%o0,%o3
  1.1243 +/* 0x0360	     */		bge,pt	%icc,.L77000215
  1.1244 +/* 0x0364	     */		or	%g0,0,%l1
  1.1245 +/* 0x0368	 254 */		add	%i1,%o1,%o1
  1.1246 +/* 0x036c	 281 */		ldd	[%o1],%f6
  1.1247 +                       .L900000655:
  1.1248 +/* 0x0370	     */		fdtox	%f6,%f10
  1.1249 +/* 0x0374	     */		ldd	[%o1+8],%f4
  1.1250 +/* 0x0378	     */		add	%o0,2,%o0
  1.1251 +/* 0x037c	     */		ldd	[%l0],%f12
  1.1252 +/* 0x0380	     */		fdtox	%f6,%f6
  1.1253 +/* 0x0384	     */		cmp	%o0,%o2
  1.1254 +/* 0x0388	     */		fdtox	%f4,%f8
  1.1255 +/* 0x038c	     */		fdtox	%f4,%f4
  1.1256 +/* 0x0390	     */		fmovs	%f12,%f10
  1.1257 +/* 0x0394	     */		fmovs	%f12,%f8
  1.1258 +/* 0x0398	     */		fxtod	%f10,%f10
  1.1259 +/* 0x039c	     */		fxtod	%f8,%f8
  1.1260 +/* 0x03a0	     */		faddd	%f10,%f2,%f2
  1.1261 +/* 0x03a4	     */		std	%f2,[%o1]
  1.1262 +/* 0x03a8	     */		faddd	%f8,%f0,%f0
  1.1263 +/* 0x03ac	     */		std	%f0,[%o1+8]
  1.1264 +/* 0x03b0	     */		add	%o1,16,%o1
  1.1265 +/* 0x03b4	     */		fitod	%f6,%f2
  1.1266 +/* 0x03b8	     */		fitod	%f4,%f0
  1.1267 +/* 0x03bc	     */		ble,a,pt	%icc,.L900000655
  1.1268 +/* 0x03c0	     */		ldd	[%o1],%f6
  1.1269 +                       .L77000233:
  1.1270 +/* 0x03c4	     */		or	%g0,0,%l1
  1.1271 +                       .L77000215:
  1.1272 +/* 0x03c8	     */		fdtox	%f18,%f0
  1.1273 +                       .L900000656:
  1.1274 +/* 0x03cc	     */		ldd	[%l0],%f6
  1.1275 +/* 0x03d0	 256 */		add	%g4,1,%g4
  1.1276 +/* 0x03d4	     */		add	%l2,8,%l2
  1.1277 +/* 0x03d8	     */		ldd	[%g5],%f2
  1.1278 +/* 0x03dc	     */		add	%l1,1,%l1
  1.1279 +/* 0x03e0	     */		add	%o5,8,%o5
  1.1280 +/* 0x03e4	     */		fmovs	%f6,%f0
  1.1281 +/* 0x03e8	     */		ldd	[%g1],%f4
  1.1282 +/* 0x03ec	     */		cmp	%g4,%g3
  1.1283 +/* 0x03f0	     */		fxtod	%f0,%f0
  1.1284 +/* 0x03f4	     */		fmuld	%f0,%f16,%f0
  1.1285 +/* 0x03f8	     */		fmuld	%f0,%f2,%f2
  1.1286 +/* 0x03fc	     */		fdtox	%f2,%f2
  1.1287 +/* 0x0400	     */		fxtod	%f2,%f2
  1.1288 +/* 0x0404	     */		fmuld	%f2,%f4,%f2
  1.1289 +/* 0x0408	     */		fsubd	%f0,%f2,%f22
  1.1290 +/* 0x040c	     */		ble,a,pt	%icc,.L900000654
  1.1291 +/* 0x0410	     */		ldd	[%i4],%f0
  1.1292 +                       .L900000629:
  1.1293 +/* 0x0414	 256 */		ba	.L900000653
  1.1294 +/* 0x0418	     */		sll	%i0,4,%g2
  1.1295 +                       .L77000279:
  1.1296 +/* 0x041c	 261 */		ldd	[%o2],%f6
  1.1297 +/* 0x0420	 279 */		or	%g0,%o0,%o4
  1.1298 +/* 0x0424	 281 */		or	%g0,0,%o3
  1.1299 +/* 0x0428	 261 */		ldd	[%i2],%f4
  1.1300 +/* 0x042c	 273 */		std	%f0,[%o0+8]
  1.1301 +/* 0x0430	     */		std	%f0,[%o0+16]
  1.1302 +/* 0x0434	 261 */		fmuld	%f4,%f6,%f4
  1.1303 +/* 0x0438	     */		std	%f4,[%o0]
  1.1304 +/* 0x043c	 273 */		std	%f0,[%o0+24]
  1.1305 +/* 0x0440	     */		std	%f0,[%o0+32]
  1.1306 +/* 0x0444	     */		fdtox	%f4,%f4
  1.1307 +/* 0x0448	     */		std	%f0,[%o0+40]
  1.1308 +/* 0x044c	     */		std	%f0,[%o0+48]
  1.1309 +/* 0x0450	     */		std	%f0,[%o0+56]
  1.1310 +/* 0x0454	     */		std	%f0,[%o0+64]
  1.1311 +/* 0x0458	     */		std	%f0,[%o0+72]
  1.1312 +/* 0x045c	     */		std	%f0,[%o0+80]
  1.1313 +/* 0x0460	     */		std	%f0,[%o0+88]
  1.1314 +/* 0x0464	     */		std	%f0,[%o0+96]
  1.1315 +/* 0x0468	     */		std	%f0,[%o0+104]
  1.1316 +/* 0x046c	     */		std	%f0,[%o0+112]
  1.1317 +/* 0x0470	     */		std	%f0,[%o0+120]
  1.1318 +/* 0x0474	     */		std	%f0,[%o0+128]
  1.1319 +/* 0x0478	     */		std	%f0,[%o0+136]
  1.1320 +/* 0x047c	     */		std	%f0,[%o0+144]
  1.1321 +/* 0x0480	     */		std	%f0,[%o0+152]
  1.1322 +/* 0x0484	     */		std	%f0,[%o0+160]
  1.1323 +/* 0x0488	     */		std	%f0,[%o0+168]
  1.1324 +/* 0x048c	     */		fmovs	%f0,%f4
  1.1325 +/* 0x0490	     */		std	%f0,[%o0+176]
  1.1326 +/* 0x0494	 281 */		or	%g0,0,%o1
  1.1327 +/* 0x0498	 273 */		std	%f0,[%o0+184]
  1.1328 +/* 0x049c	     */		fxtod	%f4,%f4
  1.1329 +/* 0x04a0	     */		std	%f0,[%o0+192]
  1.1330 +/* 0x04a4	     */		std	%f0,[%o0+200]
  1.1331 +/* 0x04a8	     */		std	%f0,[%o0+208]
  1.1332 +/* 0x04ac	 278 */		fmuld	%f4,%f2,%f2
  1.1333 +/* 0x04b0	 273 */		std	%f0,[%o0+216]
  1.1334 +/* 0x04b4	     */		std	%f0,[%o0+224]
  1.1335 +/* 0x04b8	     */		std	%f0,[%o0+232]
  1.1336 +/* 0x04bc	     */		std	%f0,[%o0+240]
  1.1337 +/* 0x04c0	     */		std	%f0,[%o0+248]
  1.1338 +/* 0x04c4	     */		std	%f0,[%o0+256]
  1.1339 +/* 0x04c8	     */		std	%f0,[%o0+264]
  1.1340 +/* 0x04cc	     */		std	%f0,[%o0+272]
  1.1341 +/* 0x04d0	     */		std	%f0,[%o0+280]
  1.1342 +/* 0x04d4	     */		std	%f0,[%o0+288]
  1.1343 +/* 0x04d8	     */		std	%f0,[%o0+296]
  1.1344 +/* 0x04dc	     */		std	%f0,[%o0+304]
  1.1345 +/* 0x04e0	     */		std	%f0,[%o0+312]
  1.1346 +/* 0x04e4	     */		std	%f0,[%o0+320]
  1.1347 +/* 0x04e8	     */		std	%f0,[%o0+328]
  1.1348 +/* 0x04ec	     */		std	%f0,[%o0+336]
  1.1349 +/* 0x04f0	     */		std	%f0,[%o0+344]
  1.1350 +/* 0x04f4	     */		std	%f0,[%o0+352]
  1.1351 +/* 0x04f8	     */		std	%f0,[%o0+360]
  1.1352 +/* 0x04fc	     */		std	%f0,[%o0+368]
  1.1353 +/* 0x0500	     */		std	%f0,[%o0+376]
  1.1354 +/* 0x0504	     */		std	%f0,[%o0+384]
  1.1355 +/* 0x0508	     */		std	%f0,[%o0+392]
  1.1356 +/* 0x050c	     */		std	%f0,[%o0+400]
  1.1357 +/* 0x0510	     */		std	%f0,[%o0+408]
  1.1358 +/* 0x0514	     */		std	%f0,[%o0+416]
  1.1359 +/* 0x0518	     */		std	%f0,[%o0+424]
  1.1360 +/* 0x051c	     */		std	%f0,[%o0+432]
  1.1361 +/* 0x0520	     */		std	%f0,[%o0+440]
  1.1362 +/* 0x0524	     */		std	%f0,[%o0+448]
  1.1363 +/* 0x0528	     */		std	%f0,[%o0+456]
  1.1364 +/* 0x052c	     */		std	%f0,[%o0+464]
  1.1365 +/* 0x0530	     */		std	%f0,[%o0+472]
  1.1366 +/* 0x0534	     */		std	%f0,[%o0+480]
  1.1367 +/* 0x0538	     */		std	%f0,[%o0+488]
  1.1368 +/* 0x053c	     */		std	%f0,[%o0+496]
  1.1369 +/* 0x0540	     */		std	%f0,[%o0+504]
  1.1370 +/* 0x0544	     */		std	%f0,[%o0+512]
  1.1371 +/* 0x0548	     */		std	%f0,[%o0+520]
  1.1372 +/* 0x054c	     */		ldd	[%g5],%f0
  1.1373 +/* 0x0550	     */		ldd	[%g1],%f8
  1.1374 +/* 0x0554	     */		fmuld	%f2,%f0,%f6
  1.1375 +/* 0x0558	 275 */		ldd	[%i4],%f4
  1.1376 +/* 0x055c	 276 */		ldd	[%i2],%f0
  1.1377 +/* 0x0560	     */		fdtox	%f6,%f6
  1.1378 +/* 0x0564	     */		fxtod	%f6,%f6
  1.1379 +/* 0x0568	     */		fmuld	%f6,%f8,%f6
  1.1380 +/* 0x056c	     */		fsubd	%f2,%f6,%f2
  1.1381 +/* 0x0570	 286 */		fmuld	%f4,%f2,%f12
  1.1382 +
  1.1383 +!  282		      !       {
  1.1384 +!  284		      !	 m2j=pdm2[j];
  1.1385 +!  285		      !	 a=pdtj[0]+pdn_0*digit;
  1.1386 +!  286		      !	 b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
  1.1387 +
  1.1388 +!  287		      !	 pdtj[1]=b;
  1.1389 +!  289		      !	 /**** this loop will be fully unrolled:
  1.1390 +!  290		      !	 for(i=1;i<16;i++)
  1.1391 +!  291		      !	   {
  1.1392 +!  292		      !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
  1.1393 +!  293		      !	   }
  1.1394 +!  294		      !	 *************************************/
  1.1395 +!  295		      !	     pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
  1.1396 +!  296		      !	     pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
  1.1397 +!  297		      !	     pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
  1.1398 +!  298		      !	     pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
  1.1399 +!  299		      !	     pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
  1.1400 +!  300		      !	     pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
  1.1401 +!  301		      !	     pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
  1.1402 +!  302		      !	     pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
  1.1403 +!  303		      !	     pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
  1.1404 +!  304		      !	     pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
  1.1405 +!  305		      !	     pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
  1.1406 +!  306		      !	     pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
  1.1407 +!  307		      !	     pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
  1.1408 +!  308		      !	     pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
  1.1409 +!  309		      !	     pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
  1.1410 +!  310		      !	 /* no need for cleenup, cannot overflow */
  1.1411 +!  311		      !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
  1.1412 +
  1.1413 +	fmovd %f2,%f0		! hand modified
  1.1414 +	fmovd %f16,%f18			! hand modified
  1.1415 +	ldd [%i4],%f2
  1.1416 +	ldd [%o4],%f8
  1.1417 +	ldd [%i2],%f10
  1.1418 +	ldd [%g5],%f14		! hand modified
  1.1419 +	ldd [%g1],%f16		! hand modified
  1.1420 +	ldd [%i3],%f24
  1.1421 +
  1.1422 +	ldd [%i2+8],%f26
  1.1423 +	ldd [%i2+16],%f40
  1.1424 +	ldd [%i2+48],%f46
  1.1425 +	ldd [%i2+56],%f30
  1.1426 +	ldd [%i2+64],%f54
  1.1427 +	ldd [%i2+104],%f34
  1.1428 +	ldd [%i2+112],%f58
  1.1429 +
  1.1430 +	ldd [%i4+8],%f28	
  1.1431 +	ldd [%i4+104],%f38
  1.1432 +	ldd [%i4+112],%f60
  1.1433 +
  1.1434 +	.L99999999: 			!1
  1.1435 +	ldd	[%i2+24],%f32
  1.1436 +	fmuld	%f0,%f2,%f4 	!2
  1.1437 +	ldd	[%i4+24],%f36
  1.1438 +	fmuld	%f26,%f24,%f20 	!3
  1.1439 +	ldd	[%i2+40],%f42
  1.1440 +	fmuld	%f28,%f0,%f22 	!4
  1.1441 +	ldd	[%i4+40],%f44
  1.1442 +	fmuld	%f32,%f24,%f32 	!5
  1.1443 +	ldd	[%i3+8],%f6
  1.1444 +	faddd	%f4,%f8,%f4
  1.1445 +	fmuld	%f36,%f0,%f36 	!6
  1.1446 +	add	%i3,8,%i3
  1.1447 +	ldd	[%i4+56],%f50
  1.1448 +	fmuld	%f42,%f24,%f42 	!7
  1.1449 +	ldd	[%i2+72],%f52
  1.1450 +	faddd	%f20,%f22,%f20
  1.1451 +	fmuld	%f44,%f0,%f44 	!8
  1.1452 +	ldd	[%o4+16],%f22
  1.1453 +	fmuld	%f10,%f6,%f12 	!9
  1.1454 +	ldd	[%i4+72],%f56
  1.1455 +	faddd	%f32,%f36,%f32
  1.1456 +	fmuld	%f14,%f4,%f4 !10
  1.1457 +	ldd	[%o4+48],%f36
  1.1458 +	fmuld	%f30,%f24,%f48 	!11
  1.1459 +	ldd	[%o4+8],%f8
  1.1460 +	faddd	%f20,%f22,%f20
  1.1461 +	fmuld	%f50,%f0,%f50	!12
  1.1462 +	std	%f20,[%o4+16]
  1.1463 +	faddd	%f42,%f44,%f42
  1.1464 +	fmuld	%f52,%f24,%f52 	!13
  1.1465 +	ldd	[%o4+80],%f44
  1.1466 +	faddd	%f4,%f12,%f4
  1.1467 +	fmuld	%f56,%f0,%f56 	!14
  1.1468 +	ldd	[%i2+88],%f20
  1.1469 +	faddd	%f32,%f36,%f32 	!15
  1.1470 +	ldd	[%i4+88],%f22
  1.1471 +	faddd	%f48,%f50,%f48 	!16
  1.1472 +	ldd	[%o4+112],%f50
  1.1473 +	faddd	%f52,%f56,%f52 	!17
  1.1474 +	ldd	[%o4+144],%f56
  1.1475 +	faddd	%f4,%f8,%f8
  1.1476 +	fmuld	%f20,%f24,%f20 	!18
  1.1477 +	std	%f32,[%o4+48]
  1.1478 +	faddd	%f42,%f44,%f42
  1.1479 +	fmuld	%f22,%f0,%f22 	!19
  1.1480 +	std	%f42,[%o4+80]
  1.1481 +	faddd	%f48,%f50,%f48
  1.1482 +	fmuld	%f34,%f24,%f32 	!20
  1.1483 +	std	%f48,[%o4+112]
  1.1484 +	faddd	%f52,%f56,%f52
  1.1485 +	fmuld	%f38,%f0,%f36 	!21
  1.1486 +	ldd	[%i2+120],%f42
  1.1487 +	fdtox	%f8,%f4 		!22
  1.1488 +	std	%f52,[%o4+144]
  1.1489 +	faddd	%f20,%f22,%f20 	!23
  1.1490 +	ldd	[%i4+120],%f44 	!24
  1.1491 +	ldd	[%o4+176],%f22
  1.1492 +	faddd	%f32,%f36,%f32
  1.1493 +	fmuld	%f42,%f24,%f42 	!25
  1.1494 +	ldd	[%i4+16],%f50
  1.1495 +	fmovs	%f17,%f4 	!26
  1.1496 +	ldd	[%i2+32],%f52
  1.1497 +	fmuld	%f44,%f0,%f44 	!27
  1.1498 +	ldd	[%i4+32],%f56
  1.1499 +	fmuld	%f40,%f24,%f48 	!28
  1.1500 +	ldd	[%o4+208],%f36
  1.1501 +	faddd	%f20,%f22,%f20
  1.1502 +	fmuld	%f50,%f0,%f50 	!29
  1.1503 +	std	%f20,[%o4+176]
  1.1504 +	fxtod	%f4,%f4
  1.1505 +	fmuld	%f52,%f24,%f52 	!30
  1.1506 +	ldd	[%i4+48],%f22
  1.1507 +	faddd	%f42,%f44,%f42
  1.1508 +	fmuld	%f56,%f0,%f56 	!31
  1.1509 +	ldd	[%o4+240],%f44
  1.1510 +	faddd	%f32,%f36,%f32 	!32
  1.1511 +	std	%f32,[%o4+208]
  1.1512 +	faddd	%f48,%f50,%f48
  1.1513 +	fmuld	%f46,%f24,%f20 	!33
  1.1514 +	ldd	[%o4+32],%f50
  1.1515 +	fmuld	%f4,%f18,%f12 	!34
  1.1516 +	ldd	[%i4+64],%f36
  1.1517 +	faddd	%f52,%f56,%f52
  1.1518 +	fmuld	%f22,%f0,%f22 	!35
  1.1519 +	ldd	[%o4+64],%f56
  1.1520 +	faddd	%f42,%f44,%f42 	!36
  1.1521 +	std	%f42,[%o4+240]
  1.1522 +	faddd	%f48,%f50,%f48
  1.1523 +	fmuld	%f54,%f24,%f32 	!37
  1.1524 +	std	%f48,[%o4+32]
  1.1525 +	fmuld	%f12,%f14,%f4 !38
  1.1526 +	ldd	[%i2+80],%f42
  1.1527 +	faddd	%f52,%f56,%f56	! yes, tmp52!
  1.1528 +	fmuld	%f36,%f0,%f36 	!39
  1.1529 +	ldd	[%i4+80],%f44
  1.1530 +	faddd	%f20,%f22,%f20 	!40
  1.1531 +	ldd	[%i2+96],%f48
  1.1532 +	fmuld	%f58,%f24,%f52 	!41
  1.1533 +	ldd	[%i4+96],%f50
  1.1534 +	fdtox	%f4,%f4
  1.1535 +	fmuld	%f42,%f24,%f42 	!42
  1.1536 +	std	%f56,[%o4+64]	! yes, tmp52!
  1.1537 +	faddd	%f32,%f36,%f32
  1.1538 +	fmuld	%f44,%f0,%f44 	!43
  1.1539 +	ldd	[%o4+96],%f22
  1.1540 +	fmuld	%f48,%f24,%f48 	!44
  1.1541 +	ldd	[%o4+128],%f36
  1.1542 +	fmovd	%f6,%f24
  1.1543 +	fmuld	%f50,%f0,%f50 	!45
  1.1544 +	fxtod	%f4,%f4
  1.1545 +	fmuld	%f60,%f0,%f56 	!46
  1.1546 +	add	%o4,8,%o4
  1.1547 +	faddd	%f42,%f44,%f42 	!47
  1.1548 +	ldd	[%o4+160-8],%f44
  1.1549 +	faddd	%f20,%f22,%f20 	!48
  1.1550 +	std	%f20,[%o4+96-8]
  1.1551 +	faddd	%f48,%f50,%f48 	!49
  1.1552 +	ldd	[%o4+192-8],%f50
  1.1553 +	faddd	%f52,%f56,%f52
  1.1554 +	fmuld	%f4,%f16,%f4 	!50
  1.1555 +	ldd	[%o4+224-8],%f56
  1.1556 +	faddd	%f32,%f36,%f32 	!51
  1.1557 +	std	%f32,[%o4+128-8]
  1.1558 +	faddd	%f42,%f44,%f42 	!52
  1.1559 +	add	%o3,1,%o3
  1.1560 +	std	%f42,[%o4+160-8]
  1.1561 +	faddd	%f48,%f50,%f48 	!53
  1.1562 +	cmp	%o3,31
  1.1563 +	std	%f48,[%o4+192-8]
  1.1564 +	fsubd	%f12,%f4,%f0 	!54
  1.1565 +	faddd	%f52,%f56,%f52
  1.1566 +	ble,pt	%icc,.L99999999
  1.1567 +	std	%f52,[%o4+224-8] 	!55
  1.1568 +	std %f8,[%o4]
  1.1569 +
  1.1570 +!  312		      !       }
  1.1571 +!  313		      !   }
  1.1572 +!  315		      ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
  1.1573 +
  1.1574 +/* 0x07c8	 315 */		sll	%i0,4,%g2
  1.1575 +                       .L900000653:
  1.1576 +/* 0x07cc	 315 */		add	%i1,%g2,%i1
  1.1577 +/* 0x07d0	 242 */		ld	[%fp+68],%o0
  1.1578 +/* 0x07d4	 315 */		or	%g0,0,%o4
  1.1579 +/* 0x07d8	     */		ldd	[%i1],%f0
  1.1580 +/* 0x07dc	     */		or	%g0,0,%g5
  1.1581 +/* 0x07e0	     */		cmp	%i0,0
  1.1582 +/* 0x07e4	 242 */		or	%g0,%o0,%o3
  1.1583 +/* 0x07e8	 311 */		sub	%i0,1,%g1
  1.1584 +/* 0x07ec	 315 */		fdtox	%f0,%f0
  1.1585 +/* 0x07f0	     */		std	%f0,[%sp+120]
  1.1586 +/* 0x07f4	 311 */		sethi	%hi(0xfc00),%o1
  1.1587 +/* 0x07f8	     */		add	%g1,1,%g3
  1.1588 +/* 0x07fc	     */		or	%g0,%o0,%g4
  1.1589 +/* 0x0800	 315 */		ldd	[%i1+8],%f0
  1.1590 +/* 0x0804	     */		add	%o1,1023,%o1
  1.1591 +/* 0x0808	     */		fdtox	%f0,%f0
  1.1592 +/* 0x080c	     */		std	%f0,[%sp+112]
  1.1593 +/* 0x0810	     */		ldx	[%sp+112],%o5
  1.1594 +/* 0x0814	     */		ldx	[%sp+120],%o7
  1.1595 +/* 0x0818	     */		ble,pt	%icc,.L900000651
  1.1596 +/* 0x081c	     */		sethi	%hi(0xfc00),%g2
  1.1597 +/* 0x0820	 311 */		or	%g0,-1,%g2
  1.1598 +/* 0x0824	 315 */		cmp	%g3,3
  1.1599 +/* 0x0828	 311 */		srl	%g2,0,%o2
  1.1600 +/* 0x082c	 315 */		bl,pn	%icc,.L77000287
  1.1601 +/* 0x0830	     */		or	%g0,%i1,%g2
  1.1602 +/* 0x0834	     */		ldd	[%i1+16],%f0
  1.1603 +/* 0x0838	     */		and	%o5,%o1,%o0
  1.1604 +/* 0x083c	     */		add	%i1,16,%g2
  1.1605 +/* 0x0840	     */		sllx	%o0,16,%g3
  1.1606 +/* 0x0844	     */		and	%o7,%o2,%o0
  1.1607 +/* 0x0848	     */		fdtox	%f0,%f0
  1.1608 +/* 0x084c	     */		std	%f0,[%sp+104]
  1.1609 +/* 0x0850	     */		add	%o0,%g3,%o4
  1.1610 +/* 0x0854	     */		ldd	[%i1+24],%f2
  1.1611 +/* 0x0858	     */		srax	%o5,16,%o0
  1.1612 +/* 0x085c	     */		add	%o3,4,%g4
  1.1613 +/* 0x0860	     */		stx	%o0,[%sp+128]
  1.1614 +/* 0x0864	     */		and	%o4,%o2,%o0
  1.1615 +/* 0x0868	     */		stx	%o0,[%sp+112]
  1.1616 +/* 0x086c	     */		srax	%o4,32,%o0
  1.1617 +/* 0x0870	     */		fdtox	%f2,%f0
  1.1618 +/* 0x0874	     */		stx	%o0,[%sp+136]
  1.1619 +/* 0x0878	     */		srax	%o7,32,%o4
  1.1620 +/* 0x087c	     */		std	%f0,[%sp+96]
  1.1621 +/* 0x0880	     */		ldx	[%sp+128],%g5
  1.1622 +/* 0x0884	     */		ldx	[%sp+136],%o7
  1.1623 +/* 0x0888	     */		ldx	[%sp+104],%g3
  1.1624 +/* 0x088c	     */		add	%g5,%o7,%o0
  1.1625 +/* 0x0890	     */		or	%g0,1,%g5
  1.1626 +/* 0x0894	     */		ldx	[%sp+112],%o7
  1.1627 +/* 0x0898	     */		add	%o4,%o0,%o4
  1.1628 +/* 0x089c	     */		ldx	[%sp+96],%o5
  1.1629 +/* 0x08a0	     */		st	%o7,[%o3]
  1.1630 +/* 0x08a4	     */		or	%g0,%g3,%o7
  1.1631 +                       .L900000634:
  1.1632 +/* 0x08a8	     */		ldd	[%g2+16],%f0
  1.1633 +/* 0x08ac	     */		add	%g5,1,%g5
  1.1634 +/* 0x08b0	     */		add	%g4,4,%g4
  1.1635 +/* 0x08b4	     */		cmp	%g5,%g1
  1.1636 +/* 0x08b8	     */		add	%g2,16,%g2
  1.1637 +/* 0x08bc	     */		fdtox	%f0,%f0
  1.1638 +/* 0x08c0	     */		std	%f0,[%sp+104]
  1.1639 +/* 0x08c4	     */		ldd	[%g2+8],%f0
  1.1640 +/* 0x08c8	     */		fdtox	%f0,%f0
  1.1641 +/* 0x08cc	     */		std	%f0,[%sp+96]
  1.1642 +/* 0x08d0	     */		and	%o5,%o1,%g3
  1.1643 +/* 0x08d4	     */		sllx	%g3,16,%g3
  1.1644 +/* 0x08d8	     */		stx	%g3,[%sp+120]
  1.1645 +/* 0x08dc	     */		and	%o7,%o2,%g3
  1.1646 +/* 0x08e0	     */		stx	%o7,[%sp+128]
  1.1647 +/* 0x08e4	     */		ldx	[%sp+120],%o7
  1.1648 +/* 0x08e8	     */		add	%g3,%o7,%g3
  1.1649 +/* 0x08ec	     */		ldx	[%sp+128],%o7
  1.1650 +/* 0x08f0	     */		srax	%o5,16,%o5
  1.1651 +/* 0x08f4	     */		add	%g3,%o4,%g3
  1.1652 +/* 0x08f8	     */		srax	%g3,32,%o4
  1.1653 +/* 0x08fc	     */		stx	%o4,[%sp+112]
  1.1654 +/* 0x0900	     */		srax	%o7,32,%o4
  1.1655 +/* 0x0904	     */		ldx	[%sp+112],%o7
  1.1656 +/* 0x0908	     */		add	%o5,%o7,%o7
  1.1657 +/* 0x090c	     */		ldx	[%sp+96],%o5
  1.1658 +/* 0x0910	     */		add	%o4,%o7,%o4
  1.1659 +/* 0x0914	     */		and	%g3,%o2,%g3
  1.1660 +/* 0x0918	     */		ldx	[%sp+104],%o7
  1.1661 +/* 0x091c	     */		ble,pt	%icc,.L900000634
  1.1662 +/* 0x0920	     */		st	%g3,[%g4-4]
  1.1663 +                       .L900000637:
  1.1664 +/* 0x0924	     */		ba	.L900000651
  1.1665 +/* 0x0928	     */		sethi	%hi(0xfc00),%g2
  1.1666 +                       .L77000287:
  1.1667 +/* 0x092c	     */		ldd	[%g2+16],%f0
  1.1668 +                       .L900000650:
  1.1669 +/* 0x0930	     */		and	%o7,%o2,%o0
  1.1670 +/* 0x0934	     */		and	%o5,%o1,%g3
  1.1671 +/* 0x0938	     */		fdtox	%f0,%f0
  1.1672 +/* 0x093c	     */		add	%o4,%o0,%o0
  1.1673 +/* 0x0940	     */		std	%f0,[%sp+104]
  1.1674 +/* 0x0944	     */		add	%g5,1,%g5
  1.1675 +/* 0x0948	     */		sllx	%g3,16,%o4
  1.1676 +/* 0x094c	     */		ldd	[%g2+24],%f2
  1.1677 +/* 0x0950	     */		add	%g2,16,%g2
  1.1678 +/* 0x0954	     */		add	%o0,%o4,%o4
  1.1679 +/* 0x0958	     */		cmp	%g5,%g1
  1.1680 +/* 0x095c	     */		srax	%o5,16,%o0
  1.1681 +/* 0x0960	     */		stx	%o0,[%sp+112]
  1.1682 +/* 0x0964	     */		and	%o4,%o2,%g3
  1.1683 +/* 0x0968	     */		srax	%o4,32,%o5
  1.1684 +/* 0x096c	     */		fdtox	%f2,%f0
  1.1685 +/* 0x0970	     */		std	%f0,[%sp+96]
  1.1686 +/* 0x0974	     */		srax	%o7,32,%o4
  1.1687 +/* 0x0978	     */		ldx	[%sp+112],%o7
  1.1688 +/* 0x097c	     */		add	%o7,%o5,%o7
  1.1689 +/* 0x0980	     */		ldx	[%sp+104],%o5
  1.1690 +/* 0x0984	     */		add	%o4,%o7,%o4
  1.1691 +/* 0x0988	     */		ldx	[%sp+96],%o0
  1.1692 +/* 0x098c	     */		st	%g3,[%g4]
  1.1693 +/* 0x0990	     */		or	%g0,%o5,%o7
  1.1694 +/* 0x0994	     */		add	%g4,4,%g4
  1.1695 +/* 0x0998	     */		or	%g0,%o0,%o5
  1.1696 +/* 0x099c	     */		ble,a,pt	%icc,.L900000650
  1.1697 +/* 0x09a0	     */		ldd	[%g2+16],%f0
  1.1698 +                       .L77000236:
  1.1699 +/* 0x09a4	     */		sethi	%hi(0xfc00),%g2
  1.1700 +                       .L900000651:
  1.1701 +/* 0x09a8	     */		or	%g0,-1,%o0
  1.1702 +/* 0x09ac	     */		add	%g2,1023,%g2
  1.1703 +/* 0x09b0	     */		ld	[%fp+88],%o1
  1.1704 +/* 0x09b4	     */		srl	%o0,0,%g3
  1.1705 +/* 0x09b8	     */		and	%o5,%g2,%g2
  1.1706 +/* 0x09bc	     */		and	%o7,%g3,%g4
  1.1707 +
  1.1708 +!  317		      ! adjust_montf_result(result,nint,nlen); 
  1.1709 +
  1.1710 +/* 0x09c0	 317 */		or	%g0,-1,%o5
  1.1711 +/* 0x09c4	 311 */		sllx	%g2,16,%g2
  1.1712 +/* 0x09c8	     */		add	%o4,%g4,%g4
  1.1713 +/* 0x09cc	     */		add	%g4,%g2,%g2
  1.1714 +/* 0x09d0	     */		sll	%g5,2,%g4
  1.1715 +/* 0x09d4	     */		and	%g2,%g3,%g2
  1.1716 +/* 0x09d8	     */		st	%g2,[%o3+%g4]
  1.1717 +/* 0x09dc	 317 */		sll	%i0,2,%g2
  1.1718 +/* 0x09e0	     */		ld	[%o3+%g2],%g2
  1.1719 +/* 0x09e4	     */		cmp	%g2,0
  1.1720 +/* 0x09e8	     */		bleu,pn	%icc,.L77000241
  1.1721 +/* 0x09ec	     */		or	%g0,%o1,%o2
  1.1722 +/* 0x09f0	     */		ba	.L900000649
  1.1723 +/* 0x09f4	     */		cmp	%o5,0
  1.1724 +                       .L77000241:
  1.1725 +/* 0x09f8	     */		sub	%i0,1,%o5
  1.1726 +/* 0x09fc	     */		sll	%o5,2,%g2
  1.1727 +/* 0x0a00	     */		cmp	%o5,0
  1.1728 +/* 0x0a04	     */		bl,pt	%icc,.L900000649
  1.1729 +/* 0x0a08	     */		cmp	%o5,0
  1.1730 +/* 0x0a0c	     */		add	%o1,%g2,%o1
  1.1731 +/* 0x0a10	     */		add	%o3,%g2,%o4
  1.1732 +/* 0x0a14	     */		ld	[%o1],%g2
  1.1733 +                       .L900000648:
  1.1734 +/* 0x0a18	     */		ld	[%o4],%g3
  1.1735 +/* 0x0a1c	     */		sub	%o5,1,%o0
  1.1736 +/* 0x0a20	     */		sub	%o1,4,%o1
  1.1737 +/* 0x0a24	     */		sub	%o4,4,%o4
  1.1738 +/* 0x0a28	     */		cmp	%g3,%g2
  1.1739 +/* 0x0a2c	     */		bne,pn	%icc,.L77000244
  1.1740 +/* 0x0a30	     */		nop
  1.1741 +/* 0x0a34	   0 */		or	%g0,%o0,%o5
  1.1742 +/* 0x0a38	 317 */		cmp	%o0,0
  1.1743 +/* 0x0a3c	     */		bge,a,pt	%icc,.L900000648
  1.1744 +/* 0x0a40	     */		ld	[%o1],%g2
  1.1745 +                       .L77000244:
  1.1746 +/* 0x0a44	     */		cmp	%o5,0
  1.1747 +                       .L900000649:
  1.1748 +/* 0x0a48	     */		bl,pn	%icc,.L77000288
  1.1749 +/* 0x0a4c	     */		sll	%o5,2,%g2
  1.1750 +/* 0x0a50	     */		ld	[%o2+%g2],%g3
  1.1751 +/* 0x0a54	     */		ld	[%o3+%g2],%g2
  1.1752 +/* 0x0a58	     */		cmp	%g2,%g3
  1.1753 +/* 0x0a5c	     */		bleu,pt	%icc,.L77000224
  1.1754 +/* 0x0a60	     */		nop
  1.1755 +                       .L77000288:
  1.1756 +/* 0x0a64	     */		cmp	%i0,0
  1.1757 +/* 0x0a68	     */		ble,pt	%icc,.L77000224
  1.1758 +/* 0x0a6c	     */		nop
  1.1759 +/* 0x0a70	 317 */		sub	%i0,1,%o7
  1.1760 +/* 0x0a74	     */		or	%g0,-1,%g2
  1.1761 +/* 0x0a78	     */		srl	%g2,0,%o4
  1.1762 +/* 0x0a7c	     */		add	%o7,1,%o0
  1.1763 +/* 0x0a80	 315 */		or	%g0,0,%o5
  1.1764 +/* 0x0a84	     */		or	%g0,0,%g1
  1.1765 +/* 0x0a88	     */		cmp	%o0,3
  1.1766 +/* 0x0a8c	     */		bl,pn	%icc,.L77000289
  1.1767 +/* 0x0a90	     */		add	%o3,8,%o1
  1.1768 +/* 0x0a94	     */		add	%o2,4,%o0
  1.1769 +/* 0x0a98	     */		ld	[%o1-8],%g2
  1.1770 +/* 0x0a9c	   0 */		or	%g0,%o1,%o3
  1.1771 +/* 0x0aa0	 315 */		ld	[%o0-4],%g3
  1.1772 +/* 0x0aa4	   0 */		or	%g0,%o0,%o2
  1.1773 +/* 0x0aa8	 315 */		or	%g0,2,%g1
  1.1774 +/* 0x0aac	     */		ld	[%o3-4],%o0
  1.1775 +/* 0x0ab0	     */		sub	%g2,%g3,%g2
  1.1776 +/* 0x0ab4	     */		or	%g0,%g2,%o5
  1.1777 +/* 0x0ab8	     */		and	%g2,%o4,%g2
  1.1778 +/* 0x0abc	     */		st	%g2,[%o3-8]
  1.1779 +/* 0x0ac0	     */		srax	%o5,32,%o5
  1.1780 +                       .L900000638:
  1.1781 +/* 0x0ac4	     */		ld	[%o2],%g2
  1.1782 +/* 0x0ac8	     */		add	%g1,1,%g1
  1.1783 +/* 0x0acc	     */		add	%o2,4,%o2
  1.1784 +/* 0x0ad0	     */		cmp	%g1,%o7
  1.1785 +/* 0x0ad4	     */		add	%o3,4,%o3
  1.1786 +/* 0x0ad8	     */		sub	%o0,%g2,%o0
  1.1787 +/* 0x0adc	     */		add	%o0,%o5,%o5
  1.1788 +/* 0x0ae0	     */		and	%o5,%o4,%g2
  1.1789 +/* 0x0ae4	     */		ld	[%o3-4],%o0
  1.1790 +/* 0x0ae8	     */		st	%g2,[%o3-8]
  1.1791 +/* 0x0aec	     */		ble,pt	%icc,.L900000638
  1.1792 +/* 0x0af0	     */		srax	%o5,32,%o5
  1.1793 +                       .L900000641:
  1.1794 +/* 0x0af4	     */		ld	[%o2],%o1
  1.1795 +/* 0x0af8	     */		sub	%o0,%o1,%o0
  1.1796 +/* 0x0afc	     */		add	%o0,%o5,%o0
  1.1797 +/* 0x0b00	     */		and	%o0,%o4,%o1
  1.1798 +/* 0x0b04	     */		st	%o1,[%o3-4]
  1.1799 +/* 0x0b08	     */		ret	! Result = 
  1.1800 +/* 0x0b0c	     */		restore	%g0,%g0,%g0
  1.1801 +                       .L77000289:
  1.1802 +/* 0x0b10	     */		ld	[%o3],%o0
  1.1803 +                       .L900000647:
  1.1804 +/* 0x0b14	     */		ld	[%o2],%o1
  1.1805 +/* 0x0b18	     */		add	%o5,%o0,%o0
  1.1806 +/* 0x0b1c	     */		add	%g1,1,%g1
  1.1807 +/* 0x0b20	     */		add	%o2,4,%o2
  1.1808 +/* 0x0b24	     */		cmp	%g1,%o7
  1.1809 +/* 0x0b28	     */		sub	%o0,%o1,%o0
  1.1810 +/* 0x0b2c	     */		and	%o0,%o4,%o1
  1.1811 +/* 0x0b30	     */		st	%o1,[%o3]
  1.1812 +/* 0x0b34	     */		add	%o3,4,%o3
  1.1813 +/* 0x0b38	     */		srax	%o0,32,%o5
  1.1814 +/* 0x0b3c	     */		ble,a,pt	%icc,.L900000647
  1.1815 +/* 0x0b40	     */		ld	[%o3],%o0
  1.1816 +                       .L77000224:
  1.1817 +/* 0x0b44	     */		ret	! Result = 
  1.1818 +/* 0x0b48	     */		restore	%g0,%g0,%g0
  1.1819 +/* 0x0b4c	   0 */		.type	mont_mulf_noconv,2
  1.1820 +/* 0x0b4c	     */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
  1.1821 +

mercurial