security/nss/lib/freebl/mpi/montmulf.s

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 !  
     2 ! This Source Code Form is subject to the terms of the Mozilla Public
     3 ! License, v. 2.0. If a copy of the MPL was not distributed with this
     4 ! file, You can obtain one at http://mozilla.org/MPL/2.0/.
     6 	.section	".text",#alloc,#execinstr
     7 	.file	"montmulf.c"
     9 	.section	".data",#alloc,#write
    10 	.align	8
    11 TwoTo16:		/* frequency 1.0 confidence 0.0 */
    12 	.word	1089470464
    13 	.word	0
    14 	.type	TwoTo16,#object
    15 	.size	TwoTo16,8
    16 TwoToMinus16:		/* frequency 1.0 confidence 0.0 */
    17 	.word	1055916032
    18 	.word	0
    19 	.type	TwoToMinus16,#object
    20 	.size	TwoToMinus16,8
    21 Zero:		/* frequency 1.0 confidence 0.0 */
    22 	.word	0
    23 	.word	0
    24 	.type	Zero,#object
    25 	.size	Zero,8
    26 TwoTo32:		/* frequency 1.0 confidence 0.0 */
    27 	.word	1106247680
    28 	.word	0
    29 	.type	TwoTo32,#object
    30 	.size	TwoTo32,8
    31 TwoToMinus32:		/* frequency 1.0 confidence 0.0 */
    32 	.word	1039138816
    33 	.word	0
    34 	.type	TwoToMinus32,#object
    35 	.size	TwoToMinus32,8
    37 	.section	".text",#alloc,#execinstr
    38 /* 000000	   0 ( 0  0) */		.align	4
    39 !
    40 ! SUBROUTINE cleanup
    41 !
    42 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
    44                                    	.global cleanup
    45                                    cleanup:		/* frequency 1.0 confidence 0.0 */
    46 ! FILE montmulf.c
    48 !    1		                    !#define RF_INLINE_MACROS
    49 !    3		                    !static double TwoTo16=65536.0;
    50 !    4		                    !static double TwoToMinus16=1.0/65536.0;
    51 !    5		                    !static double Zero=0.0;
    52 !    6		                    !static double TwoTo32=65536.0*65536.0;
    53 !    7		                    !static double TwoToMinus32=1.0/(65536.0*65536.0);
    54 !    9		                    !#ifdef RF_INLINE_MACROS
    55 !   11		                    !double upper32(double);
    56 !   12		                    !double lower32(double, double);
    57 !   13		                    !double mod(double, double, double);
    58 !   15		                    !#else
    59 !   17		                    !static double upper32(double x)
    60 !   18		                    !{
    61 !   19		                    !  return floor(x*TwoToMinus32);
    62 !   20		                    !}
    63 !   22		                    !static double lower32(double x, double y)
    64 !   23		                    !{
    65 !   24		                    !  return x-TwoTo32*floor(x*TwoToMinus32);
    66 !   25		                    !}
    67 !   27		                    !static double mod(double x, double oneoverm, double m)
    68 !   28		                    !{
    69 !   29		                    !  return x-m*floor(x*oneoverm);
    70 !   30		                    !}
    71 !   32		                    !#endif
    72 !   35		                    !void cleanup(double *dt, int from, int tlen)
    73 !   36		                    !{
    74 !   37		                    ! int i;
    75 !   38		                    ! double tmp,tmp1,x,x1;
    76 !   40		                    ! tmp=tmp1=Zero;
    78 /* 000000	  40 ( 0  1) */		sethi	%hi(Zero),%g2
    80 !   41		                    ! /* original code **
    81 !   42		                    ! for(i=2*from;i<2*tlen-2;i++)
    82 !   43		                    !   {
    83 !   44		                    !     x=dt[i];
    84 !   45		                    !     dt[i]=lower32(x,Zero)+tmp1;
    85 !   46		                    !     tmp1=tmp;
    86 !   47		                    !     tmp=upper32(x);
    87 !   48		                    !   }
    88 !   49		                    ! dt[tlen-2]+=tmp1;
    89 !   50		                    ! dt[tlen-1]+=tmp;
    90 !   51		                    ! **end original code ***/
    91 !   52		                    ! /* new code ***/
    92 !   53		                    ! for(i=2*from;i<2*tlen;i+=2)
    94 /* 0x0004	  53 ( 1  2) */		sll	%o2,1,%g3
    95 /* 0x0008	  40 ( 1  4) */		ldd	[%g2+%lo(Zero)],%f0
    96 /* 0x000c	     ( 1  2) */		add	%g2,%lo(Zero),%g2
    97 /* 0x0010	  53 ( 2  3) */		sll	%o1,1,%g4
    98 /* 0x0014	  36 ( 3  4) */		sll	%o1,4,%g1
    99 /* 0x0018	  40 ( 3  4) */		fmovd	%f0,%f4
   100 /* 0x001c	  53 ( 3  4) */		cmp	%g4,%g3
   101 /* 0x0020	     ( 3  4) */		bge,pt	%icc,.L77000116	! tprob=0.56
   102 /* 0x0024	     ( 4  5) */		fmovd	%f0,%f2
   103 /* 0x0028	  36 ( 4  5) */		add	%o0,%g1,%g1
   104 /* 0x002c	     ( 4  5) */		sub	%g3,1,%g3
   106 !   54		                    !   {
   107 !   55		                    !     x=dt[i];
   109 /* 0x0030	  55 ( 5  8) */		ldd	[%g1],%f8
   110                                    .L900000114:		/* frequency 6.4 confidence 0.0 */
   111 /* 0x0034	     ( 0  3) */		fdtox	%f8,%f6
   113 !   56		                    !     x1=dt[i+1];
   115 /* 0x0038	  56 ( 0  3) */		ldd	[%g1+8],%f10
   117 !   57		                    !     dt[i]=lower32(x,Zero)+tmp;
   118 !   58		                    !     dt[i+1]=lower32(x1,Zero)+tmp1;
   119 !   59		                    !     tmp=upper32(x);
   120 !   60		                    !     tmp1=upper32(x1);
   122 /* 0x003c	  60 ( 0  1) */		add	%g4,2,%g4
   123 /* 0x0040	     ( 1  4) */		fdtox	%f8,%f8
   124 /* 0x0044	     ( 1  2) */		cmp	%g4,%g3
   125 /* 0x0048	     ( 5  6) */		fmovs	%f0,%f6
   126 /* 0x004c	     ( 7 10) */		fxtod	%f6,%f6
   127 /* 0x0050	     ( 8 11) */		fdtox	%f10,%f0
   128 /* 0x0054	  57 (10 13) */		faddd	%f6,%f2,%f2
   129 /* 0x0058	     (10 11) */		std	%f2,[%g1]
   130 /* 0x005c	     (12 15) */		ldd	[%g2],%f2
   131 /* 0x0060	     (14 15) */		fmovs	%f2,%f0
   132 /* 0x0064	     (16 19) */		fxtod	%f0,%f6
   133 /* 0x0068	     (17 20) */		fdtox	%f10,%f0
   134 /* 0x006c	     (18 21) */		fitod	%f8,%f2
   135 /* 0x0070	  58 (19 22) */		faddd	%f6,%f4,%f4
   136 /* 0x0074	     (19 20) */		std	%f4,[%g1+8]
   137 /* 0x0078	  60 (19 20) */		add	%g1,16,%g1
   138 /* 0x007c	     (20 23) */		fitod	%f0,%f4
   139 /* 0x0080	     (20 23) */		ldd	[%g2],%f0
   140 /* 0x0084	     (20 21) */		ble,a,pt	%icc,.L900000114	! tprob=0.86
   141 /* 0x0088	     (21 24) */		ldd	[%g1],%f8
   142                                    .L77000116:		/* frequency 1.0 confidence 0.0 */
   143 /* 0x008c	     ( 0  2) */		retl	! Result = 
   144 /* 0x0090	     ( 1  2) */		nop
   145 /* 0x0094	   0 ( 0  0) */		.type	cleanup,2
   146 /* 0x0094	     ( 0  0) */		.size	cleanup,(.-cleanup)
   148 	.section	".text",#alloc,#execinstr
   149 /* 000000	   0 ( 0  0) */		.align	4
   150 !
   151 ! SUBROUTINE conv_d16_to_i32
   152 !
   153 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   155                                    	.global conv_d16_to_i32
   156                                    conv_d16_to_i32:		/* frequency 1.0 confidence 0.0 */
   157 /* 000000	     ( 0  1) */		save	%sp,-136,%sp
   159 !   61		                    !   }
   160 !   62		                    !  /** end new code **/
   161 !   63		                    !}
   162 !   66		                    !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
   163 !   67		                    !{
   164 !   68		                    !int i;
   165 !   69		                    !long long t, t1, a, b, c, d;
   166 !   71		                    ! t1=0;
   167 !   72		                    ! a=(long long)d16[0];
   169 /* 0x0004	  72 ( 1  4) */		ldd	[%i1],%f0
   171 !   73		                    ! b=(long long)d16[1];
   172 !   74		                    ! for(i=0; i<ilen-1; i++)
   174 /* 0x0008	  74 ( 1  2) */		sub	%i3,1,%g2
   175 /* 0x000c	  67 ( 1  2) */		or	%g0,%i0,%g5
   176 /* 0x0010	  74 ( 2  3) */		cmp	%g2,0
   177 /* 0x0014	  71 ( 2  3) */		or	%g0,0,%o4
   178 /* 0x0018	  72 ( 3  6) */		fdtox	%f0,%f0
   179 /* 0x001c	     ( 3  4) */		std	%f0,[%sp+120]
   180 /* 0x0020	  74 ( 3  4) */		or	%g0,0,%o7
   181 /* 0x0024	  67 ( 4  5) */		or	%g0,%i3,%o0
   182 /* 0x0028	     ( 4  5) */		sub	%i3,2,%o2
   183 /* 0x002c	  73 ( 5  8) */		ldd	[%i1+8],%f0
   184 /* 0x0030	  67 ( 5  6) */		sethi	%hi(0xfc00),%o0
   185 /* 0x0034	     ( 5  6) */		add	%o2,1,%g3
   186 /* 0x0038	     ( 6  7) */		add	%o0,1023,%o1
   187 /* 0x003c	     ( 6  7) */		or	%g0,%g5,%o5
   188 /* 0x0040	  73 ( 7 10) */		fdtox	%f0,%f0
   189 /* 0x0044	     ( 7  8) */		std	%f0,[%sp+112]
   190 /* 0x0048	  72 (11 13) */		ldx	[%sp+120],%g4
   191 /* 0x004c	  73 (12 14) */		ldx	[%sp+112],%g1
   192 /* 0x0050	  74 (12 13) */		ble,pt	%icc,.L900000214	! tprob=0.56
   193 /* 0x0054	     (12 13) */		sethi	%hi(0xfc00),%g2
   194 /* 0x0058	  67 (13 14) */		or	%g0,-1,%g2
   195 /* 0x005c	  74 (13 14) */		cmp	%g3,3
   196 /* 0x0060	  67 (14 15) */		srl	%g2,0,%o3
   197 /* 0x0064	     (14 15) */		or	%g0,%i1,%g2
   198 /* 0x0068	  74 (14 15) */		bl,pn	%icc,.L77000134	! tprob=0.44
   199 /* 0x006c	     (15 18) */		ldd	[%g2+16],%f0
   201 !   75		                    !   {
   202 !   76		                    !     c=(long long)d16[2*i+2];
   203 !   77		                    !     t1+=a&0xffffffff;
   204 !   78		                    !     t=(a>>32);
   205 !   79		                    !     d=(long long)d16[2*i+3];
   206 !   80		                    !     t1+=(b&0xffff)<<16;
   208 /* 0x0070	  80 (15 16) */		and	%g1,%o1,%o0
   210 !   81		                    !     t+=(b>>16)+(t1>>32);
   211 !   82		                    !     i32[i]=t1&0xffffffff;
   212 !   83		                    !     t1=t;
   213 !   84		                    !     a=c;
   214 !   85		                    !     b=d;
   216 /* 0x0074	  85 (15 16) */		add	%g2,16,%g2
   217 /* 0x0078	  80 (16 17) */		sllx	%o0,16,%g3
   218 /* 0x007c	  77 (16 17) */		and	%g4,%o3,%o0
   219 /* 0x0080	  76 (17 20) */		fdtox	%f0,%f0
   220 /* 0x0084	     (17 18) */		std	%f0,[%sp+104]
   221 /* 0x0088	  74 (17 18) */		add	%o0,%g3,%o4
   222 /* 0x008c	  79 (18 21) */		ldd	[%g2+8],%f2
   223 /* 0x0090	  81 (18 19) */		srax	%g1,16,%o0
   224 /* 0x0094	  82 (18 19) */		and	%o4,%o3,%o7
   225 /* 0x0098	  81 (19 20) */		stx	%o0,[%sp+112]
   226 /* 0x009c	     (19 20) */		srax	%o4,32,%o0
   227 /* 0x00a0	  85 (19 20) */		add	%g5,4,%o5
   228 /* 0x00a4	  81 (20 21) */		stx	%o0,[%sp+120]
   229 /* 0x00a8	  78 (20 21) */		srax	%g4,32,%o4
   230 /* 0x00ac	  79 (20 23) */		fdtox	%f2,%f0
   231 /* 0x00b0	     (21 22) */		std	%f0,[%sp+96]
   232 /* 0x00b4	  81 (22 24) */		ldx	[%sp+112],%o0
   233 /* 0x00b8	     (23 25) */		ldx	[%sp+120],%g4
   234 /* 0x00bc	  76 (25 27) */		ldx	[%sp+104],%g3
   235 /* 0x00c0	  81 (25 26) */		add	%o0,%g4,%g4
   236 /* 0x00c4	  79 (26 28) */		ldx	[%sp+96],%g1
   237 /* 0x00c8	  81 (26 27) */		add	%o4,%g4,%o4
   238 /* 0x00cc	  82 (27 28) */		st	%o7,[%g5]
   239 /* 0x00d0	     (27 28) */		or	%g0,1,%o7
   240 /* 0x00d4	  84 (27 28) */		or	%g0,%g3,%g4
   241                                    .L900000209:		/* frequency 64.0 confidence 0.0 */
   242 /* 0x00d8	  76 (17 19) */		ldd	[%g2+16],%f0
   243 /* 0x00dc	  85 (17 18) */		add	%o7,1,%o7
   244 /* 0x00e0	     (17 18) */		add	%o5,4,%o5
   245 /* 0x00e4	     (18 18) */		cmp	%o7,%o2
   246 /* 0x00e8	     (18 19) */		add	%g2,16,%g2
   247 /* 0x00ec	  76 (19 22) */		fdtox	%f0,%f0
   248 /* 0x00f0	     (20 21) */		std	%f0,[%sp+104]
   249 /* 0x00f4	  79 (21 23) */		ldd	[%g2+8],%f0
   250 /* 0x00f8	     (23 26) */		fdtox	%f0,%f0
   251 /* 0x00fc	     (24 25) */		std	%f0,[%sp+96]
   252 /* 0x0100	  80 (25 26) */		and	%g1,%o1,%g3
   253 /* 0x0104	     (26 27) */		sllx	%g3,16,%g3
   254 /* 0x0108	     ( 0  0) */		stx	%g3,[%sp+120]
   255 /* 0x010c	  77 (26 27) */		and	%g4,%o3,%g3
   256 /* 0x0110	  74 ( 0  0) */		stx	%o7,[%sp+128]
   257 /* 0x0114	     ( 0  0) */		ldx	[%sp+120],%o7
   258 /* 0x0118	     (27 27) */		add	%g3,%o7,%g3
   259 /* 0x011c	     ( 0  0) */		ldx	[%sp+128],%o7
   260 /* 0x0120	  81 (28 29) */		srax	%g1,16,%g1
   261 /* 0x0124	  74 (28 28) */		add	%g3,%o4,%g3
   262 /* 0x0128	  81 (29 30) */		srax	%g3,32,%o4
   263 /* 0x012c	     ( 0  0) */		stx	%o4,[%sp+112]
   264 /* 0x0130	  78 (30 31) */		srax	%g4,32,%o4
   265 /* 0x0134	  81 ( 0  0) */		ldx	[%sp+112],%g4
   266 /* 0x0138	     (30 31) */		add	%g1,%g4,%g4
   267 /* 0x013c	  79 (31 33) */		ldx	[%sp+96],%g1
   268 /* 0x0140	  81 (31 32) */		add	%o4,%g4,%o4
   269 /* 0x0144	  82 (32 33) */		and	%g3,%o3,%g3
   270 /* 0x0148	  84 ( 0  0) */		ldx	[%sp+104],%g4
   271 /* 0x014c	  85 (33 34) */		ble,pt	%icc,.L900000209	! tprob=0.50
   272 /* 0x0150	     (33 34) */		st	%g3,[%o5-4]
   273                                    .L900000212:		/* frequency 8.0 confidence 0.0 */
   274 /* 0x0154	  85 ( 0  1) */		ba	.L900000214	! tprob=1.00
   275 /* 0x0158	     ( 0  1) */		sethi	%hi(0xfc00),%g2
   276                                    .L77000134:		/* frequency 0.7 confidence 0.0 */
   277                                    .L900000213:		/* frequency 6.4 confidence 0.0 */
   278 /* 0x015c	  77 ( 0  1) */		and	%g4,%o3,%o0
   279 /* 0x0160	  80 ( 0  1) */		and	%g1,%o1,%g3
   280 /* 0x0164	  76 ( 0  3) */		fdtox	%f0,%f0
   281 /* 0x0168	  77 ( 1  2) */		add	%o4,%o0,%o0
   282 /* 0x016c	  76 ( 1  2) */		std	%f0,[%sp+104]
   283 /* 0x0170	  85 ( 1  2) */		add	%o7,1,%o7
   284 /* 0x0174	  80 ( 2  3) */		sllx	%g3,16,%o4
   285 /* 0x0178	  79 ( 2  5) */		ldd	[%g2+24],%f2
   286 /* 0x017c	  85 ( 2  3) */		add	%g2,16,%g2
   287 /* 0x0180	  80 ( 3  4) */		add	%o0,%o4,%o4
   288 /* 0x0184	  81 ( 3  4) */		stx	%o7,[%sp+128]
   289 /* 0x0188	     ( 4  5) */		srax	%g1,16,%o0
   290 /* 0x018c	     ( 4  5) */		stx	%o0,[%sp+112]
   291 /* 0x0190	  82 ( 4  5) */		and	%o4,%o3,%g3
   292 /* 0x0194	  81 ( 5  6) */		srax	%o4,32,%o0
   293 /* 0x0198	     ( 5  6) */		stx	%o0,[%sp+120]
   294 /* 0x019c	  79 ( 5  8) */		fdtox	%f2,%f0
   295 /* 0x01a0	     ( 6  7) */		std	%f0,[%sp+96]
   296 /* 0x01a4	  78 ( 6  7) */		srax	%g4,32,%o4
   297 /* 0x01a8	  81 ( 7  9) */		ldx	[%sp+120],%o7
   298 /* 0x01ac	     ( 8 10) */		ldx	[%sp+112],%g4
   299 /* 0x01b0	  76 (10 12) */		ldx	[%sp+104],%g1
   300 /* 0x01b4	  81 (10 11) */		add	%g4,%o7,%g4
   301 /* 0x01b8	     (11 13) */		ldx	[%sp+128],%o7
   302 /* 0x01bc	     (11 12) */		add	%o4,%g4,%o4
   303 /* 0x01c0	  79 (12 14) */		ldx	[%sp+96],%o0
   304 /* 0x01c4	  84 (12 13) */		or	%g0,%g1,%g4
   305 /* 0x01c8	  82 (13 14) */		st	%g3,[%o5]
   306 /* 0x01cc	  85 (13 14) */		add	%o5,4,%o5
   307 /* 0x01d0	     (13 14) */		cmp	%o7,%o2
   308 /* 0x01d4	     (14 15) */		or	%g0,%o0,%g1
   309 /* 0x01d8	     (14 15) */		ble,a,pt	%icc,.L900000213	! tprob=0.86
   310 /* 0x01dc	     (14 17) */		ldd	[%g2+16],%f0
   311                                    .L77000127:		/* frequency 1.0 confidence 0.0 */
   313 !   86		                    !   }
   314 !   87		                    !     t1+=a&0xffffffff;
   315 !   88		                    !     t=(a>>32);
   316 !   89		                    !     t1+=(b&0xffff)<<16;
   317 !   90		                    !     i32[i]=t1&0xffffffff;
   319 /* 0x01e0	  90 ( 0  1) */		sethi	%hi(0xfc00),%g2
   320                                    .L900000214:		/* frequency 1.0 confidence 0.0 */
   321 /* 0x01e4	  90 ( 0  1) */		or	%g0,-1,%g3
   322 /* 0x01e8	     ( 0  1) */		add	%g2,1023,%g2
   323 /* 0x01ec	     ( 1  2) */		srl	%g3,0,%g3
   324 /* 0x01f0	     ( 1  2) */		and	%g1,%g2,%g2
   325 /* 0x01f4	     ( 2  3) */		and	%g4,%g3,%g4
   326 /* 0x01f8	     ( 3  4) */		sllx	%g2,16,%g2
   327 /* 0x01fc	     ( 3  4) */		add	%o4,%g4,%g4
   328 /* 0x0200	     ( 4  5) */		add	%g4,%g2,%g2
   329 /* 0x0204	     ( 5  6) */		sll	%o7,2,%g4
   330 /* 0x0208	     ( 5  6) */		and	%g2,%g3,%g2
   331 /* 0x020c	     ( 6  7) */		st	%g2,[%g5+%g4]
   332 /* 0x0210	     ( 7  9) */		ret	! Result = 
   333 /* 0x0214	     ( 9 10) */		restore	%g0,%g0,%g0
   334 /* 0x0218	   0 ( 0  0) */		.type	conv_d16_to_i32,2
   335 /* 0x0218	     ( 0  0) */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
   337 	.section	".text",#alloc,#execinstr
   338 /* 000000	   0 ( 0  0) */		.align	8
   339 !
   340 ! CONSTANT POOL
   341 !
   342                                    .L_const_seg_900000301:		/* frequency 1.0 confidence 0.0 */
   343 /* 000000	   0 ( 0  0) */		.word	1127219200,0
   344 /* 0x0008	   0 ( 0  0) */		.align	4
   345 !
   346 ! SUBROUTINE conv_i32_to_d32
   347 !
   348 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   350                                    	.global conv_i32_to_d32
   351                                    conv_i32_to_d32:		/* frequency 1.0 confidence 0.0 */
   352 /* 000000	     ( 0  1) */		orcc	%g0,%o2,%g1
   354 !   92		                    !}
   355 !   94		                    !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
   356 !   95		                    !{
   357 !   96		                    !int i;
   358 !   98		                    !#pragma pipeloop(0)
   359 !   99		                    ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
   361 /* 0x0004	  99 ( 0  1) */		ble,pt	%icc,.L77000140	! tprob=0.56
   362 /* 0x0008	     ( 0  1) */		nop
   363 /* 0x000c	     ( 1  2) */		sethi	%hi(.L_const_seg_900000301),%g2
   364 /* 0x0010	  95 ( 1  2) */		or	%g0,%o1,%g4
   365 /* 0x0014	  99 ( 2  3) */		add	%g2,%lo(.L_const_seg_900000301),%g2
   366 /* 0x0018	     ( 2  3) */		or	%g0,0,%o5
   367 /* 0x001c	  95 ( 3  4) */		or	%g0,%o0,%g5
   368 /* 0x0020	  99 ( 3  4) */		sub	%o2,1,%g3
   369 /* 0x0024	     ( 4  5) */		cmp	%o2,9
   370 /* 0x0028	     ( 4  5) */		bl,pn	%icc,.L77000144	! tprob=0.44
   371 /* 0x002c	     ( 4  7) */		ldd	[%g2],%f8
   372 /* 0x0030	     ( 5  8) */		ld	[%o1],%f7
   373 /* 0x0034	     ( 5  6) */		add	%o1,16,%g4
   374 /* 0x0038	     ( 5  6) */		sub	%o2,5,%g1
   375 /* 0x003c	     ( 6  9) */		ld	[%o1+4],%f5
   376 /* 0x0040	     ( 6  7) */		or	%g0,4,%o5
   377 /* 0x0044	     ( 7 10) */		ld	[%o1+8],%f3
   378 /* 0x0048	     ( 7  8) */		fmovs	%f8,%f6
   379 /* 0x004c	     ( 8 11) */		ld	[%o1+12],%f1
   380                                    .L900000305:		/* frequency 64.0 confidence 0.0 */
   381 /* 0x0050	     ( 8 16) */		ld	[%g4],%f11
   382 /* 0x0054	     ( 8  9) */		add	%o5,5,%o5
   383 /* 0x0058	     ( 8  9) */		add	%g4,20,%g4
   384 /* 0x005c	     ( 8 11) */		fsubd	%f6,%f8,%f6
   385 /* 0x0060	     ( 9 10) */		std	%f6,[%g5]
   386 /* 0x0064	     ( 9  9) */		cmp	%o5,%g1
   387 /* 0x0068	     ( 9 10) */		add	%g5,40,%g5
   388 /* 0x006c	     ( 0  0) */		fmovs	%f8,%f4
   389 /* 0x0070	     (10 18) */		ld	[%g4-16],%f7
   390 /* 0x0074	     (10 13) */		fsubd	%f4,%f8,%f12
   391 /* 0x0078	     ( 0  0) */		fmovs	%f8,%f2
   392 /* 0x007c	     (11 12) */		std	%f12,[%g5-32]
   393 /* 0x0080	     (12 20) */		ld	[%g4-12],%f5
   394 /* 0x0084	     (12 15) */		fsubd	%f2,%f8,%f12
   395 /* 0x0088	     ( 0  0) */		fmovs	%f8,%f0
   396 /* 0x008c	     (13 14) */		std	%f12,[%g5-24]
   397 /* 0x0090	     (14 22) */		ld	[%g4-8],%f3
   398 /* 0x0094	     (14 17) */		fsubd	%f0,%f8,%f12
   399 /* 0x0098	     ( 0  0) */		fmovs	%f8,%f10
   400 /* 0x009c	     (15 16) */		std	%f12,[%g5-16]
   401 /* 0x00a0	     (16 24) */		ld	[%g4-4],%f1
   402 /* 0x00a4	     (16 19) */		fsubd	%f10,%f8,%f10
   403 /* 0x00a8	     ( 0  0) */		fmovs	%f8,%f6
   404 /* 0x00ac	     (17 18) */		ble,pt	%icc,.L900000305	! tprob=0.50
   405 /* 0x00b0	     (17 18) */		std	%f10,[%g5-8]
   406                                    .L900000308:		/* frequency 8.0 confidence 0.0 */
   407 /* 0x00b4	     ( 0  1) */		fmovs	%f8,%f4
   408 /* 0x00b8	     ( 0  1) */		add	%g5,32,%g5
   409 /* 0x00bc	     ( 0  1) */		cmp	%o5,%g3
   410 /* 0x00c0	     ( 1  2) */		fmovs	%f8,%f2
   411 /* 0x00c4	     ( 2  3) */		fmovs	%f8,%f0
   412 /* 0x00c8	     ( 4  7) */		fsubd	%f6,%f8,%f6
   413 /* 0x00cc	     ( 4  5) */		std	%f6,[%g5-32]
   414 /* 0x00d0	     ( 5  8) */		fsubd	%f4,%f8,%f4
   415 /* 0x00d4	     ( 5  6) */		std	%f4,[%g5-24]
   416 /* 0x00d8	     ( 6  9) */		fsubd	%f2,%f8,%f2
   417 /* 0x00dc	     ( 6  7) */		std	%f2,[%g5-16]
   418 /* 0x00e0	     ( 7 10) */		fsubd	%f0,%f8,%f0
   419 /* 0x00e4	     ( 7  8) */		bg,pn	%icc,.L77000140	! tprob=0.14
   420 /* 0x00e8	     ( 7  8) */		std	%f0,[%g5-8]
   421                                    .L77000144:		/* frequency 0.7 confidence 0.0 */
   422 /* 0x00ec	     ( 0  3) */		ld	[%g4],%f1
   423                                    .L900000309:		/* frequency 6.4 confidence 0.0 */
   424 /* 0x00f0	     ( 0  3) */		ldd	[%g2],%f8
   425 /* 0x00f4	     ( 0  1) */		add	%o5,1,%o5
   426 /* 0x00f8	     ( 0  1) */		add	%g4,4,%g4
   427 /* 0x00fc	     ( 1  2) */		cmp	%o5,%g3
   428 /* 0x0100	     ( 2  3) */		fmovs	%f8,%f0
   429 /* 0x0104	     ( 4  7) */		fsubd	%f0,%f8,%f0
   430 /* 0x0108	     ( 4  5) */		std	%f0,[%g5]
   431 /* 0x010c	     ( 4  5) */		add	%g5,8,%g5
   432 /* 0x0110	     ( 4  5) */		ble,a,pt	%icc,.L900000309	! tprob=0.86
   433 /* 0x0114	     ( 6  9) */		ld	[%g4],%f1
   434                                    .L77000140:		/* frequency 1.0 confidence 0.0 */
   435 /* 0x0118	     ( 0  2) */		retl	! Result = 
   436 /* 0x011c	     ( 1  2) */		nop
   437 /* 0x0120	   0 ( 0  0) */		.type	conv_i32_to_d32,2
   438 /* 0x0120	     ( 0  0) */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
   440 	.section	".text",#alloc,#execinstr
   441 /* 000000	   0 ( 0  0) */		.align	8
   442 !
   443 ! CONSTANT POOL
   444 !
   445                                    .L_const_seg_900000401:		/* frequency 1.0 confidence 0.0 */
   446 /* 000000	   0 ( 0  0) */		.word	1127219200,0
   447 /* 0x0008	   0 ( 0  0) */		.align	4
   448 !
   449 ! SUBROUTINE conv_i32_to_d16
   450 !
   451 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   453                                    	.global conv_i32_to_d16
   454                                    conv_i32_to_d16:		/* frequency 1.0 confidence 0.0 */
   455 /* 000000	     ( 0  1) */		save	%sp,-104,%sp
   456 /* 0x0004	     ( 1  2) */		orcc	%g0,%i2,%o0
   458 !  100		                    !}
   459 !  103		                    !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
   460 !  104		                    !{
   461 !  105		                    !int i;
   462 !  106		                    !unsigned int a;
   463 !  108		                    !#pragma pipeloop(0)
   464 !  109		                    ! for(i=0;i<len;i++)
   466 /* 0x0008	 109 ( 1  2) */		ble,pt	%icc,.L77000150	! tprob=0.56
   467 /* 0x000c	     ( 1  2) */		nop
   468 /* 0x0010	     ( 2  3) */		sub	%o0,1,%o5
   469 /* 0x0014	     ( 2  3) */		sethi	%hi(0xfc00),%g2
   471 !  110		                    !   {
   472 !  111		                    !     a=i32[i];
   473 !  112		                    !     d16[2*i]=(double)(a&0xffff);
   474 !  113		                    !     d16[2*i+1]=(double)(a>>16);
   476 /* 0x0018	 113 ( 3  4) */		sethi	%hi(.L_const_seg_900000401),%o0
   477 /* 0x001c	     ( 3  4) */		add	%o5,1,%g3
   478 /* 0x0020	     ( 4  5) */		add	%g2,1023,%o4
   479 /* 0x0024	 109 ( 4  5) */		or	%g0,0,%g1
   480 /* 0x0028	     ( 5  6) */		cmp	%g3,3
   481 /* 0x002c	     ( 5  6) */		or	%g0,%i1,%o7
   482 /* 0x0030	     ( 6  7) */		add	%o0,%lo(.L_const_seg_900000401),%o3
   483 /* 0x0034	     ( 6  7) */		or	%g0,%i0,%g2
   484 /* 0x0038	     ( 6  7) */		bl,pn	%icc,.L77000154	! tprob=0.44
   485 /* 0x003c	     ( 7  8) */		add	%o7,4,%o0
   486 /* 0x0040	 112 ( 7 10) */		ldd	[%o3],%f0
   487 /* 0x0044	 113 ( 7  8) */		or	%g0,1,%g1
   488 /* 0x0048	 111 ( 8 11) */		ld	[%o0-4],%o1
   489 /* 0x004c	   0 ( 8  9) */		or	%g0,%o0,%o7
   490 /* 0x0050	 112 (10 11) */		and	%o1,%o4,%o0
   491                                    .L900000406:		/* frequency 64.0 confidence 0.0 */
   492 /* 0x0054	 112 (22 23) */		st	%o0,[%sp+96]
   493 /* 0x0058	 113 (22 23) */		add	%g1,1,%g1
   494 /* 0x005c	     (22 23) */		add	%g2,16,%g2
   495 /* 0x0060	     (23 23) */		cmp	%g1,%o5
   496 /* 0x0064	     (23 24) */		add	%o7,4,%o7
   497 /* 0x0068	 112 (29 31) */		ld	[%sp+96],%f3
   498 /* 0x006c	     ( 0  0) */		fmovs	%f0,%f2
   499 /* 0x0070	     (31 34) */		fsubd	%f2,%f0,%f2
   500 /* 0x0074	 113 (32 33) */		srl	%o1,16,%o0
   501 /* 0x0078	 112 (32 33) */		std	%f2,[%g2-16]
   502 /* 0x007c	 113 (33 34) */		st	%o0,[%sp+92]
   503 /* 0x0080	     (40 42) */		ld	[%sp+92],%f3
   504 /* 0x0084	 111 (41 43) */		ld	[%o7-4],%o1
   505 /* 0x0088	 113 ( 0  0) */		fmovs	%f0,%f2
   506 /* 0x008c	     (42 45) */		fsubd	%f2,%f0,%f2
   507 /* 0x0090	 112 (43 44) */		and	%o1,%o4,%o0
   508 /* 0x0094	 113 (43 44) */		ble,pt	%icc,.L900000406	! tprob=0.50
   509 /* 0x0098	     (43 44) */		std	%f2,[%g2-8]
   510                                    .L900000409:		/* frequency 8.0 confidence 0.0 */
   511 /* 0x009c	 112 ( 0  1) */		st	%o0,[%sp+96]
   512 /* 0x00a0	     ( 0  1) */		fmovs	%f0,%f2
   513 /* 0x00a4	 113 ( 0  1) */		add	%g2,16,%g2
   514 /* 0x00a8	     ( 1  2) */		srl	%o1,16,%o0
   515 /* 0x00ac	 112 ( 4  7) */		ld	[%sp+96],%f3
   516 /* 0x00b0	     ( 6  9) */		fsubd	%f2,%f0,%f2
   517 /* 0x00b4	     ( 6  7) */		std	%f2,[%g2-16]
   518 /* 0x00b8	 113 ( 7  8) */		st	%o0,[%sp+92]
   519 /* 0x00bc	     (10 11) */		fmovs	%f0,%f2
   520 /* 0x00c0	     (11 14) */		ld	[%sp+92],%f3
   521 /* 0x00c4	     (13 16) */		fsubd	%f2,%f0,%f0
   522 /* 0x00c8	     (13 14) */		std	%f0,[%g2-8]
   523 /* 0x00cc	     (14 16) */		ret	! Result = 
   524 /* 0x00d0	     (16 17) */		restore	%g0,%g0,%g0
   525                                    .L77000154:		/* frequency 0.7 confidence 0.0 */
   526 /* 0x00d4	 111 ( 0  3) */		ld	[%o7],%o0
   527                                    .L900000410:		/* frequency 6.4 confidence 0.0 */
   528 /* 0x00d8	 112 ( 0  1) */		and	%o0,%o4,%o1
   529 /* 0x00dc	     ( 0  1) */		st	%o1,[%sp+96]
   530 /* 0x00e0	 113 ( 0  1) */		add	%g1,1,%g1
   531 /* 0x00e4	 112 ( 1  4) */		ldd	[%o3],%f0
   532 /* 0x00e8	 113 ( 1  2) */		srl	%o0,16,%o0
   533 /* 0x00ec	     ( 1  2) */		add	%o7,4,%o7
   534 /* 0x00f0	     ( 2  3) */		cmp	%g1,%o5
   535 /* 0x00f4	 112 ( 3  4) */		fmovs	%f0,%f2
   536 /* 0x00f8	     ( 4  7) */		ld	[%sp+96],%f3
   537 /* 0x00fc	     ( 6  9) */		fsubd	%f2,%f0,%f2
   538 /* 0x0100	     ( 6  7) */		std	%f2,[%g2]
   539 /* 0x0104	 113 ( 7  8) */		st	%o0,[%sp+92]
   540 /* 0x0108	     (10 11) */		fmovs	%f0,%f2
   541 /* 0x010c	     (11 14) */		ld	[%sp+92],%f3
   542 /* 0x0110	     (13 16) */		fsubd	%f2,%f0,%f0
   543 /* 0x0114	     (13 14) */		std	%f0,[%g2+8]
   544 /* 0x0118	     (13 14) */		add	%g2,16,%g2
   545 /* 0x011c	     (13 14) */		ble,a,pt	%icc,.L900000410	! tprob=0.86
   546 /* 0x0120	     (14 17) */		ld	[%o7],%o0
   547                                    .L77000150:		/* frequency 1.0 confidence 0.0 */
   548 /* 0x0124	     ( 0  2) */		ret	! Result = 
   549 /* 0x0128	     ( 2  3) */		restore	%g0,%g0,%g0
   550 /* 0x012c	   0 ( 0  0) */		.type	conv_i32_to_d16,2
   551 /* 0x012c	     ( 0  0) */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
   553 	.section	".text",#alloc,#execinstr
   554 /* 000000	   0 ( 0  0) */		.align	8
   555 !
   556 ! CONSTANT POOL
   557 !
   558                                    .L_const_seg_900000501:		/* frequency 1.0 confidence 0.0 */
   559 /* 000000	   0 ( 0  0) */		.word	1127219200,0
   560 /* 0x0008	   0 ( 0  0) */		.align	4
   561 !
   562 ! SUBROUTINE conv_i32_to_d32_and_d16
   563 !
   564 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   566                                    	.global conv_i32_to_d32_and_d16
   567                                    conv_i32_to_d32_and_d16:		/* frequency 1.0 confidence 0.0 */
   568 /* 000000	     ( 0  1) */		save	%sp,-104,%sp
   569 /* 0x0004	     ( 1  2) */		or	%g0,%i3,%i4
   570 /* 0x0008	     ( 1  2) */		or	%g0,%i2,%g1
   572 !  114		                    !   }
   573 !  115		                    !}
   574 !  118		                    !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
   575 !  119		                    !			  double * /* 0 */,
   576 !  120		                    !			  double * /*result16*/, double * /* result32 */,
   577 !  121		                    !			  float *  /*source - should be unsigned int*
   578 !  122		                    !		          	       converted to float* */);
   579 !  126		                    !void conv_i32_to_d32_and_d16(double *d32, double *d16, 
   580 !  127		                    !			     unsigned int *i32, int len)
   581 !  128		                    !{
   582 !  129		                    !int i;
   583 !  130		                    !unsigned int a;
   584 !  132		                    !#pragma pipeloop(0)
   585 !  133		                    ! for(i=0;i<len-3;i+=4)
   587 /* 0x000c	 133 ( 2  3) */		sub	%i4,3,%g2
   588 /* 0x0010	     ( 2  3) */		or	%g0,0,%o7
   589 /* 0x0014	     ( 3  4) */		cmp	%g2,0
   590 /* 0x0018	 128 ( 3  4) */		or	%g0,%i0,%i3
   591 /* 0x001c	 133 ( 3  4) */		ble,pt	%icc,.L900000515	! tprob=0.56
   592 /* 0x0020	     ( 4  5) */		cmp	%o7,%i4
   594 !  134		                    !   {
   595 !  135		                    !     i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
   596 !  136		                    !			  &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
   598 /* 0x0024	 136 ( 4  5) */		sethi	%hi(Zero),%g2
   599 /* 0x0028	 133 ( 5  6) */		or	%g0,%g1,%o3
   600 /* 0x002c	     ( 5  6) */		sub	%i4,4,%o2
   601 /* 0x0030	 136 ( 6  7) */		add	%g2,%lo(Zero),%o1
   602 /* 0x0034	 133 ( 6  7) */		or	%g0,0,%o5
   603 /* 0x0038	     ( 7  8) */		or	%g0,0,%o4
   604 /* 0x003c	 136 ( 7  8) */		or	%g0,%o3,%g4
   605                                    .L900000514:		/* frequency 6.4 confidence 0.0 */
   606 /* 0x0040	     ( 0  3) */		ldd	[%o1],%f2
   607 /* 0x0044	 136 ( 0  1) */		add	%i3,%o5,%g2
   608 /* 0x0048	     ( 0  1) */		add	%i1,%o4,%g3
   609 /* 0x004c	     ( 1  4) */		ldd	[%o1-8],%f0
   610 /* 0x0050	     ( 1  2) */		add	%o7,4,%o7
   611 /* 0x0054	     ( 1  2) */		add	%o3,16,%o3
   612 /* 0x0058	     ( 2  3) */		fmovd	%f2,%f14
   613 /* 0x005c	     ( 2  5) */		ld	[%g4],%f15
   614 /* 0x0060	     ( 2  3) */		cmp	%o7,%o2
   615 /* 0x0064	     ( 3  4) */		fmovd	%f2,%f10
   616 /* 0x0068	     ( 3  6) */		ld	[%g4+4],%f11
   617 /* 0x006c	     ( 4  5) */		fmovd	%f2,%f6
   618 /* 0x0070	     ( 4  7) */		ld	[%g4+8],%f7
   619 /* 0x0074	     ( 5  8) */		ld	[%g4+12],%f3
   620 /* 0x0078	     ( 5  8) */		fxtod	%f14,%f14
   621 /* 0x007c	     ( 6  9) */		fxtod	%f10,%f10
   622 /* 0x0080	     ( 6  9) */		ldd	[%o1-16],%f16
   623 /* 0x0084	     ( 7 10) */		fxtod	%f6,%f6
   624 /* 0x0088	     ( 7  8) */		std	%f14,[%i3+%o5]
   625 /* 0x008c	     ( 7  8) */		add	%o5,32,%o5
   626 /* 0x0090	     ( 8 11) */		fxtod	%f2,%f2
   627 /* 0x0094	     ( 8 11) */		fmuld	%f0,%f14,%f12
   628 /* 0x0098	     ( 8  9) */		std	%f10,[%g2+8]
   629 /* 0x009c	     ( 9 12) */		fmuld	%f0,%f10,%f8
   630 /* 0x00a0	     ( 9 10) */		std	%f6,[%g2+16]
   631 /* 0x00a4	     (10 13) */		fmuld	%f0,%f6,%f4
   632 /* 0x00a8	     (10 11) */		std	%f2,[%g2+24]
   633 /* 0x00ac	     (11 14) */		fmuld	%f0,%f2,%f0
   634 /* 0x00b0	     (11 14) */		fdtox	%f12,%f12
   635 /* 0x00b4	     (12 15) */		fdtox	%f8,%f8
   636 /* 0x00b8	     (13 16) */		fdtox	%f4,%f4
   637 /* 0x00bc	     (14 17) */		fdtox	%f0,%f0
   638 /* 0x00c0	     (15 18) */		fxtod	%f12,%f12
   639 /* 0x00c4	     (15 16) */		std	%f12,[%g3+8]
   640 /* 0x00c8	     (16 19) */		fxtod	%f8,%f8
   641 /* 0x00cc	     (16 17) */		std	%f8,[%g3+24]
   642 /* 0x00d0	     (17 20) */		fxtod	%f4,%f4
   643 /* 0x00d4	     (17 18) */		std	%f4,[%g3+40]
   644 /* 0x00d8	     (18 21) */		fxtod	%f0,%f0
   645 /* 0x00dc	     (18 21) */		fmuld	%f12,%f16,%f12
   646 /* 0x00e0	     (18 19) */		std	%f0,[%g3+56]
   647 /* 0x00e4	     (19 22) */		fmuld	%f8,%f16,%f8
   648 /* 0x00e8	     (20 23) */		fmuld	%f4,%f16,%f4
   649 /* 0x00ec	     (21 24) */		fmuld	%f0,%f16,%f0
   650 /* 0x00f0	     (21 24) */		fsubd	%f14,%f12,%f12
   651 /* 0x00f4	     (21 22) */		std	%f12,[%i1+%o4]
   652 /* 0x00f8	     (22 25) */		fsubd	%f10,%f8,%f8
   653 /* 0x00fc	     (22 23) */		std	%f8,[%g3+16]
   654 /* 0x0100	     (22 23) */		add	%o4,64,%o4
   655 /* 0x0104	     (23 26) */		fsubd	%f6,%f4,%f4
   656 /* 0x0108	     (23 24) */		std	%f4,[%g3+32]
   657 /* 0x010c	     (24 27) */		fsubd	%f2,%f0,%f0
   658 /* 0x0110	     (24 25) */		std	%f0,[%g3+48]
   659 /* 0x0114	     (24 25) */		ble,pt	%icc,.L900000514	! tprob=0.86
   660 /* 0x0118	     (25 26) */		or	%g0,%o3,%g4
   661                                    .L77000159:		/* frequency 1.0 confidence 0.0 */
   663 !  137		                    !   }
   664 !  138		                    ! for(;i<len;i++)
   666 /* 0x011c	 138 ( 0  1) */		cmp	%o7,%i4
   667                                    .L900000515:		/* frequency 1.0 confidence 0.0 */
   668 /* 0x0120	 138 ( 0  1) */		bge,pt	%icc,.L77000164	! tprob=0.56
   669 /* 0x0124	     ( 0  1) */		nop
   671 !  139		                    !   {
   672 !  140		                    !     a=i32[i];
   673 !  141		                    !     d32[i]=(double)(i32[i]);
   674 !  142		                    !     d16[2*i]=(double)(a&0xffff);
   675 !  143		                    !     d16[2*i+1]=(double)(a>>16);
   677 /* 0x0128	 143 ( 0  1) */		sethi	%hi(.L_const_seg_900000501),%o1
   678 /* 0x012c	 138 ( 1  2) */		sethi	%hi(0xfc00),%o0
   679 /* 0x0130	 141 ( 1  4) */		ldd	[%o1+%lo(.L_const_seg_900000501)],%f0
   680 /* 0x0134	 138 ( 1  2) */		sub	%i4,%o7,%g3
   681 /* 0x0138	     ( 2  3) */		sll	%o7,2,%g2
   682 /* 0x013c	     ( 2  3) */		add	%o0,1023,%o3
   683 /* 0x0140	     ( 3  4) */		sll	%o7,3,%g4
   684 /* 0x0144	     ( 3  4) */		cmp	%g3,3
   685 /* 0x0148	     ( 4  5) */		add	%g1,%g2,%o0
   686 /* 0x014c	     ( 4  5) */		add	%o1,%lo(.L_const_seg_900000501),%o2
   687 /* 0x0150	     ( 5  6) */		add	%i3,%g4,%o4
   688 /* 0x0154	     ( 5  6) */		sub	%i4,1,%o1
   689 /* 0x0158	     ( 6  7) */		sll	%o7,4,%g5
   690 /* 0x015c	     ( 6  7) */		bl,pn	%icc,.L77000161	! tprob=0.44
   691 /* 0x0160	     ( 7  8) */		add	%i1,%g5,%o5
   692 /* 0x0164	 141 ( 7 10) */		ld	[%g1+%g2],%f3
   693 /* 0x0168	 143 ( 7  8) */		add	%o4,8,%o4
   694 /* 0x016c	 140 ( 8 11) */		ld	[%g1+%g2],%g1
   695 /* 0x0170	 143 ( 8  9) */		add	%o5,16,%o5
   696 /* 0x0174	     ( 8  9) */		add	%o7,1,%o7
   697 /* 0x0178	 141 ( 9 10) */		fmovs	%f0,%f2
   698 /* 0x017c	 143 ( 9 10) */		add	%o0,4,%o0
   699 /* 0x0180	 142 (10 11) */		and	%g1,%o3,%g2
   700 /* 0x0184	 141 (11 14) */		fsubd	%f2,%f0,%f2
   701 /* 0x0188	     (11 12) */		std	%f2,[%o4-8]
   702 /* 0x018c	 143 (11 12) */		srl	%g1,16,%g1
   703 /* 0x0190	 142 (12 13) */		st	%g2,[%sp+96]
   704 /* 0x0194	     (15 16) */		fmovs	%f0,%f2
   705 /* 0x0198	     (16 19) */		ld	[%sp+96],%f3
   706 /* 0x019c	     (18 21) */		fsubd	%f2,%f0,%f2
   707 /* 0x01a0	     (18 19) */		std	%f2,[%o5-16]
   708 /* 0x01a4	 143 (19 20) */		st	%g1,[%sp+92]
   709 /* 0x01a8	     (22 23) */		fmovs	%f0,%f2
   710 /* 0x01ac	     (23 26) */		ld	[%sp+92],%f3
   711 /* 0x01b0	     (25 28) */		fsubd	%f2,%f0,%f2
   712 /* 0x01b4	     (25 26) */		std	%f2,[%o5-8]
   713                                    .L900000509:		/* frequency 64.0 confidence 0.0 */
   714 /* 0x01b8	 141 (26 28) */		ld	[%o0],%f3
   715 /* 0x01bc	 143 (26 27) */		add	%o7,2,%o7
   716 /* 0x01c0	     (26 27) */		add	%o5,32,%o5
   717 /* 0x01c4	 140 (27 29) */		ld	[%o0],%g1
   718 /* 0x01c8	 143 (27 27) */		cmp	%o7,%o1
   719 /* 0x01cc	     (27 28) */		add	%o4,16,%o4
   720 /* 0x01d0	 141 ( 0  0) */		fmovs	%f0,%f2
   721 /* 0x01d4	     (28 31) */		fsubd	%f2,%f0,%f2
   722 /* 0x01d8	     (29 30) */		std	%f2,[%o4-16]
   723 /* 0x01dc	 142 (29 30) */		and	%g1,%o3,%g2
   724 /* 0x01e0	     (30 31) */		st	%g2,[%sp+96]
   725 /* 0x01e4	     (37 39) */		ld	[%sp+96],%f3
   726 /* 0x01e8	     ( 0  0) */		fmovs	%f0,%f2
   727 /* 0x01ec	     (39 42) */		fsubd	%f2,%f0,%f2
   728 /* 0x01f0	 143 (40 41) */		srl	%g1,16,%g1
   729 /* 0x01f4	 142 (40 41) */		std	%f2,[%o5-32]
   730 /* 0x01f8	 143 (41 42) */		st	%g1,[%sp+92]
   731 /* 0x01fc	     (48 50) */		ld	[%sp+92],%f3
   732 /* 0x0200	     ( 0  0) */		fmovs	%f0,%f2
   733 /* 0x0204	     (50 53) */		fsubd	%f2,%f0,%f2
   734 /* 0x0208	     (51 52) */		std	%f2,[%o5-24]
   735 /* 0x020c	     (51 52) */		add	%o0,4,%o0
   736 /* 0x0210	 141 (52 54) */		ld	[%o0],%f3
   737 /* 0x0214	 140 (53 55) */		ld	[%o0],%g1
   738 /* 0x0218	 141 ( 0  0) */		fmovs	%f0,%f2
   739 /* 0x021c	     (54 57) */		fsubd	%f2,%f0,%f2
   740 /* 0x0220	     (55 56) */		std	%f2,[%o4-8]
   741 /* 0x0224	 142 (55 56) */		and	%g1,%o3,%g2
   742 /* 0x0228	     (56 57) */		st	%g2,[%sp+96]
   743 /* 0x022c	     (63 65) */		ld	[%sp+96],%f3
   744 /* 0x0230	     ( 0  0) */		fmovs	%f0,%f2
   745 /* 0x0234	     (65 68) */		fsubd	%f2,%f0,%f2
   746 /* 0x0238	 143 (66 67) */		srl	%g1,16,%g1
   747 /* 0x023c	 142 (66 67) */		std	%f2,[%o5-16]
   748 /* 0x0240	 143 (67 68) */		st	%g1,[%sp+92]
   749 /* 0x0244	     (74 76) */		ld	[%sp+92],%f3
   750 /* 0x0248	     ( 0  0) */		fmovs	%f0,%f2
   751 /* 0x024c	     (76 79) */		fsubd	%f2,%f0,%f2
   752 /* 0x0250	     (77 78) */		std	%f2,[%o5-8]
   753 /* 0x0254	     (77 78) */		bl,pt	%icc,.L900000509	! tprob=0.50
   754 /* 0x0258	     (77 78) */		add	%o0,4,%o0
   755                                    .L900000512:		/* frequency 8.0 confidence 0.0 */
   756 /* 0x025c	 143 ( 0  1) */		cmp	%o7,%i4
   757 /* 0x0260	     ( 0  1) */		bge,pn	%icc,.L77000164	! tprob=0.14
   758 /* 0x0264	     ( 0  1) */		nop
   759                                    .L77000161:		/* frequency 0.7 confidence 0.0 */
   760 /* 0x0268	 141 ( 0  3) */		ld	[%o0],%f3
   761                                    .L900000513:		/* frequency 6.4 confidence 0.0 */
   762 /* 0x026c	 141 ( 0  3) */		ldd	[%o2],%f0
   763 /* 0x0270	 143 ( 0  1) */		add	%o7,1,%o7
   764 /* 0x0274	 140 ( 1  4) */		ld	[%o0],%o1
   765 /* 0x0278	 143 ( 1  2) */		add	%o0,4,%o0
   766 /* 0x027c	     ( 1  2) */		cmp	%o7,%i4
   767 /* 0x0280	 141 ( 2  3) */		fmovs	%f0,%f2
   768 /* 0x0284	 142 ( 3  4) */		and	%o1,%o3,%g1
   769 /* 0x0288	 141 ( 4  7) */		fsubd	%f2,%f0,%f2
   770 /* 0x028c	     ( 4  5) */		std	%f2,[%o4]
   771 /* 0x0290	 143 ( 4  5) */		srl	%o1,16,%o1
   772 /* 0x0294	 142 ( 5  6) */		st	%g1,[%sp+96]
   773 /* 0x0298	 143 ( 5  6) */		add	%o4,8,%o4
   774 /* 0x029c	 142 ( 8  9) */		fmovs	%f0,%f2
   775 /* 0x02a0	     ( 9 12) */		ld	[%sp+96],%f3
   776 /* 0x02a4	     (11 14) */		fsubd	%f2,%f0,%f2
   777 /* 0x02a8	     (11 12) */		std	%f2,[%o5]
   778 /* 0x02ac	 143 (12 13) */		st	%o1,[%sp+92]
   779 /* 0x02b0	     (15 16) */		fmovs	%f0,%f2
   780 /* 0x02b4	     (16 19) */		ld	[%sp+92],%f3
   781 /* 0x02b8	     (18 21) */		fsubd	%f2,%f0,%f0
   782 /* 0x02bc	     (18 19) */		std	%f0,[%o5+8]
   783 /* 0x02c0	     (18 19) */		add	%o5,16,%o5
   784 /* 0x02c4	     (18 19) */		bl,a,pt	%icc,.L900000513	! tprob=0.86
   785 /* 0x02c8	     (19 22) */		ld	[%o0],%f3
   786                                    .L77000164:		/* frequency 1.0 confidence 0.0 */
   787 /* 0x02cc	     ( 0  2) */		ret	! Result = 
   788 /* 0x02d0	     ( 2  3) */		restore	%g0,%g0,%g0
   789 /* 0x02d4	   0 ( 0  0) */		.type	conv_i32_to_d32_and_d16,2
   790 /* 0x02d4	     ( 0  0) */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
   792 	.section	".text",#alloc,#execinstr
   793 /* 000000	   0 ( 0  0) */		.align	4
   794 !
   795 ! SUBROUTINE adjust_montf_result
   796 !
   797 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   799                                    	.global adjust_montf_result
   800                                    adjust_montf_result:		/* frequency 1.0 confidence 0.0 */
   802 !  144		                    !   }
   803 !  145		                    !}
   804 !  148		                    !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
   805 !  149		                    !{
   806 !  150		                    !long long acc;
   807 !  151		                    !int i;
   808 !  153		                    ! if(i32[len]>0) i=-1;
   810 /* 000000	 153 ( 0  1) */		sll	%o2,2,%g1
   811 /* 0x0004	     ( 0  1) */		or	%g0,-1,%g3
   812 /* 0x0008	     ( 1  4) */		ld	[%o0+%g1],%g1
   813 /* 0x000c	     ( 3  4) */		cmp	%g1,0
   814 /* 0x0010	     ( 3  4) */		bleu,pn	%icc,.L77000175	! tprob=0.50
   815 /* 0x0014	     ( 3  4) */		or	%g0,%o1,%o3
   816 /* 0x0018	     ( 4  5) */		ba	.L900000611	! tprob=1.00
   817 /* 0x001c	     ( 4  5) */		cmp	%g3,0
   818                                    .L77000175:		/* frequency 0.8 confidence 0.0 */
   820 !  154		                    ! else
   821 !  155		                    !   {
   822 !  156		                    !     for(i=len-1; i>=0; i++)
   824 /* 0x0020	 156 ( 0  1) */		subcc	%o2,1,%g3
   825 /* 0x0024	     ( 0  1) */		bneg,pt	%icc,.L900000611	! tprob=0.60
   826 /* 0x0028	     ( 1  2) */		cmp	%g3,0
   827 /* 0x002c	     ( 1  2) */		sll	%g3,2,%g1
   828 /* 0x0030	     ( 2  3) */		add	%o0,%g1,%g2
   829 /* 0x0034	     ( 2  3) */		add	%o1,%g1,%g1
   831 !  157		                    !       {
   832 !  158		                    !	 if(i32[i]!=nint[i]) break;
   834 /* 0x0038	 158 ( 3  6) */		ld	[%g1],%g5
   835                                    .L900000610:		/* frequency 5.3 confidence 0.0 */
   836 /* 0x003c	 158 ( 0  3) */		ld	[%g2],%o5
   837 /* 0x0040	     ( 0  1) */		add	%g1,4,%g1
   838 /* 0x0044	     ( 0  1) */		add	%g2,4,%g2
   839 /* 0x0048	     ( 2  3) */		cmp	%o5,%g5
   840 /* 0x004c	     ( 2  3) */		bne,pn	%icc,.L77000182	! tprob=0.16
   841 /* 0x0050	     ( 2  3) */		nop
   842 /* 0x0054	     ( 3  4) */		addcc	%g3,1,%g3
   843 /* 0x0058	     ( 3  4) */		bpos,a,pt	%icc,.L900000610	! tprob=0.84
   844 /* 0x005c	     ( 3  6) */		ld	[%g1],%g5
   845                                    .L77000182:		/* frequency 1.0 confidence 0.0 */
   847 !  159		                    !       }
   848 !  160		                    !   }
   849 !  161		                    ! if((i<0)||(i32[i]>nint[i]))
   851 /* 0x0060	 161 ( 0  1) */		cmp	%g3,0
   852                                    .L900000611:		/* frequency 1.0 confidence 0.0 */
   853 /* 0x0064	 161 ( 0  1) */		bl,pn	%icc,.L77000198	! tprob=0.50
   854 /* 0x0068	     ( 0  1) */		sll	%g3,2,%g2
   855 /* 0x006c	     ( 1  4) */		ld	[%o1+%g2],%g1
   856 /* 0x0070	     ( 2  5) */		ld	[%o0+%g2],%g2
   857 /* 0x0074	     ( 4  5) */		cmp	%g2,%g1
   858 /* 0x0078	     ( 4  5) */		bleu,pt	%icc,.L77000191	! tprob=0.56
   859 /* 0x007c	     ( 4  5) */		nop
   860                                    .L77000198:		/* frequency 0.8 confidence 0.0 */
   862 !  162		                    !   {
   863 !  163		                    !     acc=0;
   864 !  164		                    !     for(i=0;i<len;i++)
   866 /* 0x0080	 164 ( 0  1) */		cmp	%o2,0
   867 /* 0x0084	     ( 0  1) */		ble,pt	%icc,.L77000191	! tprob=0.60
   868 /* 0x0088	     ( 0  1) */		nop
   869 /* 0x008c	 161 ( 1  2) */		or	%g0,-1,%g2
   870 /* 0x0090	     ( 1  2) */		sub	%o2,1,%g4
   871 /* 0x0094	     ( 2  3) */		srl	%g2,0,%g3
   872 /* 0x0098	 163 ( 2  3) */		or	%g0,0,%g5
   873 /* 0x009c	 164 ( 3  4) */		or	%g0,0,%o5
   874 /* 0x00a0	 161 ( 3  4) */		or	%g0,%o0,%o4
   875 /* 0x00a4	     ( 4  5) */		cmp	%o2,3
   876 /* 0x00a8	     ( 4  5) */		add	%o1,4,%g2
   877 /* 0x00ac	 164 ( 4  5) */		bl,pn	%icc,.L77000199	! tprob=0.40
   878 /* 0x00b0	     ( 5  6) */		add	%o0,8,%g1
   880 !  165		                    !       {
   881 !  166		                    !	 acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
   883 /* 0x00b4	 166 ( 5  8) */		ld	[%o0],%o2
   884 /* 0x00b8	   0 ( 5  6) */		or	%g0,%g2,%o3
   885 /* 0x00bc	 166 ( 6  9) */		ld	[%o1],%o1
   886 /* 0x00c0	   0 ( 6  7) */		or	%g0,%g1,%o4
   888 !  167		                    !	 i32[i]=acc&0xffffffff;
   889 !  168		                    !	 acc=acc>>32;
   891 /* 0x00c4	 168 ( 6  7) */		or	%g0,2,%o5
   892 /* 0x00c8	 166 ( 7 10) */		ld	[%o0+4],%g1
   893 /* 0x00cc	 164 ( 8  9) */		sub	%o2,%o1,%o2
   894 /* 0x00d0	     ( 9 10) */		or	%g0,%o2,%g5
   895 /* 0x00d4	 167 ( 9 10) */		and	%o2,%g3,%o2
   896 /* 0x00d8	     ( 9 10) */		st	%o2,[%o0]
   897 /* 0x00dc	 168 (10 11) */		srax	%g5,32,%g5
   898                                    .L900000605:		/* frequency 64.0 confidence 0.0 */
   899 /* 0x00e0	 166 (12 20) */		ld	[%o3],%o2
   900 /* 0x00e4	 168 (12 13) */		add	%o5,1,%o5
   901 /* 0x00e8	     (12 13) */		add	%o3,4,%o3
   902 /* 0x00ec	     (13 13) */		cmp	%o5,%g4
   903 /* 0x00f0	     (13 14) */		add	%o4,4,%o4
   904 /* 0x00f4	 164 (14 14) */		sub	%g1,%o2,%g1
   905 /* 0x00f8	     (15 15) */		add	%g1,%g5,%g5
   906 /* 0x00fc	 167 (16 17) */		and	%g5,%g3,%o2
   907 /* 0x0100	 166 (16 24) */		ld	[%o4-4],%g1
   908 /* 0x0104	 167 (17 18) */		st	%o2,[%o4-8]
   909 /* 0x0108	 168 (17 18) */		ble,pt	%icc,.L900000605	! tprob=0.50
   910 /* 0x010c	     (17 18) */		srax	%g5,32,%g5
   911                                    .L900000608:		/* frequency 8.0 confidence 0.0 */
   912 /* 0x0110	 166 ( 0  3) */		ld	[%o3],%g2
   913 /* 0x0114	 164 ( 2  3) */		sub	%g1,%g2,%g1
   914 /* 0x0118	     ( 3  4) */		add	%g1,%g5,%g1
   915 /* 0x011c	 167 ( 4  5) */		and	%g1,%g3,%g2
   916 /* 0x0120	     ( 5  7) */		retl	! Result = 
   917 /* 0x0124	     ( 6  7) */		st	%g2,[%o4-4]
   918                                    .L77000199:		/* frequency 0.6 confidence 0.0 */
   919 /* 0x0128	 166 ( 0  3) */		ld	[%o4],%g1
   920                                    .L900000609:		/* frequency 5.3 confidence 0.0 */
   921 /* 0x012c	 166 ( 0  3) */		ld	[%o3],%g2
   922 /* 0x0130	     ( 0  1) */		add	%g5,%g1,%g1
   923 /* 0x0134	 168 ( 0  1) */		add	%o5,1,%o5
   924 /* 0x0138	     ( 1  2) */		add	%o3,4,%o3
   925 /* 0x013c	     ( 1  2) */		cmp	%o5,%g4
   926 /* 0x0140	 166 ( 2  3) */		sub	%g1,%g2,%g1
   927 /* 0x0144	 167 ( 3  4) */		and	%g1,%g3,%g2
   928 /* 0x0148	     ( 3  4) */		st	%g2,[%o4]
   929 /* 0x014c	 168 ( 3  4) */		add	%o4,4,%o4
   930 /* 0x0150	     ( 4  5) */		srax	%g1,32,%g5
   931 /* 0x0154	     ( 4  5) */		ble,a,pt	%icc,.L900000609	! tprob=0.84
   932 /* 0x0158	     ( 4  7) */		ld	[%o4],%g1
   933                                    .L77000191:		/* frequency 1.0 confidence 0.0 */
   934 /* 0x015c	     ( 0  2) */		retl	! Result = 
   935 /* 0x0160	     ( 1  2) */		nop
   936 /* 0x0164	   0 ( 0  0) */		.type	adjust_montf_result,2
   937 /* 0x0164	     ( 0  0) */		.size	adjust_montf_result,(.-adjust_montf_result)
   939 	.section	".text",#alloc,#execinstr
   940 /* 000000	   0 ( 0  0) */		.align	32
   941 !
   942 ! SUBROUTINE mont_mulf_noconv
   943 !
   944 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION	(ISSUE TIME)	(COMPLETION TIME)
   946                                    	.global mont_mulf_noconv
   947                                    mont_mulf_noconv:		/* frequency 1.0 confidence 0.0 */
   948 /* 000000	     ( 0  1) */		save	%sp,-144,%sp
   949 /* 0x0004	     ( 1  2) */		st	%i0,[%fp+68]
   951 !  169		                    !       }
   952 !  170		                    !   }
   953 !  171		                    !}
   954 !  175		                    !void cleanup(double *dt, int from, int tlen);
   955 !  177		                    !/*
   956 !  178		                    !** the lengths of the input arrays should be at least the following:
   957 !  179		                    !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
   958 !  180		                    !** all of them should be different from one another
   959 !  181		                    !**
   960 !  182		                    !*/
   961 !  183		                    !void mont_mulf_noconv(unsigned int *result,
   962 !  184		                    !		     double *dm1, double *dm2, double *dt,
   963 !  185		                    !		     double *dn, unsigned int *nint,
   964 !  186		                    !		     int nlen, double dn0)
   965 !  187		                    !{
   966 !  188		                    ! int i, j, jj;
   967 !  189		                    ! int tmp;
   968 !  190		                    ! double digit, m2j, nextm2j, a, b;
   969 !  191		                    ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
   970 !  193		                    ! pdm1=&(dm1[0]);
   971 !  194		                    ! pdm2=&(dm2[0]);
   972 !  195		                    ! pdn=&(dn[0]);
   973 !  196		                    ! pdm2[2*nlen]=Zero;
   975 /* 0x0008	 196 ( 1  2) */		sethi	%hi(Zero),%g2
   976 /* 0x000c	 187 ( 1  2) */		or	%g0,%i2,%o1
   977 /* 0x0010	     ( 2  3) */		st	%i5,[%fp+88]
   978 /* 0x0014	     ( 2  3) */		or	%g0,%i3,%o2
   979 /* 0x0018	 196 ( 2  3) */		add	%g2,%lo(Zero),%g4
   980 /* 0x001c	     ( 3  6) */		ldd	[%g2+%lo(Zero)],%f2
   981 /* 0x0020	 187 ( 3  4) */		or	%g0,%o2,%g5
   982 /* 0x0024	 196 ( 3  4) */		or	%g0,%o1,%i0
   983 /* 0x0028	 187 ( 4  5) */		or	%g0,%i4,%i2
   985 !  198		                    ! if (nlen!=16)
   986 !  199		                    !   {
   987 !  200		                    !     for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
   988 !  202		                    !     a=dt[0]=pdm1[0]*pdm2[0];
   989 !  203		                    !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
   990 !  205		                    !     pdtj=&(dt[0]);
   991 !  206		                    !     for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
   992 !  207		                    !       {
   993 !  208		                    !	 m2j=pdm2[j];
   994 !  209		                    !	 a=pdtj[0]+pdn[0]*digit;
   995 !  210		                    !	 b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
   996 !  211		                    !	 pdtj[1]=b;
   997 !  213		                    !#pragma pipeloop(0)
   998 !  214		                    !	 for(i=1;i<nlen;i++)
   999 !  215		                    !	   {
  1000 !  216		                    !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
  1001 !  217		                    !	   }
  1002 !  218		                    ! 	 if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
  1003 !  219		                    !	 
  1004 !  220		                    !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
  1005 !  221		                    !       }
  1006 !  222		                    !   }
  1007 !  223		                    ! else
  1008 !  224		                    !   {
  1009 !  225		                    !     a=dt[0]=pdm1[0]*pdm2[0];
  1010 !  227		                    !     dt[65]=     dt[64]=     dt[63]=     dt[62]=     dt[61]=     dt[60]=
  1011 !  228		                    !     dt[59]=     dt[58]=     dt[57]=     dt[56]=     dt[55]=     dt[54]=
  1012 !  229		                    !     dt[53]=     dt[52]=     dt[51]=     dt[50]=     dt[49]=     dt[48]=
  1013 !  230		                    !     dt[47]=     dt[46]=     dt[45]=     dt[44]=     dt[43]=     dt[42]=
  1014 !  231		                    !     dt[41]=     dt[40]=     dt[39]=     dt[38]=     dt[37]=     dt[36]=
  1015 !  232		                    !     dt[35]=     dt[34]=     dt[33]=     dt[32]=     dt[31]=     dt[30]=
  1016 !  233		                    !     dt[29]=     dt[28]=     dt[27]=     dt[26]=     dt[25]=     dt[24]=
  1017 !  234		                    !     dt[23]=     dt[22]=     dt[21]=     dt[20]=     dt[19]=     dt[18]=
  1018 !  235		                    !     dt[17]=     dt[16]=     dt[15]=     dt[14]=     dt[13]=     dt[12]=
  1019 !  236		                    !     dt[11]=     dt[10]=     dt[ 9]=     dt[ 8]=     dt[ 7]=     dt[ 6]=
  1020 !  237		                    !     dt[ 5]=     dt[ 4]=     dt[ 3]=     dt[ 2]=     dt[ 1]=Zero;
  1021 !  239		                    !     pdn_0=pdn[0];
  1022 !  240		                    !     pdm1_0=pdm1[0];
  1023 !  242		                    !     digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
  1024 !  243		                    !     pdtj=&(dt[0]);
  1025 !  245		                    !     for(j=0;j<32;j++,pdtj++)
  1026 !  246		                    !       {
  1027 !  248		                    !	 m2j=pdm2[j];
  1028 !  249		                    !	 a=pdtj[0]+pdn_0*digit;
  1029 !  250		                    !	 b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
  1030 !  251		                    !	 pdtj[1]=b;
  1031 !  253		                    !	 /**** this loop will be fully unrolled:
  1032 !  254		                    !	 for(i=1;i<16;i++)
  1033 !  255		                    !	   {
  1034 !  256		                    !	     pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
  1035 !  257		                    !	   }
  1036 !  258		                    !	 *************************************/
  1037 !  259		                    !	     pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
  1038 !  260		                    !	     pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
  1039 !  261		                    !	     pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
  1040 !  262		                    !	     pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
  1041 !  263		                    !	     pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
  1042 !  264		                    !	     pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
  1043 !  265		                    !	     pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
  1044 !  266		                    !	     pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
  1045 !  267		                    !	     pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
  1046 !  268		                    !	     pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
  1047 !  269		                    !	     pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
  1048 !  270		                    !	     pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
  1049 !  271		                    !	     pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
  1050 !  272		                    !	     pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
  1051 !  273		                    !	     pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
  1052 !  274		                    !	 /* no need for cleenup, cannot overflow */
  1053 !  275		                    !	 digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
  1054 !  276		                    !       }
  1055 !  277		                    !   }
  1056 !  279		                    ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
  1057 !  281		                    ! adjust_montf_result(result,nint,nlen); 
  1059 /* 0x002c	 281 ( 4  5) */		or	%g0,1,%o4
  1060 /* 0x0030	 187 ( 6  9) */		ldd	[%fp+96],%f0
  1061 /* 0x0034	 196 ( 7 10) */		ld	[%fp+92],%o0
  1062 /* 0x0038	 187 ( 8  9) */		fmovd	%f0,%f16
  1063 /* 0x003c	 196 ( 9 10) */		sll	%o0,4,%g2
  1064 /* 0x0040	     ( 9 10) */		or	%g0,%o0,%g1
  1065 /* 0x0044	 198 (10 11) */		cmp	%o0,16
  1066 /* 0x0048	     (10 11) */		be,pn	%icc,.L77000289	! tprob=0.50
  1067 /* 0x004c	     (10 11) */		std	%f2,[%o1+%g2]
  1068 /* 0x0050	 200 (11 12) */		sll	%o0,2,%g2
  1069 /* 0x0054	     (11 14) */		ldd	[%g4],%f2
  1070 /* 0x0058	     (12 13) */		add	%g2,2,%o1
  1071 /* 0x005c	     (12 13) */		add	%g2,1,%o3
  1072 /* 0x0060	 196 (13 14) */		sll	%o0,1,%o7
  1073 /* 0x0064	 200 (13 14) */		cmp	%o1,0
  1074 /* 0x0068	     (13 14) */		ble,a,pt	%icc,.L900000755	! tprob=0.55
  1075 /* 0x006c	     (14 17) */		ldd	[%i1],%f0
  1076 /* 0x0070	     (14 15) */		cmp	%o1,3
  1077 /* 0x0074	 281 (14 15) */		or	%g0,1,%o1
  1078 /* 0x0078	     (14 15) */		bl,pn	%icc,.L77000279	! tprob=0.40
  1079 /* 0x007c	     (15 16) */		add	%o2,8,%o0
  1080 /* 0x0080	     (15 16) */		std	%f2,[%g5]
  1081 /* 0x0084	   0 (16 17) */		or	%g0,%o0,%o2
  1082                                    .L900000726:		/* frequency 64.0 confidence 0.0 */
  1083 /* 0x0088	     ( 3  5) */		ldd	[%g4],%f0
  1084 /* 0x008c	     ( 3  4) */		add	%o4,1,%o4
  1085 /* 0x0090	     ( 3  4) */		add	%o2,8,%o2
  1086 /* 0x0094	     ( 4  4) */		cmp	%o4,%o3
  1087 /* 0x0098	     ( 5  6) */		ble,pt	%icc,.L900000726	! tprob=0.50
  1088 /* 0x009c	     ( 5  6) */		std	%f0,[%o2-8]
  1089                                    .L900000729:		/* frequency 8.0 confidence 0.0 */
  1090 /* 0x00a0	     ( 0  1) */		ba	.L900000755	! tprob=1.00
  1091 /* 0x00a4	     ( 0  3) */		ldd	[%i1],%f0
  1092                                    .L77000279:		/* frequency 0.6 confidence 0.0 */
  1093 /* 0x00a8	     ( 0  1) */		std	%f2,[%o2]
  1094                                    .L900000754:		/* frequency 5.3 confidence 0.0 */
  1095 /* 0x00ac	     ( 0  3) */		ldd	[%g4],%f2
  1096 /* 0x00b0	     ( 0  1) */		cmp	%o1,%o3
  1097 /* 0x00b4	     ( 0  1) */		add	%o2,8,%o2
  1098 /* 0x00b8	     ( 1  2) */		add	%o1,1,%o1
  1099 /* 0x00bc	     ( 1  2) */		ble,a,pt	%icc,.L900000754	! tprob=0.87
  1100 /* 0x00c0	     ( 3  4) */		std	%f2,[%o2]
  1101                                    .L77000284:		/* frequency 0.8 confidence 0.0 */
  1102 /* 0x00c4	 202 ( 0  3) */		ldd	[%i1],%f0
  1103                                    .L900000755:		/* frequency 0.8 confidence 0.0 */
  1104 /* 0x00c8	 202 ( 0  3) */		ldd	[%i0],%f2
  1105 /* 0x00cc	     ( 0  1) */		add	%o7,1,%o2
  1106 /* 0x00d0	 206 ( 0  1) */		cmp	%o7,0
  1107 /* 0x00d4	     ( 1  2) */		sll	%o2,1,%o0
  1108 /* 0x00d8	     ( 1  2) */		sub	%o7,1,%o1
  1109 /* 0x00dc	 202 ( 2  5) */		fmuld	%f0,%f2,%f0
  1110 /* 0x00e0	     ( 2  3) */		std	%f0,[%g5]
  1111 /* 0x00e4	     ( 2  3) */		sub	%g1,1,%o7
  1112 /* 0x00e8	     ( 3  6) */		ldd	[%g4],%f6
  1113 /* 0x00ec	   0 ( 3  4) */		or	%g0,%o7,%g3
  1114 /* 0x00f0	     ( 3  4) */		or	%g0,0,%l0
  1115 /* 0x00f4	     ( 4  7) */		ldd	[%g4-8],%f2
  1116 /* 0x00f8	     ( 4  5) */		or	%g0,0,%i5
  1117 /* 0x00fc	     ( 4  5) */		or	%g0,%o1,%o5
  1118 /* 0x0100	     ( 5  8) */		fdtox	%f0,%f0
  1119 /* 0x0104	     ( 5  8) */		ldd	[%g4-16],%f4
  1120 /* 0x0108	     ( 5  6) */		or	%g0,%o0,%o3
  1121 /* 0x010c	 210 ( 6  7) */		add	%i0,8,%o4
  1122 /* 0x0110	     ( 6  7) */		or	%g0,0,%i4
  1123 /* 0x0114	     ( 9 10) */		fmovs	%f6,%f0
  1124 /* 0x0118	     (11 14) */		fxtod	%f0,%f0
  1125 /* 0x011c	 203 (14 17) */		fmuld	%f0,%f16,%f0
  1126 /* 0x0120	     (17 20) */		fmuld	%f0,%f2,%f2
  1127 /* 0x0124	     (20 23) */		fdtox	%f2,%f2
  1128 /* 0x0128	     (23 26) */		fxtod	%f2,%f2
  1129 /* 0x012c	     (26 29) */		fmuld	%f2,%f4,%f2
  1130 /* 0x0130	     (29 32) */		fsubd	%f0,%f2,%f22
  1131 /* 0x0134	 206 (29 30) */		ble,pt	%icc,.L900000748	! tprob=0.60
  1132 /* 0x0138	     (29 30) */		sll	%g1,4,%g2
  1133 /* 0x013c	 210 (30 33) */		ldd	[%i2],%f0
  1134                                    .L900000749:		/* frequency 5.3 confidence 0.0 */
  1135 /* 0x0140	 210 ( 0  3) */		fmuld	%f0,%f22,%f8
  1136 /* 0x0144	     ( 0  3) */		ldd	[%i1],%f0
  1137 /* 0x0148	 214 ( 0  1) */		cmp	%g1,1
  1138 /* 0x014c	 210 ( 1  4) */		ldd	[%o4+%i4],%f6
  1139 /* 0x0150	     ( 1  2) */		add	%i1,8,%o0
  1140 /* 0x0154	 214 ( 1  2) */		or	%g0,1,%o1
  1141 /* 0x0158	 210 ( 2  5) */		ldd	[%i3],%f2
  1142 /* 0x015c	     ( 2  3) */		add	%i3,16,%l1
  1143 /* 0x0160	     ( 3  6) */		fmuld	%f0,%f6,%f6
  1144 /* 0x0164	     ( 3  6) */		ldd	[%g4-8],%f4
  1145 /* 0x0168	     ( 4  7) */		faddd	%f2,%f8,%f2
  1146 /* 0x016c	     ( 4  7) */		ldd	[%i3+8],%f0
  1147 /* 0x0170	 208 ( 5  8) */		ldd	[%i0+%i4],%f20
  1148 /* 0x0174	 210 ( 6  9) */		faddd	%f0,%f6,%f0
  1149 /* 0x0178	     ( 7 10) */		fmuld	%f2,%f4,%f2
  1150 /* 0x017c	     (10 13) */		faddd	%f0,%f2,%f18
  1151 /* 0x0180	 211 (10 11) */		std	%f18,[%i3+8]
  1152 /* 0x0184	 214 (10 11) */		ble,pt	%icc,.L900000753	! tprob=0.54
  1153 /* 0x0188	     (11 12) */		srl	%i5,31,%g2
  1154 /* 0x018c	     (11 12) */		cmp	%g3,7
  1155 /* 0x0190	 210 (12 13) */		add	%i2,8,%g2
  1156 /* 0x0194	 214 (12 13) */		bl,pn	%icc,.L77000281	! tprob=0.36
  1157 /* 0x0198	     (13 14) */		add	%g2,24,%o2
  1158 /* 0x019c	 216 (13 16) */		ldd	[%o0+16],%f14
  1159 /* 0x01a0	     (13 14) */		add	%i3,48,%l1
  1160 /* 0x01a4	     (14 17) */		ldd	[%o0+24],%f12
  1161 /* 0x01a8	   0 (14 15) */		or	%g0,%o2,%g2
  1162 /* 0x01ac	 214 (14 15) */		sub	%g1,3,%o2
  1163 /* 0x01b0	 216 (15 18) */		ldd	[%o0],%f2
  1164 /* 0x01b4	     (15 16) */		or	%g0,5,%o1
  1165 /* 0x01b8	     (16 19) */		ldd	[%g2-24],%f0
  1166 /* 0x01bc	     (17 20) */		ldd	[%o0+8],%f6
  1167 /* 0x01c0	     (17 20) */		fmuld	%f2,%f20,%f2
  1168 /* 0x01c4	     (17 18) */		add	%o0,32,%o0
  1169 /* 0x01c8	     (18 21) */		ldd	[%g2-16],%f8
  1170 /* 0x01cc	     (18 21) */		fmuld	%f0,%f22,%f4
  1171 /* 0x01d0	     (19 22) */		ldd	[%i3+16],%f0
  1172 /* 0x01d4	     (19 22) */		fmuld	%f6,%f20,%f10
  1173 /* 0x01d8	     (20 23) */		ldd	[%g2-8],%f6
  1174 /* 0x01dc	     (21 24) */		faddd	%f2,%f4,%f4
  1175 /* 0x01e0	     (21 24) */		ldd	[%i3+32],%f2
  1176                                    .L900000738:		/* frequency 512.0 confidence 0.0 */
  1177 /* 0x01e4	 216 (16 24) */		ldd	[%g2],%f24
  1178 /* 0x01e8	     (16 17) */		add	%o1,3,%o1
  1179 /* 0x01ec	     (16 17) */		add	%g2,24,%g2
  1180 /* 0x01f0	     (16 19) */		fmuld	%f8,%f22,%f8
  1181 /* 0x01f4	     (17 25) */		ldd	[%l1],%f28
  1182 /* 0x01f8	     (17 17) */		cmp	%o1,%o2
  1183 /* 0x01fc	     (17 18) */		add	%o0,24,%o0
  1184 /* 0x0200	     (18 26) */		ldd	[%o0-24],%f26
  1185 /* 0x0204	     (18 21) */		faddd	%f0,%f4,%f0
  1186 /* 0x0208	     (18 19) */		add	%l1,48,%l1
  1187 /* 0x020c	     (19 22) */		faddd	%f10,%f8,%f10
  1188 /* 0x0210	     (19 22) */		fmuld	%f14,%f20,%f4
  1189 /* 0x0214	     (19 20) */		std	%f0,[%l1-80]
  1190 /* 0x0218	     (20 28) */		ldd	[%g2-16],%f8
  1191 /* 0x021c	     (20 23) */		fmuld	%f6,%f22,%f6
  1192 /* 0x0220	     (21 29) */		ldd	[%l1-32],%f0
  1193 /* 0x0224	     (22 30) */		ldd	[%o0-16],%f14
  1194 /* 0x0228	     (22 25) */		faddd	%f2,%f10,%f2
  1195 /* 0x022c	     (23 26) */		faddd	%f4,%f6,%f10
  1196 /* 0x0230	     (23 26) */		fmuld	%f12,%f20,%f4
  1197 /* 0x0234	     (23 24) */		std	%f2,[%l1-64]
  1198 /* 0x0238	     (24 32) */		ldd	[%g2-8],%f6
  1199 /* 0x023c	     (24 27) */		fmuld	%f24,%f22,%f24
  1200 /* 0x0240	     (25 33) */		ldd	[%l1-16],%f2
  1201 /* 0x0244	     (26 34) */		ldd	[%o0-8],%f12
  1202 /* 0x0248	     (26 29) */		faddd	%f28,%f10,%f10
  1203 /* 0x024c	     (27 28) */		std	%f10,[%l1-48]
  1204 /* 0x0250	     (27 30) */		fmuld	%f26,%f20,%f10
  1205 /* 0x0254	     (27 28) */		ble,pt	%icc,.L900000738	! tprob=0.50
  1206 /* 0x0258	     (27 30) */		faddd	%f4,%f24,%f4
  1207                                    .L900000741:		/* frequency 64.0 confidence 0.0 */
  1208 /* 0x025c	 216 ( 0  3) */		fmuld	%f8,%f22,%f28
  1209 /* 0x0260	     ( 0  3) */		ldd	[%g2],%f24
  1210 /* 0x0264	     ( 0  3) */		faddd	%f0,%f4,%f26
  1211 /* 0x0268	     ( 1  4) */		fmuld	%f12,%f20,%f8
  1212 /* 0x026c	     ( 1  2) */		add	%l1,32,%l1
  1213 /* 0x0270	     ( 1  2) */		cmp	%o1,%g3
  1214 /* 0x0274	     ( 2  5) */		fmuld	%f14,%f20,%f14
  1215 /* 0x0278	     ( 2  5) */		ldd	[%l1-32],%f4
  1216 /* 0x027c	     ( 2  3) */		add	%g2,8,%g2
  1217 /* 0x0280	     ( 3  6) */		faddd	%f10,%f28,%f12
  1218 /* 0x0284	     ( 3  6) */		fmuld	%f6,%f22,%f6
  1219 /* 0x0288	     ( 3  6) */		ldd	[%l1-16],%f0
  1220 /* 0x028c	     ( 4  7) */		fmuld	%f24,%f22,%f10
  1221 /* 0x0290	     ( 4  5) */		std	%f26,[%l1-64]
  1222 /* 0x0294	     ( 6  9) */		faddd	%f2,%f12,%f2
  1223 /* 0x0298	     ( 6  7) */		std	%f2,[%l1-48]
  1224 /* 0x029c	     ( 7 10) */		faddd	%f14,%f6,%f6
  1225 /* 0x02a0	     ( 8 11) */		faddd	%f8,%f10,%f2
  1226 /* 0x02a4	     (10 13) */		faddd	%f4,%f6,%f4
  1227 /* 0x02a8	     (10 11) */		std	%f4,[%l1-32]
  1228 /* 0x02ac	     (11 14) */		faddd	%f0,%f2,%f0
  1229 /* 0x02b0	     (11 12) */		bg,pn	%icc,.L77000213	! tprob=0.13
  1230 /* 0x02b4	     (11 12) */		std	%f0,[%l1-16]
  1231                                    .L77000281:		/* frequency 4.0 confidence 0.0 */
  1232 /* 0x02b8	 216 ( 0  3) */		ldd	[%o0],%f0
  1233                                    .L900000752:		/* frequency 36.6 confidence 0.0 */
  1234 /* 0x02bc	 216 ( 0  3) */		ldd	[%g2],%f4
  1235 /* 0x02c0	     ( 0  3) */		fmuld	%f0,%f20,%f2
  1236 /* 0x02c4	     ( 0  1) */		add	%o1,1,%o1
  1237 /* 0x02c8	     ( 1  4) */		ldd	[%l1],%f0
  1238 /* 0x02cc	     ( 1  2) */		add	%o0,8,%o0
  1239 /* 0x02d0	     ( 1  2) */		add	%g2,8,%g2
  1240 /* 0x02d4	     ( 2  5) */		fmuld	%f4,%f22,%f4
  1241 /* 0x02d8	     ( 2  3) */		cmp	%o1,%g3
  1242 /* 0x02dc	     ( 5  8) */		faddd	%f2,%f4,%f2
  1243 /* 0x02e0	     ( 8 11) */		faddd	%f0,%f2,%f0
  1244 /* 0x02e4	     ( 8  9) */		std	%f0,[%l1]
  1245 /* 0x02e8	     ( 8  9) */		add	%l1,16,%l1
  1246 /* 0x02ec	     ( 8  9) */		ble,a,pt	%icc,.L900000752	! tprob=0.87
  1247 /* 0x02f0	     (10 13) */		ldd	[%o0],%f0
  1248                                    .L77000213:		/* frequency 5.3 confidence 0.0 */
  1249 /* 0x02f4	     ( 0  1) */		srl	%i5,31,%g2
  1250                                    .L900000753:		/* frequency 5.3 confidence 0.0 */
  1251 /* 0x02f8	 218 ( 0  1) */		cmp	%l0,30
  1252 /* 0x02fc	     ( 0  1) */		bne,a,pt	%icc,.L900000751	! tprob=0.54
  1253 /* 0x0300	     ( 0  3) */		fdtox	%f18,%f0
  1254 /* 0x0304	     ( 1  2) */		add	%i5,%g2,%g2
  1255 /* 0x0308	     ( 1  2) */		sub	%o3,1,%o2
  1256 /* 0x030c	     ( 2  3) */		sra	%g2,1,%o0
  1257 /* 0x0310	 216 ( 2  5) */		ldd	[%g4],%f0
  1258 /* 0x0314	     ( 3  4) */		add	%o0,1,%g2
  1259 /* 0x0318	     ( 4  5) */		sll	%g2,1,%o0
  1260 /* 0x031c	     ( 4  5) */		fmovd	%f0,%f2
  1261 /* 0x0320	     ( 5  6) */		sll	%g2,4,%o1
  1262 /* 0x0324	     ( 5  6) */		cmp	%o0,%o3
  1263 /* 0x0328	     ( 5  6) */		bge,pt	%icc,.L77000215	! tprob=0.53
  1264 /* 0x032c	     ( 6  7) */		or	%g0,0,%l0
  1265 /* 0x0330	 218 ( 6  7) */		add	%g5,%o1,%o1
  1266 /* 0x0334	 216 ( 7 10) */		ldd	[%o1],%f8
  1267                                    .L900000750:		/* frequency 32.0 confidence 0.0 */
  1268 /* 0x0338	     ( 0  3) */		fdtox	%f8,%f6
  1269 /* 0x033c	     ( 0  3) */		ldd	[%g4],%f10
  1270 /* 0x0340	     ( 0  1) */		add	%o0,2,%o0
  1271 /* 0x0344	     ( 1  4) */		ldd	[%o1+8],%f4
  1272 /* 0x0348	     ( 1  4) */		fdtox	%f8,%f8
  1273 /* 0x034c	     ( 1  2) */		cmp	%o0,%o2
  1274 /* 0x0350	     ( 5  6) */		fmovs	%f10,%f6
  1275 /* 0x0354	     ( 7 10) */		fxtod	%f6,%f10
  1276 /* 0x0358	     ( 8 11) */		fdtox	%f4,%f6
  1277 /* 0x035c	     ( 9 12) */		fdtox	%f4,%f4
  1278 /* 0x0360	     (10 13) */		faddd	%f10,%f2,%f2
  1279 /* 0x0364	     (10 11) */		std	%f2,[%o1]
  1280 /* 0x0368	     (12 15) */		ldd	[%g4],%f2
  1281 /* 0x036c	     (14 15) */		fmovs	%f2,%f6
  1282 /* 0x0370	     (16 19) */		fxtod	%f6,%f6
  1283 /* 0x0374	     (17 20) */		fitod	%f8,%f2
  1284 /* 0x0378	     (19 22) */		faddd	%f6,%f0,%f0
  1285 /* 0x037c	     (19 20) */		std	%f0,[%o1+8]
  1286 /* 0x0380	     (19 20) */		add	%o1,16,%o1
  1287 /* 0x0384	     (20 23) */		fitod	%f4,%f0
  1288 /* 0x0388	     (20 21) */		ble,a,pt	%icc,.L900000750	! tprob=0.87
  1289 /* 0x038c	     (20 23) */		ldd	[%o1],%f8
  1290                                    .L77000233:		/* frequency 4.6 confidence 0.0 */
  1291 /* 0x0390	     ( 0  0) */		or	%g0,0,%l0
  1292                                    .L77000215:		/* frequency 5.3 confidence 0.0 */
  1293 /* 0x0394	     ( 0  3) */		fdtox	%f18,%f0
  1294                                    .L900000751:		/* frequency 5.3 confidence 0.0 */
  1295 /* 0x0398	     ( 0  3) */		ldd	[%g4],%f6
  1296 /* 0x039c	 220 ( 0  1) */		add	%i5,1,%i5
  1297 /* 0x03a0	     ( 0  1) */		add	%i4,8,%i4
  1298 /* 0x03a4	     ( 1  4) */		ldd	[%g4-8],%f2
  1299 /* 0x03a8	     ( 1  2) */		add	%l0,1,%l0
  1300 /* 0x03ac	     ( 1  2) */		add	%i3,8,%i3
  1301 /* 0x03b0	     ( 2  3) */		fmovs	%f6,%f0
  1302 /* 0x03b4	     ( 2  5) */		ldd	[%g4-16],%f4
  1303 /* 0x03b8	     ( 2  3) */		cmp	%i5,%o5
  1304 /* 0x03bc	     ( 4  7) */		fxtod	%f0,%f0
  1305 /* 0x03c0	     ( 7 10) */		fmuld	%f0,%f16,%f0
  1306 /* 0x03c4	     (10 13) */		fmuld	%f0,%f2,%f2
  1307 /* 0x03c8	     (13 16) */		fdtox	%f2,%f2
  1308 /* 0x03cc	     (16 19) */		fxtod	%f2,%f2
  1309 /* 0x03d0	     (19 22) */		fmuld	%f2,%f4,%f2
  1310 /* 0x03d4	     (22 25) */		fsubd	%f0,%f2,%f22
  1311 /* 0x03d8	     (22 23) */		ble,a,pt	%icc,.L900000749	! tprob=0.89
  1312 /* 0x03dc	     (22 25) */		ldd	[%i2],%f0
  1313                                    .L900000725:		/* frequency 0.7 confidence 0.0 */
  1314 /* 0x03e0	 220 ( 0  1) */		ba	.L900000748	! tprob=1.00
  1315 /* 0x03e4	     ( 0  1) */		sll	%g1,4,%g2
  1318                                    .L77000289:		/* frequency 0.8 confidence 0.0 */
  1319 /* 0x03e8	 225 ( 0  3) */		ldd	[%o1],%f6
  1320 /* 0x03ec	 242 ( 0  1) */		add	%g4,-8,%g2
  1321 /* 0x03f0	     ( 0  1) */		add	%g4,-16,%g3
  1322 /* 0x03f4	 225 ( 1  4) */		ldd	[%i1],%f2
  1323 /* 0x03f8	 245 ( 1  2) */		or	%g0,0,%o3
  1324 /* 0x03fc	     ( 1  2) */		or	%g0,0,%o0
  1325 /* 0x0400	 225 ( 3  6) */		fmuld	%f2,%f6,%f2
  1326 /* 0x0404	     ( 3  4) */		std	%f2,[%o2]
  1327 /* 0x0408	     ( 4  7) */		ldd	[%g4],%f6
  1328 /* 0x040c	 237 ( 7  8) */		std	%f6,[%o2+8]
  1329 /* 0x0410	     ( 8  9) */		std	%f6,[%o2+16]
  1330 /* 0x0414	     ( 9 10) */		std	%f6,[%o2+24]
  1331 /* 0x0418	     (10 11) */		std	%f6,[%o2+32]
  1332 /* 0x041c	     (11 12) */		std	%f6,[%o2+40]
  1333 /* 0x0420	     (12 13) */		std	%f6,[%o2+48]
  1334 /* 0x0424	     (13 14) */		std	%f6,[%o2+56]
  1335 /* 0x0428	     (14 15) */		std	%f6,[%o2+64]
  1336 /* 0x042c	     (15 16) */		std	%f6,[%o2+72]
  1337 !	prefetch	[%i4],0
  1338 !	prefetch	[%i4+32],0
  1339 !	prefetch	[%i4+64],0
  1340 !	prefetch	[%i4+96],0
  1341 !	prefetch	[%i4+120],0
  1342 !	prefetch	[%i1],0
  1343 !	prefetch	[%i1+32],0
  1344 !	prefetch	[%i1+64],0
  1345 !	prefetch	[%i1+96],0
  1346 !	prefetch	[%i1+120],0
  1347 /* 0x0430	     (16 17) */		std	%f6,[%o2+80]
  1348 /* 0x0434	     (17 18) */		std	%f6,[%o2+88]
  1349 /* 0x0438	     (18 19) */		std	%f6,[%o2+96]
  1350 /* 0x043c	     (19 20) */		std	%f6,[%o2+104]
  1351 /* 0x0440	     (20 21) */		std	%f6,[%o2+112]
  1352 /* 0x0444	     (21 22) */		std	%f6,[%o2+120]
  1353 /* 0x0448	     (22 23) */		std	%f6,[%o2+128]
  1354 /* 0x044c	     (23 24) */		std	%f6,[%o2+136]
  1355 /* 0x0450	     (24 25) */		std	%f6,[%o2+144]
  1356 /* 0x0454	     (25 26) */		std	%f6,[%o2+152]
  1357 /* 0x0458	     (26 27) */		std	%f6,[%o2+160]
  1358 /* 0x045c	     (27 28) */		std	%f6,[%o2+168]
  1359 /* 0x0460	     (27 30) */		fdtox	%f2,%f2
  1360 /* 0x0464	     (28 29) */		std	%f6,[%o2+176]
  1361 /* 0x0468	     (29 30) */		std	%f6,[%o2+184]
  1362 /* 0x046c	     (30 31) */		std	%f6,[%o2+192]
  1363 /* 0x0470	     (31 32) */		std	%f6,[%o2+200]
  1364 /* 0x0474	     (32 33) */		std	%f6,[%o2+208]
  1365 /* 0x0478	     (33 34) */		std	%f6,[%o2+216]
  1366 /* 0x047c	     (34 35) */		std	%f6,[%o2+224]
  1367 /* 0x0480	     (35 36) */		std	%f6,[%o2+232]
  1368 /* 0x0484	     (36 37) */		std	%f6,[%o2+240]
  1369 /* 0x0488	     (37 38) */		std	%f6,[%o2+248]
  1370 /* 0x048c	     (38 39) */		std	%f6,[%o2+256]
  1371 /* 0x0490	     (39 40) */		std	%f6,[%o2+264]
  1372 /* 0x0494	     (40 41) */		std	%f6,[%o2+272]
  1373 /* 0x0498	     (41 42) */		std	%f6,[%o2+280]
  1374 /* 0x049c	     (42 43) */		std	%f6,[%o2+288]
  1375 /* 0x04a0	     (43 44) */		std	%f6,[%o2+296]
  1376 /* 0x04a4	     (44 45) */		std	%f6,[%o2+304]
  1377 /* 0x04a8	     (45 46) */		std	%f6,[%o2+312]
  1378 /* 0x04ac	     (46 47) */		std	%f6,[%o2+320]
  1379 /* 0x04b0	     (47 48) */		std	%f6,[%o2+328]
  1380 /* 0x04b4	     (48 49) */		std	%f6,[%o2+336]
  1381 /* 0x04b8	     (49 50) */		std	%f6,[%o2+344]
  1382 /* 0x04bc	     (50 51) */		std	%f6,[%o2+352]
  1383 /* 0x04c0	     (51 52) */		std	%f6,[%o2+360]
  1384 /* 0x04c4	     (52 53) */		std	%f6,[%o2+368]
  1385 /* 0x04c8	     (53 54) */		std	%f6,[%o2+376]
  1386 /* 0x04cc	     (54 55) */		std	%f6,[%o2+384]
  1387 /* 0x04d0	     (55 56) */		std	%f6,[%o2+392]
  1388 /* 0x04d4	     (56 57) */		std	%f6,[%o2+400]
  1389 /* 0x04d8	     (57 58) */		std	%f6,[%o2+408]
  1390 /* 0x04dc	     (58 59) */		std	%f6,[%o2+416]
  1391 /* 0x04e0	     (59 60) */		std	%f6,[%o2+424]
  1392 /* 0x04e4	     (60 61) */		std	%f6,[%o2+432]
  1393 /* 0x04e8	     (61 62) */		std	%f6,[%o2+440]
  1394 /* 0x04ec	     (62 63) */		std	%f6,[%o2+448]
  1395 /* 0x04f0	     (63 64) */		std	%f6,[%o2+456]
  1396 /* 0x04f4	     (64 65) */		std	%f6,[%o2+464]
  1397 /* 0x04f8	     (65 66) */		std	%f6,[%o2+472]
  1398 /* 0x04fc	     (66 67) */		std	%f6,[%o2+480]
  1399 /* 0x0500	     (67 68) */		std	%f6,[%o2+488]
  1400 /* 0x0504	     (68 69) */		std	%f6,[%o2+496]
  1401 /* 0x0508	     (69 70) */		std	%f6,[%o2+504]
  1402 /* 0x050c	     (70 71) */		std	%f6,[%o2+512]
  1403 /* 0x0510	     (71 72) */		std	%f6,[%o2+520]
  1404 /* 0x0514	 242 (72 75) */		ld	[%g4],%f2 ! dalign
  1405 /* 0x0518	     (73 76) */		ld	[%g2],%f6 ! dalign
  1406 /* 0x051c	     (74 77) */		fxtod	%f2,%f10
  1407 /* 0x0520	     (74 77) */		ld	[%g2+4],%f7
  1408 /* 0x0524	     (75 78) */		ld	[%g3],%f8 ! dalign
  1409 /* 0x0528	     (76 79) */		ld	[%g3+4],%f9
  1410 /* 0x052c	     (77 80) */		fmuld	%f10,%f0,%f0
  1411 /* 0x0530	 239 (77 80) */		ldd	[%i4],%f4
  1412 /* 0x0534	 240 (78 81) */		ldd	[%i1],%f2
  1413 /* 0x0538	     (80 83) */		fmuld	%f0,%f6,%f6
  1414 /* 0x053c	     (83 86) */		fdtox	%f6,%f6
  1415 /* 0x0540	     (86 89) */		fxtod	%f6,%f6
  1416 /* 0x0544	     (89 92) */		fmuld	%f6,%f8,%f6
  1417 /* 0x0548	     (92 95) */		fsubd	%f0,%f6,%f0
  1418 /* 0x054c	 250 (95 98) */		fmuld	%f4,%f0,%f10
  1419                                    .L900000747:		/* frequency 6.4 confidence 0.0 */
  1422 	fmovd %f0,%f0
  1423 	fmovd %f16,%f18
  1424 	ldd [%i4],%f2
  1425 	ldd [%o2],%f8
  1426 	ldd [%i1],%f10
  1427 	ldd [%g4-8],%f14
  1428 	ldd [%g4-16],%f16
  1429 	ldd [%o1],%f24
  1431 	ldd [%i1+8],%f26
  1432 	ldd [%i1+16],%f40
  1433 	ldd [%i1+48],%f46
  1434 	ldd [%i1+56],%f30
  1435 	ldd [%i1+64],%f54
  1436 	ldd [%i1+104],%f34
  1437 	ldd [%i1+112],%f58
  1439 	ldd [%i4+112],%f60
  1440 	ldd [%i4+8],%f28	
  1441 	ldd [%i4+104],%f38
  1443 	nop
  1444 	nop
  1446 	.L99999999:
  1447 !1
  1448 !!!
  1449 	ldd	[%i1+24],%f32
  1450 	fmuld	%f0,%f2,%f4
  1451 !2
  1452 !!!
  1453 	ldd	[%i4+24],%f36
  1454 	fmuld	%f26,%f24,%f20
  1455 !3
  1456 !!!
  1457 	ldd	[%i1+40],%f42
  1458 	fmuld	%f28,%f0,%f22
  1459 !4
  1460 !!!
  1461 	ldd	[%i4+40],%f44
  1462 	fmuld	%f32,%f24,%f32
  1463 !5
  1464 !!!
  1465 	ldd	[%o1+8],%f6
  1466 	faddd	%f4,%f8,%f4
  1467 	fmuld	%f36,%f0,%f36
  1468 !6
  1469 !!!
  1470 	add	%o1,8,%o1
  1471 	ldd	[%i4+56],%f50
  1472 	fmuld	%f42,%f24,%f42
  1473 !7
  1474 !!!
  1475 	ldd	[%i1+72],%f52
  1476 	faddd	%f20,%f22,%f20
  1477 	fmuld	%f44,%f0,%f44
  1478 !8
  1479 !!!
  1480 	ldd	[%o2+16],%f22
  1481 	fmuld	%f10,%f6,%f12
  1482 !9
  1483 !!!
  1484 	ldd	[%i4+72],%f56
  1485 	faddd	%f32,%f36,%f32
  1486 	fmuld	%f14,%f4,%f4
  1487 !10
  1488 !!!
  1489 	ldd	[%o2+48],%f36
  1490 	fmuld	%f30,%f24,%f48
  1491 !11
  1492 !!!
  1493 	ldd	[%o2+8],%f8
  1494 	faddd	%f20,%f22,%f20
  1495 	fmuld	%f50,%f0,%f50	
  1496 !12
  1497 !!!
  1498 	std	%f20,[%o2+16]
  1499 	faddd	%f42,%f44,%f42
  1500 	fmuld	%f52,%f24,%f52
  1501 !13
  1502 !!!
  1503 	ldd	[%o2+80],%f44
  1504 	faddd	%f4,%f12,%f4
  1505 	fmuld	%f56,%f0,%f56
  1506 !14
  1507 !!!
  1508 	ldd	[%i1+88],%f20
  1509 	faddd	%f32,%f36,%f32
  1510 !15
  1511 !!!
  1512 	ldd	[%i4+88],%f22
  1513 	faddd	%f48,%f50,%f48
  1514 !16
  1515 !!!
  1516 	ldd	[%o2+112],%f50
  1517 	faddd	%f52,%f56,%f52
  1518 !17
  1519 !!!
  1520 	ldd	[%o2+144],%f56
  1521 	faddd	%f4,%f8,%f8
  1522 	fmuld	%f20,%f24,%f20
  1523 !18
  1524 !!!
  1525 	std	%f32,[%o2+48]
  1526 	faddd	%f42,%f44,%f42
  1527 	fmuld	%f22,%f0,%f22
  1528 !19
  1529 !!!
  1530 	std	%f42,[%o2+80]
  1531 	faddd	%f48,%f50,%f48
  1532 	fmuld	%f34,%f24,%f32
  1533 !20
  1534 !!!
  1535 	std	%f48,[%o2+112]
  1536 	faddd	%f52,%f56,%f52
  1537 	fmuld	%f38,%f0,%f36
  1538 !21
  1539 !!!
  1540 	ldd	[%i1+120],%f42
  1541 	fdtox	%f8,%f4
  1542 !22
  1543 !!!
  1544 	std	%f52,[%o2+144]
  1545 	faddd	%f20,%f22,%f20
  1546 !23
  1547 !!!
  1548 	ldd	[%i4+120],%f44
  1549 !24
  1550 !!!
  1551 	ldd	[%o2+176],%f22
  1552 	faddd	%f32,%f36,%f32
  1553 	fmuld	%f42,%f24,%f42
  1554 !25
  1555 !!!
  1556 	ldd	[%i4+16],%f50
  1557 	fmovs	%f17,%f4
  1558 !26
  1559 !!!
  1560 	ldd	[%i1+32],%f52
  1561 	fmuld	%f44,%f0,%f44
  1562 !27
  1563 !!!
  1564 	ldd	[%i4+32],%f56
  1565 	fmuld	%f40,%f24,%f48
  1566 !28
  1567 !!!
  1568 	ldd	[%o2+208],%f36
  1569 	faddd	%f20,%f22,%f20
  1570 	fmuld	%f50,%f0,%f50
  1571 !29
  1572 !!!
  1573 	std	%f20,[%o2+176]
  1574 	fxtod	%f4,%f4
  1575 	fmuld	%f52,%f24,%f52
  1576 !30
  1577 !!!
  1578 	ldd	[%i4+48],%f22
  1579 	faddd	%f42,%f44,%f42
  1580 	fmuld	%f56,%f0,%f56
  1581 !31
  1582 !!!
  1583 	ldd	[%o2+240],%f44
  1584 	faddd	%f32,%f36,%f32
  1585 !32
  1586 !!!
  1587 	std	%f32,[%o2+208]
  1588 	faddd	%f48,%f50,%f48
  1589 	fmuld	%f46,%f24,%f20
  1590 !33
  1591 !!!
  1592 	ldd	[%o2+32],%f50
  1593 	fmuld	%f4,%f18,%f12
  1594 !34
  1595 !!!
  1596 	ldd	[%i4+64],%f36
  1597 	faddd	%f52,%f56,%f52
  1598 	fmuld	%f22,%f0,%f22
  1599 !35
  1600 !!!
  1601 	ldd	[%o2+64],%f56
  1602 	faddd	%f42,%f44,%f42
  1603 !36
  1604 !!!
  1605 	std	%f42,[%o2+240]
  1606 	faddd	%f48,%f50,%f48
  1607 	fmuld	%f54,%f24,%f32
  1608 !37
  1609 !!!
  1610 	std	%f48,[%o2+32]
  1611 	fmuld	%f12,%f14,%f4
  1612 !38
  1613 !!!
  1614 	ldd	[%i1+80],%f42
  1615 	faddd	%f52,%f56,%f56	! yes, tmp52!
  1616 	fmuld	%f36,%f0,%f36
  1617 !39
  1618 !!!
  1619 	ldd	[%i4+80],%f44
  1620 	faddd	%f20,%f22,%f20
  1621 !40
  1622 !!!
  1623 	ldd	[%i1+96],%f48
  1624 	fmuld	%f58,%f24,%f52
  1625 !41
  1626 !!!
  1627 	ldd	[%i4+96],%f50
  1628 	fdtox	%f4,%f4
  1629 	fmuld	%f42,%f24,%f42
  1630 !42
  1631 !!!
  1632 	std	%f56,[%o2+64]	! yes, tmp52!
  1633 	faddd	%f32,%f36,%f32
  1634 	fmuld	%f44,%f0,%f44
  1635 !43
  1636 !!!
  1637 	ldd	[%o2+96],%f22
  1638 	fmuld	%f48,%f24,%f48
  1639 !44
  1640 !!!
  1641 	ldd	[%o2+128],%f36
  1642 	fmovd	%f6,%f24
  1643 	fmuld	%f50,%f0,%f50
  1644 !45
  1645 !!!
  1646 	fxtod	%f4,%f4
  1647 	fmuld	%f60,%f0,%f56
  1648 !46
  1649 !!!
  1650 	add	%o2,8,%o2
  1651 	faddd	%f42,%f44,%f42
  1652 !47
  1653 !!!
  1654 	ldd	[%o2+160-8],%f44
  1655 	faddd	%f20,%f22,%f20
  1656 !48
  1657 !!!
  1658 	std	%f20,[%o2+96-8]
  1659 	faddd	%f48,%f50,%f48
  1660 !49
  1661 !!!
  1662 	ldd	[%o2+192-8],%f50
  1663 	faddd	%f52,%f56,%f52
  1664 	fmuld	%f4,%f16,%f4
  1665 !50
  1666 !!!
  1667 	ldd	[%o2+224-8],%f56
  1668 	faddd	%f32,%f36,%f32
  1669 !51
  1670 !!!
  1671 	std	%f32,[%o2+128-8]
  1672 	faddd	%f42,%f44,%f42
  1673 !52
  1674 	add	%o3,1,%o3
  1675 	std	%f42,[%o2+160-8]
  1676 	faddd	%f48,%f50,%f48
  1677 !53
  1678 !!!
  1679 	cmp	%o3,31
  1680 	std	%f48,[%o2+192-8]
  1681 	faddd	%f52,%f56,%f52
  1682 !54
  1683 	std	%f52,[%o2+224-8]
  1684 	ble,pt	%icc,.L99999999
  1685 	fsubd	%f12,%f4,%f0
  1689 !55
  1690 	std %f8,[%o2]
  1697 	                                   .L77000285:		/* frequency 1.0 confidence 0.0 */
  1698 /* 0x07a8	 279 ( 0  1) */		sll	%g1,4,%g2
  1699                                    .L900000748:		/* frequency 1.0 confidence 0.0 */
  1700 /* 0x07ac	 279 ( 0  3) */		ldd	[%g5+%g2],%f0
  1701 /* 0x07b0	     ( 0  1) */		add	%g5,%g2,%i1
  1702 /* 0x07b4	     ( 0  1) */		or	%g0,0,%o4
  1703 /* 0x07b8	 206 ( 1  4) */		ld	[%fp+68],%o0
  1704 /* 0x07bc	 279 ( 1  2) */		or	%g0,0,%i0
  1705 /* 0x07c0	     ( 1  2) */		cmp	%g1,0
  1706 /* 0x07c4	     ( 2  5) */		fdtox	%f0,%f0
  1707 /* 0x07c8	     ( 2  3) */		std	%f0,[%sp+120]
  1708 /* 0x07cc	 275 ( 2  3) */		sethi	%hi(0xfc00),%o1
  1709 /* 0x07d0	 206 ( 3  4) */		or	%g0,%o0,%o3
  1710 /* 0x07d4	 275 ( 3  4) */		sub	%g1,1,%g4
  1711 /* 0x07d8	 279 ( 4  7) */		ldd	[%i1+8],%f0
  1712 /* 0x07dc	     ( 4  5) */		or	%g0,%o0,%g5
  1713 /* 0x07e0	     ( 4  5) */		add	%o1,1023,%o1
  1714 /* 0x07e4	     ( 6  9) */		fdtox	%f0,%f0
  1715 /* 0x07e8	     ( 6  7) */		std	%f0,[%sp+112]
  1716 /* 0x07ec	     (10 12) */		ldx	[%sp+112],%o5
  1717 /* 0x07f0	     (11 13) */		ldx	[%sp+120],%o7
  1718 /* 0x07f4	     (11 12) */		ble,pt	%icc,.L900000746	! tprob=0.56
  1719 /* 0x07f8	     (11 12) */		sethi	%hi(0xfc00),%g2
  1720 /* 0x07fc	 275 (12 13) */		or	%g0,-1,%g2
  1721 /* 0x0800	 279 (12 13) */		cmp	%g1,3
  1722 /* 0x0804	 275 (13 14) */		srl	%g2,0,%o2
  1723 /* 0x0808	 279 (13 14) */		bl,pn	%icc,.L77000286	! tprob=0.44
  1724 /* 0x080c	     (13 14) */		or	%g0,%i1,%g2
  1725 /* 0x0810	     (14 17) */		ldd	[%i1+16],%f0
  1726 /* 0x0814	     (14 15) */		and	%o5,%o1,%o0
  1727 /* 0x0818	     (14 15) */		add	%i1,16,%g2
  1728 /* 0x081c	     (15 16) */		sllx	%o0,16,%g3
  1729 /* 0x0820	     (15 16) */		and	%o7,%o2,%o0
  1730 /* 0x0824	     (16 19) */		fdtox	%f0,%f0
  1731 /* 0x0828	     (16 17) */		std	%f0,[%sp+104]
  1732 /* 0x082c	     (16 17) */		add	%o0,%g3,%o4
  1733 /* 0x0830	     (17 20) */		ldd	[%i1+24],%f2
  1734 /* 0x0834	     (17 18) */		srax	%o5,16,%o0
  1735 /* 0x0838	     (17 18) */		add	%o3,4,%g5
  1736 /* 0x083c	     (18 19) */		stx	%o0,[%sp+128]
  1737 /* 0x0840	     (18 19) */		and	%o4,%o2,%o0
  1738 /* 0x0844	     (18 19) */		or	%g0,1,%i0
  1739 /* 0x0848	     (19 20) */		stx	%o0,[%sp+112]
  1740 /* 0x084c	     (19 20) */		srax	%o4,32,%o0
  1741 /* 0x0850	     (19 22) */		fdtox	%f2,%f0
  1742 /* 0x0854	     (20 21) */		stx	%o0,[%sp+136]
  1743 /* 0x0858	     (20 21) */		srax	%o7,32,%o4
  1744 /* 0x085c	     (21 22) */		std	%f0,[%sp+96]
  1745 /* 0x0860	     (22 24) */		ldx	[%sp+136],%o7
  1746 /* 0x0864	     (23 25) */		ldx	[%sp+128],%o0
  1747 /* 0x0868	     (25 27) */		ldx	[%sp+104],%g3
  1748 /* 0x086c	     (25 26) */		add	%o0,%o7,%o0
  1749 /* 0x0870	     (26 28) */		ldx	[%sp+112],%o7
  1750 /* 0x0874	     (26 27) */		add	%o4,%o0,%o4
  1751 /* 0x0878	     (27 29) */		ldx	[%sp+96],%o5
  1752 /* 0x087c	     (28 29) */		st	%o7,[%o3]
  1753 /* 0x0880	     (28 29) */		or	%g0,%g3,%o7
  1754                                    .L900000730:		/* frequency 64.0 confidence 0.0 */
  1755 /* 0x0884	     (17 19) */		ldd	[%g2+16],%f0
  1756 /* 0x0888	     (17 18) */		add	%i0,1,%i0
  1757 /* 0x088c	     (17 18) */		add	%g5,4,%g5
  1758 /* 0x0890	     (18 18) */		cmp	%i0,%g4
  1759 /* 0x0894	     (18 19) */		add	%g2,16,%g2
  1760 /* 0x0898	     (19 22) */		fdtox	%f0,%f0
  1761 /* 0x089c	     (20 21) */		std	%f0,[%sp+104]
  1762 /* 0x08a0	     (21 23) */		ldd	[%g2+8],%f0
  1763 /* 0x08a4	     (23 26) */		fdtox	%f0,%f0
  1764 /* 0x08a8	     (24 25) */		std	%f0,[%sp+96]
  1765 /* 0x08ac	     (25 26) */		and	%o5,%o1,%g3
  1766 /* 0x08b0	     (26 27) */		sllx	%g3,16,%g3
  1767 /* 0x08b4	     ( 0  0) */		stx	%g3,[%sp+120]
  1768 /* 0x08b8	     (26 27) */		and	%o7,%o2,%g3
  1769 /* 0x08bc	     ( 0  0) */		stx	%o7,[%sp+128]
  1770 /* 0x08c0	     ( 0  0) */		ldx	[%sp+120],%o7
  1771 /* 0x08c4	     (27 27) */		add	%g3,%o7,%g3
  1772 /* 0x08c8	     ( 0  0) */		ldx	[%sp+128],%o7
  1773 /* 0x08cc	     (28 29) */		srax	%o5,16,%o5
  1774 /* 0x08d0	     (28 28) */		add	%g3,%o4,%g3
  1775 /* 0x08d4	     (29 30) */		srax	%g3,32,%o4
  1776 /* 0x08d8	     ( 0  0) */		stx	%o4,[%sp+112]
  1777 /* 0x08dc	     (30 31) */		srax	%o7,32,%o4
  1778 /* 0x08e0	     ( 0  0) */		ldx	[%sp+112],%o7
  1779 /* 0x08e4	     (30 31) */		add	%o5,%o7,%o7
  1780 /* 0x08e8	     (31 33) */		ldx	[%sp+96],%o5
  1781 /* 0x08ec	     (31 32) */		add	%o4,%o7,%o4
  1782 /* 0x08f0	     (32 33) */		and	%g3,%o2,%g3
  1783 /* 0x08f4	     ( 0  0) */		ldx	[%sp+104],%o7
  1784 /* 0x08f8	     (33 34) */		ble,pt	%icc,.L900000730	! tprob=0.50
  1785 /* 0x08fc	     (33 34) */		st	%g3,[%g5-4]
  1786                                    .L900000733:		/* frequency 8.0 confidence 0.0 */
  1787 /* 0x0900	     ( 0  1) */		ba	.L900000746	! tprob=1.00
  1788 /* 0x0904	     ( 0  1) */		sethi	%hi(0xfc00),%g2
  1789                                    .L77000286:		/* frequency 0.7 confidence 0.0 */
  1790 /* 0x0908	     ( 0  3) */		ldd	[%g2+16],%f0
  1791                                    .L900000745:		/* frequency 6.4 confidence 0.0 */
  1792 /* 0x090c	     ( 0  1) */		and	%o7,%o2,%o0
  1793 /* 0x0910	     ( 0  1) */		and	%o5,%o1,%g3
  1794 /* 0x0914	     ( 0  3) */		fdtox	%f0,%f0
  1795 /* 0x0918	     ( 1  2) */		add	%o4,%o0,%o0
  1796 /* 0x091c	     ( 1  2) */		std	%f0,[%sp+104]
  1797 /* 0x0920	     ( 1  2) */		add	%i0,1,%i0
  1798 /* 0x0924	     ( 2  3) */		sllx	%g3,16,%o4
  1799 /* 0x0928	     ( 2  5) */		ldd	[%g2+24],%f2
  1800 /* 0x092c	     ( 2  3) */		add	%g2,16,%g2
  1801 /* 0x0930	     ( 3  4) */		add	%o0,%o4,%o4
  1802 /* 0x0934	     ( 3  4) */		cmp	%i0,%g4
  1803 /* 0x0938	     ( 4  5) */		srax	%o5,16,%o0
  1804 /* 0x093c	     ( 4  5) */		stx	%o0,[%sp+112]
  1805 /* 0x0940	     ( 4  5) */		and	%o4,%o2,%g3
  1806 /* 0x0944	     ( 5  6) */		srax	%o4,32,%o5
  1807 /* 0x0948	     ( 5  8) */		fdtox	%f2,%f0
  1808 /* 0x094c	     ( 5  6) */		std	%f0,[%sp+96]
  1809 /* 0x0950	     ( 6  7) */		srax	%o7,32,%o4
  1810 /* 0x0954	     ( 6  8) */		ldx	[%sp+112],%o7
  1811 /* 0x0958	     ( 8  9) */		add	%o7,%o5,%o7
  1812 /* 0x095c	     ( 9 11) */		ldx	[%sp+104],%o5
  1813 /* 0x0960	     ( 9 10) */		add	%o4,%o7,%o4
  1814 /* 0x0964	     (10 12) */		ldx	[%sp+96],%o0
  1815 /* 0x0968	     (11 12) */		st	%g3,[%g5]
  1816 /* 0x096c	     (11 12) */		or	%g0,%o5,%o7
  1817 /* 0x0970	     (11 12) */		add	%g5,4,%g5
  1818 /* 0x0974	     (12 13) */		or	%g0,%o0,%o5
  1819 /* 0x0978	     (12 13) */		ble,a,pt	%icc,.L900000745	! tprob=0.86
  1820 /* 0x097c	     (12 15) */		ldd	[%g2+16],%f0
  1821                                    .L77000236:		/* frequency 1.0 confidence 0.0 */
  1822 /* 0x0980	     ( 0  1) */		sethi	%hi(0xfc00),%g2
  1823                                    .L900000746:		/* frequency 1.0 confidence 0.0 */
  1824 /* 0x0984	     ( 0  1) */		or	%g0,-1,%o0
  1825 /* 0x0988	     ( 0  1) */		add	%g2,1023,%g2
  1826 /* 0x098c	     ( 0  3) */		ld	[%fp+88],%o1
  1827 /* 0x0990	     ( 1  2) */		srl	%o0,0,%g3
  1828 /* 0x0994	     ( 1  2) */		and	%o5,%g2,%g2
  1829 /* 0x0998	     ( 2  3) */		and	%o7,%g3,%g4
  1830 /* 0x099c	 281 ( 2  3) */		or	%g0,-1,%o5
  1831 /* 0x09a0	 275 ( 3  4) */		sllx	%g2,16,%g2
  1832 /* 0x09a4	     ( 3  4) */		add	%o4,%g4,%g4
  1833 /* 0x09a8	     ( 4  5) */		add	%g4,%g2,%g2
  1834 /* 0x09ac	     ( 5  6) */		sll	%i0,2,%g4
  1835 /* 0x09b0	     ( 5  6) */		and	%g2,%g3,%g2
  1836 /* 0x09b4	     ( 6  7) */		st	%g2,[%o3+%g4]
  1837 /* 0x09b8	 281 ( 6  7) */		sll	%g1,2,%g2
  1838 /* 0x09bc	     ( 7 10) */		ld	[%o3+%g2],%g2
  1839 /* 0x09c0	     ( 9 10) */		cmp	%g2,0
  1840 /* 0x09c4	     ( 9 10) */		bleu,pn	%icc,.L77000241	! tprob=0.50
  1841 /* 0x09c8	     ( 9 10) */		or	%g0,%o1,%o2
  1842 /* 0x09cc	     (10 11) */		ba	.L900000744	! tprob=1.00
  1843 /* 0x09d0	     (10 11) */		cmp	%o5,0
  1844                                    .L77000241:		/* frequency 0.8 confidence 0.0 */
  1845 /* 0x09d4	     ( 0  1) */		subcc	%g1,1,%o5
  1846 /* 0x09d8	     ( 0  1) */		bneg,pt	%icc,.L900000744	! tprob=0.60
  1847 /* 0x09dc	     ( 1  2) */		cmp	%o5,0
  1848 /* 0x09e0	     ( 1  2) */		sll	%o5,2,%g2
  1849 /* 0x09e4	     ( 2  3) */		add	%o1,%g2,%o0
  1850 /* 0x09e8	     ( 2  3) */		add	%o3,%g2,%o4
  1851 /* 0x09ec	     ( 3  6) */		ld	[%o0],%g2
  1852                                    .L900000743:		/* frequency 5.3 confidence 0.0 */
  1853 /* 0x09f0	     ( 0  3) */		ld	[%o4],%g3
  1854 /* 0x09f4	     ( 0  1) */		add	%o0,4,%o0
  1855 /* 0x09f8	     ( 0  1) */		add	%o4,4,%o4
  1856 /* 0x09fc	     ( 2  3) */		cmp	%g3,%g2
  1857 /* 0x0a00	     ( 2  3) */		bne,pn	%icc,.L77000244	! tprob=0.16
  1858 /* 0x0a04	     ( 2  3) */		nop
  1859 /* 0x0a08	     ( 3  4) */		addcc	%o5,1,%o5
  1860 /* 0x0a0c	     ( 3  4) */		bpos,a,pt	%icc,.L900000743	! tprob=0.84
  1861 /* 0x0a10	     ( 3  6) */		ld	[%o0],%g2
  1862                                    .L77000244:		/* frequency 1.0 confidence 0.0 */
  1863 /* 0x0a14	     ( 0  1) */		cmp	%o5,0
  1864                                    .L900000744:		/* frequency 1.0 confidence 0.0 */
  1865 /* 0x0a18	     ( 0  1) */		bl,pn	%icc,.L77000287	! tprob=0.50
  1866 /* 0x0a1c	     ( 0  1) */		sll	%o5,2,%g2
  1867 /* 0x0a20	     ( 1  4) */		ld	[%o2+%g2],%g3
  1868 /* 0x0a24	     ( 2  5) */		ld	[%o3+%g2],%g2
  1869 /* 0x0a28	     ( 4  5) */		cmp	%g2,%g3
  1870 /* 0x0a2c	     ( 4  5) */		bleu,pt	%icc,.L77000224	! tprob=0.56
  1871 /* 0x0a30	     ( 4  5) */		nop
  1872                                    .L77000287:		/* frequency 0.8 confidence 0.0 */
  1873 /* 0x0a34	     ( 0  1) */		cmp	%g1,0
  1874 /* 0x0a38	     ( 0  1) */		ble,pt	%icc,.L77000224	! tprob=0.60
  1875 /* 0x0a3c	     ( 0  1) */		nop
  1876 /* 0x0a40	 281 ( 1  2) */		sub	%g1,1,%o7
  1877 /* 0x0a44	     ( 1  2) */		or	%g0,-1,%g2
  1878 /* 0x0a48	     ( 2  3) */		srl	%g2,0,%o4
  1879 /* 0x0a4c	     ( 2  3) */		add	%o7,1,%o0
  1880 /* 0x0a50	 279 ( 3  4) */		or	%g0,0,%o5
  1881 /* 0x0a54	     ( 3  4) */		or	%g0,0,%g1
  1882 /* 0x0a58	     ( 4  5) */		cmp	%o0,3
  1883 /* 0x0a5c	     ( 4  5) */		bl,pn	%icc,.L77000288	! tprob=0.40
  1884 /* 0x0a60	     ( 4  5) */		add	%o3,8,%o1
  1885 /* 0x0a64	     ( 5  6) */		add	%o2,4,%o0
  1886 /* 0x0a68	     ( 5  8) */		ld	[%o1-8],%g2
  1887 /* 0x0a6c	   0 ( 5  6) */		or	%g0,%o1,%o3
  1888 /* 0x0a70	 279 ( 6  9) */		ld	[%o0-4],%g3
  1889 /* 0x0a74	   0 ( 6  7) */		or	%g0,%o0,%o2
  1890 /* 0x0a78	 279 ( 6  7) */		or	%g0,2,%g1
  1891 /* 0x0a7c	     ( 7 10) */		ld	[%o3-4],%o0
  1892 /* 0x0a80	     ( 8  9) */		sub	%g2,%g3,%g2
  1893 /* 0x0a84	     ( 9 10) */		or	%g0,%g2,%o5
  1894 /* 0x0a88	     ( 9 10) */		and	%g2,%o4,%g2
  1895 /* 0x0a8c	     ( 9 10) */		st	%g2,[%o3-8]
  1896 /* 0x0a90	     (10 11) */		srax	%o5,32,%o5
  1897                                    .L900000734:		/* frequency 64.0 confidence 0.0 */
  1898 /* 0x0a94	     (12 20) */		ld	[%o2],%g2
  1899 /* 0x0a98	     (12 13) */		add	%g1,1,%g1
  1900 /* 0x0a9c	     (12 13) */		add	%o2,4,%o2
  1901 /* 0x0aa0	     (13 13) */		cmp	%g1,%o7
  1902 /* 0x0aa4	     (13 14) */		add	%o3,4,%o3
  1903 /* 0x0aa8	     (14 14) */		sub	%o0,%g2,%o0
  1904 /* 0x0aac	     (15 15) */		add	%o0,%o5,%o5
  1905 /* 0x0ab0	     (16 17) */		and	%o5,%o4,%g2
  1906 /* 0x0ab4	     (16 24) */		ld	[%o3-4],%o0
  1907 /* 0x0ab8	     (17 18) */		st	%g2,[%o3-8]
  1908 /* 0x0abc	     (17 18) */		ble,pt	%icc,.L900000734	! tprob=0.50
  1909 /* 0x0ac0	     (17 18) */		srax	%o5,32,%o5
  1910                                    .L900000737:		/* frequency 8.0 confidence 0.0 */
  1911 /* 0x0ac4	     ( 0  3) */		ld	[%o2],%o1
  1912 /* 0x0ac8	     ( 2  3) */		sub	%o0,%o1,%o0
  1913 /* 0x0acc	     ( 3  4) */		add	%o0,%o5,%o0
  1914 /* 0x0ad0	     ( 4  5) */		and	%o0,%o4,%o1
  1915 /* 0x0ad4	     ( 4  5) */		st	%o1,[%o3-4]
  1916 /* 0x0ad8	     ( 5  7) */		ret	! Result = 
  1917 /* 0x0adc	     ( 7  8) */		restore	%g0,%g0,%g0
  1918                                    .L77000288:		/* frequency 0.6 confidence 0.0 */
  1919 /* 0x0ae0	     ( 0  3) */		ld	[%o3],%o0
  1920                                    .L900000742:		/* frequency 5.3 confidence 0.0 */
  1921 /* 0x0ae4	     ( 0  3) */		ld	[%o2],%o1
  1922 /* 0x0ae8	     ( 0  1) */		add	%o5,%o0,%o0
  1923 /* 0x0aec	     ( 0  1) */		add	%g1,1,%g1
  1924 /* 0x0af0	     ( 1  2) */		add	%o2,4,%o2
  1925 /* 0x0af4	     ( 1  2) */		cmp	%g1,%o7
  1926 /* 0x0af8	     ( 2  3) */		sub	%o0,%o1,%o0
  1927 /* 0x0afc	     ( 3  4) */		and	%o0,%o4,%o1
  1928 /* 0x0b00	     ( 3  4) */		st	%o1,[%o3]
  1929 /* 0x0b04	     ( 3  4) */		add	%o3,4,%o3
  1930 /* 0x0b08	     ( 4  5) */		srax	%o0,32,%o5
  1931 /* 0x0b0c	     ( 4  5) */		ble,a,pt	%icc,.L900000742	! tprob=0.84
  1932 /* 0x0b10	     ( 4  7) */		ld	[%o3],%o0
  1933                                    .L77000224:		/* frequency 1.0 confidence 0.0 */
  1934 /* 0x0b14	     ( 0  2) */		ret	! Result = 
  1935 /* 0x0b18	     ( 2  3) */		restore	%g0,%g0,%g0
  1936 /* 0x0b1c	   0 ( 0  0) */		.type	mont_mulf_noconv,2
  1937 /* 0x0b1c	     ( 0  0) */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)

mercurial