security/nss/lib/freebl/mpi/vis_64.il

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/vis_64.il	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,997 @@
     1.4 +! 
     1.5 +! This Source Code Form is subject to the terms of the Mozilla Public
     1.6 +! License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 +! file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.8 +
     1.9 +! This file is to be used in place of vis.il in 64-bit builds.
    1.10 +
    1.11 +!--------------------------------------------------------------------
    1.12 +! Pure edge handling instructions
    1.13 +!
    1.14 +! int vis_edge8(void */*frs1*/, void */*frs2*/);
    1.15 +!
    1.16 +	.inline vis_edge8,16
    1.17 +	edge8	%o0,%o1,%o0
    1.18 +	.end
    1.19 +!
    1.20 +! int vis_edge8l(void */*frs1*/, void */*frs2*/);
    1.21 +!
    1.22 +	.inline vis_edge8l,16
    1.23 +	edge8l	%o0,%o1,%o0
    1.24 +	.end
    1.25 +!
    1.26 +! int vis_edge16(void */*frs1*/, void */*frs2*/);
    1.27 +!
    1.28 +	.inline vis_edge16,16
    1.29 +	edge16	%o0,%o1,%o0
    1.30 +	.end
    1.31 +!
    1.32 +! int vis_edge16l(void */*frs1*/, void */*frs2*/);
    1.33 +!
    1.34 +	.inline vis_edge16l,16
    1.35 +	edge16l	%o0,%o1,%o0
    1.36 +	.end
    1.37 +!
    1.38 +! int vis_edge32(void */*frs1*/, void */*frs2*/);
    1.39 +!
    1.40 +	.inline vis_edge32,16
    1.41 +	edge32	%o0,%o1,%o0
    1.42 +	.end
    1.43 +!
    1.44 +! int vis_edge32l(void */*frs1*/, void */*frs2*/);
    1.45 +!
    1.46 +	.inline vis_edge32l,16
    1.47 +	edge32l	%o0,%o1,%o0
    1.48 +	.end
    1.49 +
    1.50 +!--------------------------------------------------------------------
    1.51 +! Edge handling instructions with negative return values if cc set
    1.52 +!
    1.53 +! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
    1.54 +!
    1.55 +	.inline vis_edge8cc,16
    1.56 +	edge8	%o0,%o1,%o0
    1.57 +	mov     0,%o1
    1.58 +	movgu   %xcc,-1024,%o1
    1.59 +	or      %o1,%o0,%o0
    1.60 +	.end
    1.61 +!
    1.62 +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
    1.63 +!
    1.64 +	.inline vis_edge8lcc,16
    1.65 +	edge8l	%o0,%o1,%o0
    1.66 +	mov     0,%o1
    1.67 +	movgu   %xcc,-1024,%o1
    1.68 +	or      %o1,%o0,%o0
    1.69 +	.end
    1.70 +!
    1.71 +! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
    1.72 +!
    1.73 +	.inline vis_edge16cc,16
    1.74 +	edge16	%o0,%o1,%o0
    1.75 +	mov     0,%o1
    1.76 +	movgu   %xcc,-1024,%o1
    1.77 +	or      %o1,%o0,%o0
    1.78 +	.end
    1.79 +!
    1.80 +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
    1.81 +!
    1.82 +	.inline vis_edge16lcc,16
    1.83 +	edge16l	%o0,%o1,%o0
    1.84 +	mov     0,%o1
    1.85 +	movgu   %xcc,-1024,%o1
    1.86 +	or      %o1,%o0,%o0
    1.87 +	.end
    1.88 +!
    1.89 +! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
    1.90 +!
    1.91 +	.inline vis_edge32cc,16
    1.92 +	edge32	%o0,%o1,%o0
    1.93 +	mov     0,%o1
    1.94 +	movgu   %xcc,-1024,%o1
    1.95 +	or      %o1,%o0,%o0
    1.96 +	.end
    1.97 +!
    1.98 +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
    1.99 +!
   1.100 +	.inline vis_edge32lcc,16
   1.101 +	edge32l	%o0,%o1,%o0
   1.102 +	mov     0,%o1
   1.103 +	movgu   %xcc,-1024,%o1
   1.104 +	or      %o1,%o0,%o0
   1.105 +	.end
   1.106 +
   1.107 +!--------------------------------------------------------------------
   1.108 +! Alignment instructions
   1.109 +!
   1.110 +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
   1.111 +!
   1.112 +	.inline vis_alignaddr,12
   1.113 +	alignaddr	%o0,%o1,%o0
   1.114 +	.end
   1.115 +!
   1.116 +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
   1.117 +!
   1.118 +	.inline vis_alignaddrl,12
   1.119 +	alignaddrl	%o0,%o1,%o0
   1.120 +	.end
   1.121 +!
   1.122 +! double vis_faligndata(double /*frs1*/, double /*frs2*/);
   1.123 +!
   1.124 +	.inline vis_faligndata,16
   1.125 +	faligndata	%f0,%f2,%f0
   1.126 +	.end
   1.127 +
   1.128 +!--------------------------------------------------------------------
   1.129 +! Partitioned comparison instructions
   1.130 +!
   1.131 +! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
   1.132 +!
   1.133 +	.inline vis_fcmple16,16
   1.134 +	fcmple16	%f0,%f2,%o0
   1.135 +	.end
   1.136 +!
   1.137 +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
   1.138 +!
   1.139 +	.inline vis_fcmpne16,16
   1.140 +	fcmpne16	%f0,%f2,%o0
   1.141 +	.end
   1.142 +!
   1.143 +! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
   1.144 +!
   1.145 +	.inline vis_fcmple32,16
   1.146 +	fcmple32	%f0,%f2,%o0
   1.147 +	.end
   1.148 +!
   1.149 +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
   1.150 +!
   1.151 +	.inline vis_fcmpne32,16
   1.152 +	fcmpne32	%f0,%f2,%o0
   1.153 +	.end
   1.154 +!
   1.155 +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
   1.156 +!
   1.157 +	.inline vis_fcmpgt16,16
   1.158 +	fcmpgt16	%f0,%f2,%o0
   1.159 +	.end
   1.160 +!
   1.161 +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
   1.162 +!
   1.163 +	.inline vis_fcmpeq16,16
   1.164 +	fcmpeq16	%f0,%f2,%o0
   1.165 +	.end
   1.166 +!
   1.167 +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
   1.168 +!
   1.169 +	.inline vis_fcmpgt32,16
   1.170 +	fcmpgt32	%f0,%f2,%o0
   1.171 +	.end
   1.172 +!
   1.173 +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
   1.174 +!
   1.175 +	.inline vis_fcmpeq32,16
   1.176 +	fcmpeq32	%f0,%f2,%o0
   1.177 +	.end
   1.178 +
   1.179 +!--------------------------------------------------------------------
   1.180 +! Partitioned arithmetic
   1.181 +!
   1.182 +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
   1.183 +!
   1.184 +	.inline vis_fmul8x16,12
   1.185 +	fmul8x16	%f1,%f2,%f0
   1.186 +	.end
   1.187 +!
   1.188 +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
   1.189 +!
   1.190 +	.inline vis_fmul8x16_dummy,16
   1.191 +	fmul8x16	%f1,%f4,%f0
   1.192 +	.end
   1.193 +!
   1.194 +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
   1.195 +!
   1.196 +	.inline vis_fmul8x16au,8
   1.197 +	fmul8x16au	%f1,%f3,%f0
   1.198 +	.end
   1.199 +!
   1.200 +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
   1.201 +!
   1.202 +	.inline vis_fmul8x16al,8
   1.203 +	fmul8x16al	%f1,%f3,%f0
   1.204 +	.end
   1.205 +!
   1.206 +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
   1.207 +!
   1.208 +	.inline vis_fmul8sux16,16
   1.209 +	fmul8sux16	%f0,%f2,%f0
   1.210 +	.end
   1.211 +!
   1.212 +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
   1.213 +!
   1.214 +	.inline vis_fmul8ulx16,16
   1.215 +	fmul8ulx16	%f0,%f2,%f0
   1.216 +	.end
   1.217 +!
   1.218 +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
   1.219 +!
   1.220 +	.inline vis_fmuld8sux16,8
   1.221 +	fmuld8sux16	%f1,%f3,%f0
   1.222 +	.end
   1.223 +!
   1.224 +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
   1.225 +!
   1.226 +	.inline vis_fmuld8ulx16,8
   1.227 +	fmuld8ulx16	%f1,%f3,%f0
   1.228 +	.end
   1.229 +!
   1.230 +! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
   1.231 +!
   1.232 +	.inline vis_fpadd16,16
   1.233 +	fpadd16	%f0,%f2,%f0
   1.234 +	.end
   1.235 +!
   1.236 +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
   1.237 +!
   1.238 +	.inline vis_fpadd16s,8
   1.239 +	fpadd16s	%f1,%f3,%f0
   1.240 +	.end
   1.241 +!
   1.242 +! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
   1.243 +!
   1.244 +	.inline vis_fpadd32,16
   1.245 +	fpadd32	%f0,%f2,%f0
   1.246 +	.end
   1.247 +!
   1.248 +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
   1.249 +!
   1.250 +	.inline vis_fpadd32s,8
   1.251 +	fpadd32s	%f1,%f3,%f0
   1.252 +	.end
   1.253 +!
   1.254 +! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
   1.255 +!
   1.256 +	.inline vis_fpsub16,16
   1.257 +	fpsub16	%f0,%f2,%f0
   1.258 +	.end
   1.259 +!
   1.260 +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
   1.261 +!
   1.262 +	.inline vis_fpsub16s,8
   1.263 +	fpsub16s	%f1,%f3,%f0
   1.264 +	.end
   1.265 +!
   1.266 +! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
   1.267 +!
   1.268 +	.inline vis_fpsub32,16
   1.269 +	fpsub32	%f0,%f2,%f0
   1.270 +	.end
   1.271 +!
   1.272 +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
   1.273 +!
   1.274 +	.inline vis_fpsub32s,8
   1.275 +	fpsub32s	%f1,%f3,%f0
   1.276 +	.end
   1.277 +
   1.278 +!--------------------------------------------------------------------
   1.279 +! Pixel packing
   1.280 +!
   1.281 +! float vis_fpack16(double /*frs2*/);
   1.282 +!
   1.283 +	.inline vis_fpack16,8
   1.284 +	fpack16	%f0,%f0
   1.285 +	.end
   1.286 +!
   1.287 +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
   1.288 +!
   1.289 +	.inline vis_fpack16_pair,16
   1.290 +	fpack16	%f0,%f0
   1.291 +	fpack16	%f2,%f1
   1.292 +	.end
   1.293 +!
   1.294 +! void vis_st2_fpack16(double, double, double *)
   1.295 +!
   1.296 +	.inline vis_st2_fpack16,24
   1.297 + 	fpack16	%f0,%f0
   1.298 + 	fpack16	%f2,%f1
   1.299 + 	st	%f0,[%o2+0]
   1.300 + 	st	%f1,[%o2+4]
   1.301 + 	.end
   1.302 +!
   1.303 +! void vis_std_fpack16(double, double, double *)
   1.304 +!
   1.305 +	.inline vis_std_fpack16,24
   1.306 +	fpack16	%f0,%f0
   1.307 +	fpack16	%f2,%f1
   1.308 +	std	%f0,[%o2]
   1.309 +	.end
   1.310 +!
   1.311 +! void vis_st2_fpackfix(double, double, double *)
   1.312 +!
   1.313 +	.inline vis_st2_fpackfix,24
   1.314 + 	fpackfix %f0,%f0
   1.315 + 	fpackfix %f2,%f1
   1.316 + 	st	%f0,[%o2+0]
   1.317 + 	st	%f1,[%o2+4]
   1.318 + 	.end
   1.319 +!
   1.320 +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
   1.321 +!
   1.322 +	.inline vis_fpack16_to_hi,16
   1.323 +	fpack16	%f2,%f0
   1.324 +	.end
   1.325 +
   1.326 +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
   1.327 +!
   1.328 +	.inline vis_fpack16_to_lo,16
   1.329 +	fpack16	%f2,%f3
   1.330 +	fmovs	%f3,%f1		/* without this, optimizer goes wrong */
   1.331 +	.end
   1.332 +
   1.333 +!
   1.334 +! double vis_fpack32(double /*frs1*/, double /*frs2*/);
   1.335 +!
   1.336 +	.inline vis_fpack32,16
   1.337 +	fpack32	%f0,%f2,%f0
   1.338 +	.end
   1.339 +!
   1.340 +! float vis_fpackfix(double /*frs2*/);
   1.341 +!
   1.342 +	.inline vis_fpackfix,8
   1.343 +	fpackfix	%f0,%f0
   1.344 +	.end
   1.345 +!
   1.346 +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
   1.347 +!
   1.348 +	.inline vis_fpackfix_pair,16
   1.349 +	fpackfix	%f0,%f0
   1.350 +	fpackfix	%f2,%f1
   1.351 +	.end
   1.352 +
   1.353 +!--------------------------------------------------------------------
   1.354 +! Motion estimation
   1.355 +!
   1.356 +! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, 
   1.357 +!		       double pxls2 /*frs2*/);
   1.358 +!
   1.359 +	.inline vis_pxldist64,24
   1.360 +	pdist	%f2,%f4,%f0
   1.361 +	.end
   1.362 +
   1.363 +!--------------------------------------------------------------------
   1.364 +! Channel merging
   1.365 +!
   1.366 +! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
   1.367 +!
   1.368 +	.inline vis_fpmerge,8
   1.369 +	fpmerge	%f1,%f3,%f0
   1.370 +	.end
   1.371 +
   1.372 +!--------------------------------------------------------------------
   1.373 +! Pixel expansion
   1.374 +!
   1.375 +! double vis_fexpand(float /*frs2*/);
   1.376 +!
   1.377 +	.inline vis_fexpand,4
   1.378 +	fexpand	%f1,%f0
   1.379 +	.end
   1.380 +
   1.381 +! double vis_fexpand_hi(double /*frs2*/);
   1.382 +!
   1.383 +	.inline vis_fexpand_hi,8
   1.384 +	fexpand	%f0,%f0
   1.385 +	.end
   1.386 +
   1.387 +! double vis_fexpand_lo(double /*frs2*/);
   1.388 +!
   1.389 +	.inline vis_fexpand_lo,8
   1.390 +	fexpand	%f1,%f0
   1.391 +	.end
   1.392 +
   1.393 +!--------------------------------------------------------------------
   1.394 +! Bitwise logical operations
   1.395 +!
   1.396 +! double vis_fnor(double /*frs1*/, double /*frs2*/);
   1.397 +!
   1.398 +	.inline vis_fnor,16
   1.399 +	fnor	%f0,%f2,%f0
   1.400 +	.end
   1.401 +!
   1.402 +! float vis_fnors(float /*frs1*/, float /*frs2*/);
   1.403 +!
   1.404 +	.inline vis_fnors,8
   1.405 +	fnors	%f1,%f3,%f0
   1.406 +	.end
   1.407 +!
   1.408 +! double vis_fandnot(double /*frs1*/, double /*frs2*/);
   1.409 +!
   1.410 +	.inline vis_fandnot,16
   1.411 +	fandnot1 %f0,%f2,%f0
   1.412 +	.end
   1.413 +!
   1.414 +! float vis_fandnots(float /*frs1*/, float /*frs2*/);
   1.415 +!
   1.416 +	.inline vis_fandnots,8
   1.417 +	fandnot1s %f1,%f3,%f0
   1.418 +	.end
   1.419 +!
   1.420 +! double vis_fnot(double /*frs1*/);
   1.421 +!
   1.422 +	.inline vis_fnot,8
   1.423 +	fnot1	%f0,%f0
   1.424 +	.end
   1.425 +!
   1.426 +! float vis_fnots(float /*frs1*/);
   1.427 +!
   1.428 +	.inline vis_fnots,4
   1.429 +	fnot1s	%f1,%f0
   1.430 +	.end
   1.431 +!
   1.432 +! double vis_fxor(double /*frs1*/, double /*frs2*/);
   1.433 +!
   1.434 +	.inline vis_fxor,16
   1.435 +	fxor	%f0,%f2,%f0
   1.436 +	.end
   1.437 +!
   1.438 +! float vis_fxors(float /*frs1*/, float /*frs2*/);
   1.439 +!
   1.440 +	.inline vis_fxors,8
   1.441 +	fxors	%f1,%f3,%f0
   1.442 +	.end
   1.443 +!
   1.444 +! double vis_fnand(double /*frs1*/, double /*frs2*/);
   1.445 +!
   1.446 +	.inline vis_fnand,16
   1.447 +	fnand	%f0,%f2,%f0
   1.448 +	.end
   1.449 +!
   1.450 +! float vis_fnands(float /*frs1*/, float /*frs2*/);
   1.451 +!
   1.452 +	.inline vis_fnands,8
   1.453 +	fnands	%f1,%f3,%f0
   1.454 +	.end
   1.455 +!
   1.456 +! double vis_fand(double /*frs1*/, double /*frs2*/);
   1.457 +!
   1.458 +	.inline vis_fand,16
   1.459 +	fand	%f0,%f2,%f0
   1.460 +	.end
   1.461 +!
   1.462 +! float vis_fands(float /*frs1*/, float /*frs2*/);
   1.463 +!
   1.464 +	.inline vis_fands,8
   1.465 +	fands	%f1,%f3,%f0
   1.466 +	.end
   1.467 +!
   1.468 +! double vis_fxnor(double /*frs1*/, double /*frs2*/);
   1.469 +!
   1.470 +	.inline vis_fxnor,16
   1.471 +	fxnor	%f0,%f2,%f0
   1.472 +	.end
   1.473 +!
   1.474 +! float vis_fxnors(float /*frs1*/, float /*frs2*/);
   1.475 +!
   1.476 +	.inline vis_fxnors,8
   1.477 +	fxnors	%f1,%f3,%f0
   1.478 +	.end
   1.479 +!
   1.480 +! double vis_fsrc(double /*frs1*/);
   1.481 +!
   1.482 +	.inline vis_fsrc,8
   1.483 +	fsrc1	%f0,%f0
   1.484 +	.end
   1.485 +!
   1.486 +! float vis_fsrcs(float /*frs1*/);
   1.487 +!
   1.488 +	.inline vis_fsrcs,4
   1.489 +	fsrc1s	%f1,%f0
   1.490 +	.end
   1.491 +!
   1.492 +! double vis_fornot(double /*frs1*/, double /*frs2*/);
   1.493 +!
   1.494 +	.inline vis_fornot,16
   1.495 +	fornot1	%f0,%f2,%f0
   1.496 +	.end
   1.497 +!
   1.498 +! float vis_fornots(float /*frs1*/, float /*frs2*/);
   1.499 +!
   1.500 +	.inline vis_fornots,8
   1.501 +	fornot1s %f1,%f3,%f0
   1.502 +	.end
   1.503 +!
   1.504 +! double vis_for(double /*frs1*/, double /*frs2*/);
   1.505 +!
   1.506 +	.inline vis_for,16
   1.507 +	for	%f0,%f2,%f0
   1.508 +	.end
   1.509 +!
   1.510 +! float vis_fors(float /*frs1*/, float /*frs2*/);
   1.511 +!
   1.512 +	.inline vis_fors,8
   1.513 +	fors	%f1,%f3,%f0
   1.514 +	.end
   1.515 +!
   1.516 +! double vis_fzero(/* void */)
   1.517 +!
   1.518 +	.inline	vis_fzero,0
   1.519 +	fzero	%f0
   1.520 +	.end
   1.521 +!
   1.522 +! float vis_fzeros(/* void */)
   1.523 +!
   1.524 +	.inline	vis_fzeros,0
   1.525 +	fzeros	%f0
   1.526 +	.end
   1.527 +!
   1.528 +! double vis_fone(/* void */)
   1.529 +!
   1.530 +	.inline	vis_fone,0
   1.531 +	fone	%f0
   1.532 +	.end
   1.533 +!
   1.534 +! float vis_fones(/* void */)
   1.535 +!
   1.536 +	.inline	vis_fones,0
   1.537 +	fones	%f0
   1.538 +	.end
   1.539 +
   1.540 +!--------------------------------------------------------------------
   1.541 +! Partial store instructions
   1.542 +!
   1.543 +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
   1.544 +!
   1.545 +	.inline vis_stdfa_ASI_PST8P,20
   1.546 +	stda	%f0,[%o1]%o2,0xc0	! ASI_PST8_P
   1.547 +	.end
   1.548 +!
   1.549 +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
   1.550 +!
   1.551 +	.inline vis_stdfa_ASI_PST8PL,20
   1.552 +	stda	%f0,[%o1]%o2,0xc8	! ASI_PST8_PL
   1.553 +	.end
   1.554 +!
   1.555 +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
   1.556 +!
   1.557 +	.inline vis_stdfa_ASI_PST8P_int_pair,28
   1.558 +        ld	[%o0],%f4
   1.559 +        ld	[%o1],%f5
   1.560 +	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
   1.561 +	.end
   1.562 +!
   1.563 +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
   1.564 +!
   1.565 +	.inline vis_stdfa_ASI_PST8S,20
   1.566 +	stda	%f0,[%o1]%o2,0xc1	! ASI_PST8_S
   1.567 +	.end
   1.568 +!
   1.569 +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
   1.570 +!
   1.571 +	.inline vis_stdfa_ASI_PST16P,20
   1.572 +	stda	%f0,[%o1]%o2,0xc2	! ASI_PST16_P
   1.573 +	.end
   1.574 +!
   1.575 +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
   1.576 +!
   1.577 +	.inline vis_stdfa_ASI_PST16S,20
   1.578 +	stda	%f0,[%o1]%o2,0xc3	! ASI_PST16_S
   1.579 +	.end
   1.580 +!
   1.581 +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
   1.582 +!
   1.583 +	.inline vis_stdfa_ASI_PST32P,20
   1.584 +	stda	%f0,[%o1]%o2,0xc4	! ASI_PST32_P
   1.585 +	.end
   1.586 +!
   1.587 +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
   1.588 +!
   1.589 +	.inline vis_stdfa_ASI_PST32S,20
   1.590 +	stda	%f0,[%o1]%o2,0xc5	! ASI_PST32_S
   1.591 +	.end
   1.592 +
   1.593 +!--------------------------------------------------------------------
   1.594 +! Short store instructions
   1.595 +!
   1.596 +! vis_stdfa_ASI_FL8P(double frd, void *rs1)
   1.597 +!
   1.598 +	.inline vis_stdfa_ASI_FL8P,16
   1.599 +	stda	%f0,[%o1]0xd0	! ASI_FL8_P
   1.600 +	.end
   1.601 +!
   1.602 +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
   1.603 +!
   1.604 +	.inline vis_stdfa_ASI_FL8P_index,24
   1.605 +	stda	%f0,[%o1+%o2]0xd0 ! ASI_FL8_P
   1.606 +	.end
   1.607 +!
   1.608 +! vis_stdfa_ASI_FL8S(double frd, void *rs1)
   1.609 +!
   1.610 +	.inline vis_stdfa_ASI_FL8S,16
   1.611 +	stda	%f0,[%o1]0xd1	! ASI_FL8_S
   1.612 +	.end
   1.613 +!
   1.614 +! vis_stdfa_ASI_FL16P(double frd, void *rs1)
   1.615 +!
   1.616 +	.inline vis_stdfa_ASI_FL16P,16
   1.617 +	stda	%f0,[%o1]0xd2	! ASI_FL16_P
   1.618 +	.end
   1.619 +!
   1.620 +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
   1.621 +!
   1.622 +	.inline vis_stdfa_ASI_FL16P_index,24
   1.623 +	stda	%f0,[%o1+%o2]0xd2 ! ASI_FL16_P
   1.624 +	.end
   1.625 +!
   1.626 +! vis_stdfa_ASI_FL16S(double frd, void *rs1)
   1.627 +!
   1.628 +	.inline vis_stdfa_ASI_FL16S,16
   1.629 +	stda	%f0,[%o1]0xd3	! ASI_FL16_S
   1.630 +	.end
   1.631 +!
   1.632 +! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
   1.633 +!
   1.634 +	.inline vis_stdfa_ASI_FL8PL,16
   1.635 +	stda	%f0,[%o1]0xd8	! ASI_FL8_PL
   1.636 +	.end
   1.637 +!
   1.638 +! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
   1.639 +!
   1.640 +	.inline vis_stdfa_ASI_FL8SL,16
   1.641 +	stda	%f0,[%o1]0xd9	! ASI_FL8_SL
   1.642 +	.end
   1.643 +!
   1.644 +! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
   1.645 +!
   1.646 +	.inline vis_stdfa_ASI_FL16PL,16
   1.647 +	stda	%f0,[%o1]0xda	! ASI_FL16_PL
   1.648 +	.end
   1.649 +!
   1.650 +! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
   1.651 +!
   1.652 +	.inline vis_stdfa_ASI_FL16SL,16
   1.653 +	stda	%f0,[%o1]0xdb	! ASI_FL16_SL
   1.654 +	.end
   1.655 +
   1.656 +!--------------------------------------------------------------------
   1.657 +! Short load instructions
   1.658 +!
   1.659 +! double vis_lddfa_ASI_FL8P(void *rs1)
   1.660 +!
   1.661 +	.inline vis_lddfa_ASI_FL8P,8
   1.662 +	ldda	[%o0]0xd0,%f4	! ASI_FL8_P
   1.663 +	fmovd	%f4,%f0	        ! Compiler can clean this up
   1.664 +	.end
   1.665 +!
   1.666 +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
   1.667 +!
   1.668 +	.inline vis_lddfa_ASI_FL8P_index,16
   1.669 +	ldda	[%o0+%o1]0xd0,%f4
   1.670 +	fmovd	%f4,%f0
   1.671 +	.end
   1.672 +!
   1.673 +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
   1.674 +!
   1.675 +	.inline vis_lddfa_ASI_FL8P_hi,12
   1.676 +	sra     %o1,16,%o1
   1.677 +	ldda	[%o0+%o1]0xd0,%f4
   1.678 +	fmovd	%f4,%f0
   1.679 +	.end
   1.680 +!
   1.681 +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
   1.682 +!
   1.683 +	.inline vis_lddfa_ASI_FL8P_lo,12
   1.684 +	sll     %o1,16,%o1
   1.685 +	sra     %o1,16,%o1
   1.686 +	ldda	[%o0+%o1]0xd0,%f4
   1.687 +	fmovd	%f4,%f0
   1.688 +	.end
   1.689 +!
   1.690 +! double vis_lddfa_ASI_FL8S(void *rs1)
   1.691 +!
   1.692 +	.inline vis_lddfa_ASI_FL8S,8
   1.693 +	ldda	[%o0]0xd1,%f4	! ASI_FL8_S
   1.694 +	fmovd	%f4,%f0
   1.695 +	.end
   1.696 +!
   1.697 +! double vis_lddfa_ASI_FL16P(void *rs1)
   1.698 +!
   1.699 +	.inline vis_lddfa_ASI_FL16P,8
   1.700 +	ldda	[%o0]0xd2,%f4	! ASI_FL16_P
   1.701 +	fmovd	%f4,%f0
   1.702 +	.end
   1.703 +!
   1.704 +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
   1.705 +!
   1.706 +	.inline vis_lddfa_ASI_FL16P_index,16
   1.707 +	ldda	[%o0+%o1]0xd2,%f4 ! ASI_FL16_P
   1.708 +	fmovd	%f4,%f0
   1.709 +	.end
   1.710 +!
   1.711 +! double vis_lddfa_ASI_FL16S(void *rs1)
   1.712 +!
   1.713 +	.inline vis_lddfa_ASI_FL16S,8
   1.714 +	ldda	[%o0]0xd3,%f4	! ASI_FL16_S
   1.715 +	fmovd	%f4,%f0
   1.716 +	.end
   1.717 +!
   1.718 +! double vis_lddfa_ASI_FL8PL(void *rs1)
   1.719 +!
   1.720 +	.inline vis_lddfa_ASI_FL8PL,8
   1.721 +	ldda	[%o0]0xd8,%f4	! ASI_FL8_PL
   1.722 +	fmovd	%f4,%f0
   1.723 +	.end
   1.724 +!
   1.725 +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
   1.726 +!
   1.727 +	.inline vis_lddfa_ASI_FL8PL_index,16
   1.728 +	ldda	[%o0+%o1]0xd8,%f4	! ASI_FL8_PL
   1.729 +	fmovd	%f4,%f0
   1.730 +	.end
   1.731 +!
   1.732 +! double vis_lddfa_ASI_FL8SL(void *rs1)
   1.733 +!
   1.734 +	.inline vis_lddfa_ASI_FL8SL,8
   1.735 +	ldda	[%o0]0xd9,%f4	! ASI_FL8_SL
   1.736 +	fmovd	%f4,%f0
   1.737 +	.end
   1.738 +!
   1.739 +! double vis_lddfa_ASI_FL16PL(void *rs1)
   1.740 +!
   1.741 +	.inline vis_lddfa_ASI_FL16PL,8
   1.742 +	ldda	[%o0]0xda,%f4	! ASI_FL16_PL
   1.743 +	fmovd	%f4,%f0
   1.744 +	.end
   1.745 +!
   1.746 +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
   1.747 +!
   1.748 +	.inline vis_lddfa_ASI_FL16PL_index,16
   1.749 +	ldda	[%o0+%o1]0xda,%f4	! ASI_FL16_PL
   1.750 +	fmovd	%f4,%f0
   1.751 +	.end
   1.752 +!
   1.753 +! double vis_lddfa_ASI_FL16SL(void *rs1)
   1.754 +!
   1.755 +	.inline vis_lddfa_ASI_FL16SL,8
   1.756 +	ldda	[%o0]0xdb,%f4	! ASI_FL16_SL
   1.757 +	fmovd	%f4,%f0
   1.758 +	.end
   1.759 +
   1.760 +!--------------------------------------------------------------------
   1.761 +! Graphics status register
   1.762 +!
   1.763 +! unsigned int vis_read_gsr(void)
   1.764 +!
   1.765 +	.inline vis_read_gsr,0
   1.766 +	rd	%gsr,%o0
   1.767 +	.end
   1.768 +!
   1.769 +! void vis_write_gsr(unsigned int /* GSR */)
   1.770 +!
   1.771 +	.inline vis_write_gsr,4
   1.772 +	wr	%g0,%o0,%gsr
   1.773 +	.end
   1.774 +
   1.775 +!--------------------------------------------------------------------
   1.776 +! Voxel texture mapping
   1.777 +!
   1.778 +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
   1.779 +!
   1.780 +	.inline	vis_array8,12
   1.781 +	array8	%o0,%o1,%o0
   1.782 +	.end
   1.783 +!
   1.784 +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
   1.785 +!
   1.786 +	.inline	vis_array16,12
   1.787 +	array16	%o0,%o1,%o0
   1.788 +	.end
   1.789 +!
   1.790 +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
   1.791 +!
   1.792 +	.inline	vis_array32,12
   1.793 +	array32	%o0,%o1,%o0
   1.794 +	.end
   1.795 +
   1.796 +!--------------------------------------------------------------------
   1.797 +! Register aliasing and type casts
   1.798 +!
   1.799 +! float vis_read_hi(double /* frs1 */);
   1.800 +!
   1.801 +	.inline vis_read_hi,8
   1.802 +	fmovs	%f0,%f0
   1.803 +	.end
   1.804 +!
   1.805 +! float vis_read_lo(double /* frs1 */);
   1.806 +!
   1.807 +	.inline vis_read_lo,8
   1.808 +	fmovs	%f1,%f0		! %f0 = low word (frs1); return %f0;
   1.809 +	.end
   1.810 +!
   1.811 +! double vis_write_hi(double /* frs1 */, float /* frs2 */);
   1.812 +!
   1.813 +	.inline vis_write_hi,12
   1.814 +	fmovs	%f3,%f0		! %f3 = float frs2; return %f0:f1;
   1.815 +	.end
   1.816 +!
   1.817 +! double vis_write_lo(double /* frs1 */, float /* frs2 */);
   1.818 +!
   1.819 +	.inline vis_write_lo,12
   1.820 +	fmovs	%f3,%f1		! %f3 = float frs2; return %f0:f1;
   1.821 +	.end
   1.822 +!
   1.823 +! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
   1.824 +!
   1.825 +	.inline vis_freg_pair,8
   1.826 +	fmovs	%f1,%f0		! %f1 = float frs1; put in hi;
   1.827 +	fmovs	%f3,%f1		! %f3 = float frs2; put in lo; return %f0:f1;
   1.828 +	.end
   1.829 +!
   1.830 +! float vis_to_float(unsigned int /*value*/);
   1.831 +!
   1.832 +	.inline vis_to_float,4
   1.833 +	st	%o0,[%sp+2183]
   1.834 +	ld	[%sp+2183],%f0
   1.835 +	.end
   1.836 +!
   1.837 +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
   1.838 +!
   1.839 +	.inline vis_to_double,8
   1.840 +	st	%o0,[%sp+2183]
   1.841 +	ld	[%sp+2183],%f0
   1.842 +	st	%o1,[%sp+2183]
   1.843 +	ld	[%sp+2183],%f1
   1.844 +	.end
   1.845 +!
   1.846 +! double vis_to_double_dup(unsigned int /*value*/);
   1.847 +!
   1.848 +	.inline vis_to_double_dup,4
   1.849 +	st	%o0,[%sp+2183]
   1.850 +	ld	[%sp+2183],%f1
   1.851 +	fmovs	%f1,%f0		! duplicate value
   1.852 +	.end
   1.853 +!
   1.854 +! double vis_ll_to_double(unsigned long long /*value*/);
   1.855 +!
   1.856 +	.inline vis_ll_to_double,8
   1.857 +	stx     %o0,[%sp+2183]
   1.858 +	ldd     [%sp+2183],%f0
   1.859 +        .end
   1.860 +
   1.861 +!--------------------------------------------------------------------
   1.862 +! Address space identifier (ASI) register
   1.863 +!
   1.864 +! unsigned int vis_read_asi(void)
   1.865 +!
   1.866 +	.inline vis_read_asi,0
   1.867 +	rd	%asi,%o0
   1.868 +	.end
   1.869 +!
   1.870 +! void vis_write_asi(unsigned int /* ASI */)
   1.871 +!
   1.872 +	.inline vis_write_asi,4
   1.873 +	wr	%g0,%o0,%asi
   1.874 +	.end
   1.875 +
   1.876 +!--------------------------------------------------------------------
   1.877 +! Load/store from/into alternate space
   1.878 +!
   1.879 +! float vis_ldfa_ASI_REG(void *rs1)
   1.880 +!
   1.881 +	.inline vis_ldfa_ASI_REG,8
   1.882 +	lda	[%o0+0]%asi,%f4
   1.883 +	fmovs	%f4,%f0	        ! Compiler can clean this up
   1.884 +	.end
   1.885 +!
   1.886 +! float vis_ldfa_ASI_P(void *rs1)
   1.887 +!
   1.888 +	.inline vis_ldfa_ASI_P,8
   1.889 +	lda	[%o0]0x80,%f4	! ASI_P
   1.890 +	fmovs	%f4,%f0	        ! Compiler can clean this up
   1.891 +	.end
   1.892 +!
   1.893 +! float vis_ldfa_ASI_PL(void *rs1)
   1.894 +!
   1.895 +	.inline vis_ldfa_ASI_PL,8
   1.896 +	lda	[%o0]0x88,%f4	! ASI_PL
   1.897 +	fmovs	%f4,%f0	        ! Compiler can clean this up
   1.898 +	.end
   1.899 +!
   1.900 +! double vis_lddfa_ASI_REG(void *rs1)
   1.901 +!
   1.902 +	.inline vis_lddfa_ASI_REG,8
   1.903 +	ldda	[%o0+0]%asi,%f4
   1.904 +	fmovd	%f4,%f0	        ! Compiler can clean this up
   1.905 +	.end
   1.906 +!
   1.907 +! double vis_lddfa_ASI_P(void *rs1)
   1.908 +!
   1.909 +	.inline vis_lddfa_ASI_P,8
   1.910 +	ldda	[%o0]0x80,%f4	! ASI_P
   1.911 +	fmovd	%f4,%f0	        ! Compiler can clean this up
   1.912 +	.end
   1.913 +!
   1.914 +! double vis_lddfa_ASI_PL(void *rs1)
   1.915 +!
   1.916 +	.inline vis_lddfa_ASI_PL,8
   1.917 +	ldda	[%o0]0x88,%f4	! ASI_PL
   1.918 +	fmovd	%f4,%f0	        ! Compiler can clean this up
   1.919 +	.end
   1.920 +!
   1.921 +! vis_stfa_ASI_REG(float frs, void *rs1)
   1.922 +!
   1.923 +	.inline vis_stfa_ASI_REG,12
   1.924 +	sta	%f1,[%o1+0]%asi
   1.925 +	.end
   1.926 +!
   1.927 +! vis_stfa_ASI_P(float frs, void *rs1)
   1.928 +!
   1.929 +	.inline vis_stfa_ASI_P,12
   1.930 +	sta	%f1,[%o1]0x80	! ASI_P
   1.931 +	.end
   1.932 +!
   1.933 +! vis_stfa_ASI_PL(float frs, void *rs1)
   1.934 +!
   1.935 +	.inline vis_stfa_ASI_PL,12
   1.936 +	sta	%f1,[%o1]0x88	! ASI_PL
   1.937 +	.end
   1.938 +!
   1.939 +! vis_stdfa_ASI_REG(double frd, void *rs1)
   1.940 +!
   1.941 +	.inline vis_stdfa_ASI_REG,16
   1.942 +	stda	%f0,[%o1+0]%asi
   1.943 +	.end
   1.944 +!
   1.945 +! vis_stdfa_ASI_P(double frd, void *rs1)
   1.946 +!
   1.947 +	.inline vis_stdfa_ASI_P,16
   1.948 +	stda	%f0,[%o1]0x80	! ASI_P
   1.949 +	.end
   1.950 +!
   1.951 +! vis_stdfa_ASI_PL(double frd, void *rs1)
   1.952 +!
   1.953 +	.inline vis_stdfa_ASI_PL,16
   1.954 +	stda	%f0,[%o1]0x88	! ASI_PL
   1.955 +	.end
   1.956 +!
   1.957 +! unsigned short vis_lduha_ASI_REG(void *rs1)
   1.958 +!
   1.959 +	.inline vis_lduha_ASI_REG,8
   1.960 +	lduha	[%o0+0]%asi,%o0
   1.961 +	.end
   1.962 +!
   1.963 +! unsigned short vis_lduha_ASI_P(void *rs1)
   1.964 +!
   1.965 +	.inline vis_lduha_ASI_P,8
   1.966 +	lduha	[%o0]0x80,%o0	! ASI_P
   1.967 +	.end
   1.968 +!
   1.969 +! unsigned short vis_lduha_ASI_PL(void *rs1)
   1.970 +!
   1.971 +	.inline vis_lduha_ASI_PL,8
   1.972 +	lduha	[%o0]0x88,%o0	! ASI_PL
   1.973 +	.end
   1.974 +!
   1.975 +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
   1.976 +!
   1.977 +	.inline vis_lduha_ASI_P_index,16
   1.978 +	lduha	[%o0+%o1]0x80,%o0	! ASI_P
   1.979 +	.end
   1.980 +!
   1.981 +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
   1.982 +!
   1.983 +	.inline vis_lduha_ASI_PL_index,16
   1.984 +	lduha	[%o0+%o1]0x88,%o0	! ASI_PL
   1.985 +	.end
   1.986 +
   1.987 +!--------------------------------------------------------------------
   1.988 +! Prefetch
   1.989 +!
   1.990 +! void vis_prefetch_read(void * /*address*/);
   1.991 +!
   1.992 +	.inline vis_prefetch_read,8
   1.993 +	prefetch	[%o0+0],0
   1.994 +	.end
   1.995 +!
   1.996 +! void vis_prefetch_write(void * /*address*/);
   1.997 +!
   1.998 +	.inline vis_prefetch_write,8
   1.999 +	prefetch	[%o0+0],2
  1.1000 +	.end

mercurial