security/nss/lib/freebl/mpi/vis_32.il

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/vis_32.il	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1291 @@
     1.4 +! 
     1.5 +! This Source Code Form is subject to the terms of the Mozilla Public
     1.6 +! License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 +! file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.8 +
     1.9 +! The interface to the VIS instructions as declared below (and in the VIS
    1.10 +! User's Manual) will not change, but the macro implementation might change
    1.11 +! in the future.
    1.12 +
    1.13 +!--------------------------------------------------------------------
    1.14 +! Pure edge handling instructions
    1.15 +!
    1.16 +! int vis_edge8(void */*frs1*/, void */*frs2*/);
    1.17 +!
    1.18 +	.inline vis_edge8,8
    1.19 +	edge8	%o0,%o1,%o0
    1.20 +	.end
    1.21 +!
    1.22 +! int vis_edge8l(void */*frs1*/, void */*frs2*/);
    1.23 +!
    1.24 +	.inline vis_edge8l,8
    1.25 +	edge8l	%o0,%o1,%o0
    1.26 +	.end
    1.27 +!
    1.28 +! int vis_edge16(void */*frs1*/, void */*frs2*/);
    1.29 +!
    1.30 +	.inline vis_edge16,8
    1.31 +	edge16	%o0,%o1,%o0
    1.32 +	.end
    1.33 +!
    1.34 +! int vis_edge16l(void */*frs1*/, void */*frs2*/);
    1.35 +!
    1.36 +	.inline vis_edge16l,8
    1.37 +	edge16l	%o0,%o1,%o0
    1.38 +	.end
    1.39 +!
    1.40 +! int vis_edge32(void */*frs1*/, void */*frs2*/);
    1.41 +!
    1.42 +	.inline vis_edge32,8
    1.43 +	edge32	%o0,%o1,%o0
    1.44 +	.end
    1.45 +!
    1.46 +! int vis_edge32l(void */*frs1*/, void */*frs2*/);
    1.47 +!
    1.48 +	.inline vis_edge32l,8
    1.49 +	edge32l	%o0,%o1,%o0
    1.50 +	.end
    1.51 +
    1.52 +!--------------------------------------------------------------------
    1.53 +! Edge handling instructions with negative return values if cc set
    1.54 +!
    1.55 +! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
    1.56 +!
    1.57 +	.inline vis_edge8cc,8
    1.58 +	edge8	%o0,%o1,%o0
    1.59 +	mov     0,%o1
    1.60 +	movgu   %icc,-1024,%o1
    1.61 +	or      %o1,%o0,%o0
    1.62 +	.end
    1.63 +!
    1.64 +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
    1.65 +!
    1.66 +	.inline vis_edge8lcc,8
    1.67 +	edge8l	%o0,%o1,%o0
    1.68 +	mov     0,%o1
    1.69 +	movgu   %icc,-1024,%o1
    1.70 +	or      %o1,%o0,%o0
    1.71 +	.end
    1.72 +!
    1.73 +! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
    1.74 +!
    1.75 +	.inline vis_edge16cc,8
    1.76 +	edge16	%o0,%o1,%o0
    1.77 +	mov     0,%o1
    1.78 +	movgu   %icc,-1024,%o1
    1.79 +	or      %o1,%o0,%o0
    1.80 +	.end
    1.81 +!
    1.82 +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
    1.83 +!
    1.84 +	.inline vis_edge16lcc,8
    1.85 +	edge16l	%o0,%o1,%o0
    1.86 +	mov     0,%o1
    1.87 +	movgu   %icc,-1024,%o1
    1.88 +	or      %o1,%o0,%o0
    1.89 +	.end
    1.90 +!
    1.91 +! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
    1.92 +!
    1.93 +	.inline vis_edge32cc,8
    1.94 +	edge32	%o0,%o1,%o0
    1.95 +	mov     0,%o1
    1.96 +	movgu   %icc,-1024,%o1
    1.97 +	or      %o1,%o0,%o0
    1.98 +	.end
    1.99 +!
   1.100 +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
   1.101 +!
   1.102 +	.inline vis_edge32lcc,8
   1.103 +	edge32l	%o0,%o1,%o0
   1.104 +	mov     0,%o1
   1.105 +	movgu   %icc,-1024,%o1
   1.106 +	or      %o1,%o0,%o0
   1.107 +	.end
   1.108 +
   1.109 +!--------------------------------------------------------------------
   1.110 +! Alignment instructions
   1.111 +!
   1.112 +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
   1.113 +!
   1.114 +	.inline vis_alignaddr,8
   1.115 +	alignaddr	%o0,%o1,%o0
   1.116 +	.end
   1.117 +!
   1.118 +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
   1.119 +!
   1.120 +	.inline vis_alignaddrl,8
   1.121 +	alignaddrl	%o0,%o1,%o0
   1.122 +	.end
   1.123 +!
   1.124 +! double vis_faligndata(double /*frs1*/, double /*frs2*/);
   1.125 +!
   1.126 +	.inline vis_faligndata,16
   1.127 +	std	%o0,[%sp+0x48]
   1.128 +	ldd	[%sp+0x48],%f4
   1.129 +	std	%o2,[%sp+0x48]
   1.130 +	ldd	[%sp+0x48],%f10
   1.131 +	faligndata	%f4,%f10,%f0
   1.132 +	.end
   1.133 +
   1.134 +!--------------------------------------------------------------------
   1.135 +! Partitioned comparison instructions
   1.136 +!
   1.137 +! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
   1.138 +!
   1.139 +	.inline vis_fcmple16,16
   1.140 +	std	%o0,[%sp+0x48]
   1.141 +	ldd	[%sp+0x48],%f4
   1.142 +	std	%o2,[%sp+0x48]
   1.143 +	ldd	[%sp+0x48],%f10
   1.144 +	fcmple16	%f4,%f10,%o0
   1.145 +	.end
   1.146 +!
   1.147 +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
   1.148 +!
   1.149 +	.inline vis_fcmpne16,16
   1.150 +	std	%o0,[%sp+0x48]
   1.151 +	ldd	[%sp+0x48],%f4
   1.152 +	std	%o2,[%sp+0x48]
   1.153 +	ldd	[%sp+0x48],%f10
   1.154 +	fcmpne16	%f4,%f10,%o0
   1.155 +	.end
   1.156 +!
   1.157 +! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
   1.158 +!
   1.159 +	.inline vis_fcmple32,16
   1.160 +	std	%o0,[%sp+0x48]
   1.161 +	ldd	[%sp+0x48],%f4
   1.162 +	std	%o2,[%sp+0x48]
   1.163 +	ldd	[%sp+0x48],%f10
   1.164 +	fcmple32	%f4,%f10,%o0
   1.165 +	.end
   1.166 +!
   1.167 +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
   1.168 +!
   1.169 +	.inline vis_fcmpne32,16
   1.170 +	std	%o0,[%sp+0x48]
   1.171 +	ldd	[%sp+0x48],%f4
   1.172 +	std	%o2,[%sp+0x48]
   1.173 +	ldd	[%sp+0x48],%f10
   1.174 +	fcmpne32	%f4,%f10,%o0
   1.175 +	.end
   1.176 +!
   1.177 +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
   1.178 +!
   1.179 +	.inline vis_fcmpgt16,16
   1.180 +	std	%o0,[%sp+0x48]
   1.181 +	ldd	[%sp+0x48],%f4
   1.182 +	std	%o2,[%sp+0x48]
   1.183 +	ldd	[%sp+0x48],%f10
   1.184 +	fcmpgt16	%f4,%f10,%o0
   1.185 +	.end
   1.186 +!
   1.187 +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
   1.188 +!
   1.189 +	.inline vis_fcmpeq16,16
   1.190 +	std	%o0,[%sp+0x48]
   1.191 +	ldd	[%sp+0x48],%f4
   1.192 +	std	%o2,[%sp+0x48]
   1.193 +	ldd	[%sp+0x48],%f10
   1.194 +	fcmpeq16	%f4,%f10,%o0
   1.195 +	.end
   1.196 +!
   1.197 +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
   1.198 +!
   1.199 +	.inline vis_fcmpgt32,16
   1.200 +	std	%o0,[%sp+0x48]
   1.201 +	ldd	[%sp+0x48],%f4
   1.202 +	std	%o2,[%sp+0x48]
   1.203 +	ldd	[%sp+0x48],%f10
   1.204 +	fcmpgt32	%f4,%f10,%o0
   1.205 +	.end
   1.206 +!
   1.207 +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
   1.208 +!
   1.209 +	.inline vis_fcmpeq32,16
   1.210 +	std	%o0,[%sp+0x48]
   1.211 +	ldd	[%sp+0x48],%f4
   1.212 +	std	%o2,[%sp+0x48]
   1.213 +	ldd	[%sp+0x48],%f10
   1.214 +	fcmpeq32	%f4,%f10,%o0
   1.215 +	.end
   1.216 +
   1.217 +!--------------------------------------------------------------------
   1.218 +! Partitioned arithmetic
   1.219 +!
   1.220 +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
   1.221 +!
   1.222 +	.inline vis_fmul8x16,12
   1.223 +	st	%o0,[%sp+0x44]
   1.224 +	ld	[%sp+0x44],%f4
   1.225 +	st	%o1,[%sp+0x48]
   1.226 +	st	%o2,[%sp+0x4c]
   1.227 +	ldd	[%sp+0x48],%f10
   1.228 +	fmul8x16	%f4,%f10,%f0
   1.229 +	.end
   1.230 +!
   1.231 +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
   1.232 +!
   1.233 +	.inline vis_fmul8x16_dummy,16
   1.234 +	st	%o0,[%sp+0x44]
   1.235 +	ld	[%sp+0x44],%f4
   1.236 +	std	%o2,[%sp+0x48]
   1.237 +	ldd	[%sp+0x48],%f10
   1.238 +	fmul8x16	%f4,%f10,%f0
   1.239 +	.end
   1.240 +!
   1.241 +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
   1.242 +!
   1.243 +	.inline vis_fmul8x16au,8
   1.244 +	st	%o0,[%sp+0x48]
   1.245 +	ld	[%sp+0x48],%f4
   1.246 +	st	%o1,[%sp+0x48]
   1.247 +	ld	[%sp+0x48],%f10
   1.248 +	fmul8x16au	%f4,%f10,%f0
   1.249 +	.end
   1.250 +!
   1.251 +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
   1.252 +!
   1.253 +	.inline vis_fmul8x16al,8
   1.254 +	st	%o0,[%sp+0x44]
   1.255 +	ld	[%sp+0x44],%f4
   1.256 +	st	%o1,[%sp+0x48]
   1.257 +	ld	[%sp+0x48],%f10
   1.258 +	fmul8x16al	%f4,%f10,%f0
   1.259 +	.end
   1.260 +!
   1.261 +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
   1.262 +!
   1.263 +	.inline vis_fmul8sux16,16
   1.264 +	std	%o0,[%sp+0x48]
   1.265 +	ldd	[%sp+0x48],%f4
   1.266 +	std	%o2,[%sp+0x48]
   1.267 +	ldd	[%sp+0x48],%f10
   1.268 +	fmul8sux16	%f4,%f10,%f0
   1.269 +	.end
   1.270 +!
   1.271 +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
   1.272 +!
   1.273 +	.inline vis_fmul8ulx16,16
   1.274 +	std	%o0,[%sp+0x48]
   1.275 +	ldd	[%sp+0x48],%f4
   1.276 +	std	%o2,[%sp+0x48]
   1.277 +	ldd	[%sp+0x48],%f10
   1.278 +	fmul8ulx16	%f4,%f10,%f0
   1.279 +	.end
   1.280 +!
   1.281 +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
   1.282 +!
   1.283 +	.inline vis_fmuld8sux16,8
   1.284 +	st	%o0,[%sp+0x48]
   1.285 +	ld	[%sp+0x48],%f4
   1.286 +	st	%o1,[%sp+0x48]
   1.287 +	ld	[%sp+0x48],%f10
   1.288 +	fmuld8sux16	%f4,%f10,%f0
   1.289 +	.end
   1.290 +!
   1.291 +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
   1.292 +!
   1.293 +	.inline vis_fmuld8ulx16,8
   1.294 +	st	%o0,[%sp+0x48]
   1.295 +	ld	[%sp+0x48],%f4
   1.296 +	st	%o1,[%sp+0x48]
   1.297 +	ld	[%sp+0x48],%f10
   1.298 +	fmuld8ulx16	%f4,%f10,%f0
   1.299 +	.end
   1.300 +!
   1.301 +! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
   1.302 +!
   1.303 +	.inline vis_fpadd16,16
   1.304 +	std	%o0,[%sp+0x40]
   1.305 +	ldd	[%sp+0x40],%f4
   1.306 +	std	%o2,[%sp+0x48]
   1.307 +	ldd	[%sp+0x48],%f10
   1.308 +	fpadd16	%f4,%f10,%f0
   1.309 +	.end
   1.310 +!
   1.311 +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
   1.312 +!
   1.313 +	.inline vis_fpadd16s,8
   1.314 +	st	%o0,[%sp+0x48]
   1.315 +	ld	[%sp+0x48],%f4
   1.316 +	st	%o1,[%sp+0x48]
   1.317 +	ld	[%sp+0x48],%f10
   1.318 +	fpadd16s	%f4,%f10,%f0
   1.319 +	.end
   1.320 +!
   1.321 +! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
   1.322 +!
   1.323 +	.inline vis_fpadd32,16
   1.324 +	std	%o0,[%sp+0x48]
   1.325 +	ldd	[%sp+0x48],%f4
   1.326 +	std	%o2,[%sp+0x48]
   1.327 +	ldd	[%sp+0x48],%f10
   1.328 +	fpadd32	%f4,%f10,%f0
   1.329 +	.end
   1.330 +!
   1.331 +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
   1.332 +!
   1.333 +	.inline vis_fpadd32s,8
   1.334 +	st	%o0,[%sp+0x48]
   1.335 +	ld	[%sp+0x48],%f4
   1.336 +	st	%o1,[%sp+0x48]
   1.337 +	ld	[%sp+0x48],%f10
   1.338 +	fpadd32s	%f4,%f10,%f0
   1.339 +	.end
   1.340 +!
   1.341 +! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
   1.342 +!
   1.343 +	.inline vis_fpsub16,16
   1.344 +	std	%o0,[%sp+0x48]
   1.345 +	ldd	[%sp+0x48],%f4
   1.346 +	std	%o2,[%sp+0x48]
   1.347 +	ldd	[%sp+0x48],%f10
   1.348 +	fpsub16	%f4,%f10,%f0
   1.349 +	.end
   1.350 +!
   1.351 +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
   1.352 +!
   1.353 +	.inline vis_fpsub16s,8
   1.354 +	st	%o0,[%sp+0x48]
   1.355 +	ld	[%sp+0x48],%f4
   1.356 +	st	%o1,[%sp+0x48]
   1.357 +	ld	[%sp+0x48],%f10
   1.358 +	fpsub16s	%f4,%f10,%f0
   1.359 +	.end
   1.360 +!
   1.361 +! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
   1.362 +!
   1.363 +	.inline vis_fpsub32,16
   1.364 +	std	%o0,[%sp+0x48]
   1.365 +	ldd	[%sp+0x48],%f4
   1.366 +	std	%o2,[%sp+0x48]
   1.367 +	ldd	[%sp+0x48],%f10
   1.368 +	fpsub32	%f4,%f10,%f0
   1.369 +	.end
   1.370 +!
   1.371 +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
   1.372 +!
   1.373 +	.inline vis_fpsub32s,8
   1.374 +	st	%o0,[%sp+0x48]
   1.375 +	ld	[%sp+0x48],%f4
   1.376 +	st	%o1,[%sp+0x48]
   1.377 +	ld	[%sp+0x48],%f10
   1.378 +	fpsub32s	%f4,%f10,%f0
   1.379 +	.end
   1.380 +
   1.381 +!--------------------------------------------------------------------
   1.382 +! Pixel packing
   1.383 +!
   1.384 +! float vis_fpack16(double /*frs2*/);
   1.385 +!
   1.386 +	.inline vis_fpack16,8
   1.387 +	std	%o0,[%sp+0x48]
   1.388 +	ldd	[%sp+0x48],%f4
   1.389 +	fpack16	%f4,%f0
   1.390 +	.end
   1.391 +
   1.392 +!
   1.393 +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
   1.394 +!
   1.395 +	.inline vis_fpack16_pair,16
   1.396 +	std	%o0,[%sp+0x48]
   1.397 +	ldd	[%sp+0x48],%f4
   1.398 +	std	%o2,[%sp+0x48]
   1.399 +	ldd	[%sp+0x48],%f10
   1.400 +	fpack16	%f4,%f0
   1.401 +	fpack16	%f10,%f1
   1.402 +	.end
   1.403 +!
   1.404 +! void vis_st2_fpack16(double, double, double *)
   1.405 +!
   1.406 +	.inline vis_st2_fpack16,20
   1.407 + 	std	%o0,[%sp+0x48]
   1.408 + 	ldd	[%sp+0x48],%f4
   1.409 + 	std	%o2,[%sp+0x48]
   1.410 + 	ldd	[%sp+0x48],%f10
   1.411 + 	fpack16	%f4,%f0
   1.412 + 	fpack16	%f10,%f1
   1.413 + 	st	%f0,[%o4+0]
   1.414 + 	st	%f1,[%o4+4]
   1.415 + 	.end
   1.416 +!
   1.417 +! void vis_std_fpack16(double, double, double *)
   1.418 +!
   1.419 +	.inline vis_std_fpack16,20
   1.420 +	std     %o0,[%sp+0x48]
   1.421 +	ldd     [%sp+0x48],%f4
   1.422 +	std     %o2,[%sp+0x48]
   1.423 +	ldd     [%sp+0x48],%f10
   1.424 +	fpack16 %f4,%f0
   1.425 +	fpack16 %f10,%f1
   1.426 +	std     %f0,[%o4]
   1.427 +	.end
   1.428 +!
   1.429 +! void vis_st2_fpackfix(double, double, double *)
   1.430 +!
   1.431 +	.inline vis_st2_fpackfix,20
   1.432 + 	std	%o0,[%sp+0x48]
   1.433 + 	ldd	[%sp+0x48],%f4
   1.434 + 	std	%o2,[%sp+0x48]
   1.435 + 	ldd	[%sp+0x48],%f10
   1.436 + 	fpackfix %f4,%f0
   1.437 + 	fpackfix %f10,%f1
   1.438 + 	st	%f0,[%o4+0]
   1.439 + 	st	%f1,[%o4+4]
   1.440 + 	.end
   1.441 +!
   1.442 +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
   1.443 +!
   1.444 +	.inline vis_fpack16_to_hi,16
   1.445 +	std	%o0,[%sp+0x48]
   1.446 +	ldd	[%sp+0x48],%f0
   1.447 +	std	%o2,[%sp+0x48]
   1.448 +	ldd	[%sp+0x48],%f4
   1.449 +	fpack16	%f4,%f0
   1.450 +	.end
   1.451 +
   1.452 +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
   1.453 +!
   1.454 +	.inline vis_fpack16_to_lo,16
   1.455 +	std	%o0,[%sp+0x48]
   1.456 +	ldd	[%sp+0x48],%f0
   1.457 +	std	%o2,[%sp+0x48]
   1.458 +	ldd	[%sp+0x48],%f4
   1.459 +	fpack16	%f4,%f3
   1.460 +	fmovs	%f3,%f1		/* without this, optimizer goes wrong */
   1.461 +	.end
   1.462 +
   1.463 +!
   1.464 +! double vis_fpack32(double /*frs1*/, double /*frs2*/);
   1.465 +!
   1.466 +	.inline vis_fpack32,16
   1.467 +	std	%o0,[%sp+0x48]
   1.468 +	ldd	[%sp+0x48],%f4
   1.469 +	std	%o2,[%sp+0x48]
   1.470 +	ldd	[%sp+0x48],%f10
   1.471 +	fpack32	%f4,%f10,%f0
   1.472 +	.end
   1.473 +!
   1.474 +! float vis_fpackfix(double /*frs2*/);
   1.475 +!
   1.476 +	.inline vis_fpackfix,8
   1.477 +	std	%o0,[%sp+0x48]
   1.478 +	ldd	[%sp+0x48],%f4
   1.479 +	fpackfix	%f4,%f0
   1.480 +	.end
   1.481 +!
   1.482 +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
   1.483 +!
   1.484 +	.inline vis_fpackfix_pair,16
   1.485 +	std	%o0,[%sp+0x48]
   1.486 +	ldd	[%sp+0x48],%f4
   1.487 +	std	%o2,[%sp+0x48]
   1.488 +	ldd	[%sp+0x48],%f6
   1.489 +	fpackfix	%f4,%f0
   1.490 +	fpackfix	%f6,%f1
   1.491 +	.end
   1.492 +
   1.493 +!--------------------------------------------------------------------
   1.494 +! Motion estimation
   1.495 +!
   1.496 +! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
   1.497 +!
   1.498 +	.inline vis_pdist,24
   1.499 +	std	%o4,[%sp+0x48]
   1.500 +	ldd	[%sp+0x48],%f0
   1.501 +	std	%o0,[%sp+0x48]
   1.502 +	ldd	[%sp+0x48],%f4
   1.503 +	std	%o2,[%sp+0x48]
   1.504 +	ldd	[%sp+0x48],%f10
   1.505 +	pdist	%f4,%f10,%f0
   1.506 +	.end
   1.507 +
   1.508 +!--------------------------------------------------------------------
   1.509 +! Channel merging
   1.510 +!
   1.511 +! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
   1.512 +!
   1.513 +	.inline vis_fpmerge,8
   1.514 +	st	%o0,[%sp+0x48]
   1.515 +	ld	[%sp+0x48],%f4
   1.516 +	st	%o1,[%sp+0x48]
   1.517 +	ld	[%sp+0x48],%f10
   1.518 +	fpmerge	%f4,%f10,%f0
   1.519 +	.end
   1.520 +
   1.521 +!--------------------------------------------------------------------
   1.522 +! Pixel expansion
   1.523 +!
   1.524 +! double vis_fexpand(float /*frs2*/);
   1.525 +!
   1.526 +	.inline vis_fexpand,4
   1.527 +	st	%o0,[%sp+0x48]
   1.528 +	ld	[%sp+0x48],%f4
   1.529 +	fexpand	%f4,%f0
   1.530 +	.end
   1.531 +
   1.532 +! double vis_fexpand_hi(double /*frs2*/);
   1.533 +!
   1.534 +	.inline vis_fexpand_hi,8
   1.535 +	std	%o0,[%sp+0x48]
   1.536 +	ldd	[%sp+0x48],%f4
   1.537 +	fexpand	%f4,%f0
   1.538 +	.end
   1.539 +
   1.540 +! double vis_fexpand_lo(double /*frs2*/);
   1.541 +!
   1.542 +	.inline vis_fexpand_lo,8
   1.543 +	std	%o0,[%sp+0x48]
   1.544 +	ldd	[%sp+0x48],%f4
   1.545 +	fmovs	%f5, %f2
   1.546 +	fexpand	%f2,%f0
   1.547 +	.end
   1.548 +
   1.549 +!--------------------------------------------------------------------
   1.550 +! Bitwise logical operations
   1.551 +!
   1.552 +! double vis_fnor(double /*frs1*/, double /*frs2*/);
   1.553 +!
   1.554 +	.inline vis_fnor,16
   1.555 +	std	%o0,[%sp+0x48]
   1.556 +	ldd	[%sp+0x48],%f4
   1.557 +	std	%o2,[%sp+0x48]
   1.558 +	ldd	[%sp+0x48],%f10
   1.559 +	fnor	%f4,%f10,%f0
   1.560 +	.end
   1.561 +!
   1.562 +! float vis_fnors(float /*frs1*/, float /*frs2*/);
   1.563 +!
   1.564 +	.inline vis_fnors,8
   1.565 +	st	%o0,[%sp+0x48]
   1.566 +	ld	[%sp+0x48],%f4
   1.567 +	st	%o1,[%sp+0x48]
   1.568 +	ld	[%sp+0x48],%f10
   1.569 +	fnors	%f4,%f10,%f0
   1.570 +	.end
   1.571 +!
   1.572 +! double vis_fandnot(double /*frs1*/, double /*frs2*/);
   1.573 +!
   1.574 +	.inline vis_fandnot,16
   1.575 +	std	%o0,[%sp+0x48]
   1.576 +	ldd	[%sp+0x48],%f4
   1.577 +	std	%o2,[%sp+0x48]
   1.578 +	ldd	[%sp+0x48],%f10
   1.579 +	fandnot1	%f4,%f10,%f0
   1.580 +	.end
   1.581 +!
   1.582 +! float vis_fandnots(float /*frs1*/, float /*frs2*/);
   1.583 +!
   1.584 +	.inline vis_fandnots,8
   1.585 +	st	%o0,[%sp+0x48]
   1.586 +	ld	[%sp+0x48],%f4
   1.587 +	st	%o1,[%sp+0x48]
   1.588 +	ld	[%sp+0x48],%f10
   1.589 +	fandnot1s	%f4,%f10,%f0
   1.590 +	.end
   1.591 +!
   1.592 +! double vis_fnot(double /*frs1*/);
   1.593 +!
   1.594 +	.inline vis_fnot,8
   1.595 +	std	%o0,[%sp+0x48]
   1.596 +	ldd	[%sp+0x48],%f4
   1.597 +	fnot1	%f4,%f0
   1.598 +	.end
   1.599 +!
   1.600 +! float vis_fnots(float /*frs1*/);
   1.601 +!
   1.602 +	.inline vis_fnots,4
   1.603 +	st	%o0,[%sp+0x48]
   1.604 +	ld	[%sp+0x48],%f4
   1.605 +	fnot1s	%f4,%f0
   1.606 +	.end
   1.607 +!
   1.608 +! double vis_fxor(double /*frs1*/, double /*frs2*/);
   1.609 +!
   1.610 +	.inline vis_fxor,16
   1.611 +	std	%o0,[%sp+0x48]
   1.612 +	ldd	[%sp+0x48],%f4
   1.613 +	std	%o2,[%sp+0x48]
   1.614 +	ldd	[%sp+0x48],%f10
   1.615 +	fxor	%f4,%f10,%f0
   1.616 +	.end
   1.617 +!
   1.618 +! float vis_fxors(float /*frs1*/, float /*frs2*/);
   1.619 +!
   1.620 +	.inline vis_fxors,8
   1.621 +	st	%o0,[%sp+0x48]
   1.622 +	ld	[%sp+0x48],%f4
   1.623 +	st	%o1,[%sp+0x48]
   1.624 +	ld	[%sp+0x48],%f10
   1.625 +	fxors	%f4,%f10,%f0
   1.626 +	.end
   1.627 +!
   1.628 +! double vis_fnand(double /*frs1*/, double /*frs2*/);
   1.629 +!
   1.630 +	.inline vis_fnand,16
   1.631 +	std	%o0,[%sp+0x48]
   1.632 +	ldd	[%sp+0x48],%f4
   1.633 +	std	%o2,[%sp+0x48]
   1.634 +	ldd	[%sp+0x48],%f10
   1.635 +	fnand	%f4,%f10,%f0
   1.636 +	.end
   1.637 +!
   1.638 +! float vis_fnands(float /*frs1*/, float /*frs2*/);
   1.639 +!
   1.640 +	.inline vis_fnands,8
   1.641 +	st	%o0,[%sp+0x48]
   1.642 +	ld	[%sp+0x48],%f4
   1.643 +	st	%o1,[%sp+0x48]
   1.644 +	ld	[%sp+0x48],%f10
   1.645 +	fnands	%f4,%f10,%f0
   1.646 +	.end
   1.647 +!
   1.648 +! double vis_fand(double /*frs1*/, double /*frs2*/);
   1.649 +!
   1.650 +	.inline vis_fand,16
   1.651 +	std	%o0,[%sp+0x48]
   1.652 +	ldd	[%sp+0x48],%f4
   1.653 +	std	%o2,[%sp+0x48]
   1.654 +	ldd	[%sp+0x48],%f10
   1.655 +	fand	%f4,%f10,%f0
   1.656 +	.end
   1.657 +!
   1.658 +! float vis_fands(float /*frs1*/, float /*frs2*/);
   1.659 +!
   1.660 +	.inline vis_fands,8
   1.661 +	st	%o0,[%sp+0x48]
   1.662 +	ld	[%sp+0x48],%f4
   1.663 +	st	%o1,[%sp+0x48]
   1.664 +	ld	[%sp+0x48],%f10
   1.665 +	fands	%f4,%f10,%f0
   1.666 +	.end
   1.667 +!
   1.668 +! double vis_fxnor(double /*frs1*/, double /*frs2*/);
   1.669 +!
   1.670 +	.inline vis_fxnor,16
   1.671 +	std	%o0,[%sp+0x48]
   1.672 +	ldd	[%sp+0x48],%f4
   1.673 +	std	%o2,[%sp+0x48]
   1.674 +	ldd	[%sp+0x48],%f10
   1.675 +	fxnor	%f4,%f10,%f0
   1.676 +	.end
   1.677 +!
   1.678 +! float vis_fxnors(float /*frs1*/, float /*frs2*/);
   1.679 +!
   1.680 +	.inline vis_fxnors,8
   1.681 +	st	%o0,[%sp+0x48]
   1.682 +	ld	[%sp+0x48],%f4
   1.683 +	st	%o1,[%sp+0x48]
   1.684 +	ld	[%sp+0x48],%f10
   1.685 +	fxnors	%f4,%f10,%f0
   1.686 +	.end
   1.687 +!
   1.688 +! double vis_fsrc(double /*frs1*/);
   1.689 +!
   1.690 +	.inline vis_fsrc,8
   1.691 +	std	%o0,[%sp+0x48]
   1.692 +	ldd	[%sp+0x48],%f4
   1.693 +	fsrc1	%f4,%f0
   1.694 +	.end
   1.695 +!
   1.696 +! float vis_fsrcs(float /*frs1*/);
   1.697 +!
   1.698 +	.inline vis_fsrcs,4
   1.699 +	st	%o0,[%sp+0x48]
   1.700 +	ld	[%sp+0x48],%f4
   1.701 +	fsrc1s	%f4,%f0
   1.702 +	.end
   1.703 +!
   1.704 +! double vis_fornot(double /*frs1*/, double /*frs2*/);
   1.705 +!
   1.706 +	.inline vis_fornot,16
   1.707 +	std	%o0,[%sp+0x48]
   1.708 +	ldd	[%sp+0x48],%f4
   1.709 +	std	%o2,[%sp+0x48]
   1.710 +	ldd	[%sp+0x48],%f10
   1.711 +	fornot1	%f4,%f10,%f0
   1.712 +	.end
   1.713 +!
   1.714 +! float vis_fornots(float /*frs1*/, float /*frs2*/);
   1.715 +!
   1.716 +	.inline vis_fornots,8
   1.717 +	st	%o0,[%sp+0x48]
   1.718 +	ld	[%sp+0x48],%f4
   1.719 +	st	%o1,[%sp+0x48]
   1.720 +	ld	[%sp+0x48],%f10
   1.721 +	fornot1s	%f4,%f10,%f0
   1.722 +	.end
   1.723 +!
   1.724 +! double vis_for(double /*frs1*/, double /*frs2*/);
   1.725 +!
   1.726 +	.inline vis_for,16
   1.727 +	std	%o0,[%sp+0x48]
   1.728 +	ldd	[%sp+0x48],%f4
   1.729 +	std	%o2,[%sp+0x48]
   1.730 +	ldd	[%sp+0x48],%f10
   1.731 +	for	%f4,%f10,%f0
   1.732 +	.end
   1.733 +!
   1.734 +! float vis_fors(float /*frs1*/, float /*frs2*/);
   1.735 +!
   1.736 +	.inline vis_fors,8
   1.737 +	st	%o0,[%sp+0x48]
   1.738 +	ld	[%sp+0x48],%f4
   1.739 +	st	%o1,[%sp+0x48]
   1.740 +	ld	[%sp+0x48],%f10
   1.741 +	fors	%f4,%f10,%f0
   1.742 +	.end
   1.743 +!
   1.744 +! double vis_fzero(/* void */)
   1.745 +!
   1.746 +	.inline	vis_fzero,0
   1.747 +	fzero	%f0
   1.748 +	.end
   1.749 +!
   1.750 +! float vis_fzeros(/* void */)
   1.751 +!
   1.752 +	.inline	vis_fzeros,0
   1.753 +	fzeros	%f0
   1.754 +	.end
   1.755 +!
   1.756 +! double vis_fone(/* void */)
   1.757 +!
   1.758 +	.inline	vis_fone,0
   1.759 +	fone	%f0
   1.760 +	.end
   1.761 +!
   1.762 +! float vis_fones(/* void */)
   1.763 +!
   1.764 +	.inline	vis_fones,0
   1.765 +	fones	%f0
   1.766 +	.end
   1.767 +
   1.768 +!--------------------------------------------------------------------
   1.769 +! Partial store instructions
   1.770 +!
   1.771 +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
   1.772 +!
   1.773 +	.inline vis_stdfa_ASI_PST8P,16
   1.774 +	std	%o0,[%sp+0x48]
   1.775 +	ldd	[%sp+0x48],%f4
   1.776 +	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
   1.777 +	.end
   1.778 +!
   1.779 +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
   1.780 +!
   1.781 +	.inline vis_stdfa_ASI_PST8PL,16
   1.782 +	std	%o0,[%sp+0x48]
   1.783 +	ldd	[%sp+0x48],%f4
   1.784 +	stda	%f4,[%o2]%o3,0xc8	! ASI_PST8_PL
   1.785 +	.end
   1.786 +!
   1.787 +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
   1.788 +!
   1.789 +	.inline vis_stdfa_ASI_PST8P_int_pair,16
   1.790 +        ld	[%o0],%f4
   1.791 +        ld	[%o1],%f5
   1.792 +	stda	%f4,[%o2]%o3,0xc0	! ASI_PST8_P
   1.793 +	.end
   1.794 +!
   1.795 +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
   1.796 +!
   1.797 +	.inline vis_stdfa_ASI_PST8S,16
   1.798 +	std	%o0,[%sp+0x48]
   1.799 +	ldd	[%sp+0x48],%f4
   1.800 +	stda	%f4,[%o2]%o3,0xc1	! ASI_PST8_S
   1.801 +	.end
   1.802 +!
   1.803 +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
   1.804 +!
   1.805 +	.inline vis_stdfa_ASI_PST16P,16
   1.806 +	std	%o0,[%sp+0x48]
   1.807 +	ldd	[%sp+0x48],%f4
   1.808 +	stda	%f4,[%o2]%o3,0xc2	! ASI_PST16_P
   1.809 +	.end
   1.810 +!
   1.811 +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
   1.812 +!
   1.813 +	.inline vis_stdfa_ASI_PST16S,16
   1.814 +	std	%o0,[%sp+0x48]
   1.815 +	ldd	[%sp+0x48],%f4
   1.816 +	stda	%f4,[%o2]%o3,0xc3	! ASI_PST16_S
   1.817 +	.end
   1.818 +!
   1.819 +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
   1.820 +!
   1.821 +	.inline vis_stdfa_ASI_PST32P,16
   1.822 +	std	%o0,[%sp+0x48]
   1.823 +	ldd	[%sp+0x48],%f4
   1.824 +	stda	%f4,[%o2]%o3,0xc4	! ASI_PST32_P
   1.825 +	.end
   1.826 +!
   1.827 +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
   1.828 +!
   1.829 +	.inline vis_stdfa_ASI_PST32S,16
   1.830 +	std	%o0,[%sp+0x48]
   1.831 +	ldd	[%sp+0x48],%f4
   1.832 +	stda	%f4,[%o2]%o3,0xc5	! ASI_PST32_S
   1.833 +	.end
   1.834 +
   1.835 +!--------------------------------------------------------------------
   1.836 +! Short store instructions
   1.837 +!
   1.838 +! vis_stdfa_ASI_FL8P(double frd, void *rs1)
   1.839 +!
   1.840 +	.inline vis_stdfa_ASI_FL8P,12
   1.841 +	std	%o0,[%sp+0x48]
   1.842 +	ldd	[%sp+0x48],%f4
   1.843 +	stda	%f4,[%o2]0xd0	! ASI_FL8_P
   1.844 +	.end
   1.845 +!
   1.846 +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
   1.847 +!
   1.848 +	.inline vis_stdfa_ASI_FL8P_index,16
   1.849 +	std	%o0,[%sp+0x48]
   1.850 +	ldd	[%sp+0x48],%f4
   1.851 +	stda	%f4,[%o2+%o3]0xd0 ! ASI_FL8_P
   1.852 +	.end
   1.853 +!
   1.854 +! vis_stdfa_ASI_FL8S(double frd, void *rs1)
   1.855 +!
   1.856 +	.inline vis_stdfa_ASI_FL8S,12
   1.857 +	std	%o0,[%sp+0x48]
   1.858 +	ldd	[%sp+0x48],%f4
   1.859 +	stda	%f4,[%o2]0xd1	! ASI_FL8_S
   1.860 +	.end
   1.861 +!
   1.862 +! vis_stdfa_ASI_FL16P(double frd, void *rs1)
   1.863 +!
   1.864 +	.inline vis_stdfa_ASI_FL16P,12
   1.865 +	std	%o0,[%sp+0x48]
   1.866 +	ldd	[%sp+0x48],%f4
   1.867 +	stda	%f4,[%o2]0xd2	! ASI_FL16_P
   1.868 +	.end
   1.869 +!
   1.870 +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
   1.871 +!
   1.872 +	.inline vis_stdfa_ASI_FL16P_index,16
   1.873 +	std	%o0,[%sp+0x48]
   1.874 +	ldd	[%sp+0x48],%f4
   1.875 +	stda	%f4,[%o2+%o3]0xd2 ! ASI_FL16_P
   1.876 +	.end
   1.877 +!
   1.878 +! vis_stdfa_ASI_FL16S(double frd, void *rs1)
   1.879 +!
   1.880 +	.inline vis_stdfa_ASI_FL16S,12
   1.881 +	std	%o0,[%sp+0x48]
   1.882 +	ldd	[%sp+0x48],%f4
   1.883 +	stda	%f4,[%o2]0xd3	! ASI_FL16_S
   1.884 +	.end
   1.885 +!
   1.886 +! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
   1.887 +!
   1.888 +	.inline vis_stdfa_ASI_FL8PL,12
   1.889 +	std	%o0,[%sp+0x48]
   1.890 +	ldd	[%sp+0x48],%f4
   1.891 +	stda	%f4,[%o2]0xd8	! ASI_FL8_PL
   1.892 +	.end
   1.893 +!
   1.894 +! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
   1.895 +!
   1.896 +	.inline vis_stdfa_ASI_FL8SL,12
   1.897 +	std	%o0,[%sp+0x48]
   1.898 +	ldd	[%sp+0x48],%f4
   1.899 +	stda	%f4,[%o2]0xd9	! ASI_FL8_SL
   1.900 +	.end
   1.901 +!
   1.902 +! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
   1.903 +!
   1.904 +	.inline vis_stdfa_ASI_FL16PL,12
   1.905 +	std	%o0,[%sp+0x48]
   1.906 +	ldd	[%sp+0x48],%f4
   1.907 +	stda	%f4,[%o2]0xda	! ASI_FL16_PL
   1.908 +	.end
   1.909 +!
   1.910 +! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
   1.911 +!
   1.912 +	.inline vis_stdfa_ASI_FL16SL,12
   1.913 +	std	%o0,[%sp+0x48]
   1.914 +	ldd	[%sp+0x48],%f4
   1.915 +	stda	%f4,[%o2]0xdb	! ASI_FL16_SL
   1.916 +	.end
   1.917 +
   1.918 +!--------------------------------------------------------------------
   1.919 +! Short load instructions
   1.920 +!
   1.921 +! double vis_lddfa_ASI_FL8P(void *rs1)
   1.922 +!
   1.923 +	.inline vis_lddfa_ASI_FL8P,4
   1.924 +	ldda	[%o0]0xd0,%f4	! ASI_FL8_P
   1.925 +	fmovd	%f4,%f0	        ! Compiler can clean this up
   1.926 +	.end
   1.927 +!
   1.928 +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
   1.929 +!
   1.930 +	.inline vis_lddfa_ASI_FL8P_index,8
   1.931 +	ldda	[%o0+%o1]0xd0,%f4
   1.932 +	fmovd	%f4,%f0
   1.933 +	.end
   1.934 +!
   1.935 +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
   1.936 +!
   1.937 +	.inline vis_lddfa_ASI_FL8P_hi,8
   1.938 +	sra     %o1,16,%o1
   1.939 +	ldda	[%o0+%o1]0xd0,%f4
   1.940 +	fmovd	%f4,%f0
   1.941 +	.end
   1.942 +!
   1.943 +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
   1.944 +!
   1.945 +	.inline vis_lddfa_ASI_FL8P_lo,8
   1.946 +	sll     %o1,16,%o1
   1.947 +	sra     %o1,16,%o1
   1.948 +	ldda	[%o0+%o1]0xd0,%f4
   1.949 +	fmovd	%f4,%f0
   1.950 +	.end
   1.951 +!
   1.952 +! double vis_lddfa_ASI_FL8S(void *rs1)
   1.953 +!
   1.954 +	.inline vis_lddfa_ASI_FL8S,4
   1.955 +	ldda	[%o0]0xd1,%f4	! ASI_FL8_S
   1.956 +	fmovd	%f4,%f0
   1.957 +	.end
   1.958 +!
   1.959 +! double vis_lddfa_ASI_FL16P(void *rs1)
   1.960 +!
   1.961 +	.inline vis_lddfa_ASI_FL16P,4
   1.962 +	ldda	[%o0]0xd2,%f4	! ASI_FL16_P
   1.963 +	fmovd	%f4,%f0
   1.964 +	.end
   1.965 +!
   1.966 +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
   1.967 +!
   1.968 +	.inline vis_lddfa_ASI_FL16P_index,8
   1.969 +	ldda	[%o0+%o1]0xd2,%f4 ! ASI_FL16_P
   1.970 +	fmovd	%f4,%f0
   1.971 +	.end
   1.972 +!
   1.973 +! double vis_lddfa_ASI_FL16S(void *rs1)
   1.974 +!
   1.975 +	.inline vis_lddfa_ASI_FL16S,4
   1.976 +	ldda	[%o0]0xd3,%f4	! ASI_FL16_S
   1.977 +	fmovd	%f4,%f0
   1.978 +	.end
   1.979 +!
   1.980 +! double vis_lddfa_ASI_FL8PL(void *rs1)
   1.981 +!
   1.982 +	.inline vis_lddfa_ASI_FL8PL,4
   1.983 +	ldda	[%o0]0xd8,%f4	! ASI_FL8_PL
   1.984 +	fmovd	%f4,%f0
   1.985 +	.end
   1.986 +!
   1.987 +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
   1.988 +!
   1.989 +	.inline vis_lddfa_ASI_FL8PL_index,8
   1.990 +	ldda	[%o0+%o1]0xd8,%f4	! ASI_FL8_PL
   1.991 +	fmovd	%f4,%f0
   1.992 +	.end
   1.993 +!
   1.994 +! double vis_lddfa_ASI_FL8SL(void *rs1)
   1.995 +!
   1.996 +	.inline vis_lddfa_ASI_FL8SL,4
   1.997 +	ldda	[%o0]0xd9,%f4	! ASI_FL8_SL
   1.998 +	fmovd	%f4,%f0
   1.999 +	.end
  1.1000 +!
  1.1001 +! double vis_lddfa_ASI_FL16PL(void *rs1)
  1.1002 +!
  1.1003 +	.inline vis_lddfa_ASI_FL16PL,4
  1.1004 +	ldda	[%o0]0xda,%f4	! ASI_FL16_PL
  1.1005 +	fmovd	%f4,%f0
  1.1006 +	.end
  1.1007 +!
  1.1008 +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
  1.1009 +!
  1.1010 +	.inline vis_lddfa_ASI_FL16PL_index,8
  1.1011 +	ldda	[%o0+%o1]0xda,%f4	! ASI_FL16_PL
  1.1012 +	fmovd	%f4,%f0
  1.1013 +	.end
  1.1014 +!
  1.1015 +! double vis_lddfa_ASI_FL16SL(void *rs1)
  1.1016 +!
  1.1017 +	.inline vis_lddfa_ASI_FL16SL,4
  1.1018 +	ldda	[%o0]0xdb,%f4	! ASI_FL16_SL
  1.1019 +	fmovd	%f4,%f0
  1.1020 +	.end
  1.1021 +
  1.1022 +!--------------------------------------------------------------------
  1.1023 +! Graphics status register
  1.1024 +!
  1.1025 +! unsigned int vis_read_gsr(void)
  1.1026 +!
  1.1027 +	.inline vis_read_gsr,0
  1.1028 +	rd	%gsr,%o0
  1.1029 +	.end
  1.1030 +!
  1.1031 +! void vis_write_gsr(unsigned int /* GSR */)
  1.1032 +!
  1.1033 +	.inline vis_write_gsr,4
  1.1034 +	wr	%g0,%o0,%gsr
  1.1035 +	.end
  1.1036 +
  1.1037 +!--------------------------------------------------------------------
  1.1038 +! Voxel texture mapping
  1.1039 +!
  1.1040 +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
  1.1041 +!
  1.1042 +	.inline	vis_array8,12
  1.1043 +	sllx	%o0,32,%o0
  1.1044 +	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
  1.1045 +	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
  1.1046 +	array8	%o3,%o2,%o0
  1.1047 +	.end
  1.1048 +!
  1.1049 +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
  1.1050 +!
  1.1051 +	.inline	vis_array16,12
  1.1052 +	sllx	%o0,32,%o0
  1.1053 +	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
  1.1054 +	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
  1.1055 +	array16	%o3,%o2,%o0
  1.1056 +	.end
  1.1057 +!
  1.1058 +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
  1.1059 +!
  1.1060 +	.inline	vis_array32,12
  1.1061 +	sllx	%o0,32,%o0
  1.1062 +	srl	%o1,0,%o1	! clear the most significant 32 bits of %o1
  1.1063 +	or	%o0,%o1,%o3	! join %o0 and %o1 into %o3
  1.1064 +	array32	%o3,%o2,%o0
  1.1065 +	.end
  1.1066 +
  1.1067 +!--------------------------------------------------------------------
  1.1068 +! Register aliasing and type casts
  1.1069 +!
  1.1070 +! float vis_read_hi(double /* frs1 */);
  1.1071 +!
  1.1072 +	.inline vis_read_hi,8
  1.1073 +	std	%o0,[%sp+0x48]	! store double frs1
  1.1074 +	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1; return %f0;
  1.1075 +	.end
  1.1076 +!
  1.1077 +! float vis_read_lo(double /* frs1 */);
  1.1078 +!
  1.1079 +	.inline vis_read_lo,8
  1.1080 +	std	%o0,[%sp+0x48]	! store double frs1
  1.1081 +	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
  1.1082 +	fmovs	%f1,%f0		! %f0 = low word (frs1); return %f0;
  1.1083 +	.end
  1.1084 +!
  1.1085 +! double vis_write_hi(double /* frs1 */, float /* frs2 */);
  1.1086 +!
  1.1087 +	.inline vis_write_hi,12
  1.1088 +	std	%o0,[%sp+0x48]	! store double frs1;
  1.1089 +	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
  1.1090 +	st	%o2,[%sp+0x44]	! store float frs2;
  1.1091 +	ld	[%sp+0x44],%f2	! %f2 = float frs2;
  1.1092 +	fmovs	%f2,%f0		! %f0 = float frs2; return %f0:f1;
  1.1093 +	.end
  1.1094 +!
  1.1095 +! double vis_write_lo(double /* frs1 */, float /* frs2 */);
  1.1096 +!
  1.1097 +	.inline vis_write_lo,12
  1.1098 +	std	%o0,[%sp+0x48]	! store double frs1;
  1.1099 +	ldd	[%sp+0x48],%f0	! %f0:%f1 = double frs1;
  1.1100 +	st	%o2,[%sp+0x44]	! store float frs2;
  1.1101 +	ld	[%sp+0x44],%f2	! %f2 = float frs2;
  1.1102 +	fmovs	%f2,%f1		! %f1 = float frs2; return %f0:f1;
  1.1103 +	.end
  1.1104 +!
  1.1105 +! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
  1.1106 +!
  1.1107 +	.inline vis_freg_pair,8
  1.1108 +	st	%o0,[%sp+0x48]	! store float frs1
  1.1109 +	ld	[%sp+0x48],%f0
  1.1110 +	st	%o1,[%sp+0x48]	! store float frs2
  1.1111 +	ld	[%sp+0x48],%f1
  1.1112 +	.end
  1.1113 +!
  1.1114 +! float vis_to_float(unsigned int /*value*/);
  1.1115 +!
  1.1116 +	.inline vis_to_float,4
  1.1117 +	st	%o0,[%sp+0x48]
  1.1118 +	ld	[%sp+0x48],%f0
  1.1119 +	.end
  1.1120 +!
  1.1121 +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
  1.1122 +!
  1.1123 +	.inline vis_to_double,8
  1.1124 +	std	%o0,[%sp+0x48]
  1.1125 +	ldd	[%sp+0x48],%f0
  1.1126 +	.end
  1.1127 +!
  1.1128 +! double vis_to_double_dup(unsigned int /*value*/);
  1.1129 +!
  1.1130 +	.inline vis_to_double_dup,4
  1.1131 +	st	%o0,[%sp+0x48]
  1.1132 +	ld	[%sp+0x48],%f1
  1.1133 +	fmovs	%f1,%f0		! duplicate value
  1.1134 +	.end
  1.1135 +!
  1.1136 +! double vis_ll_to_double(unsigned long long /*value*/);
  1.1137 +!
  1.1138 +	.inline vis_ll_to_double,8
  1.1139 +	std     %o0,[%sp+0x48]
  1.1140 +	ldd     [%sp+0x48],%f0
  1.1141 +	.end
  1.1142 +
  1.1143 +!--------------------------------------------------------------------
  1.1144 +! Address space identifier (ASI) register
  1.1145 +!
  1.1146 +! unsigned int vis_read_asi(void)
  1.1147 +!
  1.1148 +	.inline vis_read_asi,0
  1.1149 +	rd	%asi,%o0
  1.1150 +	.end
  1.1151 +!
  1.1152 +! void vis_write_asi(unsigned int /* ASI */)
  1.1153 +!
  1.1154 +	.inline vis_write_asi,4
  1.1155 +	wr	%g0,%o0,%asi
  1.1156 +	.end
  1.1157 +
  1.1158 +!--------------------------------------------------------------------
  1.1159 +! Load/store from/into alternate space
  1.1160 +!
  1.1161 +! float vis_ldfa_ASI_REG(void *rs1)
  1.1162 +!
  1.1163 +	.inline vis_ldfa_ASI_REG,4
  1.1164 +	lda	[%o0+0]%asi,%f4
  1.1165 +	fmovs	%f4,%f0	        ! Compiler can clean this up
  1.1166 +	.end
  1.1167 +!
  1.1168 +! float vis_ldfa_ASI_P(void *rs1)
  1.1169 +!
  1.1170 +	.inline vis_ldfa_ASI_P,4
  1.1171 +	lda	[%o0]0x80,%f4	! ASI_P
  1.1172 +	fmovs	%f4,%f0	        ! Compiler can clean this up
  1.1173 +	.end
  1.1174 +!
  1.1175 +! float vis_ldfa_ASI_PL(void *rs1)
  1.1176 +!
  1.1177 +	.inline vis_ldfa_ASI_PL,4
  1.1178 +	lda	[%o0]0x88,%f4	! ASI_PL
  1.1179 +	fmovs	%f4,%f0	        ! Compiler can clean this up
  1.1180 +	.end
  1.1181 +!
  1.1182 +! double vis_lddfa_ASI_REG(void *rs1)
  1.1183 +!
  1.1184 +	.inline vis_lddfa_ASI_REG,4
  1.1185 +	ldda	[%o0+0]%asi,%f4
  1.1186 +	fmovd	%f4,%f0	        ! Compiler can clean this up
  1.1187 +	.end
  1.1188 +!
  1.1189 +! double vis_lddfa_ASI_P(void *rs1)
  1.1190 +!
  1.1191 +	.inline vis_lddfa_ASI_P,4
  1.1192 +	ldda	[%o0]0x80,%f4	! ASI_P
  1.1193 +	fmovd	%f4,%f0	        ! Compiler can clean this up
  1.1194 +	.end
  1.1195 +!
  1.1196 +! double vis_lddfa_ASI_PL(void *rs1)
  1.1197 +!
  1.1198 +	.inline vis_lddfa_ASI_PL,4
  1.1199 +	ldda	[%o0]0x88,%f4	! ASI_PL
  1.1200 +	fmovd	%f4,%f0	        ! Compiler can clean this up
  1.1201 +	.end
  1.1202 +!
  1.1203 +! vis_stfa_ASI_REG(float frs, void *rs1)
  1.1204 +!
  1.1205 +	.inline vis_stfa_ASI_REG,8
  1.1206 +	st	%o0,[%sp+0x48]
  1.1207 +	ld	[%sp+0x48],%f4
  1.1208 +	sta	%f4,[%o1+0]%asi
  1.1209 +	.end
  1.1210 +!
  1.1211 +! vis_stfa_ASI_P(float frs, void *rs1)
  1.1212 +!
  1.1213 +	.inline vis_stfa_ASI_P,8
  1.1214 +	st	%o0,[%sp+0x48]
  1.1215 +	ld	[%sp+0x48],%f4
  1.1216 +	sta	%f4,[%o1]0x80	! ASI_P
  1.1217 +	.end
  1.1218 +!
  1.1219 +! vis_stfa_ASI_PL(float frs, void *rs1)
  1.1220 +!
  1.1221 +	.inline vis_stfa_ASI_PL,8
  1.1222 +	st	%o0,[%sp+0x48]
  1.1223 +	ld	[%sp+0x48],%f4
  1.1224 +	sta	%f4,[%o1]0x88	! ASI_PL
  1.1225 +	.end
  1.1226 +!
  1.1227 +! vis_stdfa_ASI_REG(double frd, void *rs1)
  1.1228 +!
  1.1229 +	.inline vis_stdfa_ASI_REG,12
  1.1230 +	std	%o0,[%sp+0x48]
  1.1231 +	ldd	[%sp+0x48],%f4
  1.1232 +	stda	%f4,[%o2+0]%asi
  1.1233 +	.end
  1.1234 +!
  1.1235 +! vis_stdfa_ASI_P(double frd, void *rs1)
  1.1236 +!
  1.1237 +	.inline vis_stdfa_ASI_P,12
  1.1238 +	std	%o0,[%sp+0x48]
  1.1239 +	ldd	[%sp+0x48],%f4
  1.1240 +	stda	%f4,[%o2]0x80	! ASI_P
  1.1241 +	.end
  1.1242 +!
  1.1243 +! vis_stdfa_ASI_PL(double frd, void *rs1)
  1.1244 +!
  1.1245 +	.inline vis_stdfa_ASI_PL,12
  1.1246 +	std	%o0,[%sp+0x48]
  1.1247 +	ldd	[%sp+0x48],%f4
  1.1248 +	stda	%f4,[%o2]0x88	! ASI_PL
  1.1249 +	.end
  1.1250 +!
  1.1251 +! unsigned short vis_lduha_ASI_REG(void *rs1)
  1.1252 +!
  1.1253 +	.inline vis_lduha_ASI_REG,4
  1.1254 +	lduha	[%o0+0]%asi,%o0
  1.1255 +	.end
  1.1256 +!
  1.1257 +! unsigned short vis_lduha_ASI_P(void *rs1)
  1.1258 +!
  1.1259 +	.inline vis_lduha_ASI_P,4
  1.1260 +	lduha	[%o0]0x80,%o0	! ASI_P
  1.1261 +	.end
  1.1262 +!
  1.1263 +! unsigned short vis_lduha_ASI_PL(void *rs1)
  1.1264 +!
  1.1265 +	.inline vis_lduha_ASI_PL,4
  1.1266 +	lduha	[%o0]0x88,%o0	! ASI_PL
  1.1267 +	.end
  1.1268 +!
  1.1269 +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
  1.1270 +!
  1.1271 +	.inline vis_lduha_ASI_P_index,8
  1.1272 +	lduha	[%o0+%o1]0x80,%o0	! ASI_P
  1.1273 +	.end
  1.1274 +!
  1.1275 +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
  1.1276 +!
  1.1277 +	.inline vis_lduha_ASI_PL_index,8
  1.1278 +	lduha	[%o0+%o1]0x88,%o0	! ASI_PL
  1.1279 +	.end
  1.1280 +
  1.1281 +!--------------------------------------------------------------------
  1.1282 +! Prefetch
  1.1283 +!
  1.1284 +! void vis_prefetch_read(void * /*address*/);
  1.1285 +!
  1.1286 +	.inline vis_prefetch_read,4
  1.1287 +	prefetch	[%o0+0],0
  1.1288 +	.end
  1.1289 +!
  1.1290 +! void vis_prefetch_write(void * /*address*/);
  1.1291 +!
  1.1292 +	.inline vis_prefetch_write,4
  1.1293 +	prefetch	[%o0+0],2
  1.1294 +	.end

mercurial