1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/freebl/mpi/vis_64.il Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,997 @@ 1.4 +! 1.5 +! This Source Code Form is subject to the terms of the Mozilla Public 1.6 +! License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 +! file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.8 + 1.9 +! This file is to be used in place of vis.il in 64-bit builds. 1.10 + 1.11 +!-------------------------------------------------------------------- 1.12 +! Pure edge handling instructions 1.13 +! 1.14 +! int vis_edge8(void */*frs1*/, void */*frs2*/); 1.15 +! 1.16 + .inline vis_edge8,16 1.17 + edge8 %o0,%o1,%o0 1.18 + .end 1.19 +! 1.20 +! int vis_edge8l(void */*frs1*/, void */*frs2*/); 1.21 +! 1.22 + .inline vis_edge8l,16 1.23 + edge8l %o0,%o1,%o0 1.24 + .end 1.25 +! 1.26 +! int vis_edge16(void */*frs1*/, void */*frs2*/); 1.27 +! 1.28 + .inline vis_edge16,16 1.29 + edge16 %o0,%o1,%o0 1.30 + .end 1.31 +! 1.32 +! int vis_edge16l(void */*frs1*/, void */*frs2*/); 1.33 +! 1.34 + .inline vis_edge16l,16 1.35 + edge16l %o0,%o1,%o0 1.36 + .end 1.37 +! 1.38 +! int vis_edge32(void */*frs1*/, void */*frs2*/); 1.39 +! 1.40 + .inline vis_edge32,16 1.41 + edge32 %o0,%o1,%o0 1.42 + .end 1.43 +! 1.44 +! int vis_edge32l(void */*frs1*/, void */*frs2*/); 1.45 +! 1.46 + .inline vis_edge32l,16 1.47 + edge32l %o0,%o1,%o0 1.48 + .end 1.49 + 1.50 +!-------------------------------------------------------------------- 1.51 +! Edge handling instructions with negative return values if cc set 1.52 +! 1.53 +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); 1.54 +! 1.55 + .inline vis_edge8cc,16 1.56 + edge8 %o0,%o1,%o0 1.57 + mov 0,%o1 1.58 + movgu %xcc,-1024,%o1 1.59 + or %o1,%o0,%o0 1.60 + .end 1.61 +! 1.62 +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); 1.63 +! 1.64 + .inline vis_edge8lcc,16 1.65 + edge8l %o0,%o1,%o0 1.66 + mov 0,%o1 1.67 + movgu %xcc,-1024,%o1 1.68 + or %o1,%o0,%o0 1.69 + .end 1.70 +! 1.71 +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); 1.72 +! 1.73 + .inline vis_edge16cc,16 1.74 + edge16 %o0,%o1,%o0 1.75 + mov 0,%o1 1.76 + movgu %xcc,-1024,%o1 1.77 + or %o1,%o0,%o0 1.78 + .end 1.79 +! 1.80 +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); 1.81 +! 1.82 + .inline vis_edge16lcc,16 1.83 + edge16l %o0,%o1,%o0 1.84 + mov 0,%o1 1.85 + movgu %xcc,-1024,%o1 1.86 + or %o1,%o0,%o0 1.87 + .end 1.88 +! 1.89 +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); 1.90 +! 1.91 + .inline vis_edge32cc,16 1.92 + edge32 %o0,%o1,%o0 1.93 + mov 0,%o1 1.94 + movgu %xcc,-1024,%o1 1.95 + or %o1,%o0,%o0 1.96 + .end 1.97 +! 1.98 +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); 1.99 +! 1.100 + .inline vis_edge32lcc,16 1.101 + edge32l %o0,%o1,%o0 1.102 + mov 0,%o1 1.103 + movgu %xcc,-1024,%o1 1.104 + or %o1,%o0,%o0 1.105 + .end 1.106 + 1.107 +!-------------------------------------------------------------------- 1.108 +! Alignment instructions 1.109 +! 1.110 +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); 1.111 +! 1.112 + .inline vis_alignaddr,12 1.113 + alignaddr %o0,%o1,%o0 1.114 + .end 1.115 +! 1.116 +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); 1.117 +! 1.118 + .inline vis_alignaddrl,12 1.119 + alignaddrl %o0,%o1,%o0 1.120 + .end 1.121 +! 1.122 +! double vis_faligndata(double /*frs1*/, double /*frs2*/); 1.123 +! 1.124 + .inline vis_faligndata,16 1.125 + faligndata %f0,%f2,%f0 1.126 + .end 1.127 + 1.128 +!-------------------------------------------------------------------- 1.129 +! Partitioned comparison instructions 1.130 +! 1.131 +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); 1.132 +! 1.133 + .inline vis_fcmple16,16 1.134 + fcmple16 %f0,%f2,%o0 1.135 + .end 1.136 +! 1.137 +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); 1.138 +! 1.139 + .inline vis_fcmpne16,16 1.140 + fcmpne16 %f0,%f2,%o0 1.141 + .end 1.142 +! 1.143 +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); 1.144 +! 1.145 + .inline vis_fcmple32,16 1.146 + fcmple32 %f0,%f2,%o0 1.147 + .end 1.148 +! 1.149 +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); 1.150 +! 1.151 + .inline vis_fcmpne32,16 1.152 + fcmpne32 %f0,%f2,%o0 1.153 + .end 1.154 +! 1.155 +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); 1.156 +! 1.157 + .inline vis_fcmpgt16,16 1.158 + fcmpgt16 %f0,%f2,%o0 1.159 + .end 1.160 +! 1.161 +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); 1.162 +! 1.163 + .inline vis_fcmpeq16,16 1.164 + fcmpeq16 %f0,%f2,%o0 1.165 + .end 1.166 +! 1.167 +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); 1.168 +! 1.169 + .inline vis_fcmpgt32,16 1.170 + fcmpgt32 %f0,%f2,%o0 1.171 + .end 1.172 +! 1.173 +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); 1.174 +! 1.175 + .inline vis_fcmpeq32,16 1.176 + fcmpeq32 %f0,%f2,%o0 1.177 + .end 1.178 + 1.179 +!-------------------------------------------------------------------- 1.180 +! Partitioned arithmetic 1.181 +! 1.182 +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); 1.183 +! 1.184 + .inline vis_fmul8x16,12 1.185 + fmul8x16 %f1,%f2,%f0 1.186 + .end 1.187 +! 1.188 +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); 1.189 +! 1.190 + .inline vis_fmul8x16_dummy,16 1.191 + fmul8x16 %f1,%f4,%f0 1.192 + .end 1.193 +! 1.194 +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); 1.195 +! 1.196 + .inline vis_fmul8x16au,8 1.197 + fmul8x16au %f1,%f3,%f0 1.198 + .end 1.199 +! 1.200 +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); 1.201 +! 1.202 + .inline vis_fmul8x16al,8 1.203 + fmul8x16al %f1,%f3,%f0 1.204 + .end 1.205 +! 1.206 +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); 1.207 +! 1.208 + .inline vis_fmul8sux16,16 1.209 + fmul8sux16 %f0,%f2,%f0 1.210 + .end 1.211 +! 1.212 +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); 1.213 +! 1.214 + .inline vis_fmul8ulx16,16 1.215 + fmul8ulx16 %f0,%f2,%f0 1.216 + .end 1.217 +! 1.218 +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); 1.219 +! 1.220 + .inline vis_fmuld8sux16,8 1.221 + fmuld8sux16 %f1,%f3,%f0 1.222 + .end 1.223 +! 1.224 +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); 1.225 +! 1.226 + .inline vis_fmuld8ulx16,8 1.227 + fmuld8ulx16 %f1,%f3,%f0 1.228 + .end 1.229 +! 1.230 +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); 1.231 +! 1.232 + .inline vis_fpadd16,16 1.233 + fpadd16 %f0,%f2,%f0 1.234 + .end 1.235 +! 1.236 +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); 1.237 +! 1.238 + .inline vis_fpadd16s,8 1.239 + fpadd16s %f1,%f3,%f0 1.240 + .end 1.241 +! 1.242 +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); 1.243 +! 1.244 + .inline vis_fpadd32,16 1.245 + fpadd32 %f0,%f2,%f0 1.246 + .end 1.247 +! 1.248 +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); 1.249 +! 1.250 + .inline vis_fpadd32s,8 1.251 + fpadd32s %f1,%f3,%f0 1.252 + .end 1.253 +! 1.254 +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); 1.255 +! 1.256 + .inline vis_fpsub16,16 1.257 + fpsub16 %f0,%f2,%f0 1.258 + .end 1.259 +! 1.260 +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); 1.261 +! 1.262 + .inline vis_fpsub16s,8 1.263 + fpsub16s %f1,%f3,%f0 1.264 + .end 1.265 +! 1.266 +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); 1.267 +! 1.268 + .inline vis_fpsub32,16 1.269 + fpsub32 %f0,%f2,%f0 1.270 + .end 1.271 +! 1.272 +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); 1.273 +! 1.274 + .inline vis_fpsub32s,8 1.275 + fpsub32s %f1,%f3,%f0 1.276 + .end 1.277 + 1.278 +!-------------------------------------------------------------------- 1.279 +! Pixel packing 1.280 +! 1.281 +! float vis_fpack16(double /*frs2*/); 1.282 +! 1.283 + .inline vis_fpack16,8 1.284 + fpack16 %f0,%f0 1.285 + .end 1.286 +! 1.287 +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); 1.288 +! 1.289 + .inline vis_fpack16_pair,16 1.290 + fpack16 %f0,%f0 1.291 + fpack16 %f2,%f1 1.292 + .end 1.293 +! 1.294 +! void vis_st2_fpack16(double, double, double *) 1.295 +! 1.296 + .inline vis_st2_fpack16,24 1.297 + fpack16 %f0,%f0 1.298 + fpack16 %f2,%f1 1.299 + st %f0,[%o2+0] 1.300 + st %f1,[%o2+4] 1.301 + .end 1.302 +! 1.303 +! void vis_std_fpack16(double, double, double *) 1.304 +! 1.305 + .inline vis_std_fpack16,24 1.306 + fpack16 %f0,%f0 1.307 + fpack16 %f2,%f1 1.308 + std %f0,[%o2] 1.309 + .end 1.310 +! 1.311 +! void vis_st2_fpackfix(double, double, double *) 1.312 +! 1.313 + .inline vis_st2_fpackfix,24 1.314 + fpackfix %f0,%f0 1.315 + fpackfix %f2,%f1 1.316 + st %f0,[%o2+0] 1.317 + st %f1,[%o2+4] 1.318 + .end 1.319 +! 1.320 +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); 1.321 +! 1.322 + .inline vis_fpack16_to_hi,16 1.323 + fpack16 %f2,%f0 1.324 + .end 1.325 + 1.326 +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); 1.327 +! 1.328 + .inline vis_fpack16_to_lo,16 1.329 + fpack16 %f2,%f3 1.330 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ 1.331 + .end 1.332 + 1.333 +! 1.334 +! double vis_fpack32(double /*frs1*/, double /*frs2*/); 1.335 +! 1.336 + .inline vis_fpack32,16 1.337 + fpack32 %f0,%f2,%f0 1.338 + .end 1.339 +! 1.340 +! float vis_fpackfix(double /*frs2*/); 1.341 +! 1.342 + .inline vis_fpackfix,8 1.343 + fpackfix %f0,%f0 1.344 + .end 1.345 +! 1.346 +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); 1.347 +! 1.348 + .inline vis_fpackfix_pair,16 1.349 + fpackfix %f0,%f0 1.350 + fpackfix %f2,%f1 1.351 + .end 1.352 + 1.353 +!-------------------------------------------------------------------- 1.354 +! Motion estimation 1.355 +! 1.356 +! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, 1.357 +! double pxls2 /*frs2*/); 1.358 +! 1.359 + .inline vis_pxldist64,24 1.360 + pdist %f2,%f4,%f0 1.361 + .end 1.362 + 1.363 +!-------------------------------------------------------------------- 1.364 +! Channel merging 1.365 +! 1.366 +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); 1.367 +! 1.368 + .inline vis_fpmerge,8 1.369 + fpmerge %f1,%f3,%f0 1.370 + .end 1.371 + 1.372 +!-------------------------------------------------------------------- 1.373 +! Pixel expansion 1.374 +! 1.375 +! double vis_fexpand(float /*frs2*/); 1.376 +! 1.377 + .inline vis_fexpand,4 1.378 + fexpand %f1,%f0 1.379 + .end 1.380 + 1.381 +! double vis_fexpand_hi(double /*frs2*/); 1.382 +! 1.383 + .inline vis_fexpand_hi,8 1.384 + fexpand %f0,%f0 1.385 + .end 1.386 + 1.387 +! double vis_fexpand_lo(double /*frs2*/); 1.388 +! 1.389 + .inline vis_fexpand_lo,8 1.390 + fexpand %f1,%f0 1.391 + .end 1.392 + 1.393 +!-------------------------------------------------------------------- 1.394 +! Bitwise logical operations 1.395 +! 1.396 +! double vis_fnor(double /*frs1*/, double /*frs2*/); 1.397 +! 1.398 + .inline vis_fnor,16 1.399 + fnor %f0,%f2,%f0 1.400 + .end 1.401 +! 1.402 +! float vis_fnors(float /*frs1*/, float /*frs2*/); 1.403 +! 1.404 + .inline vis_fnors,8 1.405 + fnors %f1,%f3,%f0 1.406 + .end 1.407 +! 1.408 +! double vis_fandnot(double /*frs1*/, double /*frs2*/); 1.409 +! 1.410 + .inline vis_fandnot,16 1.411 + fandnot1 %f0,%f2,%f0 1.412 + .end 1.413 +! 1.414 +! float vis_fandnots(float /*frs1*/, float /*frs2*/); 1.415 +! 1.416 + .inline vis_fandnots,8 1.417 + fandnot1s %f1,%f3,%f0 1.418 + .end 1.419 +! 1.420 +! double vis_fnot(double /*frs1*/); 1.421 +! 1.422 + .inline vis_fnot,8 1.423 + fnot1 %f0,%f0 1.424 + .end 1.425 +! 1.426 +! float vis_fnots(float /*frs1*/); 1.427 +! 1.428 + .inline vis_fnots,4 1.429 + fnot1s %f1,%f0 1.430 + .end 1.431 +! 1.432 +! double vis_fxor(double /*frs1*/, double /*frs2*/); 1.433 +! 1.434 + .inline vis_fxor,16 1.435 + fxor %f0,%f2,%f0 1.436 + .end 1.437 +! 1.438 +! float vis_fxors(float /*frs1*/, float /*frs2*/); 1.439 +! 1.440 + .inline vis_fxors,8 1.441 + fxors %f1,%f3,%f0 1.442 + .end 1.443 +! 1.444 +! double vis_fnand(double /*frs1*/, double /*frs2*/); 1.445 +! 1.446 + .inline vis_fnand,16 1.447 + fnand %f0,%f2,%f0 1.448 + .end 1.449 +! 1.450 +! float vis_fnands(float /*frs1*/, float /*frs2*/); 1.451 +! 1.452 + .inline vis_fnands,8 1.453 + fnands %f1,%f3,%f0 1.454 + .end 1.455 +! 1.456 +! double vis_fand(double /*frs1*/, double /*frs2*/); 1.457 +! 1.458 + .inline vis_fand,16 1.459 + fand %f0,%f2,%f0 1.460 + .end 1.461 +! 1.462 +! float vis_fands(float /*frs1*/, float /*frs2*/); 1.463 +! 1.464 + .inline vis_fands,8 1.465 + fands %f1,%f3,%f0 1.466 + .end 1.467 +! 1.468 +! double vis_fxnor(double /*frs1*/, double /*frs2*/); 1.469 +! 1.470 + .inline vis_fxnor,16 1.471 + fxnor %f0,%f2,%f0 1.472 + .end 1.473 +! 1.474 +! float vis_fxnors(float /*frs1*/, float /*frs2*/); 1.475 +! 1.476 + .inline vis_fxnors,8 1.477 + fxnors %f1,%f3,%f0 1.478 + .end 1.479 +! 1.480 +! double vis_fsrc(double /*frs1*/); 1.481 +! 1.482 + .inline vis_fsrc,8 1.483 + fsrc1 %f0,%f0 1.484 + .end 1.485 +! 1.486 +! float vis_fsrcs(float /*frs1*/); 1.487 +! 1.488 + .inline vis_fsrcs,4 1.489 + fsrc1s %f1,%f0 1.490 + .end 1.491 +! 1.492 +! double vis_fornot(double /*frs1*/, double /*frs2*/); 1.493 +! 1.494 + .inline vis_fornot,16 1.495 + fornot1 %f0,%f2,%f0 1.496 + .end 1.497 +! 1.498 +! float vis_fornots(float /*frs1*/, float /*frs2*/); 1.499 +! 1.500 + .inline vis_fornots,8 1.501 + fornot1s %f1,%f3,%f0 1.502 + .end 1.503 +! 1.504 +! double vis_for(double /*frs1*/, double /*frs2*/); 1.505 +! 1.506 + .inline vis_for,16 1.507 + for %f0,%f2,%f0 1.508 + .end 1.509 +! 1.510 +! float vis_fors(float /*frs1*/, float /*frs2*/); 1.511 +! 1.512 + .inline vis_fors,8 1.513 + fors %f1,%f3,%f0 1.514 + .end 1.515 +! 1.516 +! double vis_fzero(/* void */) 1.517 +! 1.518 + .inline vis_fzero,0 1.519 + fzero %f0 1.520 + .end 1.521 +! 1.522 +! float vis_fzeros(/* void */) 1.523 +! 1.524 + .inline vis_fzeros,0 1.525 + fzeros %f0 1.526 + .end 1.527 +! 1.528 +! double vis_fone(/* void */) 1.529 +! 1.530 + .inline vis_fone,0 1.531 + fone %f0 1.532 + .end 1.533 +! 1.534 +! float vis_fones(/* void */) 1.535 +! 1.536 + .inline vis_fones,0 1.537 + fones %f0 1.538 + .end 1.539 + 1.540 +!-------------------------------------------------------------------- 1.541 +! Partial store instructions 1.542 +! 1.543 +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) 1.544 +! 1.545 + .inline vis_stdfa_ASI_PST8P,20 1.546 + stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P 1.547 + .end 1.548 +! 1.549 +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) 1.550 +! 1.551 + .inline vis_stdfa_ASI_PST8PL,20 1.552 + stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL 1.553 + .end 1.554 +! 1.555 +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); 1.556 +! 1.557 + .inline vis_stdfa_ASI_PST8P_int_pair,28 1.558 + ld [%o0],%f4 1.559 + ld [%o1],%f5 1.560 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P 1.561 + .end 1.562 +! 1.563 +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) 1.564 +! 1.565 + .inline vis_stdfa_ASI_PST8S,20 1.566 + stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S 1.567 + .end 1.568 +! 1.569 +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) 1.570 +! 1.571 + .inline vis_stdfa_ASI_PST16P,20 1.572 + stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P 1.573 + .end 1.574 +! 1.575 +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) 1.576 +! 1.577 + .inline vis_stdfa_ASI_PST16S,20 1.578 + stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S 1.579 + .end 1.580 +! 1.581 +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) 1.582 +! 1.583 + .inline vis_stdfa_ASI_PST32P,20 1.584 + stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P 1.585 + .end 1.586 +! 1.587 +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) 1.588 +! 1.589 + .inline vis_stdfa_ASI_PST32S,20 1.590 + stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S 1.591 + .end 1.592 + 1.593 +!-------------------------------------------------------------------- 1.594 +! Short store instructions 1.595 +! 1.596 +! vis_stdfa_ASI_FL8P(double frd, void *rs1) 1.597 +! 1.598 + .inline vis_stdfa_ASI_FL8P,16 1.599 + stda %f0,[%o1]0xd0 ! ASI_FL8_P 1.600 + .end 1.601 +! 1.602 +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) 1.603 +! 1.604 + .inline vis_stdfa_ASI_FL8P_index,24 1.605 + stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P 1.606 + .end 1.607 +! 1.608 +! vis_stdfa_ASI_FL8S(double frd, void *rs1) 1.609 +! 1.610 + .inline vis_stdfa_ASI_FL8S,16 1.611 + stda %f0,[%o1]0xd1 ! ASI_FL8_S 1.612 + .end 1.613 +! 1.614 +! vis_stdfa_ASI_FL16P(double frd, void *rs1) 1.615 +! 1.616 + .inline vis_stdfa_ASI_FL16P,16 1.617 + stda %f0,[%o1]0xd2 ! ASI_FL16_P 1.618 + .end 1.619 +! 1.620 +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) 1.621 +! 1.622 + .inline vis_stdfa_ASI_FL16P_index,24 1.623 + stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P 1.624 + .end 1.625 +! 1.626 +! vis_stdfa_ASI_FL16S(double frd, void *rs1) 1.627 +! 1.628 + .inline vis_stdfa_ASI_FL16S,16 1.629 + stda %f0,[%o1]0xd3 ! ASI_FL16_S 1.630 + .end 1.631 +! 1.632 +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) 1.633 +! 1.634 + .inline vis_stdfa_ASI_FL8PL,16 1.635 + stda %f0,[%o1]0xd8 ! ASI_FL8_PL 1.636 + .end 1.637 +! 1.638 +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) 1.639 +! 1.640 + .inline vis_stdfa_ASI_FL8SL,16 1.641 + stda %f0,[%o1]0xd9 ! ASI_FL8_SL 1.642 + .end 1.643 +! 1.644 +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) 1.645 +! 1.646 + .inline vis_stdfa_ASI_FL16PL,16 1.647 + stda %f0,[%o1]0xda ! ASI_FL16_PL 1.648 + .end 1.649 +! 1.650 +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) 1.651 +! 1.652 + .inline vis_stdfa_ASI_FL16SL,16 1.653 + stda %f0,[%o1]0xdb ! ASI_FL16_SL 1.654 + .end 1.655 + 1.656 +!-------------------------------------------------------------------- 1.657 +! Short load instructions 1.658 +! 1.659 +! double vis_lddfa_ASI_FL8P(void *rs1) 1.660 +! 1.661 + .inline vis_lddfa_ASI_FL8P,8 1.662 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P 1.663 + fmovd %f4,%f0 ! Compiler can clean this up 1.664 + .end 1.665 +! 1.666 +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) 1.667 +! 1.668 + .inline vis_lddfa_ASI_FL8P_index,16 1.669 + ldda [%o0+%o1]0xd0,%f4 1.670 + fmovd %f4,%f0 1.671 + .end 1.672 +! 1.673 +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) 1.674 +! 1.675 + .inline vis_lddfa_ASI_FL8P_hi,12 1.676 + sra %o1,16,%o1 1.677 + ldda [%o0+%o1]0xd0,%f4 1.678 + fmovd %f4,%f0 1.679 + .end 1.680 +! 1.681 +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) 1.682 +! 1.683 + .inline vis_lddfa_ASI_FL8P_lo,12 1.684 + sll %o1,16,%o1 1.685 + sra %o1,16,%o1 1.686 + ldda [%o0+%o1]0xd0,%f4 1.687 + fmovd %f4,%f0 1.688 + .end 1.689 +! 1.690 +! double vis_lddfa_ASI_FL8S(void *rs1) 1.691 +! 1.692 + .inline vis_lddfa_ASI_FL8S,8 1.693 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S 1.694 + fmovd %f4,%f0 1.695 + .end 1.696 +! 1.697 +! double vis_lddfa_ASI_FL16P(void *rs1) 1.698 +! 1.699 + .inline vis_lddfa_ASI_FL16P,8 1.700 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P 1.701 + fmovd %f4,%f0 1.702 + .end 1.703 +! 1.704 +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) 1.705 +! 1.706 + .inline vis_lddfa_ASI_FL16P_index,16 1.707 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P 1.708 + fmovd %f4,%f0 1.709 + .end 1.710 +! 1.711 +! double vis_lddfa_ASI_FL16S(void *rs1) 1.712 +! 1.713 + .inline vis_lddfa_ASI_FL16S,8 1.714 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S 1.715 + fmovd %f4,%f0 1.716 + .end 1.717 +! 1.718 +! double vis_lddfa_ASI_FL8PL(void *rs1) 1.719 +! 1.720 + .inline vis_lddfa_ASI_FL8PL,8 1.721 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL 1.722 + fmovd %f4,%f0 1.723 + .end 1.724 +! 1.725 +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) 1.726 +! 1.727 + .inline vis_lddfa_ASI_FL8PL_index,16 1.728 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL 1.729 + fmovd %f4,%f0 1.730 + .end 1.731 +! 1.732 +! double vis_lddfa_ASI_FL8SL(void *rs1) 1.733 +! 1.734 + .inline vis_lddfa_ASI_FL8SL,8 1.735 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL 1.736 + fmovd %f4,%f0 1.737 + .end 1.738 +! 1.739 +! double vis_lddfa_ASI_FL16PL(void *rs1) 1.740 +! 1.741 + .inline vis_lddfa_ASI_FL16PL,8 1.742 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL 1.743 + fmovd %f4,%f0 1.744 + .end 1.745 +! 1.746 +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) 1.747 +! 1.748 + .inline vis_lddfa_ASI_FL16PL_index,16 1.749 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL 1.750 + fmovd %f4,%f0 1.751 + .end 1.752 +! 1.753 +! double vis_lddfa_ASI_FL16SL(void *rs1) 1.754 +! 1.755 + .inline vis_lddfa_ASI_FL16SL,8 1.756 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL 1.757 + fmovd %f4,%f0 1.758 + .end 1.759 + 1.760 +!-------------------------------------------------------------------- 1.761 +! Graphics status register 1.762 +! 1.763 +! unsigned int vis_read_gsr(void) 1.764 +! 1.765 + .inline vis_read_gsr,0 1.766 + rd %gsr,%o0 1.767 + .end 1.768 +! 1.769 +! void vis_write_gsr(unsigned int /* GSR */) 1.770 +! 1.771 + .inline vis_write_gsr,4 1.772 + wr %g0,%o0,%gsr 1.773 + .end 1.774 + 1.775 +!-------------------------------------------------------------------- 1.776 +! Voxel texture mapping 1.777 +! 1.778 +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) 1.779 +! 1.780 + .inline vis_array8,12 1.781 + array8 %o0,%o1,%o0 1.782 + .end 1.783 +! 1.784 +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) 1.785 +! 1.786 + .inline vis_array16,12 1.787 + array16 %o0,%o1,%o0 1.788 + .end 1.789 +! 1.790 +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) 1.791 +! 1.792 + .inline vis_array32,12 1.793 + array32 %o0,%o1,%o0 1.794 + .end 1.795 + 1.796 +!-------------------------------------------------------------------- 1.797 +! Register aliasing and type casts 1.798 +! 1.799 +! float vis_read_hi(double /* frs1 */); 1.800 +! 1.801 + .inline vis_read_hi,8 1.802 + fmovs %f0,%f0 1.803 + .end 1.804 +! 1.805 +! float vis_read_lo(double /* frs1 */); 1.806 +! 1.807 + .inline vis_read_lo,8 1.808 + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; 1.809 + .end 1.810 +! 1.811 +! double vis_write_hi(double /* frs1 */, float /* frs2 */); 1.812 +! 1.813 + .inline vis_write_hi,12 1.814 + fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1; 1.815 + .end 1.816 +! 1.817 +! double vis_write_lo(double /* frs1 */, float /* frs2 */); 1.818 +! 1.819 + .inline vis_write_lo,12 1.820 + fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1; 1.821 + .end 1.822 +! 1.823 +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); 1.824 +! 1.825 + .inline vis_freg_pair,8 1.826 + fmovs %f1,%f0 ! %f1 = float frs1; put in hi; 1.827 + fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1; 1.828 + .end 1.829 +! 1.830 +! float vis_to_float(unsigned int /*value*/); 1.831 +! 1.832 + .inline vis_to_float,4 1.833 + st %o0,[%sp+2183] 1.834 + ld [%sp+2183],%f0 1.835 + .end 1.836 +! 1.837 +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); 1.838 +! 1.839 + .inline vis_to_double,8 1.840 + st %o0,[%sp+2183] 1.841 + ld [%sp+2183],%f0 1.842 + st %o1,[%sp+2183] 1.843 + ld [%sp+2183],%f1 1.844 + .end 1.845 +! 1.846 +! double vis_to_double_dup(unsigned int /*value*/); 1.847 +! 1.848 + .inline vis_to_double_dup,4 1.849 + st %o0,[%sp+2183] 1.850 + ld [%sp+2183],%f1 1.851 + fmovs %f1,%f0 ! duplicate value 1.852 + .end 1.853 +! 1.854 +! double vis_ll_to_double(unsigned long long /*value*/); 1.855 +! 1.856 + .inline vis_ll_to_double,8 1.857 + stx %o0,[%sp+2183] 1.858 + ldd [%sp+2183],%f0 1.859 + .end 1.860 + 1.861 +!-------------------------------------------------------------------- 1.862 +! Address space identifier (ASI) register 1.863 +! 1.864 +! unsigned int vis_read_asi(void) 1.865 +! 1.866 + .inline vis_read_asi,0 1.867 + rd %asi,%o0 1.868 + .end 1.869 +! 1.870 +! void vis_write_asi(unsigned int /* ASI */) 1.871 +! 1.872 + .inline vis_write_asi,4 1.873 + wr %g0,%o0,%asi 1.874 + .end 1.875 + 1.876 +!-------------------------------------------------------------------- 1.877 +! Load/store from/into alternate space 1.878 +! 1.879 +! float vis_ldfa_ASI_REG(void *rs1) 1.880 +! 1.881 + .inline vis_ldfa_ASI_REG,8 1.882 + lda [%o0+0]%asi,%f4 1.883 + fmovs %f4,%f0 ! Compiler can clean this up 1.884 + .end 1.885 +! 1.886 +! float vis_ldfa_ASI_P(void *rs1) 1.887 +! 1.888 + .inline vis_ldfa_ASI_P,8 1.889 + lda [%o0]0x80,%f4 ! ASI_P 1.890 + fmovs %f4,%f0 ! Compiler can clean this up 1.891 + .end 1.892 +! 1.893 +! float vis_ldfa_ASI_PL(void *rs1) 1.894 +! 1.895 + .inline vis_ldfa_ASI_PL,8 1.896 + lda [%o0]0x88,%f4 ! ASI_PL 1.897 + fmovs %f4,%f0 ! Compiler can clean this up 1.898 + .end 1.899 +! 1.900 +! double vis_lddfa_ASI_REG(void *rs1) 1.901 +! 1.902 + .inline vis_lddfa_ASI_REG,8 1.903 + ldda [%o0+0]%asi,%f4 1.904 + fmovd %f4,%f0 ! Compiler can clean this up 1.905 + .end 1.906 +! 1.907 +! double vis_lddfa_ASI_P(void *rs1) 1.908 +! 1.909 + .inline vis_lddfa_ASI_P,8 1.910 + ldda [%o0]0x80,%f4 ! ASI_P 1.911 + fmovd %f4,%f0 ! Compiler can clean this up 1.912 + .end 1.913 +! 1.914 +! double vis_lddfa_ASI_PL(void *rs1) 1.915 +! 1.916 + .inline vis_lddfa_ASI_PL,8 1.917 + ldda [%o0]0x88,%f4 ! ASI_PL 1.918 + fmovd %f4,%f0 ! Compiler can clean this up 1.919 + .end 1.920 +! 1.921 +! vis_stfa_ASI_REG(float frs, void *rs1) 1.922 +! 1.923 + .inline vis_stfa_ASI_REG,12 1.924 + sta %f1,[%o1+0]%asi 1.925 + .end 1.926 +! 1.927 +! vis_stfa_ASI_P(float frs, void *rs1) 1.928 +! 1.929 + .inline vis_stfa_ASI_P,12 1.930 + sta %f1,[%o1]0x80 ! ASI_P 1.931 + .end 1.932 +! 1.933 +! vis_stfa_ASI_PL(float frs, void *rs1) 1.934 +! 1.935 + .inline vis_stfa_ASI_PL,12 1.936 + sta %f1,[%o1]0x88 ! ASI_PL 1.937 + .end 1.938 +! 1.939 +! vis_stdfa_ASI_REG(double frd, void *rs1) 1.940 +! 1.941 + .inline vis_stdfa_ASI_REG,16 1.942 + stda %f0,[%o1+0]%asi 1.943 + .end 1.944 +! 1.945 +! vis_stdfa_ASI_P(double frd, void *rs1) 1.946 +! 1.947 + .inline vis_stdfa_ASI_P,16 1.948 + stda %f0,[%o1]0x80 ! ASI_P 1.949 + .end 1.950 +! 1.951 +! vis_stdfa_ASI_PL(double frd, void *rs1) 1.952 +! 1.953 + .inline vis_stdfa_ASI_PL,16 1.954 + stda %f0,[%o1]0x88 ! ASI_PL 1.955 + .end 1.956 +! 1.957 +! unsigned short vis_lduha_ASI_REG(void *rs1) 1.958 +! 1.959 + .inline vis_lduha_ASI_REG,8 1.960 + lduha [%o0+0]%asi,%o0 1.961 + .end 1.962 +! 1.963 +! unsigned short vis_lduha_ASI_P(void *rs1) 1.964 +! 1.965 + .inline vis_lduha_ASI_P,8 1.966 + lduha [%o0]0x80,%o0 ! ASI_P 1.967 + .end 1.968 +! 1.969 +! unsigned short vis_lduha_ASI_PL(void *rs1) 1.970 +! 1.971 + .inline vis_lduha_ASI_PL,8 1.972 + lduha [%o0]0x88,%o0 ! ASI_PL 1.973 + .end 1.974 +! 1.975 +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) 1.976 +! 1.977 + .inline vis_lduha_ASI_P_index,16 1.978 + lduha [%o0+%o1]0x80,%o0 ! ASI_P 1.979 + .end 1.980 +! 1.981 +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) 1.982 +! 1.983 + .inline vis_lduha_ASI_PL_index,16 1.984 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL 1.985 + .end 1.986 + 1.987 +!-------------------------------------------------------------------- 1.988 +! Prefetch 1.989 +! 1.990 +! void vis_prefetch_read(void * /*address*/); 1.991 +! 1.992 + .inline vis_prefetch_read,8 1.993 + prefetch [%o0+0],0 1.994 + .end 1.995 +! 1.996 +! void vis_prefetch_write(void * /*address*/); 1.997 +! 1.998 + .inline vis_prefetch_write,8 1.999 + prefetch [%o0+0],2 1.1000 + .end