security/nss/lib/freebl/mpi/vis_32.il

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:9d6d749bf7f7
1 !
2 ! This Source Code Form is subject to the terms of the Mozilla Public
3 ! License, v. 2.0. If a copy of the MPL was not distributed with this
4 ! file, You can obtain one at http://mozilla.org/MPL/2.0/.
5
6 ! The interface to the VIS instructions as declared below (and in the VIS
7 ! User's Manual) will not change, but the macro implementation might change
8 ! in the future.
9
10 !--------------------------------------------------------------------
11 ! Pure edge handling instructions
12 !
13 ! int vis_edge8(void */*frs1*/, void */*frs2*/);
14 !
15 .inline vis_edge8,8
16 edge8 %o0,%o1,%o0
17 .end
18 !
19 ! int vis_edge8l(void */*frs1*/, void */*frs2*/);
20 !
21 .inline vis_edge8l,8
22 edge8l %o0,%o1,%o0
23 .end
24 !
25 ! int vis_edge16(void */*frs1*/, void */*frs2*/);
26 !
27 .inline vis_edge16,8
28 edge16 %o0,%o1,%o0
29 .end
30 !
31 ! int vis_edge16l(void */*frs1*/, void */*frs2*/);
32 !
33 .inline vis_edge16l,8
34 edge16l %o0,%o1,%o0
35 .end
36 !
37 ! int vis_edge32(void */*frs1*/, void */*frs2*/);
38 !
39 .inline vis_edge32,8
40 edge32 %o0,%o1,%o0
41 .end
42 !
43 ! int vis_edge32l(void */*frs1*/, void */*frs2*/);
44 !
45 .inline vis_edge32l,8
46 edge32l %o0,%o1,%o0
47 .end
48
49 !--------------------------------------------------------------------
50 ! Edge handling instructions with negative return values if cc set
51 !
52 ! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
53 !
54 .inline vis_edge8cc,8
55 edge8 %o0,%o1,%o0
56 mov 0,%o1
57 movgu %icc,-1024,%o1
58 or %o1,%o0,%o0
59 .end
60 !
61 ! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
62 !
63 .inline vis_edge8lcc,8
64 edge8l %o0,%o1,%o0
65 mov 0,%o1
66 movgu %icc,-1024,%o1
67 or %o1,%o0,%o0
68 .end
69 !
70 ! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
71 !
72 .inline vis_edge16cc,8
73 edge16 %o0,%o1,%o0
74 mov 0,%o1
75 movgu %icc,-1024,%o1
76 or %o1,%o0,%o0
77 .end
78 !
79 ! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
80 !
81 .inline vis_edge16lcc,8
82 edge16l %o0,%o1,%o0
83 mov 0,%o1
84 movgu %icc,-1024,%o1
85 or %o1,%o0,%o0
86 .end
87 !
88 ! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
89 !
90 .inline vis_edge32cc,8
91 edge32 %o0,%o1,%o0
92 mov 0,%o1
93 movgu %icc,-1024,%o1
94 or %o1,%o0,%o0
95 .end
96 !
97 ! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
98 !
99 .inline vis_edge32lcc,8
100 edge32l %o0,%o1,%o0
101 mov 0,%o1
102 movgu %icc,-1024,%o1
103 or %o1,%o0,%o0
104 .end
105
106 !--------------------------------------------------------------------
107 ! Alignment instructions
108 !
109 ! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
110 !
111 .inline vis_alignaddr,8
112 alignaddr %o0,%o1,%o0
113 .end
114 !
115 ! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
116 !
117 .inline vis_alignaddrl,8
118 alignaddrl %o0,%o1,%o0
119 .end
120 !
121 ! double vis_faligndata(double /*frs1*/, double /*frs2*/);
122 !
123 .inline vis_faligndata,16
124 std %o0,[%sp+0x48]
125 ldd [%sp+0x48],%f4
126 std %o2,[%sp+0x48]
127 ldd [%sp+0x48],%f10
128 faligndata %f4,%f10,%f0
129 .end
130
131 !--------------------------------------------------------------------
132 ! Partitioned comparison instructions
133 !
134 ! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
135 !
136 .inline vis_fcmple16,16
137 std %o0,[%sp+0x48]
138 ldd [%sp+0x48],%f4
139 std %o2,[%sp+0x48]
140 ldd [%sp+0x48],%f10
141 fcmple16 %f4,%f10,%o0
142 .end
143 !
144 ! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
145 !
146 .inline vis_fcmpne16,16
147 std %o0,[%sp+0x48]
148 ldd [%sp+0x48],%f4
149 std %o2,[%sp+0x48]
150 ldd [%sp+0x48],%f10
151 fcmpne16 %f4,%f10,%o0
152 .end
153 !
154 ! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
155 !
156 .inline vis_fcmple32,16
157 std %o0,[%sp+0x48]
158 ldd [%sp+0x48],%f4
159 std %o2,[%sp+0x48]
160 ldd [%sp+0x48],%f10
161 fcmple32 %f4,%f10,%o0
162 .end
163 !
164 ! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
165 !
166 .inline vis_fcmpne32,16
167 std %o0,[%sp+0x48]
168 ldd [%sp+0x48],%f4
169 std %o2,[%sp+0x48]
170 ldd [%sp+0x48],%f10
171 fcmpne32 %f4,%f10,%o0
172 .end
173 !
174 ! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
175 !
176 .inline vis_fcmpgt16,16
177 std %o0,[%sp+0x48]
178 ldd [%sp+0x48],%f4
179 std %o2,[%sp+0x48]
180 ldd [%sp+0x48],%f10
181 fcmpgt16 %f4,%f10,%o0
182 .end
183 !
184 ! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
185 !
186 .inline vis_fcmpeq16,16
187 std %o0,[%sp+0x48]
188 ldd [%sp+0x48],%f4
189 std %o2,[%sp+0x48]
190 ldd [%sp+0x48],%f10
191 fcmpeq16 %f4,%f10,%o0
192 .end
193 !
194 ! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
195 !
196 .inline vis_fcmpgt32,16
197 std %o0,[%sp+0x48]
198 ldd [%sp+0x48],%f4
199 std %o2,[%sp+0x48]
200 ldd [%sp+0x48],%f10
201 fcmpgt32 %f4,%f10,%o0
202 .end
203 !
204 ! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
205 !
206 .inline vis_fcmpeq32,16
207 std %o0,[%sp+0x48]
208 ldd [%sp+0x48],%f4
209 std %o2,[%sp+0x48]
210 ldd [%sp+0x48],%f10
211 fcmpeq32 %f4,%f10,%o0
212 .end
213
214 !--------------------------------------------------------------------
215 ! Partitioned arithmetic
216 !
217 ! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
218 !
219 .inline vis_fmul8x16,12
220 st %o0,[%sp+0x44]
221 ld [%sp+0x44],%f4
222 st %o1,[%sp+0x48]
223 st %o2,[%sp+0x4c]
224 ldd [%sp+0x48],%f10
225 fmul8x16 %f4,%f10,%f0
226 .end
227 !
228 ! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
229 !
230 .inline vis_fmul8x16_dummy,16
231 st %o0,[%sp+0x44]
232 ld [%sp+0x44],%f4
233 std %o2,[%sp+0x48]
234 ldd [%sp+0x48],%f10
235 fmul8x16 %f4,%f10,%f0
236 .end
237 !
238 ! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
239 !
240 .inline vis_fmul8x16au,8
241 st %o0,[%sp+0x48]
242 ld [%sp+0x48],%f4
243 st %o1,[%sp+0x48]
244 ld [%sp+0x48],%f10
245 fmul8x16au %f4,%f10,%f0
246 .end
247 !
248 ! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
249 !
250 .inline vis_fmul8x16al,8
251 st %o0,[%sp+0x44]
252 ld [%sp+0x44],%f4
253 st %o1,[%sp+0x48]
254 ld [%sp+0x48],%f10
255 fmul8x16al %f4,%f10,%f0
256 .end
257 !
258 ! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
259 !
260 .inline vis_fmul8sux16,16
261 std %o0,[%sp+0x48]
262 ldd [%sp+0x48],%f4
263 std %o2,[%sp+0x48]
264 ldd [%sp+0x48],%f10
265 fmul8sux16 %f4,%f10,%f0
266 .end
267 !
268 ! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
269 !
270 .inline vis_fmul8ulx16,16
271 std %o0,[%sp+0x48]
272 ldd [%sp+0x48],%f4
273 std %o2,[%sp+0x48]
274 ldd [%sp+0x48],%f10
275 fmul8ulx16 %f4,%f10,%f0
276 .end
277 !
278 ! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
279 !
280 .inline vis_fmuld8sux16,8
281 st %o0,[%sp+0x48]
282 ld [%sp+0x48],%f4
283 st %o1,[%sp+0x48]
284 ld [%sp+0x48],%f10
285 fmuld8sux16 %f4,%f10,%f0
286 .end
287 !
288 ! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
289 !
290 .inline vis_fmuld8ulx16,8
291 st %o0,[%sp+0x48]
292 ld [%sp+0x48],%f4
293 st %o1,[%sp+0x48]
294 ld [%sp+0x48],%f10
295 fmuld8ulx16 %f4,%f10,%f0
296 .end
297 !
298 ! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
299 !
300 .inline vis_fpadd16,16
301 std %o0,[%sp+0x40]
302 ldd [%sp+0x40],%f4
303 std %o2,[%sp+0x48]
304 ldd [%sp+0x48],%f10
305 fpadd16 %f4,%f10,%f0
306 .end
307 !
308 ! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
309 !
310 .inline vis_fpadd16s,8
311 st %o0,[%sp+0x48]
312 ld [%sp+0x48],%f4
313 st %o1,[%sp+0x48]
314 ld [%sp+0x48],%f10
315 fpadd16s %f4,%f10,%f0
316 .end
317 !
318 ! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
319 !
320 .inline vis_fpadd32,16
321 std %o0,[%sp+0x48]
322 ldd [%sp+0x48],%f4
323 std %o2,[%sp+0x48]
324 ldd [%sp+0x48],%f10
325 fpadd32 %f4,%f10,%f0
326 .end
327 !
328 ! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
329 !
330 .inline vis_fpadd32s,8
331 st %o0,[%sp+0x48]
332 ld [%sp+0x48],%f4
333 st %o1,[%sp+0x48]
334 ld [%sp+0x48],%f10
335 fpadd32s %f4,%f10,%f0
336 .end
337 !
338 ! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
339 !
340 .inline vis_fpsub16,16
341 std %o0,[%sp+0x48]
342 ldd [%sp+0x48],%f4
343 std %o2,[%sp+0x48]
344 ldd [%sp+0x48],%f10
345 fpsub16 %f4,%f10,%f0
346 .end
347 !
348 ! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
349 !
350 .inline vis_fpsub16s,8
351 st %o0,[%sp+0x48]
352 ld [%sp+0x48],%f4
353 st %o1,[%sp+0x48]
354 ld [%sp+0x48],%f10
355 fpsub16s %f4,%f10,%f0
356 .end
357 !
358 ! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
359 !
360 .inline vis_fpsub32,16
361 std %o0,[%sp+0x48]
362 ldd [%sp+0x48],%f4
363 std %o2,[%sp+0x48]
364 ldd [%sp+0x48],%f10
365 fpsub32 %f4,%f10,%f0
366 .end
367 !
368 ! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
369 !
370 .inline vis_fpsub32s,8
371 st %o0,[%sp+0x48]
372 ld [%sp+0x48],%f4
373 st %o1,[%sp+0x48]
374 ld [%sp+0x48],%f10
375 fpsub32s %f4,%f10,%f0
376 .end
377
378 !--------------------------------------------------------------------
379 ! Pixel packing
380 !
381 ! float vis_fpack16(double /*frs2*/);
382 !
383 .inline vis_fpack16,8
384 std %o0,[%sp+0x48]
385 ldd [%sp+0x48],%f4
386 fpack16 %f4,%f0
387 .end
388
389 !
390 ! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
391 !
392 .inline vis_fpack16_pair,16
393 std %o0,[%sp+0x48]
394 ldd [%sp+0x48],%f4
395 std %o2,[%sp+0x48]
396 ldd [%sp+0x48],%f10
397 fpack16 %f4,%f0
398 fpack16 %f10,%f1
399 .end
400 !
401 ! void vis_st2_fpack16(double, double, double *)
402 !
403 .inline vis_st2_fpack16,20
404 std %o0,[%sp+0x48]
405 ldd [%sp+0x48],%f4
406 std %o2,[%sp+0x48]
407 ldd [%sp+0x48],%f10
408 fpack16 %f4,%f0
409 fpack16 %f10,%f1
410 st %f0,[%o4+0]
411 st %f1,[%o4+4]
412 .end
413 !
414 ! void vis_std_fpack16(double, double, double *)
415 !
416 .inline vis_std_fpack16,20
417 std %o0,[%sp+0x48]
418 ldd [%sp+0x48],%f4
419 std %o2,[%sp+0x48]
420 ldd [%sp+0x48],%f10
421 fpack16 %f4,%f0
422 fpack16 %f10,%f1
423 std %f0,[%o4]
424 .end
425 !
426 ! void vis_st2_fpackfix(double, double, double *)
427 !
428 .inline vis_st2_fpackfix,20
429 std %o0,[%sp+0x48]
430 ldd [%sp+0x48],%f4
431 std %o2,[%sp+0x48]
432 ldd [%sp+0x48],%f10
433 fpackfix %f4,%f0
434 fpackfix %f10,%f1
435 st %f0,[%o4+0]
436 st %f1,[%o4+4]
437 .end
438 !
439 ! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
440 !
441 .inline vis_fpack16_to_hi,16
442 std %o0,[%sp+0x48]
443 ldd [%sp+0x48],%f0
444 std %o2,[%sp+0x48]
445 ldd [%sp+0x48],%f4
446 fpack16 %f4,%f0
447 .end
448
449 ! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
450 !
451 .inline vis_fpack16_to_lo,16
452 std %o0,[%sp+0x48]
453 ldd [%sp+0x48],%f0
454 std %o2,[%sp+0x48]
455 ldd [%sp+0x48],%f4
456 fpack16 %f4,%f3
457 fmovs %f3,%f1 /* without this, optimizer goes wrong */
458 .end
459
460 !
461 ! double vis_fpack32(double /*frs1*/, double /*frs2*/);
462 !
463 .inline vis_fpack32,16
464 std %o0,[%sp+0x48]
465 ldd [%sp+0x48],%f4
466 std %o2,[%sp+0x48]
467 ldd [%sp+0x48],%f10
468 fpack32 %f4,%f10,%f0
469 .end
470 !
471 ! float vis_fpackfix(double /*frs2*/);
472 !
473 .inline vis_fpackfix,8
474 std %o0,[%sp+0x48]
475 ldd [%sp+0x48],%f4
476 fpackfix %f4,%f0
477 .end
478 !
479 ! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
480 !
481 .inline vis_fpackfix_pair,16
482 std %o0,[%sp+0x48]
483 ldd [%sp+0x48],%f4
484 std %o2,[%sp+0x48]
485 ldd [%sp+0x48],%f6
486 fpackfix %f4,%f0
487 fpackfix %f6,%f1
488 .end
489
490 !--------------------------------------------------------------------
491 ! Motion estimation
492 !
493 ! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
494 !
495 .inline vis_pdist,24
496 std %o4,[%sp+0x48]
497 ldd [%sp+0x48],%f0
498 std %o0,[%sp+0x48]
499 ldd [%sp+0x48],%f4
500 std %o2,[%sp+0x48]
501 ldd [%sp+0x48],%f10
502 pdist %f4,%f10,%f0
503 .end
504
505 !--------------------------------------------------------------------
506 ! Channel merging
507 !
508 ! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
509 !
510 .inline vis_fpmerge,8
511 st %o0,[%sp+0x48]
512 ld [%sp+0x48],%f4
513 st %o1,[%sp+0x48]
514 ld [%sp+0x48],%f10
515 fpmerge %f4,%f10,%f0
516 .end
517
518 !--------------------------------------------------------------------
519 ! Pixel expansion
520 !
521 ! double vis_fexpand(float /*frs2*/);
522 !
523 .inline vis_fexpand,4
524 st %o0,[%sp+0x48]
525 ld [%sp+0x48],%f4
526 fexpand %f4,%f0
527 .end
528
529 ! double vis_fexpand_hi(double /*frs2*/);
530 !
531 .inline vis_fexpand_hi,8
532 std %o0,[%sp+0x48]
533 ldd [%sp+0x48],%f4
534 fexpand %f4,%f0
535 .end
536
537 ! double vis_fexpand_lo(double /*frs2*/);
538 !
539 .inline vis_fexpand_lo,8
540 std %o0,[%sp+0x48]
541 ldd [%sp+0x48],%f4
542 fmovs %f5, %f2
543 fexpand %f2,%f0
544 .end
545
546 !--------------------------------------------------------------------
547 ! Bitwise logical operations
548 !
549 ! double vis_fnor(double /*frs1*/, double /*frs2*/);
550 !
551 .inline vis_fnor,16
552 std %o0,[%sp+0x48]
553 ldd [%sp+0x48],%f4
554 std %o2,[%sp+0x48]
555 ldd [%sp+0x48],%f10
556 fnor %f4,%f10,%f0
557 .end
558 !
559 ! float vis_fnors(float /*frs1*/, float /*frs2*/);
560 !
561 .inline vis_fnors,8
562 st %o0,[%sp+0x48]
563 ld [%sp+0x48],%f4
564 st %o1,[%sp+0x48]
565 ld [%sp+0x48],%f10
566 fnors %f4,%f10,%f0
567 .end
568 !
569 ! double vis_fandnot(double /*frs1*/, double /*frs2*/);
570 !
571 .inline vis_fandnot,16
572 std %o0,[%sp+0x48]
573 ldd [%sp+0x48],%f4
574 std %o2,[%sp+0x48]
575 ldd [%sp+0x48],%f10
576 fandnot1 %f4,%f10,%f0
577 .end
578 !
579 ! float vis_fandnots(float /*frs1*/, float /*frs2*/);
580 !
581 .inline vis_fandnots,8
582 st %o0,[%sp+0x48]
583 ld [%sp+0x48],%f4
584 st %o1,[%sp+0x48]
585 ld [%sp+0x48],%f10
586 fandnot1s %f4,%f10,%f0
587 .end
588 !
589 ! double vis_fnot(double /*frs1*/);
590 !
591 .inline vis_fnot,8
592 std %o0,[%sp+0x48]
593 ldd [%sp+0x48],%f4
594 fnot1 %f4,%f0
595 .end
596 !
597 ! float vis_fnots(float /*frs1*/);
598 !
599 .inline vis_fnots,4
600 st %o0,[%sp+0x48]
601 ld [%sp+0x48],%f4
602 fnot1s %f4,%f0
603 .end
604 !
605 ! double vis_fxor(double /*frs1*/, double /*frs2*/);
606 !
607 .inline vis_fxor,16
608 std %o0,[%sp+0x48]
609 ldd [%sp+0x48],%f4
610 std %o2,[%sp+0x48]
611 ldd [%sp+0x48],%f10
612 fxor %f4,%f10,%f0
613 .end
614 !
615 ! float vis_fxors(float /*frs1*/, float /*frs2*/);
616 !
617 .inline vis_fxors,8
618 st %o0,[%sp+0x48]
619 ld [%sp+0x48],%f4
620 st %o1,[%sp+0x48]
621 ld [%sp+0x48],%f10
622 fxors %f4,%f10,%f0
623 .end
624 !
625 ! double vis_fnand(double /*frs1*/, double /*frs2*/);
626 !
627 .inline vis_fnand,16
628 std %o0,[%sp+0x48]
629 ldd [%sp+0x48],%f4
630 std %o2,[%sp+0x48]
631 ldd [%sp+0x48],%f10
632 fnand %f4,%f10,%f0
633 .end
634 !
635 ! float vis_fnands(float /*frs1*/, float /*frs2*/);
636 !
637 .inline vis_fnands,8
638 st %o0,[%sp+0x48]
639 ld [%sp+0x48],%f4
640 st %o1,[%sp+0x48]
641 ld [%sp+0x48],%f10
642 fnands %f4,%f10,%f0
643 .end
644 !
645 ! double vis_fand(double /*frs1*/, double /*frs2*/);
646 !
647 .inline vis_fand,16
648 std %o0,[%sp+0x48]
649 ldd [%sp+0x48],%f4
650 std %o2,[%sp+0x48]
651 ldd [%sp+0x48],%f10
652 fand %f4,%f10,%f0
653 .end
654 !
655 ! float vis_fands(float /*frs1*/, float /*frs2*/);
656 !
657 .inline vis_fands,8
658 st %o0,[%sp+0x48]
659 ld [%sp+0x48],%f4
660 st %o1,[%sp+0x48]
661 ld [%sp+0x48],%f10
662 fands %f4,%f10,%f0
663 .end
664 !
665 ! double vis_fxnor(double /*frs1*/, double /*frs2*/);
666 !
667 .inline vis_fxnor,16
668 std %o0,[%sp+0x48]
669 ldd [%sp+0x48],%f4
670 std %o2,[%sp+0x48]
671 ldd [%sp+0x48],%f10
672 fxnor %f4,%f10,%f0
673 .end
674 !
675 ! float vis_fxnors(float /*frs1*/, float /*frs2*/);
676 !
677 .inline vis_fxnors,8
678 st %o0,[%sp+0x48]
679 ld [%sp+0x48],%f4
680 st %o1,[%sp+0x48]
681 ld [%sp+0x48],%f10
682 fxnors %f4,%f10,%f0
683 .end
684 !
685 ! double vis_fsrc(double /*frs1*/);
686 !
687 .inline vis_fsrc,8
688 std %o0,[%sp+0x48]
689 ldd [%sp+0x48],%f4
690 fsrc1 %f4,%f0
691 .end
692 !
693 ! float vis_fsrcs(float /*frs1*/);
694 !
695 .inline vis_fsrcs,4
696 st %o0,[%sp+0x48]
697 ld [%sp+0x48],%f4
698 fsrc1s %f4,%f0
699 .end
700 !
701 ! double vis_fornot(double /*frs1*/, double /*frs2*/);
702 !
703 .inline vis_fornot,16
704 std %o0,[%sp+0x48]
705 ldd [%sp+0x48],%f4
706 std %o2,[%sp+0x48]
707 ldd [%sp+0x48],%f10
708 fornot1 %f4,%f10,%f0
709 .end
710 !
711 ! float vis_fornots(float /*frs1*/, float /*frs2*/);
712 !
713 .inline vis_fornots,8
714 st %o0,[%sp+0x48]
715 ld [%sp+0x48],%f4
716 st %o1,[%sp+0x48]
717 ld [%sp+0x48],%f10
718 fornot1s %f4,%f10,%f0
719 .end
720 !
721 ! double vis_for(double /*frs1*/, double /*frs2*/);
722 !
723 .inline vis_for,16
724 std %o0,[%sp+0x48]
725 ldd [%sp+0x48],%f4
726 std %o2,[%sp+0x48]
727 ldd [%sp+0x48],%f10
728 for %f4,%f10,%f0
729 .end
730 !
731 ! float vis_fors(float /*frs1*/, float /*frs2*/);
732 !
733 .inline vis_fors,8
734 st %o0,[%sp+0x48]
735 ld [%sp+0x48],%f4
736 st %o1,[%sp+0x48]
737 ld [%sp+0x48],%f10
738 fors %f4,%f10,%f0
739 .end
740 !
741 ! double vis_fzero(/* void */)
742 !
743 .inline vis_fzero,0
744 fzero %f0
745 .end
746 !
747 ! float vis_fzeros(/* void */)
748 !
749 .inline vis_fzeros,0
750 fzeros %f0
751 .end
752 !
753 ! double vis_fone(/* void */)
754 !
755 .inline vis_fone,0
756 fone %f0
757 .end
758 !
759 ! float vis_fones(/* void */)
760 !
761 .inline vis_fones,0
762 fones %f0
763 .end
764
765 !--------------------------------------------------------------------
766 ! Partial store instructions
767 !
768 ! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
769 !
770 .inline vis_stdfa_ASI_PST8P,16
771 std %o0,[%sp+0x48]
772 ldd [%sp+0x48],%f4
773 stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
774 .end
775 !
776 ! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
777 !
778 .inline vis_stdfa_ASI_PST8PL,16
779 std %o0,[%sp+0x48]
780 ldd [%sp+0x48],%f4
781 stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL
782 .end
783 !
784 ! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
785 !
786 .inline vis_stdfa_ASI_PST8P_int_pair,16
787 ld [%o0],%f4
788 ld [%o1],%f5
789 stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
790 .end
791 !
792 ! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
793 !
794 .inline vis_stdfa_ASI_PST8S,16
795 std %o0,[%sp+0x48]
796 ldd [%sp+0x48],%f4
797 stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S
798 .end
799 !
800 ! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
801 !
802 .inline vis_stdfa_ASI_PST16P,16
803 std %o0,[%sp+0x48]
804 ldd [%sp+0x48],%f4
805 stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P
806 .end
807 !
808 ! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
809 !
810 .inline vis_stdfa_ASI_PST16S,16
811 std %o0,[%sp+0x48]
812 ldd [%sp+0x48],%f4
813 stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S
814 .end
815 !
816 ! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
817 !
818 .inline vis_stdfa_ASI_PST32P,16
819 std %o0,[%sp+0x48]
820 ldd [%sp+0x48],%f4
821 stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P
822 .end
823 !
824 ! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
825 !
826 .inline vis_stdfa_ASI_PST32S,16
827 std %o0,[%sp+0x48]
828 ldd [%sp+0x48],%f4
829 stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S
830 .end
831
832 !--------------------------------------------------------------------
833 ! Short store instructions
834 !
835 ! vis_stdfa_ASI_FL8P(double frd, void *rs1)
836 !
837 .inline vis_stdfa_ASI_FL8P,12
838 std %o0,[%sp+0x48]
839 ldd [%sp+0x48],%f4
840 stda %f4,[%o2]0xd0 ! ASI_FL8_P
841 .end
842 !
843 ! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
844 !
845 .inline vis_stdfa_ASI_FL8P_index,16
846 std %o0,[%sp+0x48]
847 ldd [%sp+0x48],%f4
848 stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P
849 .end
850 !
851 ! vis_stdfa_ASI_FL8S(double frd, void *rs1)
852 !
853 .inline vis_stdfa_ASI_FL8S,12
854 std %o0,[%sp+0x48]
855 ldd [%sp+0x48],%f4
856 stda %f4,[%o2]0xd1 ! ASI_FL8_S
857 .end
858 !
859 ! vis_stdfa_ASI_FL16P(double frd, void *rs1)
860 !
861 .inline vis_stdfa_ASI_FL16P,12
862 std %o0,[%sp+0x48]
863 ldd [%sp+0x48],%f4
864 stda %f4,[%o2]0xd2 ! ASI_FL16_P
865 .end
866 !
867 ! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
868 !
869 .inline vis_stdfa_ASI_FL16P_index,16
870 std %o0,[%sp+0x48]
871 ldd [%sp+0x48],%f4
872 stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P
873 .end
874 !
875 ! vis_stdfa_ASI_FL16S(double frd, void *rs1)
876 !
877 .inline vis_stdfa_ASI_FL16S,12
878 std %o0,[%sp+0x48]
879 ldd [%sp+0x48],%f4
880 stda %f4,[%o2]0xd3 ! ASI_FL16_S
881 .end
882 !
883 ! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
884 !
885 .inline vis_stdfa_ASI_FL8PL,12
886 std %o0,[%sp+0x48]
887 ldd [%sp+0x48],%f4
888 stda %f4,[%o2]0xd8 ! ASI_FL8_PL
889 .end
890 !
891 ! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
892 !
893 .inline vis_stdfa_ASI_FL8SL,12
894 std %o0,[%sp+0x48]
895 ldd [%sp+0x48],%f4
896 stda %f4,[%o2]0xd9 ! ASI_FL8_SL
897 .end
898 !
899 ! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
900 !
901 .inline vis_stdfa_ASI_FL16PL,12
902 std %o0,[%sp+0x48]
903 ldd [%sp+0x48],%f4
904 stda %f4,[%o2]0xda ! ASI_FL16_PL
905 .end
906 !
907 ! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
908 !
909 .inline vis_stdfa_ASI_FL16SL,12
910 std %o0,[%sp+0x48]
911 ldd [%sp+0x48],%f4
912 stda %f4,[%o2]0xdb ! ASI_FL16_SL
913 .end
914
915 !--------------------------------------------------------------------
916 ! Short load instructions
917 !
918 ! double vis_lddfa_ASI_FL8P(void *rs1)
919 !
920 .inline vis_lddfa_ASI_FL8P,4
921 ldda [%o0]0xd0,%f4 ! ASI_FL8_P
922 fmovd %f4,%f0 ! Compiler can clean this up
923 .end
924 !
925 ! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
926 !
927 .inline vis_lddfa_ASI_FL8P_index,8
928 ldda [%o0+%o1]0xd0,%f4
929 fmovd %f4,%f0
930 .end
931 !
932 ! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
933 !
934 .inline vis_lddfa_ASI_FL8P_hi,8
935 sra %o1,16,%o1
936 ldda [%o0+%o1]0xd0,%f4
937 fmovd %f4,%f0
938 .end
939 !
940 ! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
941 !
942 .inline vis_lddfa_ASI_FL8P_lo,8
943 sll %o1,16,%o1
944 sra %o1,16,%o1
945 ldda [%o0+%o1]0xd0,%f4
946 fmovd %f4,%f0
947 .end
948 !
949 ! double vis_lddfa_ASI_FL8S(void *rs1)
950 !
951 .inline vis_lddfa_ASI_FL8S,4
952 ldda [%o0]0xd1,%f4 ! ASI_FL8_S
953 fmovd %f4,%f0
954 .end
955 !
956 ! double vis_lddfa_ASI_FL16P(void *rs1)
957 !
958 .inline vis_lddfa_ASI_FL16P,4
959 ldda [%o0]0xd2,%f4 ! ASI_FL16_P
960 fmovd %f4,%f0
961 .end
962 !
963 ! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
964 !
965 .inline vis_lddfa_ASI_FL16P_index,8
966 ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
967 fmovd %f4,%f0
968 .end
969 !
970 ! double vis_lddfa_ASI_FL16S(void *rs1)
971 !
972 .inline vis_lddfa_ASI_FL16S,4
973 ldda [%o0]0xd3,%f4 ! ASI_FL16_S
974 fmovd %f4,%f0
975 .end
976 !
977 ! double vis_lddfa_ASI_FL8PL(void *rs1)
978 !
979 .inline vis_lddfa_ASI_FL8PL,4
980 ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
981 fmovd %f4,%f0
982 .end
983 !
984 ! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
985 !
986 .inline vis_lddfa_ASI_FL8PL_index,8
987 ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
988 fmovd %f4,%f0
989 .end
990 !
991 ! double vis_lddfa_ASI_FL8SL(void *rs1)
992 !
993 .inline vis_lddfa_ASI_FL8SL,4
994 ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
995 fmovd %f4,%f0
996 .end
997 !
998 ! double vis_lddfa_ASI_FL16PL(void *rs1)
999 !
1000 .inline vis_lddfa_ASI_FL16PL,4
1001 ldda [%o0]0xda,%f4 ! ASI_FL16_PL
1002 fmovd %f4,%f0
1003 .end
1004 !
1005 ! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
1006 !
1007 .inline vis_lddfa_ASI_FL16PL_index,8
1008 ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
1009 fmovd %f4,%f0
1010 .end
1011 !
1012 ! double vis_lddfa_ASI_FL16SL(void *rs1)
1013 !
1014 .inline vis_lddfa_ASI_FL16SL,4
1015 ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
1016 fmovd %f4,%f0
1017 .end
1018
1019 !--------------------------------------------------------------------
1020 ! Graphics status register
1021 !
1022 ! unsigned int vis_read_gsr(void)
1023 !
1024 .inline vis_read_gsr,0
1025 rd %gsr,%o0
1026 .end
1027 !
1028 ! void vis_write_gsr(unsigned int /* GSR */)
1029 !
1030 .inline vis_write_gsr,4
1031 wr %g0,%o0,%gsr
1032 .end
1033
1034 !--------------------------------------------------------------------
1035 ! Voxel texture mapping
1036 !
1037 ! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
1038 !
1039 .inline vis_array8,12
1040 sllx %o0,32,%o0
1041 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
1042 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
1043 array8 %o3,%o2,%o0
1044 .end
1045 !
1046 ! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
1047 !
1048 .inline vis_array16,12
1049 sllx %o0,32,%o0
1050 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
1051 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
1052 array16 %o3,%o2,%o0
1053 .end
1054 !
1055 ! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
1056 !
1057 .inline vis_array32,12
1058 sllx %o0,32,%o0
1059 srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
1060 or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
1061 array32 %o3,%o2,%o0
1062 .end
1063
1064 !--------------------------------------------------------------------
1065 ! Register aliasing and type casts
1066 !
1067 ! float vis_read_hi(double /* frs1 */);
1068 !
1069 .inline vis_read_hi,8
1070 std %o0,[%sp+0x48] ! store double frs1
1071 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0;
1072 .end
1073 !
1074 ! float vis_read_lo(double /* frs1 */);
1075 !
1076 .inline vis_read_lo,8
1077 std %o0,[%sp+0x48] ! store double frs1
1078 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
1079 fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
1080 .end
1081 !
1082 ! double vis_write_hi(double /* frs1 */, float /* frs2 */);
1083 !
1084 .inline vis_write_hi,12
1085 std %o0,[%sp+0x48] ! store double frs1;
1086 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
1087 st %o2,[%sp+0x44] ! store float frs2;
1088 ld [%sp+0x44],%f2 ! %f2 = float frs2;
1089 fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1;
1090 .end
1091 !
1092 ! double vis_write_lo(double /* frs1 */, float /* frs2 */);
1093 !
1094 .inline vis_write_lo,12
1095 std %o0,[%sp+0x48] ! store double frs1;
1096 ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
1097 st %o2,[%sp+0x44] ! store float frs2;
1098 ld [%sp+0x44],%f2 ! %f2 = float frs2;
1099 fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1;
1100 .end
1101 !
1102 ! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
1103 !
1104 .inline vis_freg_pair,8
1105 st %o0,[%sp+0x48] ! store float frs1
1106 ld [%sp+0x48],%f0
1107 st %o1,[%sp+0x48] ! store float frs2
1108 ld [%sp+0x48],%f1
1109 .end
1110 !
1111 ! float vis_to_float(unsigned int /*value*/);
1112 !
1113 .inline vis_to_float,4
1114 st %o0,[%sp+0x48]
1115 ld [%sp+0x48],%f0
1116 .end
1117 !
1118 ! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
1119 !
1120 .inline vis_to_double,8
1121 std %o0,[%sp+0x48]
1122 ldd [%sp+0x48],%f0
1123 .end
1124 !
1125 ! double vis_to_double_dup(unsigned int /*value*/);
1126 !
1127 .inline vis_to_double_dup,4
1128 st %o0,[%sp+0x48]
1129 ld [%sp+0x48],%f1
1130 fmovs %f1,%f0 ! duplicate value
1131 .end
1132 !
1133 ! double vis_ll_to_double(unsigned long long /*value*/);
1134 !
1135 .inline vis_ll_to_double,8
1136 std %o0,[%sp+0x48]
1137 ldd [%sp+0x48],%f0
1138 .end
1139
1140 !--------------------------------------------------------------------
1141 ! Address space identifier (ASI) register
1142 !
1143 ! unsigned int vis_read_asi(void)
1144 !
1145 .inline vis_read_asi,0
1146 rd %asi,%o0
1147 .end
1148 !
1149 ! void vis_write_asi(unsigned int /* ASI */)
1150 !
1151 .inline vis_write_asi,4
1152 wr %g0,%o0,%asi
1153 .end
1154
1155 !--------------------------------------------------------------------
1156 ! Load/store from/into alternate space
1157 !
1158 ! float vis_ldfa_ASI_REG(void *rs1)
1159 !
1160 .inline vis_ldfa_ASI_REG,4
1161 lda [%o0+0]%asi,%f4
1162 fmovs %f4,%f0 ! Compiler can clean this up
1163 .end
1164 !
1165 ! float vis_ldfa_ASI_P(void *rs1)
1166 !
1167 .inline vis_ldfa_ASI_P,4
1168 lda [%o0]0x80,%f4 ! ASI_P
1169 fmovs %f4,%f0 ! Compiler can clean this up
1170 .end
1171 !
1172 ! float vis_ldfa_ASI_PL(void *rs1)
1173 !
1174 .inline vis_ldfa_ASI_PL,4
1175 lda [%o0]0x88,%f4 ! ASI_PL
1176 fmovs %f4,%f0 ! Compiler can clean this up
1177 .end
1178 !
1179 ! double vis_lddfa_ASI_REG(void *rs1)
1180 !
1181 .inline vis_lddfa_ASI_REG,4
1182 ldda [%o0+0]%asi,%f4
1183 fmovd %f4,%f0 ! Compiler can clean this up
1184 .end
1185 !
1186 ! double vis_lddfa_ASI_P(void *rs1)
1187 !
1188 .inline vis_lddfa_ASI_P,4
1189 ldda [%o0]0x80,%f4 ! ASI_P
1190 fmovd %f4,%f0 ! Compiler can clean this up
1191 .end
1192 !
1193 ! double vis_lddfa_ASI_PL(void *rs1)
1194 !
1195 .inline vis_lddfa_ASI_PL,4
1196 ldda [%o0]0x88,%f4 ! ASI_PL
1197 fmovd %f4,%f0 ! Compiler can clean this up
1198 .end
1199 !
1200 ! vis_stfa_ASI_REG(float frs, void *rs1)
1201 !
1202 .inline vis_stfa_ASI_REG,8
1203 st %o0,[%sp+0x48]
1204 ld [%sp+0x48],%f4
1205 sta %f4,[%o1+0]%asi
1206 .end
1207 !
1208 ! vis_stfa_ASI_P(float frs, void *rs1)
1209 !
1210 .inline vis_stfa_ASI_P,8
1211 st %o0,[%sp+0x48]
1212 ld [%sp+0x48],%f4
1213 sta %f4,[%o1]0x80 ! ASI_P
1214 .end
1215 !
1216 ! vis_stfa_ASI_PL(float frs, void *rs1)
1217 !
1218 .inline vis_stfa_ASI_PL,8
1219 st %o0,[%sp+0x48]
1220 ld [%sp+0x48],%f4
1221 sta %f4,[%o1]0x88 ! ASI_PL
1222 .end
1223 !
1224 ! vis_stdfa_ASI_REG(double frd, void *rs1)
1225 !
1226 .inline vis_stdfa_ASI_REG,12
1227 std %o0,[%sp+0x48]
1228 ldd [%sp+0x48],%f4
1229 stda %f4,[%o2+0]%asi
1230 .end
1231 !
1232 ! vis_stdfa_ASI_P(double frd, void *rs1)
1233 !
1234 .inline vis_stdfa_ASI_P,12
1235 std %o0,[%sp+0x48]
1236 ldd [%sp+0x48],%f4
1237 stda %f4,[%o2]0x80 ! ASI_P
1238 .end
1239 !
1240 ! vis_stdfa_ASI_PL(double frd, void *rs1)
1241 !
1242 .inline vis_stdfa_ASI_PL,12
1243 std %o0,[%sp+0x48]
1244 ldd [%sp+0x48],%f4
1245 stda %f4,[%o2]0x88 ! ASI_PL
1246 .end
1247 !
1248 ! unsigned short vis_lduha_ASI_REG(void *rs1)
1249 !
1250 .inline vis_lduha_ASI_REG,4
1251 lduha [%o0+0]%asi,%o0
1252 .end
1253 !
1254 ! unsigned short vis_lduha_ASI_P(void *rs1)
1255 !
1256 .inline vis_lduha_ASI_P,4
1257 lduha [%o0]0x80,%o0 ! ASI_P
1258 .end
1259 !
1260 ! unsigned short vis_lduha_ASI_PL(void *rs1)
1261 !
1262 .inline vis_lduha_ASI_PL,4
1263 lduha [%o0]0x88,%o0 ! ASI_PL
1264 .end
1265 !
1266 ! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
1267 !
1268 .inline vis_lduha_ASI_P_index,8
1269 lduha [%o0+%o1]0x80,%o0 ! ASI_P
1270 .end
1271 !
1272 ! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
1273 !
1274 .inline vis_lduha_ASI_PL_index,8
1275 lduha [%o0+%o1]0x88,%o0 ! ASI_PL
1276 .end
1277
1278 !--------------------------------------------------------------------
1279 ! Prefetch
1280 !
1281 ! void vis_prefetch_read(void * /*address*/);
1282 !
1283 .inline vis_prefetch_read,4
1284 prefetch [%o0+0],0
1285 .end
1286 !
1287 ! void vis_prefetch_write(void * /*address*/);
1288 !
1289 .inline vis_prefetch_write,4
1290 prefetch [%o0+0],2
1291 .end

mercurial