1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/encoder/x86/vp9_variance_impl_mmx.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,510 @@ 1.4 +; 1.5 +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 +%include "vpx_ports/x86_abi_support.asm" 1.16 + 1.17 +;unsigned int vp9_get_mb_ss_mmx( short *src_ptr ) 1.18 +global sym(vp9_get_mb_ss_mmx) PRIVATE 1.19 +sym(vp9_get_mb_ss_mmx): 1.20 + push rbp 1.21 + mov rbp, rsp 1.22 + SHADOW_ARGS_TO_STACK 7 1.23 + GET_GOT rbx 1.24 + push rsi 1.25 + push rdi 1.26 + sub rsp, 8 1.27 + ; end prolog 1.28 + 1.29 + mov rax, arg(0) ;src_ptr 1.30 + mov rcx, 16 1.31 + pxor mm4, mm4 1.32 + 1.33 +.NEXTROW: 1.34 + movq mm0, [rax] 1.35 + movq mm1, [rax+8] 1.36 + movq mm2, [rax+16] 1.37 + movq mm3, [rax+24] 1.38 + pmaddwd mm0, mm0 1.39 + pmaddwd mm1, mm1 1.40 + pmaddwd mm2, mm2 1.41 + pmaddwd mm3, mm3 1.42 + 1.43 + paddd mm4, mm0 1.44 + paddd mm4, mm1 1.45 + paddd mm4, mm2 1.46 + paddd mm4, mm3 1.47 + 1.48 + add rax, 32 1.49 + dec rcx 1.50 + ja .NEXTROW 1.51 + movq QWORD PTR [rsp], mm4 1.52 + 1.53 + ;return sum[0]+sum[1]; 1.54 + movsxd rax, dword ptr [rsp] 1.55 + movsxd rcx, dword ptr [rsp+4] 1.56 + add rax, rcx 1.57 + 1.58 + 1.59 + ; begin epilog 1.60 + add rsp, 8 1.61 + pop rdi 1.62 + pop rsi 1.63 + RESTORE_GOT 1.64 + UNSHADOW_ARGS 1.65 + pop rbp 1.66 + ret 1.67 + 1.68 + 1.69 +;unsigned int vp9_get8x8var_mmx 1.70 +;( 1.71 +; unsigned char *src_ptr, 1.72 +; int source_stride, 1.73 +; unsigned char *ref_ptr, 1.74 +; int recon_stride, 1.75 +; unsigned int *SSE, 1.76 +; int *Sum 1.77 +;) 1.78 +global sym(vp9_get8x8var_mmx) PRIVATE 1.79 +sym(vp9_get8x8var_mmx): 1.80 + push rbp 1.81 + mov rbp, rsp 1.82 + SHADOW_ARGS_TO_STACK 6 1.83 + push rsi 1.84 + push rdi 1.85 + push rbx 1.86 + sub rsp, 16 1.87 + ; end prolog 1.88 + 1.89 + 1.90 + pxor mm5, mm5 ; Blank mmx6 1.91 + pxor mm6, mm6 ; Blank mmx7 1.92 + pxor mm7, mm7 ; Blank mmx7 1.93 + 1.94 + mov rax, arg(0) ;[src_ptr] ; Load base addresses 1.95 + mov rbx, arg(2) ;[ref_ptr] 1.96 + movsxd rcx, dword ptr arg(1) ;[source_stride] 1.97 + movsxd rdx, dword ptr arg(3) ;[recon_stride] 1.98 + 1.99 + ; Row 1 1.100 + movq mm0, [rax] ; Copy eight bytes to mm0 1.101 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.102 + movq mm2, mm0 ; Take copies 1.103 + movq mm3, mm1 ; Take copies 1.104 + 1.105 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.106 + punpcklbw mm1, mm6 1.107 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.108 + punpckhbw mm3, mm6 1.109 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.110 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.111 + 1.112 + paddw mm5, mm0 ; accumulate differences in mm5 1.113 + paddw mm5, mm2 ; accumulate differences in mm5 1.114 + 1.115 + pmaddwd mm0, mm0 ; square and accumulate 1.116 + pmaddwd mm2, mm2 ; square and accumulate 1.117 + add rbx,rdx ; Inc pointer into ref data 1.118 + add rax,rcx ; Inc pointer into the new data 1.119 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.120 + paddd mm7, mm0 ; accumulate in mm7 1.121 + paddd mm7, mm2 ; accumulate in mm7 1.122 + 1.123 + 1.124 + ; Row 2 1.125 + movq mm0, [rax] ; Copy eight bytes to mm0 1.126 + movq mm2, mm0 ; Take copies 1.127 + movq mm3, mm1 ; Take copies 1.128 + 1.129 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.130 + punpcklbw mm1, mm6 1.131 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.132 + punpckhbw mm3, mm6 1.133 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.134 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.135 + 1.136 + paddw mm5, mm0 ; accumulate differences in mm5 1.137 + paddw mm5, mm2 ; accumulate differences in mm5 1.138 + 1.139 + pmaddwd mm0, mm0 ; square and accumulate 1.140 + pmaddwd mm2, mm2 ; square and accumulate 1.141 + add rbx,rdx ; Inc pointer into ref data 1.142 + add rax,rcx ; Inc pointer into the new data 1.143 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.144 + paddd mm7, mm0 ; accumulate in mm7 1.145 + paddd mm7, mm2 ; accumulate in mm7 1.146 + 1.147 + ; Row 3 1.148 + movq mm0, [rax] ; Copy eight bytes to mm0 1.149 + movq mm2, mm0 ; Take copies 1.150 + movq mm3, mm1 ; Take copies 1.151 + 1.152 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.153 + punpcklbw mm1, mm6 1.154 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.155 + punpckhbw mm3, mm6 1.156 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.157 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.158 + 1.159 + paddw mm5, mm0 ; accumulate differences in mm5 1.160 + paddw mm5, mm2 ; accumulate differences in mm5 1.161 + 1.162 + pmaddwd mm0, mm0 ; square and accumulate 1.163 + pmaddwd mm2, mm2 ; square and accumulate 1.164 + add rbx,rdx ; Inc pointer into ref data 1.165 + add rax,rcx ; Inc pointer into the new data 1.166 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.167 + paddd mm7, mm0 ; accumulate in mm7 1.168 + paddd mm7, mm2 ; accumulate in mm7 1.169 + 1.170 + ; Row 4 1.171 + movq mm0, [rax] ; Copy eight bytes to mm0 1.172 + movq mm2, mm0 ; Take copies 1.173 + movq mm3, mm1 ; Take copies 1.174 + 1.175 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.176 + punpcklbw mm1, mm6 1.177 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.178 + punpckhbw mm3, mm6 1.179 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.180 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.181 + 1.182 + paddw mm5, mm0 ; accumulate differences in mm5 1.183 + paddw mm5, mm2 ; accumulate differences in mm5 1.184 + 1.185 + pmaddwd mm0, mm0 ; square and accumulate 1.186 + pmaddwd mm2, mm2 ; square and accumulate 1.187 + add rbx,rdx ; Inc pointer into ref data 1.188 + add rax,rcx ; Inc pointer into the new data 1.189 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.190 + paddd mm7, mm0 ; accumulate in mm7 1.191 + paddd mm7, mm2 ; accumulate in mm7 1.192 + 1.193 + ; Row 5 1.194 + movq mm0, [rax] ; Copy eight bytes to mm0 1.195 + movq mm2, mm0 ; Take copies 1.196 + movq mm3, mm1 ; Take copies 1.197 + 1.198 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.199 + punpcklbw mm1, mm6 1.200 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.201 + punpckhbw mm3, mm6 1.202 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.203 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.204 + 1.205 + paddw mm5, mm0 ; accumulate differences in mm5 1.206 + paddw mm5, mm2 ; accumulate differences in mm5 1.207 + 1.208 + pmaddwd mm0, mm0 ; square and accumulate 1.209 + pmaddwd mm2, mm2 ; square and accumulate 1.210 + add rbx,rdx ; Inc pointer into ref data 1.211 + add rax,rcx ; Inc pointer into the new data 1.212 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.213 + ; movq mm4, [rbx + rdx] 1.214 + paddd mm7, mm0 ; accumulate in mm7 1.215 + paddd mm7, mm2 ; accumulate in mm7 1.216 + 1.217 + ; Row 6 1.218 + movq mm0, [rax] ; Copy eight bytes to mm0 1.219 + movq mm2, mm0 ; Take copies 1.220 + movq mm3, mm1 ; Take copies 1.221 + 1.222 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.223 + punpcklbw mm1, mm6 1.224 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.225 + punpckhbw mm3, mm6 1.226 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.227 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.228 + 1.229 + paddw mm5, mm0 ; accumulate differences in mm5 1.230 + paddw mm5, mm2 ; accumulate differences in mm5 1.231 + 1.232 + pmaddwd mm0, mm0 ; square and accumulate 1.233 + pmaddwd mm2, mm2 ; square and accumulate 1.234 + add rbx,rdx ; Inc pointer into ref data 1.235 + add rax,rcx ; Inc pointer into the new data 1.236 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.237 + paddd mm7, mm0 ; accumulate in mm7 1.238 + paddd mm7, mm2 ; accumulate in mm7 1.239 + 1.240 + ; Row 7 1.241 + movq mm0, [rax] ; Copy eight bytes to mm0 1.242 + movq mm2, mm0 ; Take copies 1.243 + movq mm3, mm1 ; Take copies 1.244 + 1.245 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.246 + punpcklbw mm1, mm6 1.247 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.248 + punpckhbw mm3, mm6 1.249 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.250 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.251 + 1.252 + paddw mm5, mm0 ; accumulate differences in mm5 1.253 + paddw mm5, mm2 ; accumulate differences in mm5 1.254 + 1.255 + pmaddwd mm0, mm0 ; square and accumulate 1.256 + pmaddwd mm2, mm2 ; square and accumulate 1.257 + add rbx,rdx ; Inc pointer into ref data 1.258 + add rax,rcx ; Inc pointer into the new data 1.259 + movq mm1, [rbx] ; Copy eight bytes to mm1 1.260 + paddd mm7, mm0 ; accumulate in mm7 1.261 + paddd mm7, mm2 ; accumulate in mm7 1.262 + 1.263 + ; Row 8 1.264 + movq mm0, [rax] ; Copy eight bytes to mm0 1.265 + movq mm2, mm0 ; Take copies 1.266 + movq mm3, mm1 ; Take copies 1.267 + 1.268 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.269 + punpcklbw mm1, mm6 1.270 + punpckhbw mm2, mm6 ; unpack to higher prrcision 1.271 + punpckhbw mm3, mm6 1.272 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.273 + psubsw mm2, mm3 ; A-B (high order) to MM2 1.274 + 1.275 + paddw mm5, mm0 ; accumulate differences in mm5 1.276 + paddw mm5, mm2 ; accumulate differences in mm5 1.277 + 1.278 + pmaddwd mm0, mm0 ; square and accumulate 1.279 + pmaddwd mm2, mm2 ; square and accumulate 1.280 + add rbx,rdx ; Inc pointer into ref data 1.281 + add rax,rcx ; Inc pointer into the new data 1.282 + paddd mm7, mm0 ; accumulate in mm7 1.283 + paddd mm7, mm2 ; accumulate in mm7 1.284 + 1.285 + ; Now accumulate the final results. 1.286 + movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory 1.287 + movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory 1.288 + movsx rdx, WORD PTR [rsp+8] 1.289 + movsx rcx, WORD PTR [rsp+10] 1.290 + movsx rbx, WORD PTR [rsp+12] 1.291 + movsx rax, WORD PTR [rsp+14] 1.292 + add rdx, rcx 1.293 + add rbx, rax 1.294 + add rdx, rbx ;XSum 1.295 + movsxd rax, DWORD PTR [rsp] 1.296 + movsxd rcx, DWORD PTR [rsp+4] 1.297 + add rax, rcx ;XXSum 1.298 + mov rsi, arg(4) ;SSE 1.299 + mov rdi, arg(5) ;Sum 1.300 + mov dword ptr [rsi], eax 1.301 + mov dword ptr [rdi], edx 1.302 + xor rax, rax ; return 0 1.303 + 1.304 + 1.305 + ; begin epilog 1.306 + add rsp, 16 1.307 + pop rbx 1.308 + pop rdi 1.309 + pop rsi 1.310 + UNSHADOW_ARGS 1.311 + pop rbp 1.312 + ret 1.313 + 1.314 + 1.315 + 1.316 +;unsigned int 1.317 +;vp9_get4x4var_mmx 1.318 +;( 1.319 +; unsigned char *src_ptr, 1.320 +; int source_stride, 1.321 +; unsigned char *ref_ptr, 1.322 +; int recon_stride, 1.323 +; unsigned int *SSE, 1.324 +; int *Sum 1.325 +;) 1.326 +global sym(vp9_get4x4var_mmx) PRIVATE 1.327 +sym(vp9_get4x4var_mmx): 1.328 + push rbp 1.329 + mov rbp, rsp 1.330 + SHADOW_ARGS_TO_STACK 6 1.331 + push rsi 1.332 + push rdi 1.333 + push rbx 1.334 + sub rsp, 16 1.335 + ; end prolog 1.336 + 1.337 + 1.338 + pxor mm5, mm5 ; Blank mmx6 1.339 + pxor mm6, mm6 ; Blank mmx7 1.340 + pxor mm7, mm7 ; Blank mmx7 1.341 + 1.342 + mov rax, arg(0) ;[src_ptr] ; Load base addresses 1.343 + mov rbx, arg(2) ;[ref_ptr] 1.344 + movsxd rcx, dword ptr arg(1) ;[source_stride] 1.345 + movsxd rdx, dword ptr arg(3) ;[recon_stride] 1.346 + 1.347 + ; Row 1 1.348 + movd mm0, [rax] ; Copy 4 bytes to mm0 1.349 + movd mm1, [rbx] ; Copy 4 bytes to mm1 1.350 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.351 + punpcklbw mm1, mm6 1.352 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.353 + paddw mm5, mm0 ; accumulate differences in mm5 1.354 + pmaddwd mm0, mm0 ; square and accumulate 1.355 + add rbx,rdx ; Inc pointer into ref data 1.356 + add rax,rcx ; Inc pointer into the new data 1.357 + movd mm1, [rbx] ; Copy 4 bytes to mm1 1.358 + paddd mm7, mm0 ; accumulate in mm7 1.359 + 1.360 + 1.361 + ; Row 2 1.362 + movd mm0, [rax] ; Copy 4 bytes to mm0 1.363 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.364 + punpcklbw mm1, mm6 1.365 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.366 + paddw mm5, mm0 ; accumulate differences in mm5 1.367 + 1.368 + pmaddwd mm0, mm0 ; square and accumulate 1.369 + add rbx,rdx ; Inc pointer into ref data 1.370 + add rax,rcx ; Inc pointer into the new data 1.371 + movd mm1, [rbx] ; Copy 4 bytes to mm1 1.372 + paddd mm7, mm0 ; accumulate in mm7 1.373 + 1.374 + ; Row 3 1.375 + movd mm0, [rax] ; Copy 4 bytes to mm0 1.376 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.377 + punpcklbw mm1, mm6 1.378 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.379 + paddw mm5, mm0 ; accumulate differences in mm5 1.380 + 1.381 + pmaddwd mm0, mm0 ; square and accumulate 1.382 + add rbx,rdx ; Inc pointer into ref data 1.383 + add rax,rcx ; Inc pointer into the new data 1.384 + movd mm1, [rbx] ; Copy 4 bytes to mm1 1.385 + paddd mm7, mm0 ; accumulate in mm7 1.386 + 1.387 + ; Row 4 1.388 + movd mm0, [rax] ; Copy 4 bytes to mm0 1.389 + 1.390 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.391 + punpcklbw mm1, mm6 1.392 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.393 + 1.394 + paddw mm5, mm0 ; accumulate differences in mm5 1.395 + 1.396 + pmaddwd mm0, mm0 ; square and accumulate 1.397 + paddd mm7, mm0 ; accumulate in mm7 1.398 + 1.399 + 1.400 + ; Now accumulate the final results. 1.401 + movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory 1.402 + movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory 1.403 + movsx rdx, WORD PTR [rsp+8] 1.404 + movsx rcx, WORD PTR [rsp+10] 1.405 + movsx rbx, WORD PTR [rsp+12] 1.406 + movsx rax, WORD PTR [rsp+14] 1.407 + add rdx, rcx 1.408 + add rbx, rax 1.409 + add rdx, rbx ;XSum 1.410 + movsxd rax, DWORD PTR [rsp] 1.411 + movsxd rcx, DWORD PTR [rsp+4] 1.412 + add rax, rcx ;XXSum 1.413 + mov rsi, arg(4) ;SSE 1.414 + mov rdi, arg(5) ;Sum 1.415 + mov dword ptr [rsi], eax 1.416 + mov dword ptr [rdi], edx 1.417 + xor rax, rax ; return 0 1.418 + 1.419 + 1.420 + ; begin epilog 1.421 + add rsp, 16 1.422 + pop rbx 1.423 + pop rdi 1.424 + pop rsi 1.425 + UNSHADOW_ARGS 1.426 + pop rbp 1.427 + ret 1.428 + 1.429 + 1.430 + 1.431 +;unsigned int 1.432 +;vp9_get4x4sse_cs_mmx 1.433 +;( 1.434 +; unsigned char *src_ptr, 1.435 +; int source_stride, 1.436 +; unsigned char *ref_ptr, 1.437 +; int recon_stride 1.438 +;) 1.439 +global sym(vp9_get4x4sse_cs_mmx) PRIVATE 1.440 +sym(vp9_get4x4sse_cs_mmx): 1.441 + push rbp 1.442 + mov rbp, rsp 1.443 + SHADOW_ARGS_TO_STACK 4 1.444 + push rsi 1.445 + push rdi 1.446 + push rbx 1.447 + ; end prolog 1.448 + 1.449 + 1.450 + pxor mm6, mm6 ; Blank mmx7 1.451 + pxor mm7, mm7 ; Blank mmx7 1.452 + 1.453 + mov rax, arg(0) ;[src_ptr] ; Load base addresses 1.454 + mov rbx, arg(2) ;[ref_ptr] 1.455 + movsxd rcx, dword ptr arg(1) ;[source_stride] 1.456 + movsxd rdx, dword ptr arg(3) ;[recon_stride] 1.457 + ; Row 1 1.458 + movd mm0, [rax] ; Copy eight bytes to mm0 1.459 + movd mm1, [rbx] ; Copy eight bytes to mm1 1.460 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.461 + punpcklbw mm1, mm6 1.462 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.463 + pmaddwd mm0, mm0 ; square and accumulate 1.464 + add rbx,rdx ; Inc pointer into ref data 1.465 + add rax,rcx ; Inc pointer into the new data 1.466 + movd mm1, [rbx] ; Copy eight bytes to mm1 1.467 + paddd mm7, mm0 ; accumulate in mm7 1.468 + 1.469 + ; Row 2 1.470 + movd mm0, [rax] ; Copy eight bytes to mm0 1.471 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.472 + punpcklbw mm1, mm6 1.473 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.474 + pmaddwd mm0, mm0 ; square and accumulate 1.475 + add rbx,rdx ; Inc pointer into ref data 1.476 + add rax,rcx ; Inc pointer into the new data 1.477 + movd mm1, [rbx] ; Copy eight bytes to mm1 1.478 + paddd mm7, mm0 ; accumulate in mm7 1.479 + 1.480 + ; Row 3 1.481 + movd mm0, [rax] ; Copy eight bytes to mm0 1.482 + punpcklbw mm1, mm6 1.483 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.484 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.485 + 1.486 + pmaddwd mm0, mm0 ; square and accumulate 1.487 + add rbx,rdx ; Inc pointer into ref data 1.488 + add rax,rcx ; Inc pointer into the new data 1.489 + movd mm1, [rbx] ; Copy eight bytes to mm1 1.490 + paddd mm7, mm0 ; accumulate in mm7 1.491 + 1.492 + ; Row 4 1.493 + movd mm0, [rax] ; Copy eight bytes to mm0 1.494 + punpcklbw mm0, mm6 ; unpack to higher prrcision 1.495 + punpcklbw mm1, mm6 1.496 + psubsw mm0, mm1 ; A-B (low order) to MM0 1.497 + pmaddwd mm0, mm0 ; square and accumulate 1.498 + paddd mm7, mm0 ; accumulate in mm7 1.499 + 1.500 + movq mm0, mm7 ; 1.501 + psrlq mm7, 32 1.502 + 1.503 + paddd mm0, mm7 1.504 + movq rax, mm0 1.505 + 1.506 + 1.507 + ; begin epilog 1.508 + pop rbx 1.509 + pop rdi 1.510 + pop rsi 1.511 + UNSHADOW_ARGS 1.512 + pop rbp 1.513 + ret