1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/common/x86/recon_mmx.asm Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,274 @@ 1.4 +; 1.5 +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 +; 1.7 +; Use of this source code is governed by a BSD-style license 1.8 +; that can be found in the LICENSE file in the root of the source 1.9 +; tree. An additional intellectual property rights grant can be found 1.10 +; in the file PATENTS. All contributing project authors may 1.11 +; be found in the AUTHORS file in the root of the source tree. 1.12 +; 1.13 + 1.14 + 1.15 +%include "vpx_ports/x86_abi_support.asm" 1.16 + 1.17 + 1.18 +;void copy_mem8x8_mmx( 1.19 +; unsigned char *src, 1.20 +; int src_stride, 1.21 +; unsigned char *dst, 1.22 +; int dst_stride 1.23 +; ) 1.24 +global sym(vp8_copy_mem8x8_mmx) PRIVATE 1.25 +sym(vp8_copy_mem8x8_mmx): 1.26 + push rbp 1.27 + mov rbp, rsp 1.28 + SHADOW_ARGS_TO_STACK 4 1.29 + push rsi 1.30 + push rdi 1.31 + ; end prolog 1.32 + 1.33 + mov rsi, arg(0) ;src; 1.34 + movq mm0, [rsi] 1.35 + 1.36 + movsxd rax, dword ptr arg(1) ;src_stride; 1.37 + mov rdi, arg(2) ;dst; 1.38 + 1.39 + movq mm1, [rsi+rax] 1.40 + movq mm2, [rsi+rax*2] 1.41 + 1.42 + movsxd rcx, dword ptr arg(3) ;dst_stride 1.43 + lea rsi, [rsi+rax*2] 1.44 + 1.45 + movq [rdi], mm0 1.46 + add rsi, rax 1.47 + 1.48 + movq [rdi+rcx], mm1 1.49 + movq [rdi+rcx*2], mm2 1.50 + 1.51 + 1.52 + lea rdi, [rdi+rcx*2] 1.53 + movq mm3, [rsi] 1.54 + 1.55 + add rdi, rcx 1.56 + movq mm4, [rsi+rax] 1.57 + 1.58 + movq mm5, [rsi+rax*2] 1.59 + movq [rdi], mm3 1.60 + 1.61 + lea rsi, [rsi+rax*2] 1.62 + movq [rdi+rcx], mm4 1.63 + 1.64 + movq [rdi+rcx*2], mm5 1.65 + lea rdi, [rdi+rcx*2] 1.66 + 1.67 + movq mm0, [rsi+rax] 1.68 + movq mm1, [rsi+rax*2] 1.69 + 1.70 + movq [rdi+rcx], mm0 1.71 + movq [rdi+rcx*2],mm1 1.72 + 1.73 + ; begin epilog 1.74 + pop rdi 1.75 + pop rsi 1.76 + UNSHADOW_ARGS 1.77 + pop rbp 1.78 + ret 1.79 + 1.80 + 1.81 +;void copy_mem8x4_mmx( 1.82 +; unsigned char *src, 1.83 +; int src_stride, 1.84 +; unsigned char *dst, 1.85 +; int dst_stride 1.86 +; ) 1.87 +global sym(vp8_copy_mem8x4_mmx) PRIVATE 1.88 +sym(vp8_copy_mem8x4_mmx): 1.89 + push rbp 1.90 + mov rbp, rsp 1.91 + SHADOW_ARGS_TO_STACK 4 1.92 + push rsi 1.93 + push rdi 1.94 + ; end prolog 1.95 + 1.96 + mov rsi, arg(0) ;src; 1.97 + movq mm0, [rsi] 1.98 + 1.99 + movsxd rax, dword ptr arg(1) ;src_stride; 1.100 + mov rdi, arg(2) ;dst; 1.101 + 1.102 + movq mm1, [rsi+rax] 1.103 + movq mm2, [rsi+rax*2] 1.104 + 1.105 + movsxd rcx, dword ptr arg(3) ;dst_stride 1.106 + lea rsi, [rsi+rax*2] 1.107 + 1.108 + movq [rdi], mm0 1.109 + movq [rdi+rcx], mm1 1.110 + 1.111 + movq [rdi+rcx*2], mm2 1.112 + lea rdi, [rdi+rcx*2] 1.113 + 1.114 + movq mm3, [rsi+rax] 1.115 + movq [rdi+rcx], mm3 1.116 + 1.117 + ; begin epilog 1.118 + pop rdi 1.119 + pop rsi 1.120 + UNSHADOW_ARGS 1.121 + pop rbp 1.122 + ret 1.123 + 1.124 + 1.125 +;void copy_mem16x16_mmx( 1.126 +; unsigned char *src, 1.127 +; int src_stride, 1.128 +; unsigned char *dst, 1.129 +; int dst_stride 1.130 +; ) 1.131 +global sym(vp8_copy_mem16x16_mmx) PRIVATE 1.132 +sym(vp8_copy_mem16x16_mmx): 1.133 + push rbp 1.134 + mov rbp, rsp 1.135 + SHADOW_ARGS_TO_STACK 4 1.136 + push rsi 1.137 + push rdi 1.138 + ; end prolog 1.139 + 1.140 + mov rsi, arg(0) ;src; 1.141 + movsxd rax, dword ptr arg(1) ;src_stride; 1.142 + 1.143 + mov rdi, arg(2) ;dst; 1.144 + movsxd rcx, dword ptr arg(3) ;dst_stride 1.145 + 1.146 + movq mm0, [rsi] 1.147 + movq mm3, [rsi+8]; 1.148 + 1.149 + movq mm1, [rsi+rax] 1.150 + movq mm4, [rsi+rax+8] 1.151 + 1.152 + movq mm2, [rsi+rax*2] 1.153 + movq mm5, [rsi+rax*2+8] 1.154 + 1.155 + lea rsi, [rsi+rax*2] 1.156 + add rsi, rax 1.157 + 1.158 + movq [rdi], mm0 1.159 + movq [rdi+8], mm3 1.160 + 1.161 + movq [rdi+rcx], mm1 1.162 + movq [rdi+rcx+8], mm4 1.163 + 1.164 + movq [rdi+rcx*2], mm2 1.165 + movq [rdi+rcx*2+8], mm5 1.166 + 1.167 + lea rdi, [rdi+rcx*2] 1.168 + add rdi, rcx 1.169 + 1.170 + movq mm0, [rsi] 1.171 + movq mm3, [rsi+8]; 1.172 + 1.173 + movq mm1, [rsi+rax] 1.174 + movq mm4, [rsi+rax+8] 1.175 + 1.176 + movq mm2, [rsi+rax*2] 1.177 + movq mm5, [rsi+rax*2+8] 1.178 + 1.179 + lea rsi, [rsi+rax*2] 1.180 + add rsi, rax 1.181 + 1.182 + movq [rdi], mm0 1.183 + movq [rdi+8], mm3 1.184 + 1.185 + movq [rdi+rcx], mm1 1.186 + movq [rdi+rcx+8], mm4 1.187 + 1.188 + movq [rdi+rcx*2], mm2 1.189 + movq [rdi+rcx*2+8], mm5 1.190 + 1.191 + lea rdi, [rdi+rcx*2] 1.192 + add rdi, rcx 1.193 + 1.194 + movq mm0, [rsi] 1.195 + movq mm3, [rsi+8]; 1.196 + 1.197 + movq mm1, [rsi+rax] 1.198 + movq mm4, [rsi+rax+8] 1.199 + 1.200 + movq mm2, [rsi+rax*2] 1.201 + movq mm5, [rsi+rax*2+8] 1.202 + 1.203 + lea rsi, [rsi+rax*2] 1.204 + add rsi, rax 1.205 + 1.206 + movq [rdi], mm0 1.207 + movq [rdi+8], mm3 1.208 + 1.209 + movq [rdi+rcx], mm1 1.210 + movq [rdi+rcx+8], mm4 1.211 + 1.212 + movq [rdi+rcx*2], mm2 1.213 + movq [rdi+rcx*2+8], mm5 1.214 + 1.215 + lea rdi, [rdi+rcx*2] 1.216 + add rdi, rcx 1.217 + 1.218 + movq mm0, [rsi] 1.219 + movq mm3, [rsi+8]; 1.220 + 1.221 + movq mm1, [rsi+rax] 1.222 + movq mm4, [rsi+rax+8] 1.223 + 1.224 + movq mm2, [rsi+rax*2] 1.225 + movq mm5, [rsi+rax*2+8] 1.226 + 1.227 + lea rsi, [rsi+rax*2] 1.228 + add rsi, rax 1.229 + 1.230 + movq [rdi], mm0 1.231 + movq [rdi+8], mm3 1.232 + 1.233 + movq [rdi+rcx], mm1 1.234 + movq [rdi+rcx+8], mm4 1.235 + 1.236 + movq [rdi+rcx*2], mm2 1.237 + movq [rdi+rcx*2+8], mm5 1.238 + 1.239 + lea rdi, [rdi+rcx*2] 1.240 + add rdi, rcx 1.241 + 1.242 + movq mm0, [rsi] 1.243 + movq mm3, [rsi+8]; 1.244 + 1.245 + movq mm1, [rsi+rax] 1.246 + movq mm4, [rsi+rax+8] 1.247 + 1.248 + movq mm2, [rsi+rax*2] 1.249 + movq mm5, [rsi+rax*2+8] 1.250 + 1.251 + lea rsi, [rsi+rax*2] 1.252 + add rsi, rax 1.253 + 1.254 + movq [rdi], mm0 1.255 + movq [rdi+8], mm3 1.256 + 1.257 + movq [rdi+rcx], mm1 1.258 + movq [rdi+rcx+8], mm4 1.259 + 1.260 + movq [rdi+rcx*2], mm2 1.261 + movq [rdi+rcx*2+8], mm5 1.262 + 1.263 + lea rdi, [rdi+rcx*2] 1.264 + add rdi, rcx 1.265 + 1.266 + movq mm0, [rsi] 1.267 + movq mm3, [rsi+8]; 1.268 + 1.269 + movq [rdi], mm0 1.270 + movq [rdi+8], mm3 1.271 + 1.272 + ; begin epilog 1.273 + pop rdi 1.274 + pop rsi 1.275 + UNSHADOW_ARGS 1.276 + pop rbp 1.277 + ret