1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libjpeg/simd/jsimdext.inc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,376 @@ 1.4 +; 1.5 +; jsimdext.inc - common declarations 1.6 +; 1.7 +; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 1.8 +; Copyright 2010 D. R. Commander 1.9 +; 1.10 +; Based on 1.11 +; x86 SIMD extension for IJG JPEG library - version 1.02 1.12 +; 1.13 +; Copyright (C) 1999-2006, MIYASAKA Masaru. 1.14 +; 1.15 +; This software is provided 'as-is', without any express or implied 1.16 +; warranty. In no event will the authors be held liable for any damages 1.17 +; arising from the use of this software. 1.18 +; 1.19 +; Permission is granted to anyone to use this software for any purpose, 1.20 +; including commercial applications, and to alter it and redistribute it 1.21 +; freely, subject to the following restrictions: 1.22 +; 1.23 +; 1. The origin of this software must not be misrepresented; you must not 1.24 +; claim that you wrote the original software. If you use this software 1.25 +; in a product, an acknowledgment in the product documentation would be 1.26 +; appreciated but is not required. 1.27 +; 2. Altered source versions must be plainly marked as such, and must not be 1.28 +; misrepresented as being the original software. 1.29 +; 3. This notice may not be removed or altered from any source distribution. 1.30 +; 1.31 +; [TAB8] 1.32 + 1.33 +; ========================================================================== 1.34 +; System-dependent configurations 1.35 + 1.36 +%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- 1.37 +; * Microsoft Visual C++ 1.38 +; * MinGW (Minimalist GNU for Windows) 1.39 +; * CygWin 1.40 +; * LCC-Win32 1.41 + 1.42 +; -- segment definition -- 1.43 +; 1.44 +%ifdef __YASM_VER__ 1.45 +%define SEG_TEXT .text align=16 1.46 +%define SEG_CONST .rdata align=16 1.47 +%else 1.48 +%define SEG_TEXT .text align=16 public use32 class=CODE 1.49 +%define SEG_CONST .rdata align=16 public use32 class=CONST 1.50 +%endif 1.51 + 1.52 +%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- 1.53 +; * Microsoft Visual C++ 1.54 + 1.55 +; -- segment definition -- 1.56 +; 1.57 +%ifdef __YASM_VER__ 1.58 +%define SEG_TEXT .text align=16 1.59 +%define SEG_CONST .rdata align=16 1.60 +%else 1.61 +%define SEG_TEXT .text align=16 public use64 class=CODE 1.62 +%define SEG_CONST .rdata align=16 public use64 class=CONST 1.63 +%endif 1.64 +%define EXTN(name) name ; foo() -> foo 1.65 + 1.66 +%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- 1.67 +; * Borland C++ (Win32) 1.68 + 1.69 +; -- segment definition -- 1.70 +; 1.71 +%define SEG_TEXT .text align=16 public use32 class=CODE 1.72 +%define SEG_CONST .data align=16 public use32 class=DATA 1.73 + 1.74 +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ 1.75 +; * Linux 1.76 +; * *BSD family Unix using elf format 1.77 +; * Unix System V, including Solaris x86, UnixWare and SCO Unix 1.78 + 1.79 +; mark stack as non-executable 1.80 +section .note.GNU-stack noalloc noexec nowrite progbits 1.81 + 1.82 +; -- segment definition -- 1.83 +; 1.84 +%ifdef __x86_64__ 1.85 +%define SEG_TEXT .text progbits align=16 1.86 +%define SEG_CONST .rodata progbits align=16 1.87 +%else 1.88 +%define SEG_TEXT .text progbits alloc exec nowrite align=16 1.89 +%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 1.90 +%endif 1.91 + 1.92 +; To make the code position-independent, append -DPIC to the commandline 1.93 +; 1.94 +%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC 1.95 +%define EXTN(name) name ; foo() -> foo 1.96 + 1.97 +%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- 1.98 +; * Older Linux using a.out format (nasm -f aout -DAOUT ...) 1.99 +; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) 1.100 + 1.101 +; -- segment definition -- 1.102 +; 1.103 +%define SEG_TEXT .text 1.104 +%define SEG_CONST .data 1.105 + 1.106 +; To make the code position-independent, append -DPIC to the commandline 1.107 +; 1.108 +%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC 1.109 + 1.110 +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- 1.111 +; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) 1.112 + 1.113 +; -- segment definition -- 1.114 +; 1.115 +%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? 1.116 +%define SEG_CONST .rodata align=16 1.117 + 1.118 +; The generation of position-independent code (PIC) is the default on Darwin. 1.119 +; 1.120 +%define PIC 1.121 +%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing 1.122 + 1.123 +%else ; ----(Other case)---------------------- 1.124 + 1.125 +; -- segment definition -- 1.126 +; 1.127 +%define SEG_TEXT .text 1.128 +%define SEG_CONST .data 1.129 + 1.130 +%endif ; ---------------------------------------------- 1.131 + 1.132 +; ========================================================================== 1.133 + 1.134 +; -------------------------------------------------------------------------- 1.135 +; Common types 1.136 +; 1.137 +%ifdef __x86_64__ 1.138 +%define POINTER qword ; general pointer type 1.139 +%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) 1.140 +%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT 1.141 +%else 1.142 +%define POINTER dword ; general pointer type 1.143 +%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) 1.144 +%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT 1.145 +%endif 1.146 + 1.147 +%define INT dword ; signed integer type 1.148 +%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) 1.149 +%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT 1.150 + 1.151 +%define FP32 dword ; IEEE754 single 1.152 +%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) 1.153 +%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT 1.154 + 1.155 +%define MMWORD qword ; int64 (MMX register) 1.156 +%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) 1.157 +%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT 1.158 + 1.159 +; NASM is buggy and doesn't properly handle operand sizes for SSE 1.160 +; instructions, so for now we have to define XMMWORD as blank. 1.161 +%define XMMWORD ; int128 (SSE register) 1.162 +%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) 1.163 +%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT 1.164 + 1.165 +; Similar hacks for when we load a dword or MMWORD into an xmm# register 1.166 +%define XMM_DWORD 1.167 +%define XMM_MMWORD 1.168 + 1.169 +%define SIZEOF_BYTE 1 ; sizeof(BYTE) 1.170 +%define SIZEOF_WORD 2 ; sizeof(WORD) 1.171 +%define SIZEOF_DWORD 4 ; sizeof(DWORD) 1.172 +%define SIZEOF_QWORD 8 ; sizeof(QWORD) 1.173 +%define SIZEOF_OWORD 16 ; sizeof(OWORD) 1.174 + 1.175 +%define BYTE_BIT 8 ; CHAR_BIT in C 1.176 +%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT 1.177 +%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT 1.178 +%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT 1.179 +%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT 1.180 + 1.181 +; -------------------------------------------------------------------------- 1.182 +; External Symbol Name 1.183 +; 1.184 +%ifndef EXTN 1.185 +%define EXTN(name) _ %+ name ; foo() -> _foo 1.186 +%endif 1.187 + 1.188 +; -------------------------------------------------------------------------- 1.189 +; Macros for position-independent code (PIC) support 1.190 +; 1.191 +%ifndef GOT_SYMBOL 1.192 +%undef PIC 1.193 +%endif 1.194 + 1.195 +%ifdef PIC ; ------------------------------------------- 1.196 + 1.197 +%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- 1.198 + 1.199 +; At present, nasm doesn't seem to support PIC generation for Mach-O. 1.200 +; The PIC support code below is a little tricky. 1.201 + 1.202 + SECTION SEG_CONST 1.203 +const_base: 1.204 + 1.205 +%define GOTOFF(got,sym) (got) + (sym) - const_base 1.206 + 1.207 +%imacro get_GOT 1 1.208 + ; NOTE: this macro destroys ecx resister. 1.209 + call %%geteip 1.210 + add ecx, byte (%%ref - $) 1.211 + jmp short %%adjust 1.212 +%%geteip: 1.213 + mov ecx, POINTER [esp] 1.214 + ret 1.215 +%%adjust: 1.216 + push ebp 1.217 + xor ebp,ebp ; ebp = 0 1.218 +%ifidni %1,ebx ; (%1 == ebx) 1.219 + ; db 0x8D,0x9C + jmp near const_base = 1.220 + ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) 1.221 + db 0x8D,0x9C ; 8D,9C 1.222 + jmp near const_base ; E9,(const_base-%%ref) 1.223 +%%ref: 1.224 +%else ; (%1 != ebx) 1.225 + ; db 0x8D,0x8C + jmp near const_base = 1.226 + ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) 1.227 + db 0x8D,0x8C ; 8D,8C 1.228 + jmp near const_base ; E9,(const_base-%%ref) 1.229 +%%ref: mov %1, ecx 1.230 +%endif ; (%1 == ebx) 1.231 + pop ebp 1.232 +%endmacro 1.233 + 1.234 +%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- 1.235 + 1.236 +%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff 1.237 + 1.238 +%imacro get_GOT 1 1.239 + extern GOT_SYMBOL 1.240 + call %%geteip 1.241 + add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc 1.242 + jmp short %%done 1.243 +%%geteip: 1.244 + mov %1, POINTER [esp] 1.245 + ret 1.246 +%%done: 1.247 +%endmacro 1.248 + 1.249 +%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- 1.250 + 1.251 +%imacro pushpic 1.nolist 1.252 + push %1 1.253 +%endmacro 1.254 +%imacro poppic 1.nolist 1.255 + pop %1 1.256 +%endmacro 1.257 +%imacro movpic 2.nolist 1.258 + mov %1,%2 1.259 +%endmacro 1.260 + 1.261 +%else ; !PIC ----------------------------------------- 1.262 + 1.263 +%define GOTOFF(got,sym) (sym) 1.264 + 1.265 +%imacro get_GOT 1.nolist 1.266 +%endmacro 1.267 +%imacro pushpic 1.nolist 1.268 +%endmacro 1.269 +%imacro poppic 1.nolist 1.270 +%endmacro 1.271 +%imacro movpic 2.nolist 1.272 +%endmacro 1.273 + 1.274 +%endif ; PIC ----------------------------------------- 1.275 + 1.276 +; -------------------------------------------------------------------------- 1.277 +; Align the next instruction on {2,4,8,16,..}-byte boundary. 1.278 +; ".balign n,,m" in GNU as 1.279 +; 1.280 +%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) 1.281 +%define FILLB(b,n) (($$-(b)) & ((n)-1)) 1.282 + 1.283 +%imacro alignx 1-2.nolist 0xFFFF 1.284 +%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ 1.285 + db 0x90 ; nop 1.286 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ 1.287 + db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] 1.288 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ 1.289 + db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 1.290 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ 1.291 + db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] 1.292 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ 1.293 + db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] 1.294 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ 1.295 + db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] 1.296 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ 1.297 + db 0x8B,0xED ; mov ebp,ebp 1.298 + times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ 1.299 + db 0x90 ; nop 1.300 +%endmacro 1.301 + 1.302 +; Align the next data on {2,4,8,16,..}-byte boundary. 1.303 +; 1.304 +%imacro alignz 1.nolist 1.305 + align %1, db 0 ; filling zeros 1.306 +%endmacro 1.307 + 1.308 +%ifdef __x86_64__ 1.309 + 1.310 +%ifdef WIN64 1.311 + 1.312 +%imacro collect_args 0 1.313 + push r12 1.314 + push r13 1.315 + push r14 1.316 + push r15 1.317 + mov r10, rcx 1.318 + mov r11, rdx 1.319 + mov r12, r8 1.320 + mov r13, r9 1.321 + mov r14, [rax+48] 1.322 + mov r15, [rax+56] 1.323 + push rsi 1.324 + push rdi 1.325 + sub rsp, SIZEOF_XMMWORD 1.326 + movaps XMMWORD [rsp], xmm6 1.327 + sub rsp, SIZEOF_XMMWORD 1.328 + movaps XMMWORD [rsp], xmm7 1.329 +%endmacro 1.330 + 1.331 +%imacro uncollect_args 0 1.332 + movaps xmm7, XMMWORD [rsp] 1.333 + add rsp, SIZEOF_XMMWORD 1.334 + movaps xmm6, XMMWORD [rsp] 1.335 + add rsp, SIZEOF_XMMWORD 1.336 + pop rdi 1.337 + pop rsi 1.338 + pop r15 1.339 + pop r14 1.340 + pop r13 1.341 + pop r12 1.342 +%endmacro 1.343 + 1.344 +%else 1.345 + 1.346 +%imacro collect_args 0 1.347 + push r10 1.348 + push r11 1.349 + push r12 1.350 + push r13 1.351 + push r14 1.352 + push r15 1.353 + mov r10, rdi 1.354 + mov r11, rsi 1.355 + mov r12, rdx 1.356 + mov r13, rcx 1.357 + mov r14, r8 1.358 + mov r15, r9 1.359 +%endmacro 1.360 + 1.361 +%imacro uncollect_args 0 1.362 + pop r15 1.363 + pop r14 1.364 + pop r13 1.365 + pop r12 1.366 + pop r11 1.367 + pop r10 1.368 +%endmacro 1.369 + 1.370 +%endif 1.371 + 1.372 +%endif 1.373 + 1.374 +; -------------------------------------------------------------------------- 1.375 +; Defines picked up from the C headers 1.376 +; 1.377 +%include "jsimdcfg.inc" 1.378 + 1.379 +; --------------------------------------------------------------------------