media/libjpeg/simd/jsimdext.inc

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:6d7a024e9dbc
1 ;
2 ; jsimdext.inc - common declarations
3 ;
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ; Copyright 2010 D. R. Commander
6 ;
7 ; Based on
8 ; x86 SIMD extension for IJG JPEG library - version 1.02
9 ;
10 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
11 ;
12 ; This software is provided 'as-is', without any express or implied
13 ; warranty. In no event will the authors be held liable for any damages
14 ; arising from the use of this software.
15 ;
16 ; Permission is granted to anyone to use this software for any purpose,
17 ; including commercial applications, and to alter it and redistribute it
18 ; freely, subject to the following restrictions:
19 ;
20 ; 1. The origin of this software must not be misrepresented; you must not
21 ; claim that you wrote the original software. If you use this software
22 ; in a product, an acknowledgment in the product documentation would be
23 ; appreciated but is not required.
24 ; 2. Altered source versions must be plainly marked as such, and must not be
25 ; misrepresented as being the original software.
26 ; 3. This notice may not be removed or altered from any source distribution.
27 ;
28 ; [TAB8]
29
30 ; ==========================================================================
31 ; System-dependent configurations
32
33 %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
34 ; * Microsoft Visual C++
35 ; * MinGW (Minimalist GNU for Windows)
36 ; * CygWin
37 ; * LCC-Win32
38
39 ; -- segment definition --
40 ;
41 %ifdef __YASM_VER__
42 %define SEG_TEXT .text align=16
43 %define SEG_CONST .rdata align=16
44 %else
45 %define SEG_TEXT .text align=16 public use32 class=CODE
46 %define SEG_CONST .rdata align=16 public use32 class=CONST
47 %endif
48
49 %elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
50 ; * Microsoft Visual C++
51
52 ; -- segment definition --
53 ;
54 %ifdef __YASM_VER__
55 %define SEG_TEXT .text align=16
56 %define SEG_CONST .rdata align=16
57 %else
58 %define SEG_TEXT .text align=16 public use64 class=CODE
59 %define SEG_CONST .rdata align=16 public use64 class=CONST
60 %endif
61 %define EXTN(name) name ; foo() -> foo
62
63 %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
64 ; * Borland C++ (Win32)
65
66 ; -- segment definition --
67 ;
68 %define SEG_TEXT .text align=16 public use32 class=CODE
69 %define SEG_CONST .data align=16 public use32 class=DATA
70
71 %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
72 ; * Linux
73 ; * *BSD family Unix using elf format
74 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
75
76 ; mark stack as non-executable
77 section .note.GNU-stack noalloc noexec nowrite progbits
78
79 ; -- segment definition --
80 ;
81 %ifdef __x86_64__
82 %define SEG_TEXT .text progbits align=16
83 %define SEG_CONST .rodata progbits align=16
84 %else
85 %define SEG_TEXT .text progbits alloc exec nowrite align=16
86 %define SEG_CONST .rodata progbits alloc noexec nowrite align=16
87 %endif
88
89 ; To make the code position-independent, append -DPIC to the commandline
90 ;
91 %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
92 %define EXTN(name) name ; foo() -> foo
93
94 %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
95 ; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
96 ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
97
98 ; -- segment definition --
99 ;
100 %define SEG_TEXT .text
101 %define SEG_CONST .data
102
103 ; To make the code position-independent, append -DPIC to the commandline
104 ;
105 %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
106
107 %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
108 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
109
110 ; -- segment definition --
111 ;
112 %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
113 %define SEG_CONST .rodata align=16
114
115 ; The generation of position-independent code (PIC) is the default on Darwin.
116 ;
117 %define PIC
118 %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
119
120 %else ; ----(Other case)----------------------
121
122 ; -- segment definition --
123 ;
124 %define SEG_TEXT .text
125 %define SEG_CONST .data
126
127 %endif ; ----------------------------------------------
128
129 ; ==========================================================================
130
131 ; --------------------------------------------------------------------------
132 ; Common types
133 ;
134 %ifdef __x86_64__
135 %define POINTER qword ; general pointer type
136 %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
137 %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
138 %else
139 %define POINTER dword ; general pointer type
140 %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
141 %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
142 %endif
143
144 %define INT dword ; signed integer type
145 %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
146 %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
147
148 %define FP32 dword ; IEEE754 single
149 %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
150 %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
151
152 %define MMWORD qword ; int64 (MMX register)
153 %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
154 %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
155
156 ; NASM is buggy and doesn't properly handle operand sizes for SSE
157 ; instructions, so for now we have to define XMMWORD as blank.
158 %define XMMWORD ; int128 (SSE register)
159 %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
160 %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
161
162 ; Similar hacks for when we load a dword or MMWORD into an xmm# register
163 %define XMM_DWORD
164 %define XMM_MMWORD
165
166 %define SIZEOF_BYTE 1 ; sizeof(BYTE)
167 %define SIZEOF_WORD 2 ; sizeof(WORD)
168 %define SIZEOF_DWORD 4 ; sizeof(DWORD)
169 %define SIZEOF_QWORD 8 ; sizeof(QWORD)
170 %define SIZEOF_OWORD 16 ; sizeof(OWORD)
171
172 %define BYTE_BIT 8 ; CHAR_BIT in C
173 %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
174 %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
175 %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
176 %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
177
178 ; --------------------------------------------------------------------------
179 ; External Symbol Name
180 ;
181 %ifndef EXTN
182 %define EXTN(name) _ %+ name ; foo() -> _foo
183 %endif
184
185 ; --------------------------------------------------------------------------
186 ; Macros for position-independent code (PIC) support
187 ;
188 %ifndef GOT_SYMBOL
189 %undef PIC
190 %endif
191
192 %ifdef PIC ; -------------------------------------------
193
194 %ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
195
196 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
197 ; The PIC support code below is a little tricky.
198
199 SECTION SEG_CONST
200 const_base:
201
202 %define GOTOFF(got,sym) (got) + (sym) - const_base
203
204 %imacro get_GOT 1
205 ; NOTE: this macro destroys ecx resister.
206 call %%geteip
207 add ecx, byte (%%ref - $)
208 jmp short %%adjust
209 %%geteip:
210 mov ecx, POINTER [esp]
211 ret
212 %%adjust:
213 push ebp
214 xor ebp,ebp ; ebp = 0
215 %ifidni %1,ebx ; (%1 == ebx)
216 ; db 0x8D,0x9C + jmp near const_base =
217 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
218 db 0x8D,0x9C ; 8D,9C
219 jmp near const_base ; E9,(const_base-%%ref)
220 %%ref:
221 %else ; (%1 != ebx)
222 ; db 0x8D,0x8C + jmp near const_base =
223 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
224 db 0x8D,0x8C ; 8D,8C
225 jmp near const_base ; E9,(const_base-%%ref)
226 %%ref: mov %1, ecx
227 %endif ; (%1 == ebx)
228 pop ebp
229 %endmacro
230
231 %else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
232
233 %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
234
235 %imacro get_GOT 1
236 extern GOT_SYMBOL
237 call %%geteip
238 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
239 jmp short %%done
240 %%geteip:
241 mov %1, POINTER [esp]
242 ret
243 %%done:
244 %endmacro
245
246 %endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
247
248 %imacro pushpic 1.nolist
249 push %1
250 %endmacro
251 %imacro poppic 1.nolist
252 pop %1
253 %endmacro
254 %imacro movpic 2.nolist
255 mov %1,%2
256 %endmacro
257
258 %else ; !PIC -----------------------------------------
259
260 %define GOTOFF(got,sym) (sym)
261
262 %imacro get_GOT 1.nolist
263 %endmacro
264 %imacro pushpic 1.nolist
265 %endmacro
266 %imacro poppic 1.nolist
267 %endmacro
268 %imacro movpic 2.nolist
269 %endmacro
270
271 %endif ; PIC -----------------------------------------
272
273 ; --------------------------------------------------------------------------
274 ; Align the next instruction on {2,4,8,16,..}-byte boundary.
275 ; ".balign n,,m" in GNU as
276 ;
277 %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
278 %define FILLB(b,n) (($$-(b)) & ((n)-1))
279
280 %imacro alignx 1-2.nolist 0xFFFF
281 %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
282 db 0x90 ; nop
283 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
284 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
285 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
286 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
287 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
288 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
289 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
290 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
291 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
292 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
293 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
294 db 0x8B,0xED ; mov ebp,ebp
295 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
296 db 0x90 ; nop
297 %endmacro
298
299 ; Align the next data on {2,4,8,16,..}-byte boundary.
300 ;
301 %imacro alignz 1.nolist
302 align %1, db 0 ; filling zeros
303 %endmacro
304
305 %ifdef __x86_64__
306
307 %ifdef WIN64
308
309 %imacro collect_args 0
310 push r12
311 push r13
312 push r14
313 push r15
314 mov r10, rcx
315 mov r11, rdx
316 mov r12, r8
317 mov r13, r9
318 mov r14, [rax+48]
319 mov r15, [rax+56]
320 push rsi
321 push rdi
322 sub rsp, SIZEOF_XMMWORD
323 movaps XMMWORD [rsp], xmm6
324 sub rsp, SIZEOF_XMMWORD
325 movaps XMMWORD [rsp], xmm7
326 %endmacro
327
328 %imacro uncollect_args 0
329 movaps xmm7, XMMWORD [rsp]
330 add rsp, SIZEOF_XMMWORD
331 movaps xmm6, XMMWORD [rsp]
332 add rsp, SIZEOF_XMMWORD
333 pop rdi
334 pop rsi
335 pop r15
336 pop r14
337 pop r13
338 pop r12
339 %endmacro
340
341 %else
342
343 %imacro collect_args 0
344 push r10
345 push r11
346 push r12
347 push r13
348 push r14
349 push r15
350 mov r10, rdi
351 mov r11, rsi
352 mov r12, rdx
353 mov r13, rcx
354 mov r14, r8
355 mov r15, r9
356 %endmacro
357
358 %imacro uncollect_args 0
359 pop r15
360 pop r14
361 pop r13
362 pop r12
363 pop r11
364 pop r10
365 %endmacro
366
367 %endif
368
369 %endif
370
371 ; --------------------------------------------------------------------------
372 ; Defines picked up from the C headers
373 ;
374 %include "jsimdcfg.inc"
375
376 ; --------------------------------------------------------------------------

mercurial