michael@0: ; michael@0: ; jsimdext.inc - common declarations michael@0: ; michael@0: ; Copyright 2009 Pierre Ossman for Cendio AB michael@0: ; Copyright 2010 D. R. Commander michael@0: ; michael@0: ; Based on michael@0: ; x86 SIMD extension for IJG JPEG library - version 1.02 michael@0: ; michael@0: ; Copyright (C) 1999-2006, MIYASAKA Masaru. michael@0: ; michael@0: ; This software is provided 'as-is', without any express or implied michael@0: ; warranty. In no event will the authors be held liable for any damages michael@0: ; arising from the use of this software. michael@0: ; michael@0: ; Permission is granted to anyone to use this software for any purpose, michael@0: ; including commercial applications, and to alter it and redistribute it michael@0: ; freely, subject to the following restrictions: michael@0: ; michael@0: ; 1. The origin of this software must not be misrepresented; you must not michael@0: ; claim that you wrote the original software. If you use this software michael@0: ; in a product, an acknowledgment in the product documentation would be michael@0: ; appreciated but is not required. michael@0: ; 2. Altered source versions must be plainly marked as such, and must not be michael@0: ; misrepresented as being the original software. michael@0: ; 3. This notice may not be removed or altered from any source distribution. michael@0: ; michael@0: ; [TAB8] michael@0: michael@0: ; ========================================================================== michael@0: ; System-dependent configurations michael@0: michael@0: %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- michael@0: ; * Microsoft Visual C++ michael@0: ; * MinGW (Minimalist GNU for Windows) michael@0: ; * CygWin michael@0: ; * LCC-Win32 michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %ifdef __YASM_VER__ michael@0: %define SEG_TEXT .text align=16 michael@0: %define SEG_CONST .rdata align=16 michael@0: %else michael@0: %define SEG_TEXT .text align=16 public use32 class=CODE michael@0: %define SEG_CONST .rdata align=16 public use32 class=CONST michael@0: %endif michael@0: michael@0: %elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- michael@0: ; * Microsoft Visual C++ michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %ifdef __YASM_VER__ michael@0: %define SEG_TEXT .text align=16 michael@0: %define SEG_CONST .rdata align=16 michael@0: %else michael@0: %define SEG_TEXT .text align=16 public use64 class=CODE michael@0: %define SEG_CONST .rdata align=16 public use64 class=CONST michael@0: %endif michael@0: %define EXTN(name) name ; foo() -> foo michael@0: michael@0: %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- michael@0: ; * Borland C++ (Win32) michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %define SEG_TEXT .text align=16 public use32 class=CODE michael@0: %define SEG_CONST .data align=16 public use32 class=DATA michael@0: michael@0: %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ michael@0: ; * Linux michael@0: ; * *BSD family Unix using elf format michael@0: ; * Unix System V, including Solaris x86, UnixWare and SCO Unix michael@0: michael@0: ; mark stack as non-executable michael@0: section .note.GNU-stack noalloc noexec nowrite progbits michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %ifdef __x86_64__ michael@0: %define SEG_TEXT .text progbits align=16 michael@0: %define SEG_CONST .rodata progbits align=16 michael@0: %else michael@0: %define SEG_TEXT .text progbits alloc exec nowrite align=16 michael@0: %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 michael@0: %endif michael@0: michael@0: ; To make the code position-independent, append -DPIC to the commandline michael@0: ; michael@0: %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC michael@0: %define EXTN(name) name ; foo() -> foo michael@0: michael@0: %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- michael@0: ; * Older Linux using a.out format (nasm -f aout -DAOUT ...) michael@0: ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %define SEG_TEXT .text michael@0: %define SEG_CONST .data michael@0: michael@0: ; To make the code position-independent, append -DPIC to the commandline michael@0: ; michael@0: %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC michael@0: michael@0: %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- michael@0: ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? michael@0: %define SEG_CONST .rodata align=16 michael@0: michael@0: ; The generation of position-independent code (PIC) is the default on Darwin. michael@0: ; michael@0: %define PIC michael@0: %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing michael@0: michael@0: %else ; ----(Other case)---------------------- michael@0: michael@0: ; -- segment definition -- michael@0: ; michael@0: %define SEG_TEXT .text michael@0: %define SEG_CONST .data michael@0: michael@0: %endif ; ---------------------------------------------- michael@0: michael@0: ; ========================================================================== michael@0: michael@0: ; -------------------------------------------------------------------------- michael@0: ; Common types michael@0: ; michael@0: %ifdef __x86_64__ michael@0: %define POINTER qword ; general pointer type michael@0: %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) michael@0: %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT michael@0: %else michael@0: %define POINTER dword ; general pointer type michael@0: %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) michael@0: %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT michael@0: %endif michael@0: michael@0: %define INT dword ; signed integer type michael@0: %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) michael@0: %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT michael@0: michael@0: %define FP32 dword ; IEEE754 single michael@0: %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) michael@0: %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT michael@0: michael@0: %define MMWORD qword ; int64 (MMX register) michael@0: %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) michael@0: %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT michael@0: michael@0: ; NASM is buggy and doesn't properly handle operand sizes for SSE michael@0: ; instructions, so for now we have to define XMMWORD as blank. michael@0: %define XMMWORD ; int128 (SSE register) michael@0: %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) michael@0: %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT michael@0: michael@0: ; Similar hacks for when we load a dword or MMWORD into an xmm# register michael@0: %define XMM_DWORD michael@0: %define XMM_MMWORD michael@0: michael@0: %define SIZEOF_BYTE 1 ; sizeof(BYTE) michael@0: %define SIZEOF_WORD 2 ; sizeof(WORD) michael@0: %define SIZEOF_DWORD 4 ; sizeof(DWORD) michael@0: %define SIZEOF_QWORD 8 ; sizeof(QWORD) michael@0: %define SIZEOF_OWORD 16 ; sizeof(OWORD) michael@0: michael@0: %define BYTE_BIT 8 ; CHAR_BIT in C michael@0: %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT michael@0: %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT michael@0: %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT michael@0: %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT michael@0: michael@0: ; -------------------------------------------------------------------------- michael@0: ; External Symbol Name michael@0: ; michael@0: %ifndef EXTN michael@0: %define EXTN(name) _ %+ name ; foo() -> _foo michael@0: %endif michael@0: michael@0: ; -------------------------------------------------------------------------- michael@0: ; Macros for position-independent code (PIC) support michael@0: ; michael@0: %ifndef GOT_SYMBOL michael@0: %undef PIC michael@0: %endif michael@0: michael@0: %ifdef PIC ; ------------------------------------------- michael@0: michael@0: %ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- michael@0: michael@0: ; At present, nasm doesn't seem to support PIC generation for Mach-O. michael@0: ; The PIC support code below is a little tricky. michael@0: michael@0: SECTION SEG_CONST michael@0: const_base: michael@0: michael@0: %define GOTOFF(got,sym) (got) + (sym) - const_base michael@0: michael@0: %imacro get_GOT 1 michael@0: ; NOTE: this macro destroys ecx resister. michael@0: call %%geteip michael@0: add ecx, byte (%%ref - $) michael@0: jmp short %%adjust michael@0: %%geteip: michael@0: mov ecx, POINTER [esp] michael@0: ret michael@0: %%adjust: michael@0: push ebp michael@0: xor ebp,ebp ; ebp = 0 michael@0: %ifidni %1,ebx ; (%1 == ebx) michael@0: ; db 0x8D,0x9C + jmp near const_base = michael@0: ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) michael@0: db 0x8D,0x9C ; 8D,9C michael@0: jmp near const_base ; E9,(const_base-%%ref) michael@0: %%ref: michael@0: %else ; (%1 != ebx) michael@0: ; db 0x8D,0x8C + jmp near const_base = michael@0: ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) michael@0: db 0x8D,0x8C ; 8D,8C michael@0: jmp near const_base ; E9,(const_base-%%ref) michael@0: %%ref: mov %1, ecx michael@0: %endif ; (%1 == ebx) michael@0: pop ebp michael@0: %endmacro michael@0: michael@0: %else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- michael@0: michael@0: %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff michael@0: michael@0: %imacro get_GOT 1 michael@0: extern GOT_SYMBOL michael@0: call %%geteip michael@0: add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc michael@0: jmp short %%done michael@0: %%geteip: michael@0: mov %1, POINTER [esp] michael@0: ret michael@0: %%done: michael@0: %endmacro michael@0: michael@0: %endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- michael@0: michael@0: %imacro pushpic 1.nolist michael@0: push %1 michael@0: %endmacro michael@0: %imacro poppic 1.nolist michael@0: pop %1 michael@0: %endmacro michael@0: %imacro movpic 2.nolist michael@0: mov %1,%2 michael@0: %endmacro michael@0: michael@0: %else ; !PIC ----------------------------------------- michael@0: michael@0: %define GOTOFF(got,sym) (sym) michael@0: michael@0: %imacro get_GOT 1.nolist michael@0: %endmacro michael@0: %imacro pushpic 1.nolist michael@0: %endmacro michael@0: %imacro poppic 1.nolist michael@0: %endmacro michael@0: %imacro movpic 2.nolist michael@0: %endmacro michael@0: michael@0: %endif ; PIC ----------------------------------------- michael@0: michael@0: ; -------------------------------------------------------------------------- michael@0: ; Align the next instruction on {2,4,8,16,..}-byte boundary. michael@0: ; ".balign n,,m" in GNU as michael@0: ; michael@0: %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) michael@0: %define FILLB(b,n) (($$-(b)) & ((n)-1)) michael@0: michael@0: %imacro alignx 1-2.nolist 0xFFFF michael@0: %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ michael@0: db 0x90 ; nop michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ michael@0: db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ michael@0: db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ michael@0: db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ michael@0: db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ michael@0: db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ michael@0: db 0x8B,0xED ; mov ebp,ebp michael@0: times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ michael@0: db 0x90 ; nop michael@0: %endmacro michael@0: michael@0: ; Align the next data on {2,4,8,16,..}-byte boundary. michael@0: ; michael@0: %imacro alignz 1.nolist michael@0: align %1, db 0 ; filling zeros michael@0: %endmacro michael@0: michael@0: %ifdef __x86_64__ michael@0: michael@0: %ifdef WIN64 michael@0: michael@0: %imacro collect_args 0 michael@0: push r12 michael@0: push r13 michael@0: push r14 michael@0: push r15 michael@0: mov r10, rcx michael@0: mov r11, rdx michael@0: mov r12, r8 michael@0: mov r13, r9 michael@0: mov r14, [rax+48] michael@0: mov r15, [rax+56] michael@0: push rsi michael@0: push rdi michael@0: sub rsp, SIZEOF_XMMWORD michael@0: movaps XMMWORD [rsp], xmm6 michael@0: sub rsp, SIZEOF_XMMWORD michael@0: movaps XMMWORD [rsp], xmm7 michael@0: %endmacro michael@0: michael@0: %imacro uncollect_args 0 michael@0: movaps xmm7, XMMWORD [rsp] michael@0: add rsp, SIZEOF_XMMWORD michael@0: movaps xmm6, XMMWORD [rsp] michael@0: add rsp, SIZEOF_XMMWORD michael@0: pop rdi michael@0: pop rsi michael@0: pop r15 michael@0: pop r14 michael@0: pop r13 michael@0: pop r12 michael@0: %endmacro michael@0: michael@0: %else michael@0: michael@0: %imacro collect_args 0 michael@0: push r10 michael@0: push r11 michael@0: push r12 michael@0: push r13 michael@0: push r14 michael@0: push r15 michael@0: mov r10, rdi michael@0: mov r11, rsi michael@0: mov r12, rdx michael@0: mov r13, rcx michael@0: mov r14, r8 michael@0: mov r15, r9 michael@0: %endmacro michael@0: michael@0: %imacro uncollect_args 0 michael@0: pop r15 michael@0: pop r14 michael@0: pop r13 michael@0: pop r12 michael@0: pop r11 michael@0: pop r10 michael@0: %endmacro michael@0: michael@0: %endif michael@0: michael@0: %endif michael@0: michael@0: ; -------------------------------------------------------------------------- michael@0: ; Defines picked up from the C headers michael@0: ; michael@0: %include "jsimdcfg.inc" michael@0: michael@0: ; --------------------------------------------------------------------------