media/libjpeg/simd/jsimdext.inc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 ;
     2 ; jsimdext.inc - common declarations
     3 ;
     4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
     5 ; Copyright 2010 D. R. Commander
     6 ;
     7 ; Based on
     8 ; x86 SIMD extension for IJG JPEG library - version 1.02
     9 ;
    10 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
    11 ;
    12 ; This software is provided 'as-is', without any express or implied
    13 ; warranty.  In no event will the authors be held liable for any damages
    14 ; arising from the use of this software.
    15 ;
    16 ; Permission is granted to anyone to use this software for any purpose,
    17 ; including commercial applications, and to alter it and redistribute it
    18 ; freely, subject to the following restrictions:
    19 ;
    20 ; 1. The origin of this software must not be misrepresented; you must not
    21 ;    claim that you wrote the original software. If you use this software
    22 ;    in a product, an acknowledgment in the product documentation would be
    23 ;    appreciated but is not required.
    24 ; 2. Altered source versions must be plainly marked as such, and must not be
    25 ;    misrepresented as being the original software.
    26 ; 3. This notice may not be removed or altered from any source distribution.
    27 ;
    28 ; [TAB8]
    30 ; ==========================================================================
    31 ;  System-dependent configurations
    33 %ifdef WIN32	; ----(nasm -fwin32 -DWIN32 ...)--------
    34 ; * Microsoft Visual C++
    35 ; * MinGW (Minimalist GNU for Windows)
    36 ; * CygWin
    37 ; * LCC-Win32
    39 ; -- segment definition --
    40 ;
    41 %ifdef __YASM_VER__
    42 %define SEG_TEXT    .text  align=16
    43 %define SEG_CONST   .rdata align=16
    44 %else
    45 %define SEG_TEXT    .text  align=16 public use32 class=CODE
    46 %define SEG_CONST   .rdata align=16 public use32 class=CONST
    47 %endif
    49 %elifdef WIN64	; ----(nasm -fwin64 -DWIN64 ...)--------
    50 ; * Microsoft Visual C++
    52 ; -- segment definition --
    53 ;
    54 %ifdef __YASM_VER__
    55 %define SEG_TEXT    .text  align=16
    56 %define SEG_CONST   .rdata align=16
    57 %else
    58 %define SEG_TEXT    .text  align=16 public use64 class=CODE
    59 %define SEG_CONST   .rdata align=16 public use64 class=CONST
    60 %endif
    61 %define EXTN(name)  name			; foo() -> foo
    63 %elifdef OBJ32	; ----(nasm -fobj -DOBJ32 ...)----------
    64 ; * Borland C++ (Win32)
    66 ; -- segment definition --
    67 ;
    68 %define SEG_TEXT    .text  align=16 public use32 class=CODE
    69 %define SEG_CONST   .data  align=16 public use32 class=DATA
    71 %elifdef ELF	; ----(nasm -felf[64] -DELF ...)------------
    72 ; * Linux
    73 ; * *BSD family Unix using elf format
    74 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
    76 ; mark stack as non-executable
    77 section .note.GNU-stack noalloc noexec nowrite progbits
    79 ; -- segment definition --
    80 ;
    81 %ifdef __x86_64__
    82 %define SEG_TEXT    .text   progbits align=16
    83 %define SEG_CONST   .rodata progbits align=16
    84 %else
    85 %define SEG_TEXT    .text   progbits alloc exec   nowrite align=16
    86 %define SEG_CONST   .rodata progbits alloc noexec nowrite align=16
    87 %endif
    89 ; To make the code position-independent, append -DPIC to the commandline
    90 ;
    91 %define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_	; ELF supports PIC
    92 %define EXTN(name)  name			; foo() -> foo
    94 %elifdef AOUT	; ----(nasm -faoutb/aout -DAOUT ...)----
    95 ; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
    96 ; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
    98 ; -- segment definition --
    99 ;
   100 %define SEG_TEXT    .text
   101 %define SEG_CONST   .data
   103 ; To make the code position-independent, append -DPIC to the commandline
   104 ;
   105 %define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_	; BSD-style a.out supports PIC
   107 %elifdef MACHO	; ----(nasm -fmacho -DMACHO ...)--------
   108 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
   110 ; -- segment definition --
   111 ;
   112 %define SEG_TEXT    .text  ;align=16	; nasm doesn't accept align=16. why?
   113 %define SEG_CONST   .rodata align=16
   115 ; The generation of position-independent code (PIC) is the default on Darwin.
   116 ;
   117 %define PIC
   118 %define GOT_SYMBOL  _MACHO_PIC_		; Mach-O style code-relative addressing
   120 %else		; ----(Other case)----------------------
   122 ; -- segment definition --
   123 ;
   124 %define SEG_TEXT    .text
   125 %define SEG_CONST   .data
   127 %endif	; ----------------------------------------------
   129 ; ==========================================================================
   131 ; --------------------------------------------------------------------------
   132 ;  Common types
   133 ;
   134 %ifdef __x86_64__
   135 %define POINTER                 qword           ; general pointer type
   136 %define SIZEOF_POINTER          SIZEOF_QWORD    ; sizeof(POINTER)
   137 %define POINTER_BIT             QWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
   138 %else
   139 %define POINTER                 dword           ; general pointer type
   140 %define SIZEOF_POINTER          SIZEOF_DWORD    ; sizeof(POINTER)
   141 %define POINTER_BIT             DWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
   142 %endif
   144 %define INT                     dword           ; signed integer type
   145 %define SIZEOF_INT              SIZEOF_DWORD    ; sizeof(INT)
   146 %define INT_BIT                 DWORD_BIT       ; sizeof(INT)*BYTE_BIT
   148 %define FP32                    dword           ; IEEE754 single
   149 %define SIZEOF_FP32             SIZEOF_DWORD    ; sizeof(FP32)
   150 %define FP32_BIT                DWORD_BIT       ; sizeof(FP32)*BYTE_BIT
   152 %define MMWORD                  qword           ; int64  (MMX register)
   153 %define SIZEOF_MMWORD           SIZEOF_QWORD    ; sizeof(MMWORD)
   154 %define MMWORD_BIT              QWORD_BIT       ; sizeof(MMWORD)*BYTE_BIT
   156 ; NASM is buggy and doesn't properly handle operand sizes for SSE
   157 ; instructions, so for now we have to define XMMWORD as blank.
   158 %define XMMWORD                                 ; int128 (SSE register)
   159 %define SIZEOF_XMMWORD          SIZEOF_OWORD    ; sizeof(XMMWORD)
   160 %define XMMWORD_BIT             OWORD_BIT       ; sizeof(XMMWORD)*BYTE_BIT
   162 ; Similar hacks for when we load a dword or MMWORD into an xmm# register
   163 %define XMM_DWORD
   164 %define XMM_MMWORD
   166 %define SIZEOF_BYTE             1               ; sizeof(BYTE)
   167 %define SIZEOF_WORD             2               ; sizeof(WORD)
   168 %define SIZEOF_DWORD            4               ; sizeof(DWORD)
   169 %define SIZEOF_QWORD            8               ; sizeof(QWORD)
   170 %define SIZEOF_OWORD            16              ; sizeof(OWORD)
   172 %define BYTE_BIT                8               ; CHAR_BIT in C
   173 %define WORD_BIT                16              ; sizeof(WORD)*BYTE_BIT
   174 %define DWORD_BIT               32              ; sizeof(DWORD)*BYTE_BIT
   175 %define QWORD_BIT               64              ; sizeof(QWORD)*BYTE_BIT
   176 %define OWORD_BIT               128             ; sizeof(OWORD)*BYTE_BIT
   178 ; --------------------------------------------------------------------------
   179 ;  External Symbol Name
   180 ;
   181 %ifndef EXTN
   182 %define EXTN(name)   _ %+ name		; foo() -> _foo
   183 %endif
   185 ; --------------------------------------------------------------------------
   186 ;  Macros for position-independent code (PIC) support
   187 ;
   188 %ifndef GOT_SYMBOL
   189 %undef PIC
   190 %endif
   192 %ifdef PIC ; -------------------------------------------
   194 %ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
   196 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
   197 ; The PIC support code below is a little tricky.
   199 	SECTION	SEG_CONST
   200 const_base:
   202 %define GOTOFF(got,sym) (got) + (sym) - const_base
   204 %imacro get_GOT	1
   205 	; NOTE: this macro destroys ecx resister.
   206 	call	%%geteip
   207 	add	ecx, byte (%%ref - $)
   208 	jmp	short %%adjust
   209 %%geteip:
   210 	mov	ecx, POINTER [esp]
   211 	ret
   212 %%adjust:
   213 	push	ebp
   214 	xor	ebp,ebp		; ebp = 0
   215 %ifidni %1,ebx	; (%1 == ebx)
   216 	; db 0x8D,0x9C + jmp near const_base =
   217 	;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
   218 	db	0x8D,0x9C		; 8D,9C
   219 	jmp	near const_base		; E9,(const_base-%%ref)
   220 %%ref:
   221 %else  ; (%1 != ebx)
   222 	; db 0x8D,0x8C + jmp near const_base =
   223 	;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
   224 	db	0x8D,0x8C		; 8D,8C
   225 	jmp	near const_base		; E9,(const_base-%%ref)
   226 %%ref:	mov	%1, ecx
   227 %endif ; (%1 == ebx)
   228 	pop	ebp
   229 %endmacro
   231 %else	; GOT_SYMBOL != _MACHO_PIC_ ----------------
   233 %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
   235 %imacro get_GOT	1
   236 	extern	GOT_SYMBOL
   237 	call	%%geteip
   238 	add	%1, GOT_SYMBOL + $$ - $ wrt ..gotpc
   239 	jmp	short %%done
   240 %%geteip:
   241 	mov	%1, POINTER [esp]
   242 	ret
   243 %%done:
   244 %endmacro
   246 %endif	; GOT_SYMBOL == _MACHO_PIC_ ----------------
   248 %imacro pushpic	1.nolist
   249 	push	%1
   250 %endmacro
   251 %imacro poppic	1.nolist
   252 	pop	%1
   253 %endmacro
   254 %imacro movpic	2.nolist
   255 	mov	%1,%2
   256 %endmacro
   258 %else	; !PIC -----------------------------------------
   260 %define GOTOFF(got,sym) (sym)
   262 %imacro get_GOT	1.nolist
   263 %endmacro
   264 %imacro pushpic	1.nolist
   265 %endmacro
   266 %imacro poppic	1.nolist
   267 %endmacro
   268 %imacro movpic	2.nolist
   269 %endmacro
   271 %endif	;  PIC -----------------------------------------
   273 ; --------------------------------------------------------------------------
   274 ;  Align the next instruction on {2,4,8,16,..}-byte boundary.
   275 ;  ".balign n,,m" in GNU as
   276 ;
   277 %define MSKLE(x,y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
   278 %define FILLB(b,n)  (($$-(b)) & ((n)-1))
   280 %imacro alignx 1-2.nolist 0xFFFF
   281 %%bs:	times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
   282 	       db 0x90                               ; nop
   283 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
   284 	       db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
   285 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
   286 	       db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
   287 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
   288 	       db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
   289 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
   290 	       db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
   291 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
   292 	       db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
   293 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
   294 	       db 0x8B,0xED                          ; mov ebp,ebp
   295 	times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
   296 	       db 0x90                               ; nop
   297 %endmacro
   299 ; Align the next data on {2,4,8,16,..}-byte boundary.
   300 ;
   301 %imacro alignz 1.nolist
   302 	align %1, db 0		; filling zeros
   303 %endmacro
   305 %ifdef __x86_64__
   307 %ifdef WIN64
   309 %imacro collect_args 0
   310 	push r12
   311 	push r13
   312 	push r14
   313 	push r15
   314 	mov r10, rcx
   315 	mov r11, rdx
   316 	mov r12, r8
   317 	mov r13, r9
   318 	mov r14, [rax+48]
   319 	mov r15, [rax+56]
   320 	push rsi
   321 	push rdi
   322 	sub     rsp, SIZEOF_XMMWORD
   323 	movaps  XMMWORD [rsp], xmm6
   324 	sub     rsp, SIZEOF_XMMWORD
   325 	movaps  XMMWORD [rsp], xmm7
   326 %endmacro
   328 %imacro uncollect_args 0
   329 	movaps  xmm7, XMMWORD [rsp]
   330 	add     rsp, SIZEOF_XMMWORD
   331 	movaps  xmm6, XMMWORD [rsp]
   332 	add     rsp, SIZEOF_XMMWORD
   333 	pop rdi
   334 	pop rsi
   335 	pop r15
   336 	pop r14
   337 	pop r13
   338 	pop r12
   339 %endmacro
   341 %else
   343 %imacro collect_args 0
   344 	push r10
   345 	push r11
   346 	push r12
   347 	push r13
   348 	push r14
   349 	push r15
   350 	mov r10, rdi
   351 	mov r11, rsi
   352 	mov r12, rdx
   353 	mov r13, rcx
   354 	mov r14, r8
   355 	mov r15, r9
   356 %endmacro
   358 %imacro uncollect_args 0
   359 	pop r15
   360 	pop r14
   361 	pop r13
   362 	pop r12
   363 	pop r11
   364 	pop r10
   365 %endmacro
   367 %endif
   369 %endif
   371 ; --------------------------------------------------------------------------
   372 ;  Defines picked up from the C headers
   373 ;
   374 %include "jsimdcfg.inc"
   376 ; --------------------------------------------------------------------------

mercurial