media/libjpeg/simd/jsimdext.inc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 ;
michael@0 2 ; jsimdext.inc - common declarations
michael@0 3 ;
michael@0 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
michael@0 5 ; Copyright 2010 D. R. Commander
michael@0 6 ;
michael@0 7 ; Based on
michael@0 8 ; x86 SIMD extension for IJG JPEG library - version 1.02
michael@0 9 ;
michael@0 10 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
michael@0 11 ;
michael@0 12 ; This software is provided 'as-is', without any express or implied
michael@0 13 ; warranty. In no event will the authors be held liable for any damages
michael@0 14 ; arising from the use of this software.
michael@0 15 ;
michael@0 16 ; Permission is granted to anyone to use this software for any purpose,
michael@0 17 ; including commercial applications, and to alter it and redistribute it
michael@0 18 ; freely, subject to the following restrictions:
michael@0 19 ;
michael@0 20 ; 1. The origin of this software must not be misrepresented; you must not
michael@0 21 ; claim that you wrote the original software. If you use this software
michael@0 22 ; in a product, an acknowledgment in the product documentation would be
michael@0 23 ; appreciated but is not required.
michael@0 24 ; 2. Altered source versions must be plainly marked as such, and must not be
michael@0 25 ; misrepresented as being the original software.
michael@0 26 ; 3. This notice may not be removed or altered from any source distribution.
michael@0 27 ;
michael@0 28 ; [TAB8]
michael@0 29
michael@0 30 ; ==========================================================================
michael@0 31 ; System-dependent configurations
michael@0 32
michael@0 33 %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
michael@0 34 ; * Microsoft Visual C++
michael@0 35 ; * MinGW (Minimalist GNU for Windows)
michael@0 36 ; * CygWin
michael@0 37 ; * LCC-Win32
michael@0 38
michael@0 39 ; -- segment definition --
michael@0 40 ;
michael@0 41 %ifdef __YASM_VER__
michael@0 42 %define SEG_TEXT .text align=16
michael@0 43 %define SEG_CONST .rdata align=16
michael@0 44 %else
michael@0 45 %define SEG_TEXT .text align=16 public use32 class=CODE
michael@0 46 %define SEG_CONST .rdata align=16 public use32 class=CONST
michael@0 47 %endif
michael@0 48
michael@0 49 %elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
michael@0 50 ; * Microsoft Visual C++
michael@0 51
michael@0 52 ; -- segment definition --
michael@0 53 ;
michael@0 54 %ifdef __YASM_VER__
michael@0 55 %define SEG_TEXT .text align=16
michael@0 56 %define SEG_CONST .rdata align=16
michael@0 57 %else
michael@0 58 %define SEG_TEXT .text align=16 public use64 class=CODE
michael@0 59 %define SEG_CONST .rdata align=16 public use64 class=CONST
michael@0 60 %endif
michael@0 61 %define EXTN(name) name ; foo() -> foo
michael@0 62
michael@0 63 %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
michael@0 64 ; * Borland C++ (Win32)
michael@0 65
michael@0 66 ; -- segment definition --
michael@0 67 ;
michael@0 68 %define SEG_TEXT .text align=16 public use32 class=CODE
michael@0 69 %define SEG_CONST .data align=16 public use32 class=DATA
michael@0 70
michael@0 71 %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
michael@0 72 ; * Linux
michael@0 73 ; * *BSD family Unix using elf format
michael@0 74 ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
michael@0 75
michael@0 76 ; mark stack as non-executable
michael@0 77 section .note.GNU-stack noalloc noexec nowrite progbits
michael@0 78
michael@0 79 ; -- segment definition --
michael@0 80 ;
michael@0 81 %ifdef __x86_64__
michael@0 82 %define SEG_TEXT .text progbits align=16
michael@0 83 %define SEG_CONST .rodata progbits align=16
michael@0 84 %else
michael@0 85 %define SEG_TEXT .text progbits alloc exec nowrite align=16
michael@0 86 %define SEG_CONST .rodata progbits alloc noexec nowrite align=16
michael@0 87 %endif
michael@0 88
michael@0 89 ; To make the code position-independent, append -DPIC to the commandline
michael@0 90 ;
michael@0 91 %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
michael@0 92 %define EXTN(name) name ; foo() -> foo
michael@0 93
michael@0 94 %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
michael@0 95 ; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
michael@0 96 ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
michael@0 97
michael@0 98 ; -- segment definition --
michael@0 99 ;
michael@0 100 %define SEG_TEXT .text
michael@0 101 %define SEG_CONST .data
michael@0 102
michael@0 103 ; To make the code position-independent, append -DPIC to the commandline
michael@0 104 ;
michael@0 105 %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
michael@0 106
michael@0 107 %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
michael@0 108 ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
michael@0 109
michael@0 110 ; -- segment definition --
michael@0 111 ;
michael@0 112 %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
michael@0 113 %define SEG_CONST .rodata align=16
michael@0 114
michael@0 115 ; The generation of position-independent code (PIC) is the default on Darwin.
michael@0 116 ;
michael@0 117 %define PIC
michael@0 118 %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
michael@0 119
michael@0 120 %else ; ----(Other case)----------------------
michael@0 121
michael@0 122 ; -- segment definition --
michael@0 123 ;
michael@0 124 %define SEG_TEXT .text
michael@0 125 %define SEG_CONST .data
michael@0 126
michael@0 127 %endif ; ----------------------------------------------
michael@0 128
michael@0 129 ; ==========================================================================
michael@0 130
michael@0 131 ; --------------------------------------------------------------------------
michael@0 132 ; Common types
michael@0 133 ;
michael@0 134 %ifdef __x86_64__
michael@0 135 %define POINTER qword ; general pointer type
michael@0 136 %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
michael@0 137 %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
michael@0 138 %else
michael@0 139 %define POINTER dword ; general pointer type
michael@0 140 %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
michael@0 141 %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
michael@0 142 %endif
michael@0 143
michael@0 144 %define INT dword ; signed integer type
michael@0 145 %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
michael@0 146 %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
michael@0 147
michael@0 148 %define FP32 dword ; IEEE754 single
michael@0 149 %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
michael@0 150 %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
michael@0 151
michael@0 152 %define MMWORD qword ; int64 (MMX register)
michael@0 153 %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
michael@0 154 %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
michael@0 155
michael@0 156 ; NASM is buggy and doesn't properly handle operand sizes for SSE
michael@0 157 ; instructions, so for now we have to define XMMWORD as blank.
michael@0 158 %define XMMWORD ; int128 (SSE register)
michael@0 159 %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
michael@0 160 %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
michael@0 161
michael@0 162 ; Similar hacks for when we load a dword or MMWORD into an xmm# register
michael@0 163 %define XMM_DWORD
michael@0 164 %define XMM_MMWORD
michael@0 165
michael@0 166 %define SIZEOF_BYTE 1 ; sizeof(BYTE)
michael@0 167 %define SIZEOF_WORD 2 ; sizeof(WORD)
michael@0 168 %define SIZEOF_DWORD 4 ; sizeof(DWORD)
michael@0 169 %define SIZEOF_QWORD 8 ; sizeof(QWORD)
michael@0 170 %define SIZEOF_OWORD 16 ; sizeof(OWORD)
michael@0 171
michael@0 172 %define BYTE_BIT 8 ; CHAR_BIT in C
michael@0 173 %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
michael@0 174 %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
michael@0 175 %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
michael@0 176 %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
michael@0 177
michael@0 178 ; --------------------------------------------------------------------------
michael@0 179 ; External Symbol Name
michael@0 180 ;
michael@0 181 %ifndef EXTN
michael@0 182 %define EXTN(name) _ %+ name ; foo() -> _foo
michael@0 183 %endif
michael@0 184
michael@0 185 ; --------------------------------------------------------------------------
michael@0 186 ; Macros for position-independent code (PIC) support
michael@0 187 ;
michael@0 188 %ifndef GOT_SYMBOL
michael@0 189 %undef PIC
michael@0 190 %endif
michael@0 191
michael@0 192 %ifdef PIC ; -------------------------------------------
michael@0 193
michael@0 194 %ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
michael@0 195
michael@0 196 ; At present, nasm doesn't seem to support PIC generation for Mach-O.
michael@0 197 ; The PIC support code below is a little tricky.
michael@0 198
michael@0 199 SECTION SEG_CONST
michael@0 200 const_base:
michael@0 201
michael@0 202 %define GOTOFF(got,sym) (got) + (sym) - const_base
michael@0 203
michael@0 204 %imacro get_GOT 1
michael@0 205 ; NOTE: this macro destroys ecx resister.
michael@0 206 call %%geteip
michael@0 207 add ecx, byte (%%ref - $)
michael@0 208 jmp short %%adjust
michael@0 209 %%geteip:
michael@0 210 mov ecx, POINTER [esp]
michael@0 211 ret
michael@0 212 %%adjust:
michael@0 213 push ebp
michael@0 214 xor ebp,ebp ; ebp = 0
michael@0 215 %ifidni %1,ebx ; (%1 == ebx)
michael@0 216 ; db 0x8D,0x9C + jmp near const_base =
michael@0 217 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
michael@0 218 db 0x8D,0x9C ; 8D,9C
michael@0 219 jmp near const_base ; E9,(const_base-%%ref)
michael@0 220 %%ref:
michael@0 221 %else ; (%1 != ebx)
michael@0 222 ; db 0x8D,0x8C + jmp near const_base =
michael@0 223 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
michael@0 224 db 0x8D,0x8C ; 8D,8C
michael@0 225 jmp near const_base ; E9,(const_base-%%ref)
michael@0 226 %%ref: mov %1, ecx
michael@0 227 %endif ; (%1 == ebx)
michael@0 228 pop ebp
michael@0 229 %endmacro
michael@0 230
michael@0 231 %else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
michael@0 232
michael@0 233 %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
michael@0 234
michael@0 235 %imacro get_GOT 1
michael@0 236 extern GOT_SYMBOL
michael@0 237 call %%geteip
michael@0 238 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
michael@0 239 jmp short %%done
michael@0 240 %%geteip:
michael@0 241 mov %1, POINTER [esp]
michael@0 242 ret
michael@0 243 %%done:
michael@0 244 %endmacro
michael@0 245
michael@0 246 %endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
michael@0 247
michael@0 248 %imacro pushpic 1.nolist
michael@0 249 push %1
michael@0 250 %endmacro
michael@0 251 %imacro poppic 1.nolist
michael@0 252 pop %1
michael@0 253 %endmacro
michael@0 254 %imacro movpic 2.nolist
michael@0 255 mov %1,%2
michael@0 256 %endmacro
michael@0 257
michael@0 258 %else ; !PIC -----------------------------------------
michael@0 259
michael@0 260 %define GOTOFF(got,sym) (sym)
michael@0 261
michael@0 262 %imacro get_GOT 1.nolist
michael@0 263 %endmacro
michael@0 264 %imacro pushpic 1.nolist
michael@0 265 %endmacro
michael@0 266 %imacro poppic 1.nolist
michael@0 267 %endmacro
michael@0 268 %imacro movpic 2.nolist
michael@0 269 %endmacro
michael@0 270
michael@0 271 %endif ; PIC -----------------------------------------
michael@0 272
michael@0 273 ; --------------------------------------------------------------------------
michael@0 274 ; Align the next instruction on {2,4,8,16,..}-byte boundary.
michael@0 275 ; ".balign n,,m" in GNU as
michael@0 276 ;
michael@0 277 %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
michael@0 278 %define FILLB(b,n) (($$-(b)) & ((n)-1))
michael@0 279
michael@0 280 %imacro alignx 1-2.nolist 0xFFFF
michael@0 281 %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
michael@0 282 db 0x90 ; nop
michael@0 283 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
michael@0 284 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
michael@0 285 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
michael@0 286 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
michael@0 287 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
michael@0 288 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
michael@0 289 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
michael@0 290 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
michael@0 291 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
michael@0 292 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
michael@0 293 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
michael@0 294 db 0x8B,0xED ; mov ebp,ebp
michael@0 295 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
michael@0 296 db 0x90 ; nop
michael@0 297 %endmacro
michael@0 298
michael@0 299 ; Align the next data on {2,4,8,16,..}-byte boundary.
michael@0 300 ;
michael@0 301 %imacro alignz 1.nolist
michael@0 302 align %1, db 0 ; filling zeros
michael@0 303 %endmacro
michael@0 304
michael@0 305 %ifdef __x86_64__
michael@0 306
michael@0 307 %ifdef WIN64
michael@0 308
michael@0 309 %imacro collect_args 0
michael@0 310 push r12
michael@0 311 push r13
michael@0 312 push r14
michael@0 313 push r15
michael@0 314 mov r10, rcx
michael@0 315 mov r11, rdx
michael@0 316 mov r12, r8
michael@0 317 mov r13, r9
michael@0 318 mov r14, [rax+48]
michael@0 319 mov r15, [rax+56]
michael@0 320 push rsi
michael@0 321 push rdi
michael@0 322 sub rsp, SIZEOF_XMMWORD
michael@0 323 movaps XMMWORD [rsp], xmm6
michael@0 324 sub rsp, SIZEOF_XMMWORD
michael@0 325 movaps XMMWORD [rsp], xmm7
michael@0 326 %endmacro
michael@0 327
michael@0 328 %imacro uncollect_args 0
michael@0 329 movaps xmm7, XMMWORD [rsp]
michael@0 330 add rsp, SIZEOF_XMMWORD
michael@0 331 movaps xmm6, XMMWORD [rsp]
michael@0 332 add rsp, SIZEOF_XMMWORD
michael@0 333 pop rdi
michael@0 334 pop rsi
michael@0 335 pop r15
michael@0 336 pop r14
michael@0 337 pop r13
michael@0 338 pop r12
michael@0 339 %endmacro
michael@0 340
michael@0 341 %else
michael@0 342
michael@0 343 %imacro collect_args 0
michael@0 344 push r10
michael@0 345 push r11
michael@0 346 push r12
michael@0 347 push r13
michael@0 348 push r14
michael@0 349 push r15
michael@0 350 mov r10, rdi
michael@0 351 mov r11, rsi
michael@0 352 mov r12, rdx
michael@0 353 mov r13, rcx
michael@0 354 mov r14, r8
michael@0 355 mov r15, r9
michael@0 356 %endmacro
michael@0 357
michael@0 358 %imacro uncollect_args 0
michael@0 359 pop r15
michael@0 360 pop r14
michael@0 361 pop r13
michael@0 362 pop r12
michael@0 363 pop r11
michael@0 364 pop r10
michael@0 365 %endmacro
michael@0 366
michael@0 367 %endif
michael@0 368
michael@0 369 %endif
michael@0 370
michael@0 371 ; --------------------------------------------------------------------------
michael@0 372 ; Defines picked up from the C headers
michael@0 373 ;
michael@0 374 %include "jsimdcfg.inc"
michael@0 375
michael@0 376 ; --------------------------------------------------------------------------

mercurial