Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | ; |
michael@0 | 2 | ; jsimdext.inc - common declarations |
michael@0 | 3 | ; |
michael@0 | 4 | ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
michael@0 | 5 | ; Copyright 2010 D. R. Commander |
michael@0 | 6 | ; |
michael@0 | 7 | ; Based on |
michael@0 | 8 | ; x86 SIMD extension for IJG JPEG library - version 1.02 |
michael@0 | 9 | ; |
michael@0 | 10 | ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
michael@0 | 11 | ; |
michael@0 | 12 | ; This software is provided 'as-is', without any express or implied |
michael@0 | 13 | ; warranty. In no event will the authors be held liable for any damages |
michael@0 | 14 | ; arising from the use of this software. |
michael@0 | 15 | ; |
michael@0 | 16 | ; Permission is granted to anyone to use this software for any purpose, |
michael@0 | 17 | ; including commercial applications, and to alter it and redistribute it |
michael@0 | 18 | ; freely, subject to the following restrictions: |
michael@0 | 19 | ; |
michael@0 | 20 | ; 1. The origin of this software must not be misrepresented; you must not |
michael@0 | 21 | ; claim that you wrote the original software. If you use this software |
michael@0 | 22 | ; in a product, an acknowledgment in the product documentation would be |
michael@0 | 23 | ; appreciated but is not required. |
michael@0 | 24 | ; 2. Altered source versions must be plainly marked as such, and must not be |
michael@0 | 25 | ; misrepresented as being the original software. |
michael@0 | 26 | ; 3. This notice may not be removed or altered from any source distribution. |
michael@0 | 27 | ; |
michael@0 | 28 | ; [TAB8] |
michael@0 | 29 | |
michael@0 | 30 | ; ========================================================================== |
michael@0 | 31 | ; System-dependent configurations |
michael@0 | 32 | |
michael@0 | 33 | %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- |
michael@0 | 34 | ; * Microsoft Visual C++ |
michael@0 | 35 | ; * MinGW (Minimalist GNU for Windows) |
michael@0 | 36 | ; * CygWin |
michael@0 | 37 | ; * LCC-Win32 |
michael@0 | 38 | |
michael@0 | 39 | ; -- segment definition -- |
michael@0 | 40 | ; |
michael@0 | 41 | %ifdef __YASM_VER__ |
michael@0 | 42 | %define SEG_TEXT .text align=16 |
michael@0 | 43 | %define SEG_CONST .rdata align=16 |
michael@0 | 44 | %else |
michael@0 | 45 | %define SEG_TEXT .text align=16 public use32 class=CODE |
michael@0 | 46 | %define SEG_CONST .rdata align=16 public use32 class=CONST |
michael@0 | 47 | %endif |
michael@0 | 48 | |
michael@0 | 49 | %elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- |
michael@0 | 50 | ; * Microsoft Visual C++ |
michael@0 | 51 | |
michael@0 | 52 | ; -- segment definition -- |
michael@0 | 53 | ; |
michael@0 | 54 | %ifdef __YASM_VER__ |
michael@0 | 55 | %define SEG_TEXT .text align=16 |
michael@0 | 56 | %define SEG_CONST .rdata align=16 |
michael@0 | 57 | %else |
michael@0 | 58 | %define SEG_TEXT .text align=16 public use64 class=CODE |
michael@0 | 59 | %define SEG_CONST .rdata align=16 public use64 class=CONST |
michael@0 | 60 | %endif |
michael@0 | 61 | %define EXTN(name) name ; foo() -> foo |
michael@0 | 62 | |
michael@0 | 63 | %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- |
michael@0 | 64 | ; * Borland C++ (Win32) |
michael@0 | 65 | |
michael@0 | 66 | ; -- segment definition -- |
michael@0 | 67 | ; |
michael@0 | 68 | %define SEG_TEXT .text align=16 public use32 class=CODE |
michael@0 | 69 | %define SEG_CONST .data align=16 public use32 class=DATA |
michael@0 | 70 | |
michael@0 | 71 | %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
michael@0 | 72 | ; * Linux |
michael@0 | 73 | ; * *BSD family Unix using elf format |
michael@0 | 74 | ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
michael@0 | 75 | |
michael@0 | 76 | ; mark stack as non-executable |
michael@0 | 77 | section .note.GNU-stack noalloc noexec nowrite progbits |
michael@0 | 78 | |
michael@0 | 79 | ; -- segment definition -- |
michael@0 | 80 | ; |
michael@0 | 81 | %ifdef __x86_64__ |
michael@0 | 82 | %define SEG_TEXT .text progbits align=16 |
michael@0 | 83 | %define SEG_CONST .rodata progbits align=16 |
michael@0 | 84 | %else |
michael@0 | 85 | %define SEG_TEXT .text progbits alloc exec nowrite align=16 |
michael@0 | 86 | %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 |
michael@0 | 87 | %endif |
michael@0 | 88 | |
michael@0 | 89 | ; To make the code position-independent, append -DPIC to the commandline |
michael@0 | 90 | ; |
michael@0 | 91 | %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC |
michael@0 | 92 | %define EXTN(name) name ; foo() -> foo |
michael@0 | 93 | |
michael@0 | 94 | %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- |
michael@0 | 95 | ; * Older Linux using a.out format (nasm -f aout -DAOUT ...) |
michael@0 | 96 | ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) |
michael@0 | 97 | |
michael@0 | 98 | ; -- segment definition -- |
michael@0 | 99 | ; |
michael@0 | 100 | %define SEG_TEXT .text |
michael@0 | 101 | %define SEG_CONST .data |
michael@0 | 102 | |
michael@0 | 103 | ; To make the code position-independent, append -DPIC to the commandline |
michael@0 | 104 | ; |
michael@0 | 105 | %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC |
michael@0 | 106 | |
michael@0 | 107 | %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
michael@0 | 108 | ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) |
michael@0 | 109 | |
michael@0 | 110 | ; -- segment definition -- |
michael@0 | 111 | ; |
michael@0 | 112 | %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? |
michael@0 | 113 | %define SEG_CONST .rodata align=16 |
michael@0 | 114 | |
michael@0 | 115 | ; The generation of position-independent code (PIC) is the default on Darwin. |
michael@0 | 116 | ; |
michael@0 | 117 | %define PIC |
michael@0 | 118 | %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing |
michael@0 | 119 | |
michael@0 | 120 | %else ; ----(Other case)---------------------- |
michael@0 | 121 | |
michael@0 | 122 | ; -- segment definition -- |
michael@0 | 123 | ; |
michael@0 | 124 | %define SEG_TEXT .text |
michael@0 | 125 | %define SEG_CONST .data |
michael@0 | 126 | |
michael@0 | 127 | %endif ; ---------------------------------------------- |
michael@0 | 128 | |
michael@0 | 129 | ; ========================================================================== |
michael@0 | 130 | |
michael@0 | 131 | ; -------------------------------------------------------------------------- |
michael@0 | 132 | ; Common types |
michael@0 | 133 | ; |
michael@0 | 134 | %ifdef __x86_64__ |
michael@0 | 135 | %define POINTER qword ; general pointer type |
michael@0 | 136 | %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) |
michael@0 | 137 | %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
michael@0 | 138 | %else |
michael@0 | 139 | %define POINTER dword ; general pointer type |
michael@0 | 140 | %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) |
michael@0 | 141 | %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
michael@0 | 142 | %endif |
michael@0 | 143 | |
michael@0 | 144 | %define INT dword ; signed integer type |
michael@0 | 145 | %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) |
michael@0 | 146 | %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT |
michael@0 | 147 | |
michael@0 | 148 | %define FP32 dword ; IEEE754 single |
michael@0 | 149 | %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) |
michael@0 | 150 | %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT |
michael@0 | 151 | |
michael@0 | 152 | %define MMWORD qword ; int64 (MMX register) |
michael@0 | 153 | %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) |
michael@0 | 154 | %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT |
michael@0 | 155 | |
michael@0 | 156 | ; NASM is buggy and doesn't properly handle operand sizes for SSE |
michael@0 | 157 | ; instructions, so for now we have to define XMMWORD as blank. |
michael@0 | 158 | %define XMMWORD ; int128 (SSE register) |
michael@0 | 159 | %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) |
michael@0 | 160 | %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT |
michael@0 | 161 | |
michael@0 | 162 | ; Similar hacks for when we load a dword or MMWORD into an xmm# register |
michael@0 | 163 | %define XMM_DWORD |
michael@0 | 164 | %define XMM_MMWORD |
michael@0 | 165 | |
michael@0 | 166 | %define SIZEOF_BYTE 1 ; sizeof(BYTE) |
michael@0 | 167 | %define SIZEOF_WORD 2 ; sizeof(WORD) |
michael@0 | 168 | %define SIZEOF_DWORD 4 ; sizeof(DWORD) |
michael@0 | 169 | %define SIZEOF_QWORD 8 ; sizeof(QWORD) |
michael@0 | 170 | %define SIZEOF_OWORD 16 ; sizeof(OWORD) |
michael@0 | 171 | |
michael@0 | 172 | %define BYTE_BIT 8 ; CHAR_BIT in C |
michael@0 | 173 | %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT |
michael@0 | 174 | %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT |
michael@0 | 175 | %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT |
michael@0 | 176 | %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT |
michael@0 | 177 | |
michael@0 | 178 | ; -------------------------------------------------------------------------- |
michael@0 | 179 | ; External Symbol Name |
michael@0 | 180 | ; |
michael@0 | 181 | %ifndef EXTN |
michael@0 | 182 | %define EXTN(name) _ %+ name ; foo() -> _foo |
michael@0 | 183 | %endif |
michael@0 | 184 | |
michael@0 | 185 | ; -------------------------------------------------------------------------- |
michael@0 | 186 | ; Macros for position-independent code (PIC) support |
michael@0 | 187 | ; |
michael@0 | 188 | %ifndef GOT_SYMBOL |
michael@0 | 189 | %undef PIC |
michael@0 | 190 | %endif |
michael@0 | 191 | |
michael@0 | 192 | %ifdef PIC ; ------------------------------------------- |
michael@0 | 193 | |
michael@0 | 194 | %ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- |
michael@0 | 195 | |
michael@0 | 196 | ; At present, nasm doesn't seem to support PIC generation for Mach-O. |
michael@0 | 197 | ; The PIC support code below is a little tricky. |
michael@0 | 198 | |
michael@0 | 199 | SECTION SEG_CONST |
michael@0 | 200 | const_base: |
michael@0 | 201 | |
michael@0 | 202 | %define GOTOFF(got,sym) (got) + (sym) - const_base |
michael@0 | 203 | |
michael@0 | 204 | %imacro get_GOT 1 |
michael@0 | 205 | ; NOTE: this macro destroys ecx resister. |
michael@0 | 206 | call %%geteip |
michael@0 | 207 | add ecx, byte (%%ref - $) |
michael@0 | 208 | jmp short %%adjust |
michael@0 | 209 | %%geteip: |
michael@0 | 210 | mov ecx, POINTER [esp] |
michael@0 | 211 | ret |
michael@0 | 212 | %%adjust: |
michael@0 | 213 | push ebp |
michael@0 | 214 | xor ebp,ebp ; ebp = 0 |
michael@0 | 215 | %ifidni %1,ebx ; (%1 == ebx) |
michael@0 | 216 | ; db 0x8D,0x9C + jmp near const_base = |
michael@0 | 217 | ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) |
michael@0 | 218 | db 0x8D,0x9C ; 8D,9C |
michael@0 | 219 | jmp near const_base ; E9,(const_base-%%ref) |
michael@0 | 220 | %%ref: |
michael@0 | 221 | %else ; (%1 != ebx) |
michael@0 | 222 | ; db 0x8D,0x8C + jmp near const_base = |
michael@0 | 223 | ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) |
michael@0 | 224 | db 0x8D,0x8C ; 8D,8C |
michael@0 | 225 | jmp near const_base ; E9,(const_base-%%ref) |
michael@0 | 226 | %%ref: mov %1, ecx |
michael@0 | 227 | %endif ; (%1 == ebx) |
michael@0 | 228 | pop ebp |
michael@0 | 229 | %endmacro |
michael@0 | 230 | |
michael@0 | 231 | %else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- |
michael@0 | 232 | |
michael@0 | 233 | %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff |
michael@0 | 234 | |
michael@0 | 235 | %imacro get_GOT 1 |
michael@0 | 236 | extern GOT_SYMBOL |
michael@0 | 237 | call %%geteip |
michael@0 | 238 | add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc |
michael@0 | 239 | jmp short %%done |
michael@0 | 240 | %%geteip: |
michael@0 | 241 | mov %1, POINTER [esp] |
michael@0 | 242 | ret |
michael@0 | 243 | %%done: |
michael@0 | 244 | %endmacro |
michael@0 | 245 | |
michael@0 | 246 | %endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- |
michael@0 | 247 | |
michael@0 | 248 | %imacro pushpic 1.nolist |
michael@0 | 249 | push %1 |
michael@0 | 250 | %endmacro |
michael@0 | 251 | %imacro poppic 1.nolist |
michael@0 | 252 | pop %1 |
michael@0 | 253 | %endmacro |
michael@0 | 254 | %imacro movpic 2.nolist |
michael@0 | 255 | mov %1,%2 |
michael@0 | 256 | %endmacro |
michael@0 | 257 | |
michael@0 | 258 | %else ; !PIC ----------------------------------------- |
michael@0 | 259 | |
michael@0 | 260 | %define GOTOFF(got,sym) (sym) |
michael@0 | 261 | |
michael@0 | 262 | %imacro get_GOT 1.nolist |
michael@0 | 263 | %endmacro |
michael@0 | 264 | %imacro pushpic 1.nolist |
michael@0 | 265 | %endmacro |
michael@0 | 266 | %imacro poppic 1.nolist |
michael@0 | 267 | %endmacro |
michael@0 | 268 | %imacro movpic 2.nolist |
michael@0 | 269 | %endmacro |
michael@0 | 270 | |
michael@0 | 271 | %endif ; PIC ----------------------------------------- |
michael@0 | 272 | |
michael@0 | 273 | ; -------------------------------------------------------------------------- |
michael@0 | 274 | ; Align the next instruction on {2,4,8,16,..}-byte boundary. |
michael@0 | 275 | ; ".balign n,,m" in GNU as |
michael@0 | 276 | ; |
michael@0 | 277 | %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) |
michael@0 | 278 | %define FILLB(b,n) (($$-(b)) & ((n)-1)) |
michael@0 | 279 | |
michael@0 | 280 | %imacro alignx 1-2.nolist 0xFFFF |
michael@0 | 281 | %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ |
michael@0 | 282 | db 0x90 ; nop |
michael@0 | 283 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ |
michael@0 | 284 | db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] |
michael@0 | 285 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ |
michael@0 | 286 | db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
michael@0 | 287 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ |
michael@0 | 288 | db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
michael@0 | 289 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ |
michael@0 | 290 | db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] |
michael@0 | 291 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ |
michael@0 | 292 | db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] |
michael@0 | 293 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ |
michael@0 | 294 | db 0x8B,0xED ; mov ebp,ebp |
michael@0 | 295 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ |
michael@0 | 296 | db 0x90 ; nop |
michael@0 | 297 | %endmacro |
michael@0 | 298 | |
michael@0 | 299 | ; Align the next data on {2,4,8,16,..}-byte boundary. |
michael@0 | 300 | ; |
michael@0 | 301 | %imacro alignz 1.nolist |
michael@0 | 302 | align %1, db 0 ; filling zeros |
michael@0 | 303 | %endmacro |
michael@0 | 304 | |
michael@0 | 305 | %ifdef __x86_64__ |
michael@0 | 306 | |
michael@0 | 307 | %ifdef WIN64 |
michael@0 | 308 | |
michael@0 | 309 | %imacro collect_args 0 |
michael@0 | 310 | push r12 |
michael@0 | 311 | push r13 |
michael@0 | 312 | push r14 |
michael@0 | 313 | push r15 |
michael@0 | 314 | mov r10, rcx |
michael@0 | 315 | mov r11, rdx |
michael@0 | 316 | mov r12, r8 |
michael@0 | 317 | mov r13, r9 |
michael@0 | 318 | mov r14, [rax+48] |
michael@0 | 319 | mov r15, [rax+56] |
michael@0 | 320 | push rsi |
michael@0 | 321 | push rdi |
michael@0 | 322 | sub rsp, SIZEOF_XMMWORD |
michael@0 | 323 | movaps XMMWORD [rsp], xmm6 |
michael@0 | 324 | sub rsp, SIZEOF_XMMWORD |
michael@0 | 325 | movaps XMMWORD [rsp], xmm7 |
michael@0 | 326 | %endmacro |
michael@0 | 327 | |
michael@0 | 328 | %imacro uncollect_args 0 |
michael@0 | 329 | movaps xmm7, XMMWORD [rsp] |
michael@0 | 330 | add rsp, SIZEOF_XMMWORD |
michael@0 | 331 | movaps xmm6, XMMWORD [rsp] |
michael@0 | 332 | add rsp, SIZEOF_XMMWORD |
michael@0 | 333 | pop rdi |
michael@0 | 334 | pop rsi |
michael@0 | 335 | pop r15 |
michael@0 | 336 | pop r14 |
michael@0 | 337 | pop r13 |
michael@0 | 338 | pop r12 |
michael@0 | 339 | %endmacro |
michael@0 | 340 | |
michael@0 | 341 | %else |
michael@0 | 342 | |
michael@0 | 343 | %imacro collect_args 0 |
michael@0 | 344 | push r10 |
michael@0 | 345 | push r11 |
michael@0 | 346 | push r12 |
michael@0 | 347 | push r13 |
michael@0 | 348 | push r14 |
michael@0 | 349 | push r15 |
michael@0 | 350 | mov r10, rdi |
michael@0 | 351 | mov r11, rsi |
michael@0 | 352 | mov r12, rdx |
michael@0 | 353 | mov r13, rcx |
michael@0 | 354 | mov r14, r8 |
michael@0 | 355 | mov r15, r9 |
michael@0 | 356 | %endmacro |
michael@0 | 357 | |
michael@0 | 358 | %imacro uncollect_args 0 |
michael@0 | 359 | pop r15 |
michael@0 | 360 | pop r14 |
michael@0 | 361 | pop r13 |
michael@0 | 362 | pop r12 |
michael@0 | 363 | pop r11 |
michael@0 | 364 | pop r10 |
michael@0 | 365 | %endmacro |
michael@0 | 366 | |
michael@0 | 367 | %endif |
michael@0 | 368 | |
michael@0 | 369 | %endif |
michael@0 | 370 | |
michael@0 | 371 | ; -------------------------------------------------------------------------- |
michael@0 | 372 | ; Defines picked up from the C headers |
michael@0 | 373 | ; |
michael@0 | 374 | %include "jsimdcfg.inc" |
michael@0 | 375 | |
michael@0 | 376 | ; -------------------------------------------------------------------------- |