security/nss/lib/freebl/mpi/mpi_amd64_masm.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ; This Source Code Form is subject to the terms of the Mozilla Public
     2 ; License, v. 2.0. If a copy of the MPL was not distributed with this
     3 ; file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 ;
     6 ; This code is converted from mpi_amd64_gas.asm for MASM for x64.
     7 ;
     9 ; ------------------------------------------------------------------------
    10 ;
    11 ;  Implementation of s_mpv_mul_set_vec which exploits
    12 ;  the 64X64->128 bit  unsigned multiply instruction.
    13 ;
    14 ; ------------------------------------------------------------------------
    16 ; r = a * digit, r and a are vectors of length len
    17 ; returns the carry digit
    18 ; r and a are 64 bit aligned.
    19 ;
    20 ; uint64_t
    21 ; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
    22 ;
    24 .CODE
    26 s_mpv_mul_set_vec64 PROC
    28         ; compatibilities for paramenter registers
    29         ;
    30         ; About GAS and MASM, the usage of parameter registers are different.
    32         push rdi
    33         push rsi
    35         mov rdi, rcx
    36         mov rsi, rdx
    37         mov edx, r8d
    38         mov rcx, r9
    40         xor rax, rax
    41         test rdx, rdx
    42         jz L17
    43         mov r8, rdx
    44         xor r9, r9
    46 L15:
    47         cmp r8, 8
    48         jb  L16
    49         mov rax, [rsi]
    50         mov r11, [8+rsi]
    51         mul rcx
    52         add rax, r9
    53         adc rdx, 0
    54         mov [0+rdi], rax
    55         mov r9, rdx
    56         mov rax,r11
    57         mov r11, [16+rsi]
    58         mul rcx
    59         add rax,r9
    60         adc rdx,0
    61         mov [8+rdi],rax
    62         mov r9,rdx
    63         mov rax,r11
    64         mov r11, [24+rsi]
    65         mul rcx
    66         add rax,r9
    67         adc rdx,0
    68         mov [16+rdi],rax
    69         mov r9,rdx
    70         mov rax,r11
    71         mov r11, [32+rsi]
    72         mul rcx
    73         add rax,r9
    74         adc rdx,0
    75         mov [24+rdi],rax
    76         mov r9,rdx
    77         mov rax,r11
    78         mov r11, [40+rsi]
    79         mul rcx
    80         add rax,r9
    81         adc rdx,0
    82         mov [32+rdi],rax
    83         mov r9,rdx
    84         mov rax,r11
    85         mov r11, [48+rsi]
    86         mul rcx
    87         add rax,r9
    88         adc rdx,0
    89         mov [40+rdi],rax
    90         mov r9,rdx
    91         mov rax,r11
    92         mov r11, [56+rsi]
    93         mul rcx
    94         add rax,r9
    95         adc rdx,0
    96         mov [48+rdi],rax
    97         mov r9,rdx
    98         mov rax,r11
    99         mul rcx
   100         add rax,r9
   101         adc rdx,0
   102         mov [56+rdi],rax
   103         mov r9,rdx
   104         add rsi, 64
   105         add rdi, 64
   106         sub r8, 8
   107         jz L17
   108         jmp L15
   110 L16:
   111         mov rax, [0+rsi]
   112         mul rcx
   113         add rax, r9
   114         adc rdx,0
   115         mov [0+rdi],rax
   116         mov r9,rdx
   117         dec r8
   118         jz L17
   119         mov rax, [8+rsi]
   120         mul rcx
   121         add rax,r9
   122         adc rdx,0
   123         mov [8+rdi], rax
   124         mov r9, rdx
   125         dec r8
   126         jz L17
   127         mov rax, [16+rsi]
   128         mul rcx
   129         add rax, r9
   130         adc rdx, 0
   131         mov [16+rdi],rax
   132         mov r9,rdx
   133         dec r8
   134         jz L17
   135         mov rax, [24+rsi]
   136         mul rcx
   137         add rax, r9
   138         adc rdx, 0
   139         mov [24+rdi], rax
   140         mov r9, rdx
   141         dec r8
   142         jz L17
   143         mov rax, [32+rsi]
   144         mul rcx
   145         add rax, r9
   146         adc rdx, 0
   147         mov [32+rdi],rax
   148         mov r9, rdx
   149         dec r8
   150         jz L17
   151         mov rax, [40+rsi]
   152         mul rcx
   153         add rax, r9
   154         adc rdx, 0
   155         mov [40+rdi], rax
   156         mov r9, rdx
   157         dec r8
   158         jz L17
   159         mov rax, [48+rsi]
   160         mul rcx
   161         add rax, r9
   162         adc rdx, 0
   163         mov [48+rdi], rax
   164         mov r9, rdx
   165         dec r8
   166         jz L17
   168 L17:
   169         mov rax, r9
   170         pop rsi
   171         pop rdi
   172         ret
   174 s_mpv_mul_set_vec64 ENDP
   177 ;------------------------------------------------------------------------
   178 ;
   179 ; Implementation of s_mpv_mul_add_vec which exploits
   180 ; the 64X64->128 bit  unsigned multiply instruction.
   181 ;
   182 ;------------------------------------------------------------------------
   184 ; r += a * digit, r and a are vectors of length len
   185 ; returns the carry digit
   186 ; r and a are 64 bit aligned.
   187 ;
   188 ; uint64_t
   189 ; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
   190 ; 
   192 s_mpv_mul_add_vec64 PROC
   194         ; compatibilities for paramenter registers
   195         ;
   196         ; About GAS and MASM, the usage of parameter registers are different.
   198         push rdi
   199         push rsi
   201         mov rdi, rcx
   202         mov rsi, rdx
   203         mov edx, r8d
   204         mov rcx, r9
   206         xor rax, rax
   207         test rdx, rdx
   208         jz L27
   209         mov r8, rdx
   210         xor r9, r9
   212 L25:
   213         cmp r8, 8
   214         jb L26
   215         mov rax, [0+rsi]
   216         mov r10, [0+rdi]
   217         mov r11, [8+rsi]
   218         mul rcx
   219         add rax,r10
   220         adc rdx,0
   221         mov r10, [8+rdi]
   222         add rax,r9
   223         adc rdx,0
   224         mov [0+rdi],rax
   225         mov r9,rdx
   226         mov rax,r11
   227         mov r11, [16+rsi]
   228         mul rcx
   229         add rax,r10
   230         adc rdx,0
   231         mov r10, [16+rdi]
   232         add rax,r9
   233         adc rdx,0
   234         mov [8+rdi],rax
   235         mov r9,rdx
   236         mov rax,r11
   237         mov r11, [24+rsi]
   238         mul rcx
   239         add rax,r10
   240         adc rdx,0
   241         mov r10, [24+rdi]
   242         add rax,r9
   243         adc rdx,0
   244         mov [16+rdi],rax
   245         mov r9,rdx
   246         mov rax,r11
   247         mov r11, [32+rsi]
   248         mul rcx
   249         add rax,r10
   250         adc rdx,0
   251         mov r10, [32+rdi]
   252         add rax,r9
   253         adc rdx,0
   254         mov [24+rdi],rax
   255         mov r9,rdx
   256         mov rax,r11
   257         mov r11, [40+rsi]
   258         mul rcx
   259         add rax,r10
   260         adc rdx,0
   261         mov r10, [40+rdi]
   262         add rax,r9
   263         adc rdx,0
   264         mov [32+rdi],rax
   265         mov r9,rdx
   266         mov rax,r11
   267         mov r11, [48+rsi]
   268         mul rcx
   269         add rax,r10
   270         adc rdx,0
   271         mov r10, [48+rdi]
   272         add rax,r9
   273         adc rdx,0
   274         mov [40+rdi],rax
   275         mov r9,rdx
   276         mov rax,r11
   277         mov r11, [56+rsi]
   278         mul rcx
   279         add rax,r10
   280         adc rdx,0
   281         mov r10, [56+rdi]
   282         add rax,r9
   283         adc rdx,0
   284         mov [48+rdi],rax
   285         mov r9,rdx
   286         mov rax,r11
   287         mul rcx
   288         add rax,r10
   289         adc rdx,0
   290         add rax,r9
   291         adc rdx,0
   292         mov [56+rdi],rax
   293         mov r9,rdx
   294         add rsi,64
   295         add rdi,64
   296         sub r8, 8
   297         jz L27
   298         jmp L25
   300 L26:
   301         mov rax, [0+rsi]
   302         mov r10, [0+rdi]
   303         mul rcx
   304         add rax,r10
   305         adc rdx,0
   306         add rax,r9
   307         adc rdx,0
   308         mov [0+rdi],rax
   309         mov r9,rdx
   310         dec r8
   311         jz L27
   312         mov rax, [8+rsi]
   313         mov r10, [8+rdi]
   314         mul rcx
   315         add rax,r10
   316         adc rdx,0
   317         add rax,r9
   318         adc rdx,0
   319         mov [8+rdi],rax
   320         mov r9,rdx
   321         dec r8
   322         jz L27
   323         mov rax, [16+rsi]
   324         mov r10, [16+rdi]
   325         mul rcx
   326         add rax,r10
   327         adc rdx,0
   328         add rax,r9
   329         adc rdx,0
   330         mov [16+rdi],rax
   331         mov r9,rdx
   332         dec r8
   333         jz L27
   334         mov rax, [24+rsi]
   335         mov r10, [24+rdi]
   336         mul rcx
   337         add rax,r10
   338         adc rdx,0
   339         add rax,r9
   340         adc rdx,0
   341         mov [24+rdi],rax
   342         mov r9,rdx
   343         dec r8
   344         jz L27
   345         mov rax, [32+rsi]
   346         mov r10, [32+rdi]
   347         mul rcx
   348         add rax,r10
   349         adc rdx,0
   350         add rax,r9
   351         adc rdx,0
   352         mov [32+rdi],rax
   353         mov r9,rdx
   354         dec r8
   355         jz L27
   356         mov rax, [40+rsi]
   357         mov r10, [40+rdi]
   358         mul rcx
   359         add rax,r10
   360         adc rdx,0
   361         add rax,r9
   362         adc rdx,0
   363         mov [40+rdi],rax
   364         mov r9,rdx
   365         dec r8
   366         jz L27
   367         mov rax, [48+rsi]
   368         mov r10, [48+rdi]
   369         mul rcx
   370         add rax,r10
   371         adc rdx,0
   372         add rax, r9
   373         adc rdx, 0
   374         mov [48+rdi], rax
   375         mov r9, rdx
   376         dec r8
   377         jz L27
   379 L27:
   380         mov rax, r9
   382         pop rsi
   383         pop rdi
   384         ret
   386 s_mpv_mul_add_vec64 ENDP
   388 END

mercurial