security/nss/lib/freebl/mpi/mpv_sparc.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "vis_proto.h"
     7 /***************************************************************/
     9 typedef  int                t_s32;
    10 typedef  unsigned int       t_u32;
    11 #if defined(__sparcv9)
    12 typedef  long               t_s64;
    13 typedef  unsigned long      t_u64;
    14 #else
    15 typedef  long long          t_s64;
    16 typedef  unsigned long long t_u64;
    17 #endif
    18 typedef  double             t_d64;
    20 /***************************************************************/
    22 typedef union {
    23   t_d64 d64;
    24   struct {
    25     t_s32 i0;
    26     t_s32 i1;
    27   } i32s;
    28 } d64_2_i32;
    30 /***************************************************************/
    32 #define BUFF_SIZE  256
    34 #define A_BITS  19
    35 #define A_MASK  ((1 << A_BITS) - 1)
    37 /***************************************************************/
    39 static t_u64 mask_cnst[] = {
    40   0x8000000080000000ull
    41 };
    43 /***************************************************************/
    45 #define DEF_VARS(N)                     \
    46   t_d64 *py = (t_d64*)y;                \
    47   t_d64 mask = *((t_d64*)mask_cnst);    \
    48   t_d64 ca = (1u << 31) - 1;            \
    49   t_d64 da = (t_d64)a;                  \
    50   t_s64 buff[N], s;                     \
    51   d64_2_i32 dy
    53 /***************************************************************/
    55 #define MUL_U32_S64_2(i)                                \
    56   dy.d64 = vis_fxnor(mask, py[i]);                      \
    57   buff[2*(i)  ] = (ca - (t_d64)dy.i32s.i0) * da;        \
    58   buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
    60 #define MUL_U32_S64_2_D(i)              \
    61   dy.d64 = vis_fxnor(mask, py[i]);      \
    62   d0 = ca - (t_d64)dy.i32s.i0;          \
    63   d1 = ca - (t_d64)dy.i32s.i1;          \
    64   buff[4*(i)  ] = (t_s64)(d0 * da);     \
    65   buff[4*(i)+1] = (t_s64)(d0 * db);     \
    66   buff[4*(i)+2] = (t_s64)(d1 * da);     \
    67   buff[4*(i)+3] = (t_s64)(d1 * db)
    69 /***************************************************************/
    71 #define ADD_S64_U32(i)          \
    72   s = buff[i] + x[i] + c;       \
    73   z[i] = s;                     \
    74   c = (s >> 32)
    76 #define ADD_S64_U32_D(i)                        \
    77   s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc;   \
    78   z[i] = s;                                     \
    79   uc = ((t_u64)s >> 32)
    81 /***************************************************************/
    83 #define MUL_U32_S64_8(i)        \
    84   MUL_U32_S64_2(i);             \
    85   MUL_U32_S64_2(i+1);           \
    86   MUL_U32_S64_2(i+2);           \
    87   MUL_U32_S64_2(i+3)
    89 #define MUL_U32_S64_D_8(i)      \
    90   MUL_U32_S64_2_D(i);           \
    91   MUL_U32_S64_2_D(i+1);         \
    92   MUL_U32_S64_2_D(i+2);         \
    93   MUL_U32_S64_2_D(i+3)
    95 /***************************************************************/
    97 #define ADD_S64_U32_8(i)        \
    98   ADD_S64_U32(i);               \
    99   ADD_S64_U32(i+1);             \
   100   ADD_S64_U32(i+2);             \
   101   ADD_S64_U32(i+3);             \
   102   ADD_S64_U32(i+4);             \
   103   ADD_S64_U32(i+5);             \
   104   ADD_S64_U32(i+6);             \
   105   ADD_S64_U32(i+7)
   107 #define ADD_S64_U32_D_8(i)      \
   108   ADD_S64_U32_D(i);             \
   109   ADD_S64_U32_D(i+1);           \
   110   ADD_S64_U32_D(i+2);           \
   111   ADD_S64_U32_D(i+3);           \
   112   ADD_S64_U32_D(i+4);           \
   113   ADD_S64_U32_D(i+5);           \
   114   ADD_S64_U32_D(i+6);           \
   115   ADD_S64_U32_D(i+7)
   117 /***************************************************************/
   119 t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
   120 {
   121   if (a < (1 << A_BITS)) {
   123     if (n == 8) {
   124       DEF_VARS(8);
   125       t_s32 c = 0;
   127       MUL_U32_S64_8(0);
   128       ADD_S64_U32_8(0);
   130       return c;
   132     } else if (n == 16) {
   133       DEF_VARS(16);
   134       t_s32 c = 0;
   136       MUL_U32_S64_8(0);
   137       MUL_U32_S64_8(4);
   138       ADD_S64_U32_8(0);
   139       ADD_S64_U32_8(8);
   141       return c;
   143     } else {
   144       DEF_VARS(BUFF_SIZE);
   145       t_s32 i, c = 0;
   147 #pragma pipeloop(0)
   148       for (i = 0; i < (n+1)/2; i ++) {
   149         MUL_U32_S64_2(i);
   150       }
   152 #pragma pipeloop(0)
   153       for (i = 0; i < n; i ++) {
   154         ADD_S64_U32(i);
   155       }
   157       return c;
   159     }
   160   } else {
   162     if (n == 8) {
   163       DEF_VARS(2*8);
   164       t_d64 d0, d1, db;
   165       t_u32 uc = 0;
   167       da = (t_d64)(a &  A_MASK);
   168       db = (t_d64)(a >> A_BITS);
   170       MUL_U32_S64_D_8(0);
   171       ADD_S64_U32_D_8(0);
   173       return uc;
   175     } else if (n == 16) {
   176       DEF_VARS(2*16);
   177       t_d64 d0, d1, db;
   178       t_u32 uc = 0;
   180       da = (t_d64)(a &  A_MASK);
   181       db = (t_d64)(a >> A_BITS);
   183       MUL_U32_S64_D_8(0);
   184       MUL_U32_S64_D_8(4);
   185       ADD_S64_U32_D_8(0);
   186       ADD_S64_U32_D_8(8);
   188       return uc;
   190     } else {
   191       DEF_VARS(2*BUFF_SIZE);
   192       t_d64 d0, d1, db;
   193       t_u32 i, uc = 0;
   195       da = (t_d64)(a &  A_MASK);
   196       db = (t_d64)(a >> A_BITS);
   198 #pragma pipeloop(0)
   199       for (i = 0; i < (n+1)/2; i ++) {
   200         MUL_U32_S64_2_D(i);
   201       }
   203 #pragma pipeloop(0)
   204       for (i = 0; i < n; i ++) {
   205         ADD_S64_U32_D(i);
   206       }
   208       return uc;
   209     }
   210   }
   211 }
   213 /***************************************************************/
   215 t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
   216 {
   217   return mul_add(x, x, y, n, a);
   218 }
   220 /***************************************************************/

mercurial