security/nss/lib/freebl/mpi/mpv_sparc.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,220 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +#include "vis_proto.h"
     1.9 +
    1.10 +/***************************************************************/
    1.11 +
    1.12 +typedef  int                t_s32;
    1.13 +typedef  unsigned int       t_u32;
    1.14 +#if defined(__sparcv9)
    1.15 +typedef  long               t_s64;
    1.16 +typedef  unsigned long      t_u64;
    1.17 +#else
    1.18 +typedef  long long          t_s64;
    1.19 +typedef  unsigned long long t_u64;
    1.20 +#endif
    1.21 +typedef  double             t_d64;
    1.22 +
    1.23 +/***************************************************************/
    1.24 +
    1.25 +typedef union {
    1.26 +  t_d64 d64;
    1.27 +  struct {
    1.28 +    t_s32 i0;
    1.29 +    t_s32 i1;
    1.30 +  } i32s;
    1.31 +} d64_2_i32;
    1.32 +
    1.33 +/***************************************************************/
    1.34 +
    1.35 +#define BUFF_SIZE  256
    1.36 +
    1.37 +#define A_BITS  19
    1.38 +#define A_MASK  ((1 << A_BITS) - 1)
    1.39 +
    1.40 +/***************************************************************/
    1.41 +
    1.42 +static t_u64 mask_cnst[] = {
    1.43 +  0x8000000080000000ull
    1.44 +};
    1.45 +
    1.46 +/***************************************************************/
    1.47 +
    1.48 +#define DEF_VARS(N)                     \
    1.49 +  t_d64 *py = (t_d64*)y;                \
    1.50 +  t_d64 mask = *((t_d64*)mask_cnst);    \
    1.51 +  t_d64 ca = (1u << 31) - 1;            \
    1.52 +  t_d64 da = (t_d64)a;                  \
    1.53 +  t_s64 buff[N], s;                     \
    1.54 +  d64_2_i32 dy
    1.55 +
    1.56 +/***************************************************************/
    1.57 +
    1.58 +#define MUL_U32_S64_2(i)                                \
    1.59 +  dy.d64 = vis_fxnor(mask, py[i]);                      \
    1.60 +  buff[2*(i)  ] = (ca - (t_d64)dy.i32s.i0) * da;        \
    1.61 +  buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da
    1.62 +
    1.63 +#define MUL_U32_S64_2_D(i)              \
    1.64 +  dy.d64 = vis_fxnor(mask, py[i]);      \
    1.65 +  d0 = ca - (t_d64)dy.i32s.i0;          \
    1.66 +  d1 = ca - (t_d64)dy.i32s.i1;          \
    1.67 +  buff[4*(i)  ] = (t_s64)(d0 * da);     \
    1.68 +  buff[4*(i)+1] = (t_s64)(d0 * db);     \
    1.69 +  buff[4*(i)+2] = (t_s64)(d1 * da);     \
    1.70 +  buff[4*(i)+3] = (t_s64)(d1 * db)
    1.71 +
    1.72 +/***************************************************************/
    1.73 +
    1.74 +#define ADD_S64_U32(i)          \
    1.75 +  s = buff[i] + x[i] + c;       \
    1.76 +  z[i] = s;                     \
    1.77 +  c = (s >> 32)
    1.78 +
    1.79 +#define ADD_S64_U32_D(i)                        \
    1.80 +  s = buff[2*(i)] +(((t_s64)(buff[2*(i)+1]))<<A_BITS) + x[i] + uc;   \
    1.81 +  z[i] = s;                                     \
    1.82 +  uc = ((t_u64)s >> 32)
    1.83 +
    1.84 +/***************************************************************/
    1.85 +
    1.86 +#define MUL_U32_S64_8(i)        \
    1.87 +  MUL_U32_S64_2(i);             \
    1.88 +  MUL_U32_S64_2(i+1);           \
    1.89 +  MUL_U32_S64_2(i+2);           \
    1.90 +  MUL_U32_S64_2(i+3)
    1.91 +
    1.92 +#define MUL_U32_S64_D_8(i)      \
    1.93 +  MUL_U32_S64_2_D(i);           \
    1.94 +  MUL_U32_S64_2_D(i+1);         \
    1.95 +  MUL_U32_S64_2_D(i+2);         \
    1.96 +  MUL_U32_S64_2_D(i+3)
    1.97 +
    1.98 +/***************************************************************/
    1.99 +
   1.100 +#define ADD_S64_U32_8(i)        \
   1.101 +  ADD_S64_U32(i);               \
   1.102 +  ADD_S64_U32(i+1);             \
   1.103 +  ADD_S64_U32(i+2);             \
   1.104 +  ADD_S64_U32(i+3);             \
   1.105 +  ADD_S64_U32(i+4);             \
   1.106 +  ADD_S64_U32(i+5);             \
   1.107 +  ADD_S64_U32(i+6);             \
   1.108 +  ADD_S64_U32(i+7)
   1.109 +
   1.110 +#define ADD_S64_U32_D_8(i)      \
   1.111 +  ADD_S64_U32_D(i);             \
   1.112 +  ADD_S64_U32_D(i+1);           \
   1.113 +  ADD_S64_U32_D(i+2);           \
   1.114 +  ADD_S64_U32_D(i+3);           \
   1.115 +  ADD_S64_U32_D(i+4);           \
   1.116 +  ADD_S64_U32_D(i+5);           \
   1.117 +  ADD_S64_U32_D(i+6);           \
   1.118 +  ADD_S64_U32_D(i+7)
   1.119 +
   1.120 +/***************************************************************/
   1.121 +
   1.122 +t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
   1.123 +{
   1.124 +  if (a < (1 << A_BITS)) {
   1.125 +
   1.126 +    if (n == 8) {
   1.127 +      DEF_VARS(8);
   1.128 +      t_s32 c = 0;
   1.129 +
   1.130 +      MUL_U32_S64_8(0);
   1.131 +      ADD_S64_U32_8(0);
   1.132 +
   1.133 +      return c;
   1.134 +
   1.135 +    } else if (n == 16) {
   1.136 +      DEF_VARS(16);
   1.137 +      t_s32 c = 0;
   1.138 +
   1.139 +      MUL_U32_S64_8(0);
   1.140 +      MUL_U32_S64_8(4);
   1.141 +      ADD_S64_U32_8(0);
   1.142 +      ADD_S64_U32_8(8);
   1.143 +
   1.144 +      return c;
   1.145 +
   1.146 +    } else {
   1.147 +      DEF_VARS(BUFF_SIZE);
   1.148 +      t_s32 i, c = 0;
   1.149 +
   1.150 +#pragma pipeloop(0)
   1.151 +      for (i = 0; i < (n+1)/2; i ++) {
   1.152 +        MUL_U32_S64_2(i);
   1.153 +      }
   1.154 +
   1.155 +#pragma pipeloop(0)
   1.156 +      for (i = 0; i < n; i ++) {
   1.157 +        ADD_S64_U32(i);
   1.158 +      }
   1.159 +
   1.160 +      return c;
   1.161 +
   1.162 +    }
   1.163 +  } else {
   1.164 +
   1.165 +    if (n == 8) {
   1.166 +      DEF_VARS(2*8);
   1.167 +      t_d64 d0, d1, db;
   1.168 +      t_u32 uc = 0;
   1.169 +
   1.170 +      da = (t_d64)(a &  A_MASK);
   1.171 +      db = (t_d64)(a >> A_BITS);
   1.172 +
   1.173 +      MUL_U32_S64_D_8(0);
   1.174 +      ADD_S64_U32_D_8(0);
   1.175 +
   1.176 +      return uc;
   1.177 +
   1.178 +    } else if (n == 16) {
   1.179 +      DEF_VARS(2*16);
   1.180 +      t_d64 d0, d1, db;
   1.181 +      t_u32 uc = 0;
   1.182 +
   1.183 +      da = (t_d64)(a &  A_MASK);
   1.184 +      db = (t_d64)(a >> A_BITS);
   1.185 +
   1.186 +      MUL_U32_S64_D_8(0);
   1.187 +      MUL_U32_S64_D_8(4);
   1.188 +      ADD_S64_U32_D_8(0);
   1.189 +      ADD_S64_U32_D_8(8);
   1.190 +
   1.191 +      return uc;
   1.192 +
   1.193 +    } else {
   1.194 +      DEF_VARS(2*BUFF_SIZE);
   1.195 +      t_d64 d0, d1, db;
   1.196 +      t_u32 i, uc = 0;
   1.197 +
   1.198 +      da = (t_d64)(a &  A_MASK);
   1.199 +      db = (t_d64)(a >> A_BITS);
   1.200 +
   1.201 +#pragma pipeloop(0)
   1.202 +      for (i = 0; i < (n+1)/2; i ++) {
   1.203 +        MUL_U32_S64_2_D(i);
   1.204 +      }
   1.205 +
   1.206 +#pragma pipeloop(0)
   1.207 +      for (i = 0; i < n; i ++) {
   1.208 +        ADD_S64_U32_D(i);
   1.209 +      }
   1.210 +
   1.211 +      return uc;
   1.212 +    }
   1.213 +  }
   1.214 +}
   1.215 +
   1.216 +/***************************************************************/
   1.217 +
   1.218 +t_u32 mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
   1.219 +{
   1.220 +  return mul_add(x, x, y, n, a);
   1.221 +}
   1.222 +
   1.223 +/***************************************************************/

mercurial