security/nss/lib/freebl/mpi/mpi.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/freebl/mpi/mpi.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,4820 @@
     1.4 +/*
     1.5 + *  mpi.c
     1.6 + *
     1.7 + *  Arbitrary precision integer arithmetic library
     1.8 + *
     1.9 + * This Source Code Form is subject to the terms of the Mozilla Public
    1.10 + * License, v. 2.0. If a copy of the MPL was not distributed with this
    1.11 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    1.12 +
    1.13 +#include "mpi-priv.h"
    1.14 +#if defined(OSF1)
    1.15 +#include <c_asm.h>
    1.16 +#endif
    1.17 +
    1.18 +#if defined(__arm__) && \
    1.19 +    ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
    1.20 +/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
    1.21 +#undef MP_ASSEMBLY_MULTIPLY
    1.22 +#undef MP_ASSEMBLY_SQUARE
    1.23 +#endif
    1.24 +
    1.25 +#if MP_LOGTAB
    1.26 +/*
    1.27 +  A table of the logs of 2 for various bases (the 0 and 1 entries of
    1.28 +  this table are meaningless and should not be referenced).  
    1.29 +
    1.30 +  This table is used to compute output lengths for the mp_toradix()
    1.31 +  function.  Since a number n in radix r takes up about log_r(n)
    1.32 +  digits, we estimate the output size by taking the least integer
    1.33 +  greater than log_r(n), where:
    1.34 +
    1.35 +  log_r(n) = log_2(n) * log_r(2)
    1.36 +
    1.37 +  This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
    1.38 +  which are the output bases supported.  
    1.39 + */
    1.40 +#include "logtab.h"
    1.41 +#endif
    1.42 +
    1.43 +/* {{{ Constant strings */
    1.44 +
    1.45 +/* Constant strings returned by mp_strerror() */
    1.46 +static const char *mp_err_string[] = {
    1.47 +  "unknown result code",     /* say what?            */
    1.48 +  "boolean true",            /* MP_OKAY, MP_YES      */
    1.49 +  "boolean false",           /* MP_NO                */
    1.50 +  "out of memory",           /* MP_MEM               */
    1.51 +  "argument out of range",   /* MP_RANGE             */
    1.52 +  "invalid input parameter", /* MP_BADARG            */
    1.53 +  "result is undefined"      /* MP_UNDEF             */
    1.54 +};
    1.55 +
    1.56 +/* Value to digit maps for radix conversion   */
    1.57 +
    1.58 +/* s_dmap_1 - standard digits and letters */
    1.59 +static const char *s_dmap_1 = 
    1.60 +  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
    1.61 +
    1.62 +/* }}} */
    1.63 +
    1.64 +unsigned long mp_allocs;
    1.65 +unsigned long mp_frees;
    1.66 +unsigned long mp_copies;
    1.67 +
    1.68 +/* {{{ Default precision manipulation */
    1.69 +
    1.70 +/* Default precision for newly created mp_int's      */
    1.71 +static mp_size s_mp_defprec = MP_DEFPREC;
    1.72 +
    1.73 +mp_size mp_get_prec(void)
    1.74 +{
    1.75 +  return s_mp_defprec;
    1.76 +
    1.77 +} /* end mp_get_prec() */
    1.78 +
    1.79 +void         mp_set_prec(mp_size prec)
    1.80 +{
    1.81 +  if(prec == 0)
    1.82 +    s_mp_defprec = MP_DEFPREC;
    1.83 +  else
    1.84 +    s_mp_defprec = prec;
    1.85 +
    1.86 +} /* end mp_set_prec() */
    1.87 +
    1.88 +/* }}} */
    1.89 +
    1.90 +/*------------------------------------------------------------------------*/
    1.91 +/* {{{ mp_init(mp) */
    1.92 +
    1.93 +/*
    1.94 +  mp_init(mp)
    1.95 +
    1.96 +  Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
    1.97 +  MP_MEM if memory could not be allocated for the structure.
    1.98 + */
    1.99 +
   1.100 +mp_err mp_init(mp_int *mp)
   1.101 +{
   1.102 +  return mp_init_size(mp, s_mp_defprec);
   1.103 +
   1.104 +} /* end mp_init() */
   1.105 +
   1.106 +/* }}} */
   1.107 +
   1.108 +/* {{{ mp_init_size(mp, prec) */
   1.109 +
   1.110 +/*
   1.111 +  mp_init_size(mp, prec)
   1.112 +
   1.113 +  Initialize a new zero-valued mp_int with at least the given
   1.114 +  precision; returns MP_OKAY if successful, or MP_MEM if memory could
   1.115 +  not be allocated for the structure.
   1.116 + */
   1.117 +
   1.118 +mp_err mp_init_size(mp_int *mp, mp_size prec)
   1.119 +{
   1.120 +  ARGCHK(mp != NULL && prec > 0, MP_BADARG);
   1.121 +
   1.122 +  prec = MP_ROUNDUP(prec, s_mp_defprec);
   1.123 +  if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
   1.124 +    return MP_MEM;
   1.125 +
   1.126 +  SIGN(mp) = ZPOS;
   1.127 +  USED(mp) = 1;
   1.128 +  ALLOC(mp) = prec;
   1.129 +
   1.130 +  return MP_OKAY;
   1.131 +
   1.132 +} /* end mp_init_size() */
   1.133 +
   1.134 +/* }}} */
   1.135 +
   1.136 +/* {{{ mp_init_copy(mp, from) */
   1.137 +
   1.138 +/*
   1.139 +  mp_init_copy(mp, from)
   1.140 +
   1.141 +  Initialize mp as an exact copy of from.  Returns MP_OKAY if
   1.142 +  successful, MP_MEM if memory could not be allocated for the new
   1.143 +  structure.
   1.144 + */
   1.145 +
   1.146 +mp_err mp_init_copy(mp_int *mp, const mp_int *from)
   1.147 +{
   1.148 +  ARGCHK(mp != NULL && from != NULL, MP_BADARG);
   1.149 +
   1.150 +  if(mp == from)
   1.151 +    return MP_OKAY;
   1.152 +
   1.153 +  if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
   1.154 +    return MP_MEM;
   1.155 +
   1.156 +  s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
   1.157 +  USED(mp) = USED(from);
   1.158 +  ALLOC(mp) = ALLOC(from);
   1.159 +  SIGN(mp) = SIGN(from);
   1.160 +
   1.161 +  return MP_OKAY;
   1.162 +
   1.163 +} /* end mp_init_copy() */
   1.164 +
   1.165 +/* }}} */
   1.166 +
   1.167 +/* {{{ mp_copy(from, to) */
   1.168 +
   1.169 +/*
   1.170 +  mp_copy(from, to)
   1.171 +
   1.172 +  Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
   1.173 +  'to' has already been initialized (if not, use mp_init_copy()
   1.174 +  instead). If 'from' and 'to' are identical, nothing happens.
   1.175 + */
   1.176 +
   1.177 +mp_err mp_copy(const mp_int *from, mp_int *to)
   1.178 +{
   1.179 +  ARGCHK(from != NULL && to != NULL, MP_BADARG);
   1.180 +
   1.181 +  if(from == to)
   1.182 +    return MP_OKAY;
   1.183 +
   1.184 +  { /* copy */
   1.185 +    mp_digit   *tmp;
   1.186 +
   1.187 +    /*
   1.188 +      If the allocated buffer in 'to' already has enough space to hold
   1.189 +      all the used digits of 'from', we'll re-use it to avoid hitting
   1.190 +      the memory allocater more than necessary; otherwise, we'd have
   1.191 +      to grow anyway, so we just allocate a hunk and make the copy as
   1.192 +      usual
   1.193 +     */
   1.194 +    if(ALLOC(to) >= USED(from)) {
   1.195 +      s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
   1.196 +      s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
   1.197 +      
   1.198 +    } else {
   1.199 +      if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
   1.200 +	return MP_MEM;
   1.201 +
   1.202 +      s_mp_copy(DIGITS(from), tmp, USED(from));
   1.203 +
   1.204 +      if(DIGITS(to) != NULL) {
   1.205 +#if MP_CRYPTO
   1.206 +	s_mp_setz(DIGITS(to), ALLOC(to));
   1.207 +#endif
   1.208 +	s_mp_free(DIGITS(to));
   1.209 +      }
   1.210 +
   1.211 +      DIGITS(to) = tmp;
   1.212 +      ALLOC(to) = ALLOC(from);
   1.213 +    }
   1.214 +
   1.215 +    /* Copy the precision and sign from the original */
   1.216 +    USED(to) = USED(from);
   1.217 +    SIGN(to) = SIGN(from);
   1.218 +  } /* end copy */
   1.219 +
   1.220 +  return MP_OKAY;
   1.221 +
   1.222 +} /* end mp_copy() */
   1.223 +
   1.224 +/* }}} */
   1.225 +
   1.226 +/* {{{ mp_exch(mp1, mp2) */
   1.227 +
   1.228 +/*
   1.229 +  mp_exch(mp1, mp2)
   1.230 +
   1.231 +  Exchange mp1 and mp2 without allocating any intermediate memory
   1.232 +  (well, unless you count the stack space needed for this call and the
   1.233 +  locals it creates...).  This cannot fail.
   1.234 + */
   1.235 +
   1.236 +void mp_exch(mp_int *mp1, mp_int *mp2)
   1.237 +{
   1.238 +#if MP_ARGCHK == 2
   1.239 +  assert(mp1 != NULL && mp2 != NULL);
   1.240 +#else
   1.241 +  if(mp1 == NULL || mp2 == NULL)
   1.242 +    return;
   1.243 +#endif
   1.244 +
   1.245 +  s_mp_exch(mp1, mp2);
   1.246 +
   1.247 +} /* end mp_exch() */
   1.248 +
   1.249 +/* }}} */
   1.250 +
   1.251 +/* {{{ mp_clear(mp) */
   1.252 +
   1.253 +/*
   1.254 +  mp_clear(mp)
   1.255 +
   1.256 +  Release the storage used by an mp_int, and void its fields so that
   1.257 +  if someone calls mp_clear() again for the same int later, we won't
   1.258 +  get tollchocked.
   1.259 + */
   1.260 +
   1.261 +void   mp_clear(mp_int *mp)
   1.262 +{
   1.263 +  if(mp == NULL)
   1.264 +    return;
   1.265 +
   1.266 +  if(DIGITS(mp) != NULL) {
   1.267 +#if MP_CRYPTO
   1.268 +    s_mp_setz(DIGITS(mp), ALLOC(mp));
   1.269 +#endif
   1.270 +    s_mp_free(DIGITS(mp));
   1.271 +    DIGITS(mp) = NULL;
   1.272 +  }
   1.273 +
   1.274 +  USED(mp) = 0;
   1.275 +  ALLOC(mp) = 0;
   1.276 +
   1.277 +} /* end mp_clear() */
   1.278 +
   1.279 +/* }}} */
   1.280 +
   1.281 +/* {{{ mp_zero(mp) */
   1.282 +
   1.283 +/*
   1.284 +  mp_zero(mp) 
   1.285 +
   1.286 +  Set mp to zero.  Does not change the allocated size of the structure,
   1.287 +  and therefore cannot fail (except on a bad argument, which we ignore)
   1.288 + */
   1.289 +void   mp_zero(mp_int *mp)
   1.290 +{
   1.291 +  if(mp == NULL)
   1.292 +    return;
   1.293 +
   1.294 +  s_mp_setz(DIGITS(mp), ALLOC(mp));
   1.295 +  USED(mp) = 1;
   1.296 +  SIGN(mp) = ZPOS;
   1.297 +
   1.298 +} /* end mp_zero() */
   1.299 +
   1.300 +/* }}} */
   1.301 +
   1.302 +/* {{{ mp_set(mp, d) */
   1.303 +
   1.304 +void   mp_set(mp_int *mp, mp_digit d)
   1.305 +{
   1.306 +  if(mp == NULL)
   1.307 +    return;
   1.308 +
   1.309 +  mp_zero(mp);
   1.310 +  DIGIT(mp, 0) = d;
   1.311 +
   1.312 +} /* end mp_set() */
   1.313 +
   1.314 +/* }}} */
   1.315 +
   1.316 +/* {{{ mp_set_int(mp, z) */
   1.317 +
   1.318 +mp_err mp_set_int(mp_int *mp, long z)
   1.319 +{
   1.320 +  int            ix;
   1.321 +  unsigned long  v = labs(z);
   1.322 +  mp_err         res;
   1.323 +
   1.324 +  ARGCHK(mp != NULL, MP_BADARG);
   1.325 +
   1.326 +  mp_zero(mp);
   1.327 +  if(z == 0)
   1.328 +    return MP_OKAY;  /* shortcut for zero */
   1.329 +
   1.330 +  if (sizeof v <= sizeof(mp_digit)) {
   1.331 +    DIGIT(mp,0) = v;
   1.332 +  } else {
   1.333 +    for (ix = sizeof(long) - 1; ix >= 0; ix--) {
   1.334 +      if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
   1.335 +	return res;
   1.336 +
   1.337 +      res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
   1.338 +      if (res != MP_OKAY)
   1.339 +	return res;
   1.340 +    }
   1.341 +  }
   1.342 +  if(z < 0)
   1.343 +    SIGN(mp) = NEG;
   1.344 +
   1.345 +  return MP_OKAY;
   1.346 +
   1.347 +} /* end mp_set_int() */
   1.348 +
   1.349 +/* }}} */
   1.350 +
   1.351 +/* {{{ mp_set_ulong(mp, z) */
   1.352 +
   1.353 +mp_err mp_set_ulong(mp_int *mp, unsigned long z)
   1.354 +{
   1.355 +  int            ix;
   1.356 +  mp_err         res;
   1.357 +
   1.358 +  ARGCHK(mp != NULL, MP_BADARG);
   1.359 +
   1.360 +  mp_zero(mp);
   1.361 +  if(z == 0)
   1.362 +    return MP_OKAY;  /* shortcut for zero */
   1.363 +
   1.364 +  if (sizeof z <= sizeof(mp_digit)) {
   1.365 +    DIGIT(mp,0) = z;
   1.366 +  } else {
   1.367 +    for (ix = sizeof(long) - 1; ix >= 0; ix--) {
   1.368 +      if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
   1.369 +	return res;
   1.370 +
   1.371 +      res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
   1.372 +      if (res != MP_OKAY)
   1.373 +	return res;
   1.374 +    }
   1.375 +  }
   1.376 +  return MP_OKAY;
   1.377 +} /* end mp_set_ulong() */
   1.378 +
   1.379 +/* }}} */
   1.380 +
   1.381 +/*------------------------------------------------------------------------*/
   1.382 +/* {{{ Digit arithmetic */
   1.383 +
   1.384 +/* {{{ mp_add_d(a, d, b) */
   1.385 +
   1.386 +/*
   1.387 +  mp_add_d(a, d, b)
   1.388 +
   1.389 +  Compute the sum b = a + d, for a single digit d.  Respects the sign of
   1.390 +  its primary addend (single digits are unsigned anyway).
   1.391 + */
   1.392 +
   1.393 +mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
   1.394 +{
   1.395 +  mp_int   tmp;
   1.396 +  mp_err   res;
   1.397 +
   1.398 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.399 +
   1.400 +  if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
   1.401 +    return res;
   1.402 +
   1.403 +  if(SIGN(&tmp) == ZPOS) {
   1.404 +    if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
   1.405 +      goto CLEANUP;
   1.406 +  } else if(s_mp_cmp_d(&tmp, d) >= 0) {
   1.407 +    if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
   1.408 +      goto CLEANUP;
   1.409 +  } else {
   1.410 +    mp_neg(&tmp, &tmp);
   1.411 +
   1.412 +    DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
   1.413 +  }
   1.414 +
   1.415 +  if(s_mp_cmp_d(&tmp, 0) == 0)
   1.416 +    SIGN(&tmp) = ZPOS;
   1.417 +
   1.418 +  s_mp_exch(&tmp, b);
   1.419 +
   1.420 +CLEANUP:
   1.421 +  mp_clear(&tmp);
   1.422 +  return res;
   1.423 +
   1.424 +} /* end mp_add_d() */
   1.425 +
   1.426 +/* }}} */
   1.427 +
   1.428 +/* {{{ mp_sub_d(a, d, b) */
   1.429 +
   1.430 +/*
   1.431 +  mp_sub_d(a, d, b)
   1.432 +
   1.433 +  Compute the difference b = a - d, for a single digit d.  Respects the
   1.434 +  sign of its subtrahend (single digits are unsigned anyway).
   1.435 + */
   1.436 +
   1.437 +mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
   1.438 +{
   1.439 +  mp_int   tmp;
   1.440 +  mp_err   res;
   1.441 +
   1.442 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.443 +
   1.444 +  if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
   1.445 +    return res;
   1.446 +
   1.447 +  if(SIGN(&tmp) == NEG) {
   1.448 +    if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
   1.449 +      goto CLEANUP;
   1.450 +  } else if(s_mp_cmp_d(&tmp, d) >= 0) {
   1.451 +    if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
   1.452 +      goto CLEANUP;
   1.453 +  } else {
   1.454 +    mp_neg(&tmp, &tmp);
   1.455 +
   1.456 +    DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
   1.457 +    SIGN(&tmp) = NEG;
   1.458 +  }
   1.459 +
   1.460 +  if(s_mp_cmp_d(&tmp, 0) == 0)
   1.461 +    SIGN(&tmp) = ZPOS;
   1.462 +
   1.463 +  s_mp_exch(&tmp, b);
   1.464 +
   1.465 +CLEANUP:
   1.466 +  mp_clear(&tmp);
   1.467 +  return res;
   1.468 +
   1.469 +} /* end mp_sub_d() */
   1.470 +
   1.471 +/* }}} */
   1.472 +
   1.473 +/* {{{ mp_mul_d(a, d, b) */
   1.474 +
   1.475 +/*
   1.476 +  mp_mul_d(a, d, b)
   1.477 +
   1.478 +  Compute the product b = a * d, for a single digit d.  Respects the sign
   1.479 +  of its multiplicand (single digits are unsigned anyway)
   1.480 + */
   1.481 +
   1.482 +mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
   1.483 +{
   1.484 +  mp_err  res;
   1.485 +
   1.486 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.487 +
   1.488 +  if(d == 0) {
   1.489 +    mp_zero(b);
   1.490 +    return MP_OKAY;
   1.491 +  }
   1.492 +
   1.493 +  if((res = mp_copy(a, b)) != MP_OKAY)
   1.494 +    return res;
   1.495 +
   1.496 +  res = s_mp_mul_d(b, d);
   1.497 +
   1.498 +  return res;
   1.499 +
   1.500 +} /* end mp_mul_d() */
   1.501 +
   1.502 +/* }}} */
   1.503 +
   1.504 +/* {{{ mp_mul_2(a, c) */
   1.505 +
   1.506 +mp_err mp_mul_2(const mp_int *a, mp_int *c)
   1.507 +{
   1.508 +  mp_err  res;
   1.509 +
   1.510 +  ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1.511 +
   1.512 +  if((res = mp_copy(a, c)) != MP_OKAY)
   1.513 +    return res;
   1.514 +
   1.515 +  return s_mp_mul_2(c);
   1.516 +
   1.517 +} /* end mp_mul_2() */
   1.518 +
   1.519 +/* }}} */
   1.520 +
   1.521 +/* {{{ mp_div_d(a, d, q, r) */
   1.522 +
   1.523 +/*
   1.524 +  mp_div_d(a, d, q, r)
   1.525 +
   1.526 +  Compute the quotient q = a / d and remainder r = a mod d, for a
   1.527 +  single digit d.  Respects the sign of its divisor (single digits are
   1.528 +  unsigned anyway).
   1.529 + */
   1.530 +
   1.531 +mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
   1.532 +{
   1.533 +  mp_err   res;
   1.534 +  mp_int   qp;
   1.535 +  mp_digit rem;
   1.536 +  int      pow;
   1.537 +
   1.538 +  ARGCHK(a != NULL, MP_BADARG);
   1.539 +
   1.540 +  if(d == 0)
   1.541 +    return MP_RANGE;
   1.542 +
   1.543 +  /* Shortcut for powers of two ... */
   1.544 +  if((pow = s_mp_ispow2d(d)) >= 0) {
   1.545 +    mp_digit  mask;
   1.546 +
   1.547 +    mask = ((mp_digit)1 << pow) - 1;
   1.548 +    rem = DIGIT(a, 0) & mask;
   1.549 +
   1.550 +    if(q) {
   1.551 +      mp_copy(a, q);
   1.552 +      s_mp_div_2d(q, pow);
   1.553 +    }
   1.554 +
   1.555 +    if(r)
   1.556 +      *r = rem;
   1.557 +
   1.558 +    return MP_OKAY;
   1.559 +  }
   1.560 +
   1.561 +  if((res = mp_init_copy(&qp, a)) != MP_OKAY)
   1.562 +    return res;
   1.563 +
   1.564 +  res = s_mp_div_d(&qp, d, &rem);
   1.565 +
   1.566 +  if(s_mp_cmp_d(&qp, 0) == 0)
   1.567 +    SIGN(q) = ZPOS;
   1.568 +
   1.569 +  if(r)
   1.570 +    *r = rem;
   1.571 +
   1.572 +  if(q)
   1.573 +    s_mp_exch(&qp, q);
   1.574 +
   1.575 +  mp_clear(&qp);
   1.576 +  return res;
   1.577 +
   1.578 +} /* end mp_div_d() */
   1.579 +
   1.580 +/* }}} */
   1.581 +
   1.582 +/* {{{ mp_div_2(a, c) */
   1.583 +
   1.584 +/*
   1.585 +  mp_div_2(a, c)
   1.586 +
   1.587 +  Compute c = a / 2, disregarding the remainder.
   1.588 + */
   1.589 +
   1.590 +mp_err mp_div_2(const mp_int *a, mp_int *c)
   1.591 +{
   1.592 +  mp_err  res;
   1.593 +
   1.594 +  ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1.595 +
   1.596 +  if((res = mp_copy(a, c)) != MP_OKAY)
   1.597 +    return res;
   1.598 +
   1.599 +  s_mp_div_2(c);
   1.600 +
   1.601 +  return MP_OKAY;
   1.602 +
   1.603 +} /* end mp_div_2() */
   1.604 +
   1.605 +/* }}} */
   1.606 +
   1.607 +/* {{{ mp_expt_d(a, d, b) */
   1.608 +
   1.609 +mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
   1.610 +{
   1.611 +  mp_int   s, x;
   1.612 +  mp_err   res;
   1.613 +
   1.614 +  ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1.615 +
   1.616 +  if((res = mp_init(&s)) != MP_OKAY)
   1.617 +    return res;
   1.618 +  if((res = mp_init_copy(&x, a)) != MP_OKAY)
   1.619 +    goto X;
   1.620 +
   1.621 +  DIGIT(&s, 0) = 1;
   1.622 +
   1.623 +  while(d != 0) {
   1.624 +    if(d & 1) {
   1.625 +      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1.626 +	goto CLEANUP;
   1.627 +    }
   1.628 +
   1.629 +    d /= 2;
   1.630 +
   1.631 +    if((res = s_mp_sqr(&x)) != MP_OKAY)
   1.632 +      goto CLEANUP;
   1.633 +  }
   1.634 +
   1.635 +  s_mp_exch(&s, c);
   1.636 +
   1.637 +CLEANUP:
   1.638 +  mp_clear(&x);
   1.639 +X:
   1.640 +  mp_clear(&s);
   1.641 +
   1.642 +  return res;
   1.643 +
   1.644 +} /* end mp_expt_d() */
   1.645 +
   1.646 +/* }}} */
   1.647 +
   1.648 +/* }}} */
   1.649 +
   1.650 +/*------------------------------------------------------------------------*/
   1.651 +/* {{{ Full arithmetic */
   1.652 +
   1.653 +/* {{{ mp_abs(a, b) */
   1.654 +
   1.655 +/*
   1.656 +  mp_abs(a, b)
   1.657 +
   1.658 +  Compute b = |a|.  'a' and 'b' may be identical.
   1.659 + */
   1.660 +
   1.661 +mp_err mp_abs(const mp_int *a, mp_int *b)
   1.662 +{
   1.663 +  mp_err   res;
   1.664 +
   1.665 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.666 +
   1.667 +  if((res = mp_copy(a, b)) != MP_OKAY)
   1.668 +    return res;
   1.669 +
   1.670 +  SIGN(b) = ZPOS;
   1.671 +
   1.672 +  return MP_OKAY;
   1.673 +
   1.674 +} /* end mp_abs() */
   1.675 +
   1.676 +/* }}} */
   1.677 +
   1.678 +/* {{{ mp_neg(a, b) */
   1.679 +
   1.680 +/*
   1.681 +  mp_neg(a, b)
   1.682 +
   1.683 +  Compute b = -a.  'a' and 'b' may be identical.
   1.684 + */
   1.685 +
   1.686 +mp_err mp_neg(const mp_int *a, mp_int *b)
   1.687 +{
   1.688 +  mp_err   res;
   1.689 +
   1.690 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.691 +
   1.692 +  if((res = mp_copy(a, b)) != MP_OKAY)
   1.693 +    return res;
   1.694 +
   1.695 +  if(s_mp_cmp_d(b, 0) == MP_EQ) 
   1.696 +    SIGN(b) = ZPOS;
   1.697 +  else 
   1.698 +    SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
   1.699 +
   1.700 +  return MP_OKAY;
   1.701 +
   1.702 +} /* end mp_neg() */
   1.703 +
   1.704 +/* }}} */
   1.705 +
   1.706 +/* {{{ mp_add(a, b, c) */
   1.707 +
   1.708 +/*
   1.709 +  mp_add(a, b, c)
   1.710 +
   1.711 +  Compute c = a + b.  All parameters may be identical.
   1.712 + */
   1.713 +
   1.714 +mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
   1.715 +{
   1.716 +  mp_err  res;
   1.717 +
   1.718 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1.719 +
   1.720 +  if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
   1.721 +    MP_CHECKOK( s_mp_add_3arg(a, b, c) );
   1.722 +  } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
   1.723 +    MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
   1.724 +  } else {                          /* different sign: |a|  < |b|   */
   1.725 +    MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
   1.726 +  }
   1.727 +
   1.728 +  if (s_mp_cmp_d(c, 0) == MP_EQ)
   1.729 +    SIGN(c) = ZPOS;
   1.730 +
   1.731 +CLEANUP:
   1.732 +  return res;
   1.733 +
   1.734 +} /* end mp_add() */
   1.735 +
   1.736 +/* }}} */
   1.737 +
   1.738 +/* {{{ mp_sub(a, b, c) */
   1.739 +
   1.740 +/*
   1.741 +  mp_sub(a, b, c)
   1.742 +
   1.743 +  Compute c = a - b.  All parameters may be identical.
   1.744 + */
   1.745 +
   1.746 +mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
   1.747 +{
   1.748 +  mp_err  res;
   1.749 +  int     magDiff;
   1.750 +
   1.751 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1.752 +
   1.753 +  if (a == b) {
   1.754 +    mp_zero(c);
   1.755 +    return MP_OKAY;
   1.756 +  }
   1.757 +
   1.758 +  if (MP_SIGN(a) != MP_SIGN(b)) {
   1.759 +    MP_CHECKOK( s_mp_add_3arg(a, b, c) );
   1.760 +  } else if (!(magDiff = s_mp_cmp(a, b))) {
   1.761 +    mp_zero(c);
   1.762 +    res = MP_OKAY;
   1.763 +  } else if (magDiff > 0) {
   1.764 +    MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
   1.765 +  } else {
   1.766 +    MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
   1.767 +    MP_SIGN(c) = !MP_SIGN(a);
   1.768 +  }
   1.769 +
   1.770 +  if (s_mp_cmp_d(c, 0) == MP_EQ)
   1.771 +    MP_SIGN(c) = MP_ZPOS;
   1.772 +
   1.773 +CLEANUP:
   1.774 +  return res;
   1.775 +
   1.776 +} /* end mp_sub() */
   1.777 +
   1.778 +/* }}} */
   1.779 +
   1.780 +/* {{{ mp_mul(a, b, c) */
   1.781 +
   1.782 +/*
   1.783 +  mp_mul(a, b, c)
   1.784 +
   1.785 +  Compute c = a * b.  All parameters may be identical.
   1.786 + */
   1.787 +mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
   1.788 +{
   1.789 +  mp_digit *pb;
   1.790 +  mp_int   tmp;
   1.791 +  mp_err   res;
   1.792 +  mp_size  ib;
   1.793 +  mp_size  useda, usedb;
   1.794 +
   1.795 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1.796 +
   1.797 +  if (a == c) {
   1.798 +    if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
   1.799 +      return res;
   1.800 +    if (a == b) 
   1.801 +      b = &tmp;
   1.802 +    a = &tmp;
   1.803 +  } else if (b == c) {
   1.804 +    if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
   1.805 +      return res;
   1.806 +    b = &tmp;
   1.807 +  } else {
   1.808 +    MP_DIGITS(&tmp) = 0;
   1.809 +  }
   1.810 +
   1.811 +  if (MP_USED(a) < MP_USED(b)) {
   1.812 +    const mp_int *xch = b;	/* switch a and b, to do fewer outer loops */
   1.813 +    b = a;
   1.814 +    a = xch;
   1.815 +  }
   1.816 +
   1.817 +  MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
   1.818 +  if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
   1.819 +    goto CLEANUP;
   1.820 +
   1.821 +#ifdef NSS_USE_COMBA
   1.822 +  if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
   1.823 +      if (MP_USED(a) == 4) {
   1.824 +          s_mp_mul_comba_4(a, b, c);
   1.825 +          goto CLEANUP;
   1.826 +      }
   1.827 +      if (MP_USED(a) == 8) {
   1.828 +          s_mp_mul_comba_8(a, b, c);
   1.829 +          goto CLEANUP;
   1.830 +      }
   1.831 +      if (MP_USED(a) == 16) {
   1.832 +          s_mp_mul_comba_16(a, b, c);
   1.833 +          goto CLEANUP;
   1.834 +      }
   1.835 +      if (MP_USED(a) == 32) {
   1.836 +          s_mp_mul_comba_32(a, b, c);
   1.837 +          goto CLEANUP;
   1.838 +      } 
   1.839 +  }
   1.840 +#endif
   1.841 +
   1.842 +  pb = MP_DIGITS(b);
   1.843 +  s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
   1.844 +
   1.845 +  /* Outer loop:  Digits of b */
   1.846 +  useda = MP_USED(a);
   1.847 +  usedb = MP_USED(b);
   1.848 +  for (ib = 1; ib < usedb; ib++) {
   1.849 +    mp_digit b_i    = *pb++;
   1.850 +
   1.851 +    /* Inner product:  Digits of a */
   1.852 +    if (b_i)
   1.853 +      s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
   1.854 +    else
   1.855 +      MP_DIGIT(c, ib + useda) = b_i;
   1.856 +  }
   1.857 +
   1.858 +  s_mp_clamp(c);
   1.859 +
   1.860 +  if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
   1.861 +    SIGN(c) = ZPOS;
   1.862 +  else
   1.863 +    SIGN(c) = NEG;
   1.864 +
   1.865 +CLEANUP:
   1.866 +  mp_clear(&tmp);
   1.867 +  return res;
   1.868 +} /* end mp_mul() */
   1.869 +
   1.870 +/* }}} */
   1.871 +
   1.872 +/* {{{ mp_sqr(a, sqr) */
   1.873 +
   1.874 +#if MP_SQUARE
   1.875 +/*
   1.876 +  Computes the square of a.  This can be done more
   1.877 +  efficiently than a general multiplication, because many of the
   1.878 +  computation steps are redundant when squaring.  The inner product
   1.879 +  step is a bit more complicated, but we save a fair number of
   1.880 +  iterations of the multiplication loop.
   1.881 + */
   1.882 +
   1.883 +/* sqr = a^2;   Caller provides both a and tmp; */
   1.884 +mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
   1.885 +{
   1.886 +  mp_digit *pa;
   1.887 +  mp_digit d;
   1.888 +  mp_err   res;
   1.889 +  mp_size  ix;
   1.890 +  mp_int   tmp;
   1.891 +  int      count;
   1.892 +
   1.893 +  ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
   1.894 +
   1.895 +  if (a == sqr) {
   1.896 +    if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
   1.897 +      return res;
   1.898 +    a = &tmp;
   1.899 +  } else {
   1.900 +    DIGITS(&tmp) = 0;
   1.901 +    res = MP_OKAY;
   1.902 +  }
   1.903 +
   1.904 +  ix = 2 * MP_USED(a);
   1.905 +  if (ix > MP_ALLOC(sqr)) {
   1.906 +    MP_USED(sqr) = 1; 
   1.907 +    MP_CHECKOK( s_mp_grow(sqr, ix) );
   1.908 +  } 
   1.909 +  MP_USED(sqr) = ix;
   1.910 +  MP_DIGIT(sqr, 0) = 0;
   1.911 +
   1.912 +#ifdef NSS_USE_COMBA
   1.913 +  if (IS_POWER_OF_2(MP_USED(a))) {
   1.914 +      if (MP_USED(a) == 4) {
   1.915 +          s_mp_sqr_comba_4(a, sqr);
   1.916 +          goto CLEANUP;
   1.917 +      }
   1.918 +      if (MP_USED(a) == 8) {
   1.919 +          s_mp_sqr_comba_8(a, sqr);
   1.920 +          goto CLEANUP;
   1.921 +      }
   1.922 +      if (MP_USED(a) == 16) {
   1.923 +          s_mp_sqr_comba_16(a, sqr);
   1.924 +          goto CLEANUP;
   1.925 +      }
   1.926 +      if (MP_USED(a) == 32) {
   1.927 +          s_mp_sqr_comba_32(a, sqr);
   1.928 +          goto CLEANUP;
   1.929 +      } 
   1.930 +  }
   1.931 +#endif
   1.932 +
   1.933 +  pa = MP_DIGITS(a);
   1.934 +  count = MP_USED(a) - 1;
   1.935 +  if (count > 0) {
   1.936 +    d = *pa++;
   1.937 +    s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
   1.938 +    for (ix = 3; --count > 0; ix += 2) {
   1.939 +      d = *pa++;
   1.940 +      s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
   1.941 +    } /* for(ix ...) */
   1.942 +    MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
   1.943 +
   1.944 +    /* now sqr *= 2 */
   1.945 +    s_mp_mul_2(sqr);
   1.946 +  } else {
   1.947 +    MP_DIGIT(sqr, 1) = 0;
   1.948 +  }
   1.949 +
   1.950 +  /* now add the squares of the digits of a to sqr. */
   1.951 +  s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
   1.952 +
   1.953 +  SIGN(sqr) = ZPOS;
   1.954 +  s_mp_clamp(sqr);
   1.955 +
   1.956 +CLEANUP:
   1.957 +  mp_clear(&tmp);
   1.958 +  return res;
   1.959 +
   1.960 +} /* end mp_sqr() */
   1.961 +#endif
   1.962 +
   1.963 +/* }}} */
   1.964 +
   1.965 +/* {{{ mp_div(a, b, q, r) */
   1.966 +
   1.967 +/*
   1.968 +  mp_div(a, b, q, r)
   1.969 +
   1.970 +  Compute q = a / b and r = a mod b.  Input parameters may be re-used
   1.971 +  as output parameters.  If q or r is NULL, that portion of the
   1.972 +  computation will be discarded (although it will still be computed)
   1.973 + */
   1.974 +mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
   1.975 +{
   1.976 +  mp_err   res;
   1.977 +  mp_int   *pQ, *pR;
   1.978 +  mp_int   qtmp, rtmp, btmp;
   1.979 +  int      cmp;
   1.980 +  mp_sign  signA;
   1.981 +  mp_sign  signB;
   1.982 +
   1.983 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1.984 +  
   1.985 +  signA = MP_SIGN(a);
   1.986 +  signB = MP_SIGN(b);
   1.987 +
   1.988 +  if(mp_cmp_z(b) == MP_EQ)
   1.989 +    return MP_RANGE;
   1.990 +
   1.991 +  DIGITS(&qtmp) = 0;
   1.992 +  DIGITS(&rtmp) = 0;
   1.993 +  DIGITS(&btmp) = 0;
   1.994 +
   1.995 +  /* Set up some temporaries... */
   1.996 +  if (!r || r == a || r == b) {
   1.997 +    MP_CHECKOK( mp_init_copy(&rtmp, a) );
   1.998 +    pR = &rtmp;
   1.999 +  } else {
  1.1000 +    MP_CHECKOK( mp_copy(a, r) );
  1.1001 +    pR = r;
  1.1002 +  }
  1.1003 +
  1.1004 +  if (!q || q == a || q == b) {
  1.1005 +    MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a)) );
  1.1006 +    pQ = &qtmp;
  1.1007 +  } else {
  1.1008 +    MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
  1.1009 +    pQ = q;
  1.1010 +    mp_zero(pQ);
  1.1011 +  }
  1.1012 +
  1.1013 +  /*
  1.1014 +    If |a| <= |b|, we can compute the solution without division;
  1.1015 +    otherwise, we actually do the work required.
  1.1016 +   */
  1.1017 +  if ((cmp = s_mp_cmp(a, b)) <= 0) {
  1.1018 +    if (cmp) {
  1.1019 +      /* r was set to a above. */
  1.1020 +      mp_zero(pQ);
  1.1021 +    } else {
  1.1022 +      mp_set(pQ, 1);
  1.1023 +      mp_zero(pR);
  1.1024 +    }
  1.1025 +  } else {
  1.1026 +    MP_CHECKOK( mp_init_copy(&btmp, b) );
  1.1027 +    MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
  1.1028 +  }
  1.1029 +
  1.1030 +  /* Compute the signs for the output  */
  1.1031 +  MP_SIGN(pR) = signA;   /* Sr = Sa              */
  1.1032 +  /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
  1.1033 +  MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
  1.1034 +
  1.1035 +  if(s_mp_cmp_d(pQ, 0) == MP_EQ)
  1.1036 +    SIGN(pQ) = ZPOS;
  1.1037 +  if(s_mp_cmp_d(pR, 0) == MP_EQ)
  1.1038 +    SIGN(pR) = ZPOS;
  1.1039 +
  1.1040 +  /* Copy output, if it is needed      */
  1.1041 +  if(q && q != pQ) 
  1.1042 +    s_mp_exch(pQ, q);
  1.1043 +
  1.1044 +  if(r && r != pR) 
  1.1045 +    s_mp_exch(pR, r);
  1.1046 +
  1.1047 +CLEANUP:
  1.1048 +  mp_clear(&btmp);
  1.1049 +  mp_clear(&rtmp);
  1.1050 +  mp_clear(&qtmp);
  1.1051 +
  1.1052 +  return res;
  1.1053 +
  1.1054 +} /* end mp_div() */
  1.1055 +
  1.1056 +/* }}} */
  1.1057 +
  1.1058 +/* {{{ mp_div_2d(a, d, q, r) */
  1.1059 +
  1.1060 +mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
  1.1061 +{
  1.1062 +  mp_err  res;
  1.1063 +
  1.1064 +  ARGCHK(a != NULL, MP_BADARG);
  1.1065 +
  1.1066 +  if(q) {
  1.1067 +    if((res = mp_copy(a, q)) != MP_OKAY)
  1.1068 +      return res;
  1.1069 +  }
  1.1070 +  if(r) {
  1.1071 +    if((res = mp_copy(a, r)) != MP_OKAY)
  1.1072 +      return res;
  1.1073 +  }
  1.1074 +  if(q) {
  1.1075 +    s_mp_div_2d(q, d);
  1.1076 +  }
  1.1077 +  if(r) {
  1.1078 +    s_mp_mod_2d(r, d);
  1.1079 +  }
  1.1080 +
  1.1081 +  return MP_OKAY;
  1.1082 +
  1.1083 +} /* end mp_div_2d() */
  1.1084 +
  1.1085 +/* }}} */
  1.1086 +
  1.1087 +/* {{{ mp_expt(a, b, c) */
  1.1088 +
  1.1089 +/*
  1.1090 +  mp_expt(a, b, c)
  1.1091 +
  1.1092 +  Compute c = a ** b, that is, raise a to the b power.  Uses a
  1.1093 +  standard iterative square-and-multiply technique.
  1.1094 + */
  1.1095 +
  1.1096 +mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
  1.1097 +{
  1.1098 +  mp_int   s, x;
  1.1099 +  mp_err   res;
  1.1100 +  mp_digit d;
  1.1101 +  int      dig, bit;
  1.1102 +
  1.1103 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
  1.1104 +
  1.1105 +  if(mp_cmp_z(b) < 0)
  1.1106 +    return MP_RANGE;
  1.1107 +
  1.1108 +  if((res = mp_init(&s)) != MP_OKAY)
  1.1109 +    return res;
  1.1110 +
  1.1111 +  mp_set(&s, 1);
  1.1112 +
  1.1113 +  if((res = mp_init_copy(&x, a)) != MP_OKAY)
  1.1114 +    goto X;
  1.1115 +
  1.1116 +  /* Loop over low-order digits in ascending order */
  1.1117 +  for(dig = 0; dig < (USED(b) - 1); dig++) {
  1.1118 +    d = DIGIT(b, dig);
  1.1119 +
  1.1120 +    /* Loop over bits of each non-maximal digit */
  1.1121 +    for(bit = 0; bit < DIGIT_BIT; bit++) {
  1.1122 +      if(d & 1) {
  1.1123 +	if((res = s_mp_mul(&s, &x)) != MP_OKAY) 
  1.1124 +	  goto CLEANUP;
  1.1125 +      }
  1.1126 +
  1.1127 +      d >>= 1;
  1.1128 +      
  1.1129 +      if((res = s_mp_sqr(&x)) != MP_OKAY)
  1.1130 +	goto CLEANUP;
  1.1131 +    }
  1.1132 +  }
  1.1133 +
  1.1134 +  /* Consider now the last digit... */
  1.1135 +  d = DIGIT(b, dig);
  1.1136 +
  1.1137 +  while(d) {
  1.1138 +    if(d & 1) {
  1.1139 +      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
  1.1140 +	goto CLEANUP;
  1.1141 +    }
  1.1142 +
  1.1143 +    d >>= 1;
  1.1144 +
  1.1145 +    if((res = s_mp_sqr(&x)) != MP_OKAY)
  1.1146 +      goto CLEANUP;
  1.1147 +  }
  1.1148 +  
  1.1149 +  if(mp_iseven(b))
  1.1150 +    SIGN(&s) = SIGN(a);
  1.1151 +
  1.1152 +  res = mp_copy(&s, c);
  1.1153 +
  1.1154 +CLEANUP:
  1.1155 +  mp_clear(&x);
  1.1156 +X:
  1.1157 +  mp_clear(&s);
  1.1158 +
  1.1159 +  return res;
  1.1160 +
  1.1161 +} /* end mp_expt() */
  1.1162 +
  1.1163 +/* }}} */
  1.1164 +
  1.1165 +/* {{{ mp_2expt(a, k) */
  1.1166 +
  1.1167 +/* Compute a = 2^k */
  1.1168 +
  1.1169 +mp_err mp_2expt(mp_int *a, mp_digit k)
  1.1170 +{
  1.1171 +  ARGCHK(a != NULL, MP_BADARG);
  1.1172 +
  1.1173 +  return s_mp_2expt(a, k);
  1.1174 +
  1.1175 +} /* end mp_2expt() */
  1.1176 +
  1.1177 +/* }}} */
  1.1178 +
  1.1179 +/* {{{ mp_mod(a, m, c) */
  1.1180 +
  1.1181 +/*
  1.1182 +  mp_mod(a, m, c)
  1.1183 +
  1.1184 +  Compute c = a (mod m).  Result will always be 0 <= c < m.
  1.1185 + */
  1.1186 +
  1.1187 +mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
  1.1188 +{
  1.1189 +  mp_err  res;
  1.1190 +  int     mag;
  1.1191 +
  1.1192 +  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
  1.1193 +
  1.1194 +  if(SIGN(m) == NEG)
  1.1195 +    return MP_RANGE;
  1.1196 +
  1.1197 +  /*
  1.1198 +     If |a| > m, we need to divide to get the remainder and take the
  1.1199 +     absolute value.  
  1.1200 +
  1.1201 +     If |a| < m, we don't need to do any division, just copy and adjust
  1.1202 +     the sign (if a is negative).
  1.1203 +
  1.1204 +     If |a| == m, we can simply set the result to zero.
  1.1205 +
  1.1206 +     This order is intended to minimize the average path length of the
  1.1207 +     comparison chain on common workloads -- the most frequent cases are
  1.1208 +     that |a| != m, so we do those first.
  1.1209 +   */
  1.1210 +  if((mag = s_mp_cmp(a, m)) > 0) {
  1.1211 +    if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
  1.1212 +      return res;
  1.1213 +    
  1.1214 +    if(SIGN(c) == NEG) {
  1.1215 +      if((res = mp_add(c, m, c)) != MP_OKAY)
  1.1216 +	return res;
  1.1217 +    }
  1.1218 +
  1.1219 +  } else if(mag < 0) {
  1.1220 +    if((res = mp_copy(a, c)) != MP_OKAY)
  1.1221 +      return res;
  1.1222 +
  1.1223 +    if(mp_cmp_z(a) < 0) {
  1.1224 +      if((res = mp_add(c, m, c)) != MP_OKAY)
  1.1225 +	return res;
  1.1226 +
  1.1227 +    }
  1.1228 +    
  1.1229 +  } else {
  1.1230 +    mp_zero(c);
  1.1231 +
  1.1232 +  }
  1.1233 +
  1.1234 +  return MP_OKAY;
  1.1235 +
  1.1236 +} /* end mp_mod() */
  1.1237 +
  1.1238 +/* }}} */
  1.1239 +
  1.1240 +/* {{{ mp_mod_d(a, d, c) */
  1.1241 +
  1.1242 +/*
  1.1243 +  mp_mod_d(a, d, c)
  1.1244 +
  1.1245 +  Compute c = a (mod d).  Result will always be 0 <= c < d
  1.1246 + */
  1.1247 +mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
  1.1248 +{
  1.1249 +  mp_err   res;
  1.1250 +  mp_digit rem;
  1.1251 +
  1.1252 +  ARGCHK(a != NULL && c != NULL, MP_BADARG);
  1.1253 +
  1.1254 +  if(s_mp_cmp_d(a, d) > 0) {
  1.1255 +    if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
  1.1256 +      return res;
  1.1257 +
  1.1258 +  } else {
  1.1259 +    if(SIGN(a) == NEG)
  1.1260 +      rem = d - DIGIT(a, 0);
  1.1261 +    else
  1.1262 +      rem = DIGIT(a, 0);
  1.1263 +  }
  1.1264 +
  1.1265 +  if(c)
  1.1266 +    *c = rem;
  1.1267 +
  1.1268 +  return MP_OKAY;
  1.1269 +
  1.1270 +} /* end mp_mod_d() */
  1.1271 +
  1.1272 +/* }}} */
  1.1273 +
  1.1274 +/* {{{ mp_sqrt(a, b) */
  1.1275 +
  1.1276 +/*
  1.1277 +  mp_sqrt(a, b)
  1.1278 +
  1.1279 +  Compute the integer square root of a, and store the result in b.
  1.1280 +  Uses an integer-arithmetic version of Newton's iterative linear
  1.1281 +  approximation technique to determine this value; the result has the
  1.1282 +  following two properties:
  1.1283 +
  1.1284 +     b^2 <= a
  1.1285 +     (b+1)^2 >= a
  1.1286 +
  1.1287 +  It is a range error to pass a negative value.
  1.1288 + */
  1.1289 +mp_err mp_sqrt(const mp_int *a, mp_int *b)
  1.1290 +{
  1.1291 +  mp_int   x, t;
  1.1292 +  mp_err   res;
  1.1293 +  mp_size  used;
  1.1294 +
  1.1295 +  ARGCHK(a != NULL && b != NULL, MP_BADARG);
  1.1296 +
  1.1297 +  /* Cannot take square root of a negative value */
  1.1298 +  if(SIGN(a) == NEG)
  1.1299 +    return MP_RANGE;
  1.1300 +
  1.1301 +  /* Special cases for zero and one, trivial     */
  1.1302 +  if(mp_cmp_d(a, 1) <= 0)
  1.1303 +    return mp_copy(a, b);
  1.1304 +    
  1.1305 +  /* Initialize the temporaries we'll use below  */
  1.1306 +  if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
  1.1307 +    return res;
  1.1308 +
  1.1309 +  /* Compute an initial guess for the iteration as a itself */
  1.1310 +  if((res = mp_init_copy(&x, a)) != MP_OKAY)
  1.1311 +    goto X;
  1.1312 +
  1.1313 +  used = MP_USED(&x);
  1.1314 +  if (used > 1) {
  1.1315 +    s_mp_rshd(&x, used / 2);
  1.1316 +  }
  1.1317 +
  1.1318 +  for(;;) {
  1.1319 +    /* t = (x * x) - a */
  1.1320 +    mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
  1.1321 +    if((res = mp_sqr(&t, &t)) != MP_OKAY ||
  1.1322 +       (res = mp_sub(&t, a, &t)) != MP_OKAY)
  1.1323 +      goto CLEANUP;
  1.1324 +
  1.1325 +    /* t = t / 2x       */
  1.1326 +    s_mp_mul_2(&x);
  1.1327 +    if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
  1.1328 +      goto CLEANUP;
  1.1329 +    s_mp_div_2(&x);
  1.1330 +
  1.1331 +    /* Terminate the loop, if the quotient is zero */
  1.1332 +    if(mp_cmp_z(&t) == MP_EQ)
  1.1333 +      break;
  1.1334 +
  1.1335 +    /* x = x - t       */
  1.1336 +    if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
  1.1337 +      goto CLEANUP;
  1.1338 +
  1.1339 +  }
  1.1340 +
  1.1341 +  /* Copy result to output parameter */
  1.1342 +  mp_sub_d(&x, 1, &x);
  1.1343 +  s_mp_exch(&x, b);
  1.1344 +
  1.1345 + CLEANUP:
  1.1346 +  mp_clear(&x);
  1.1347 + X:
  1.1348 +  mp_clear(&t); 
  1.1349 +
  1.1350 +  return res;
  1.1351 +
  1.1352 +} /* end mp_sqrt() */
  1.1353 +
  1.1354 +/* }}} */
  1.1355 +
  1.1356 +/* }}} */
  1.1357 +
  1.1358 +/*------------------------------------------------------------------------*/
  1.1359 +/* {{{ Modular arithmetic */
  1.1360 +
  1.1361 +#if MP_MODARITH
  1.1362 +/* {{{ mp_addmod(a, b, m, c) */
  1.1363 +
  1.1364 +/*
  1.1365 +  mp_addmod(a, b, m, c)
  1.1366 +
  1.1367 +  Compute c = (a + b) mod m
  1.1368 + */
  1.1369 +
  1.1370 +mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
  1.1371 +{
  1.1372 +  mp_err  res;
  1.1373 +
  1.1374 +  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
  1.1375 +
  1.1376 +  if((res = mp_add(a, b, c)) != MP_OKAY)
  1.1377 +    return res;
  1.1378 +  if((res = mp_mod(c, m, c)) != MP_OKAY)
  1.1379 +    return res;
  1.1380 +
  1.1381 +  return MP_OKAY;
  1.1382 +
  1.1383 +}
  1.1384 +
  1.1385 +/* }}} */
  1.1386 +
  1.1387 +/* {{{ mp_submod(a, b, m, c) */
  1.1388 +
  1.1389 +/*
  1.1390 +  mp_submod(a, b, m, c)
  1.1391 +
  1.1392 +  Compute c = (a - b) mod m
  1.1393 + */
  1.1394 +
  1.1395 +mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
  1.1396 +{
  1.1397 +  mp_err  res;
  1.1398 +
  1.1399 +  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
  1.1400 +
  1.1401 +  if((res = mp_sub(a, b, c)) != MP_OKAY)
  1.1402 +    return res;
  1.1403 +  if((res = mp_mod(c, m, c)) != MP_OKAY)
  1.1404 +    return res;
  1.1405 +
  1.1406 +  return MP_OKAY;
  1.1407 +
  1.1408 +}
  1.1409 +
  1.1410 +/* }}} */
  1.1411 +
  1.1412 +/* {{{ mp_mulmod(a, b, m, c) */
  1.1413 +
  1.1414 +/*
  1.1415 +  mp_mulmod(a, b, m, c)
  1.1416 +
  1.1417 +  Compute c = (a * b) mod m
  1.1418 + */
  1.1419 +
  1.1420 +mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
  1.1421 +{
  1.1422 +  mp_err  res;
  1.1423 +
  1.1424 +  ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
  1.1425 +
  1.1426 +  if((res = mp_mul(a, b, c)) != MP_OKAY)
  1.1427 +    return res;
  1.1428 +  if((res = mp_mod(c, m, c)) != MP_OKAY)
  1.1429 +    return res;
  1.1430 +
  1.1431 +  return MP_OKAY;
  1.1432 +
  1.1433 +}
  1.1434 +
  1.1435 +/* }}} */
  1.1436 +
  1.1437 +/* {{{ mp_sqrmod(a, m, c) */
  1.1438 +
  1.1439 +#if MP_SQUARE
  1.1440 +mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
  1.1441 +{
  1.1442 +  mp_err  res;
  1.1443 +
  1.1444 +  ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
  1.1445 +
  1.1446 +  if((res = mp_sqr(a, c)) != MP_OKAY)
  1.1447 +    return res;
  1.1448 +  if((res = mp_mod(c, m, c)) != MP_OKAY)
  1.1449 +    return res;
  1.1450 +
  1.1451 +  return MP_OKAY;
  1.1452 +
  1.1453 +} /* end mp_sqrmod() */
  1.1454 +#endif
  1.1455 +
  1.1456 +/* }}} */
  1.1457 +
  1.1458 +/* {{{ s_mp_exptmod(a, b, m, c) */
  1.1459 +
  1.1460 +/*
  1.1461 +  s_mp_exptmod(a, b, m, c)
  1.1462 +
  1.1463 +  Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
  1.1464 +  method with modular reductions at each step. (This is basically the
  1.1465 +  same code as mp_expt(), except for the addition of the reductions)
  1.1466 +  
  1.1467 +  The modular reductions are done using Barrett's algorithm (see
  1.1468 +  s_mp_reduce() below for details)
  1.1469 + */
  1.1470 +
  1.1471 +mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
  1.1472 +{
  1.1473 +  mp_int   s, x, mu;
  1.1474 +  mp_err   res;
  1.1475 +  mp_digit d;
  1.1476 +  int      dig, bit;
  1.1477 +
  1.1478 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
  1.1479 +
  1.1480 +  if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
  1.1481 +    return MP_RANGE;
  1.1482 +
  1.1483 +  if((res = mp_init(&s)) != MP_OKAY)
  1.1484 +    return res;
  1.1485 +  if((res = mp_init_copy(&x, a)) != MP_OKAY ||
  1.1486 +     (res = mp_mod(&x, m, &x)) != MP_OKAY)
  1.1487 +    goto X;
  1.1488 +  if((res = mp_init(&mu)) != MP_OKAY)
  1.1489 +    goto MU;
  1.1490 +
  1.1491 +  mp_set(&s, 1);
  1.1492 +
  1.1493 +  /* mu = b^2k / m */
  1.1494 +  s_mp_add_d(&mu, 1); 
  1.1495 +  s_mp_lshd(&mu, 2 * USED(m));
  1.1496 +  if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
  1.1497 +    goto CLEANUP;
  1.1498 +
  1.1499 +  /* Loop over digits of b in ascending order, except highest order */
  1.1500 +  for(dig = 0; dig < (USED(b) - 1); dig++) {
  1.1501 +    d = DIGIT(b, dig);
  1.1502 +
  1.1503 +    /* Loop over the bits of the lower-order digits */
  1.1504 +    for(bit = 0; bit < DIGIT_BIT; bit++) {
  1.1505 +      if(d & 1) {
  1.1506 +	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
  1.1507 +	  goto CLEANUP;
  1.1508 +	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
  1.1509 +	  goto CLEANUP;
  1.1510 +      }
  1.1511 +
  1.1512 +      d >>= 1;
  1.1513 +
  1.1514 +      if((res = s_mp_sqr(&x)) != MP_OKAY)
  1.1515 +	goto CLEANUP;
  1.1516 +      if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
  1.1517 +	goto CLEANUP;
  1.1518 +    }
  1.1519 +  }
  1.1520 +
  1.1521 +  /* Now do the last digit... */
  1.1522 +  d = DIGIT(b, dig);
  1.1523 +
  1.1524 +  while(d) {
  1.1525 +    if(d & 1) {
  1.1526 +      if((res = s_mp_mul(&s, &x)) != MP_OKAY)
  1.1527 +	goto CLEANUP;
  1.1528 +      if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
  1.1529 +	goto CLEANUP;
  1.1530 +    }
  1.1531 +
  1.1532 +    d >>= 1;
  1.1533 +
  1.1534 +    if((res = s_mp_sqr(&x)) != MP_OKAY)
  1.1535 +      goto CLEANUP;
  1.1536 +    if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
  1.1537 +      goto CLEANUP;
  1.1538 +  }
  1.1539 +
  1.1540 +  s_mp_exch(&s, c);
  1.1541 +
  1.1542 + CLEANUP:
  1.1543 +  mp_clear(&mu);
  1.1544 + MU:
  1.1545 +  mp_clear(&x);
  1.1546 + X:
  1.1547 +  mp_clear(&s);
  1.1548 +
  1.1549 +  return res;
  1.1550 +
  1.1551 +} /* end s_mp_exptmod() */
  1.1552 +
  1.1553 +/* }}} */
  1.1554 +
  1.1555 +/* {{{ mp_exptmod_d(a, d, m, c) */
  1.1556 +
  1.1557 +mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
  1.1558 +{
  1.1559 +  mp_int   s, x;
  1.1560 +  mp_err   res;
  1.1561 +
  1.1562 +  ARGCHK(a != NULL && c != NULL, MP_BADARG);
  1.1563 +
  1.1564 +  if((res = mp_init(&s)) != MP_OKAY)
  1.1565 +    return res;
  1.1566 +  if((res = mp_init_copy(&x, a)) != MP_OKAY)
  1.1567 +    goto X;
  1.1568 +
  1.1569 +  mp_set(&s, 1);
  1.1570 +
  1.1571 +  while(d != 0) {
  1.1572 +    if(d & 1) {
  1.1573 +      if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
  1.1574 +	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
  1.1575 +	goto CLEANUP;
  1.1576 +    }
  1.1577 +
  1.1578 +    d /= 2;
  1.1579 +
  1.1580 +    if((res = s_mp_sqr(&x)) != MP_OKAY ||
  1.1581 +       (res = mp_mod(&x, m, &x)) != MP_OKAY)
  1.1582 +      goto CLEANUP;
  1.1583 +  }
  1.1584 +
  1.1585 +  s_mp_exch(&s, c);
  1.1586 +
  1.1587 +CLEANUP:
  1.1588 +  mp_clear(&x);
  1.1589 +X:
  1.1590 +  mp_clear(&s);
  1.1591 +
  1.1592 +  return res;
  1.1593 +
  1.1594 +} /* end mp_exptmod_d() */
  1.1595 +
  1.1596 +/* }}} */
  1.1597 +#endif /* if MP_MODARITH */
  1.1598 +
  1.1599 +/* }}} */
  1.1600 +
  1.1601 +/*------------------------------------------------------------------------*/
  1.1602 +/* {{{ Comparison functions */
  1.1603 +
  1.1604 +/* {{{ mp_cmp_z(a) */
  1.1605 +
  1.1606 +/*
  1.1607 +  mp_cmp_z(a)
  1.1608 +
  1.1609 +  Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
  1.1610 + */
  1.1611 +
  1.1612 +int    mp_cmp_z(const mp_int *a)
  1.1613 +{
  1.1614 +  if(SIGN(a) == NEG)
  1.1615 +    return MP_LT;
  1.1616 +  else if(USED(a) == 1 && DIGIT(a, 0) == 0)
  1.1617 +    return MP_EQ;
  1.1618 +  else
  1.1619 +    return MP_GT;
  1.1620 +
  1.1621 +} /* end mp_cmp_z() */
  1.1622 +
  1.1623 +/* }}} */
  1.1624 +
  1.1625 +/* {{{ mp_cmp_d(a, d) */
  1.1626 +
  1.1627 +/*
  1.1628 +  mp_cmp_d(a, d)
  1.1629 +
  1.1630 +  Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
  1.1631 + */
  1.1632 +
  1.1633 +int    mp_cmp_d(const mp_int *a, mp_digit d)
  1.1634 +{
  1.1635 +  ARGCHK(a != NULL, MP_EQ);
  1.1636 +
  1.1637 +  if(SIGN(a) == NEG)
  1.1638 +    return MP_LT;
  1.1639 +
  1.1640 +  return s_mp_cmp_d(a, d);
  1.1641 +
  1.1642 +} /* end mp_cmp_d() */
  1.1643 +
  1.1644 +/* }}} */
  1.1645 +
  1.1646 +/* {{{ mp_cmp(a, b) */
  1.1647 +
  1.1648 +int    mp_cmp(const mp_int *a, const mp_int *b)
  1.1649 +{
  1.1650 +  ARGCHK(a != NULL && b != NULL, MP_EQ);
  1.1651 +
  1.1652 +  if(SIGN(a) == SIGN(b)) {
  1.1653 +    int  mag;
  1.1654 +
  1.1655 +    if((mag = s_mp_cmp(a, b)) == MP_EQ)
  1.1656 +      return MP_EQ;
  1.1657 +
  1.1658 +    if(SIGN(a) == ZPOS)
  1.1659 +      return mag;
  1.1660 +    else
  1.1661 +      return -mag;
  1.1662 +
  1.1663 +  } else if(SIGN(a) == ZPOS) {
  1.1664 +    return MP_GT;
  1.1665 +  } else {
  1.1666 +    return MP_LT;
  1.1667 +  }
  1.1668 +
  1.1669 +} /* end mp_cmp() */
  1.1670 +
  1.1671 +/* }}} */
  1.1672 +
  1.1673 +/* {{{ mp_cmp_mag(a, b) */
  1.1674 +
  1.1675 +/*
  1.1676 +  mp_cmp_mag(a, b)
  1.1677 +
  1.1678 +  Compares |a| <=> |b|, and returns an appropriate comparison result
  1.1679 + */
  1.1680 +
  1.1681 +int    mp_cmp_mag(mp_int *a, mp_int *b)
  1.1682 +{
  1.1683 +  ARGCHK(a != NULL && b != NULL, MP_EQ);
  1.1684 +
  1.1685 +  return s_mp_cmp(a, b);
  1.1686 +
  1.1687 +} /* end mp_cmp_mag() */
  1.1688 +
  1.1689 +/* }}} */
  1.1690 +
  1.1691 +/* {{{ mp_cmp_int(a, z) */
  1.1692 +
  1.1693 +/*
  1.1694 +  This just converts z to an mp_int, and uses the existing comparison
  1.1695 +  routines.  This is sort of inefficient, but it's not clear to me how
  1.1696 +  frequently this wil get used anyway.  For small positive constants,
  1.1697 +  you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
  1.1698 + */
  1.1699 +int    mp_cmp_int(const mp_int *a, long z)
  1.1700 +{
  1.1701 +  mp_int  tmp;
  1.1702 +  int     out;
  1.1703 +
  1.1704 +  ARGCHK(a != NULL, MP_EQ);
  1.1705 +  
  1.1706 +  mp_init(&tmp); mp_set_int(&tmp, z);
  1.1707 +  out = mp_cmp(a, &tmp);
  1.1708 +  mp_clear(&tmp);
  1.1709 +
  1.1710 +  return out;
  1.1711 +
  1.1712 +} /* end mp_cmp_int() */
  1.1713 +
  1.1714 +/* }}} */
  1.1715 +
  1.1716 +/* {{{ mp_isodd(a) */
  1.1717 +
  1.1718 +/*
  1.1719 +  mp_isodd(a)
  1.1720 +
  1.1721 +  Returns a true (non-zero) value if a is odd, false (zero) otherwise.
  1.1722 + */
  1.1723 +int    mp_isodd(const mp_int *a)
  1.1724 +{
  1.1725 +  ARGCHK(a != NULL, 0);
  1.1726 +
  1.1727 +  return (int)(DIGIT(a, 0) & 1);
  1.1728 +
  1.1729 +} /* end mp_isodd() */
  1.1730 +
  1.1731 +/* }}} */
  1.1732 +
  1.1733 +/* {{{ mp_iseven(a) */
  1.1734 +
  1.1735 +int    mp_iseven(const mp_int *a)
  1.1736 +{
  1.1737 +  return !mp_isodd(a);
  1.1738 +
  1.1739 +} /* end mp_iseven() */
  1.1740 +
  1.1741 +/* }}} */
  1.1742 +
  1.1743 +/* }}} */
  1.1744 +
  1.1745 +/*------------------------------------------------------------------------*/
  1.1746 +/* {{{ Number theoretic functions */
  1.1747 +
  1.1748 +#if MP_NUMTH
  1.1749 +/* {{{ mp_gcd(a, b, c) */
  1.1750 +
  1.1751 +/*
  1.1752 +  Like the old mp_gcd() function, except computes the GCD using the
  1.1753 +  binary algorithm due to Josef Stein in 1961 (via Knuth).
  1.1754 + */
  1.1755 +mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
  1.1756 +{
  1.1757 +  mp_err   res;
  1.1758 +  mp_int   u, v, t;
  1.1759 +  mp_size  k = 0;
  1.1760 +
  1.1761 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
  1.1762 +
  1.1763 +  if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
  1.1764 +      return MP_RANGE;
  1.1765 +  if(mp_cmp_z(a) == MP_EQ) {
  1.1766 +    return mp_copy(b, c);
  1.1767 +  } else if(mp_cmp_z(b) == MP_EQ) {
  1.1768 +    return mp_copy(a, c);
  1.1769 +  }
  1.1770 +
  1.1771 +  if((res = mp_init(&t)) != MP_OKAY)
  1.1772 +    return res;
  1.1773 +  if((res = mp_init_copy(&u, a)) != MP_OKAY)
  1.1774 +    goto U;
  1.1775 +  if((res = mp_init_copy(&v, b)) != MP_OKAY)
  1.1776 +    goto V;
  1.1777 +
  1.1778 +  SIGN(&u) = ZPOS;
  1.1779 +  SIGN(&v) = ZPOS;
  1.1780 +
  1.1781 +  /* Divide out common factors of 2 until at least 1 of a, b is even */
  1.1782 +  while(mp_iseven(&u) && mp_iseven(&v)) {
  1.1783 +    s_mp_div_2(&u);
  1.1784 +    s_mp_div_2(&v);
  1.1785 +    ++k;
  1.1786 +  }
  1.1787 +
  1.1788 +  /* Initialize t */
  1.1789 +  if(mp_isodd(&u)) {
  1.1790 +    if((res = mp_copy(&v, &t)) != MP_OKAY)
  1.1791 +      goto CLEANUP;
  1.1792 +    
  1.1793 +    /* t = -v */
  1.1794 +    if(SIGN(&v) == ZPOS)
  1.1795 +      SIGN(&t) = NEG;
  1.1796 +    else
  1.1797 +      SIGN(&t) = ZPOS;
  1.1798 +    
  1.1799 +  } else {
  1.1800 +    if((res = mp_copy(&u, &t)) != MP_OKAY)
  1.1801 +      goto CLEANUP;
  1.1802 +
  1.1803 +  }
  1.1804 +
  1.1805 +  for(;;) {
  1.1806 +    while(mp_iseven(&t)) {
  1.1807 +      s_mp_div_2(&t);
  1.1808 +    }
  1.1809 +
  1.1810 +    if(mp_cmp_z(&t) == MP_GT) {
  1.1811 +      if((res = mp_copy(&t, &u)) != MP_OKAY)
  1.1812 +	goto CLEANUP;
  1.1813 +
  1.1814 +    } else {
  1.1815 +      if((res = mp_copy(&t, &v)) != MP_OKAY)
  1.1816 +	goto CLEANUP;
  1.1817 +
  1.1818 +      /* v = -t */
  1.1819 +      if(SIGN(&t) == ZPOS)
  1.1820 +	SIGN(&v) = NEG;
  1.1821 +      else
  1.1822 +	SIGN(&v) = ZPOS;
  1.1823 +    }
  1.1824 +
  1.1825 +    if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
  1.1826 +      goto CLEANUP;
  1.1827 +
  1.1828 +    if(s_mp_cmp_d(&t, 0) == MP_EQ)
  1.1829 +      break;
  1.1830 +  }
  1.1831 +
  1.1832 +  s_mp_2expt(&v, k);       /* v = 2^k   */
  1.1833 +  res = mp_mul(&u, &v, c); /* c = u * v */
  1.1834 +
  1.1835 + CLEANUP:
  1.1836 +  mp_clear(&v);
  1.1837 + V:
  1.1838 +  mp_clear(&u);
  1.1839 + U:
  1.1840 +  mp_clear(&t);
  1.1841 +
  1.1842 +  return res;
  1.1843 +
  1.1844 +} /* end mp_gcd() */
  1.1845 +
  1.1846 +/* }}} */
  1.1847 +
  1.1848 +/* {{{ mp_lcm(a, b, c) */
  1.1849 +
  1.1850 +/* We compute the least common multiple using the rule:
  1.1851 +
  1.1852 +   ab = [a, b](a, b)
  1.1853 +
  1.1854 +   ... by computing the product, and dividing out the gcd.
  1.1855 + */
  1.1856 +
  1.1857 +mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
  1.1858 +{
  1.1859 +  mp_int  gcd, prod;
  1.1860 +  mp_err  res;
  1.1861 +
  1.1862 +  ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
  1.1863 +
  1.1864 +  /* Set up temporaries */
  1.1865 +  if((res = mp_init(&gcd)) != MP_OKAY)
  1.1866 +    return res;
  1.1867 +  if((res = mp_init(&prod)) != MP_OKAY)
  1.1868 +    goto GCD;
  1.1869 +
  1.1870 +  if((res = mp_mul(a, b, &prod)) != MP_OKAY)
  1.1871 +    goto CLEANUP;
  1.1872 +  if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
  1.1873 +    goto CLEANUP;
  1.1874 +
  1.1875 +  res = mp_div(&prod, &gcd, c, NULL);
  1.1876 +
  1.1877 + CLEANUP:
  1.1878 +  mp_clear(&prod);
  1.1879 + GCD:
  1.1880 +  mp_clear(&gcd);
  1.1881 +
  1.1882 +  return res;
  1.1883 +
  1.1884 +} /* end mp_lcm() */
  1.1885 +
  1.1886 +/* }}} */
  1.1887 +
  1.1888 +/* {{{ mp_xgcd(a, b, g, x, y) */
  1.1889 +
  1.1890 +/*
  1.1891 +  mp_xgcd(a, b, g, x, y)
  1.1892 +
  1.1893 +  Compute g = (a, b) and values x and y satisfying Bezout's identity
  1.1894 +  (that is, ax + by = g).  This uses the binary extended GCD algorithm
  1.1895 +  based on the Stein algorithm used for mp_gcd()
  1.1896 +  See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
  1.1897 + */
  1.1898 +
  1.1899 +mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
  1.1900 +{
  1.1901 +  mp_int   gx, xc, yc, u, v, A, B, C, D;
  1.1902 +  mp_int  *clean[9];
  1.1903 +  mp_err   res;
  1.1904 +  int      last = -1;
  1.1905 +
  1.1906 +  if(mp_cmp_z(b) == 0)
  1.1907 +    return MP_RANGE;
  1.1908 +
  1.1909 +  /* Initialize all these variables we need */
  1.1910 +  MP_CHECKOK( mp_init(&u) );
  1.1911 +  clean[++last] = &u;
  1.1912 +  MP_CHECKOK( mp_init(&v) );
  1.1913 +  clean[++last] = &v;
  1.1914 +  MP_CHECKOK( mp_init(&gx) );
  1.1915 +  clean[++last] = &gx;
  1.1916 +  MP_CHECKOK( mp_init(&A) );
  1.1917 +  clean[++last] = &A;
  1.1918 +  MP_CHECKOK( mp_init(&B) );
  1.1919 +  clean[++last] = &B;
  1.1920 +  MP_CHECKOK( mp_init(&C) );
  1.1921 +  clean[++last] = &C;
  1.1922 +  MP_CHECKOK( mp_init(&D) );
  1.1923 +  clean[++last] = &D;
  1.1924 +  MP_CHECKOK( mp_init_copy(&xc, a) );
  1.1925 +  clean[++last] = &xc;
  1.1926 +  mp_abs(&xc, &xc);
  1.1927 +  MP_CHECKOK( mp_init_copy(&yc, b) );
  1.1928 +  clean[++last] = &yc;
  1.1929 +  mp_abs(&yc, &yc);
  1.1930 +
  1.1931 +  mp_set(&gx, 1);
  1.1932 +
  1.1933 +  /* Divide by two until at least one of them is odd */
  1.1934 +  while(mp_iseven(&xc) && mp_iseven(&yc)) {
  1.1935 +    mp_size nx = mp_trailing_zeros(&xc);
  1.1936 +    mp_size ny = mp_trailing_zeros(&yc);
  1.1937 +    mp_size n  = MP_MIN(nx, ny);
  1.1938 +    s_mp_div_2d(&xc,n);
  1.1939 +    s_mp_div_2d(&yc,n);
  1.1940 +    MP_CHECKOK( s_mp_mul_2d(&gx,n) );
  1.1941 +  }
  1.1942 +
  1.1943 +  mp_copy(&xc, &u);
  1.1944 +  mp_copy(&yc, &v);
  1.1945 +  mp_set(&A, 1); mp_set(&D, 1);
  1.1946 +
  1.1947 +  /* Loop through binary GCD algorithm */
  1.1948 +  do {
  1.1949 +    while(mp_iseven(&u)) {
  1.1950 +      s_mp_div_2(&u);
  1.1951 +
  1.1952 +      if(mp_iseven(&A) && mp_iseven(&B)) {
  1.1953 +	s_mp_div_2(&A); s_mp_div_2(&B);
  1.1954 +      } else {
  1.1955 +	MP_CHECKOK( mp_add(&A, &yc, &A) );
  1.1956 +	s_mp_div_2(&A);
  1.1957 +	MP_CHECKOK( mp_sub(&B, &xc, &B) );
  1.1958 +	s_mp_div_2(&B);
  1.1959 +      }
  1.1960 +    }
  1.1961 +
  1.1962 +    while(mp_iseven(&v)) {
  1.1963 +      s_mp_div_2(&v);
  1.1964 +
  1.1965 +      if(mp_iseven(&C) && mp_iseven(&D)) {
  1.1966 +	s_mp_div_2(&C); s_mp_div_2(&D);
  1.1967 +      } else {
  1.1968 +	MP_CHECKOK( mp_add(&C, &yc, &C) );
  1.1969 +	s_mp_div_2(&C);
  1.1970 +	MP_CHECKOK( mp_sub(&D, &xc, &D) );
  1.1971 +	s_mp_div_2(&D);
  1.1972 +      }
  1.1973 +    }
  1.1974 +
  1.1975 +    if(mp_cmp(&u, &v) >= 0) {
  1.1976 +      MP_CHECKOK( mp_sub(&u, &v, &u) );
  1.1977 +      MP_CHECKOK( mp_sub(&A, &C, &A) );
  1.1978 +      MP_CHECKOK( mp_sub(&B, &D, &B) );
  1.1979 +    } else {
  1.1980 +      MP_CHECKOK( mp_sub(&v, &u, &v) );
  1.1981 +      MP_CHECKOK( mp_sub(&C, &A, &C) );
  1.1982 +      MP_CHECKOK( mp_sub(&D, &B, &D) );
  1.1983 +    }
  1.1984 +  } while (mp_cmp_z(&u) != 0);
  1.1985 +
  1.1986 +  /* copy results to output */
  1.1987 +  if(x)
  1.1988 +    MP_CHECKOK( mp_copy(&C, x) );
  1.1989 +
  1.1990 +  if(y)
  1.1991 +    MP_CHECKOK( mp_copy(&D, y) );
  1.1992 +      
  1.1993 +  if(g)
  1.1994 +    MP_CHECKOK( mp_mul(&gx, &v, g) );
  1.1995 +
  1.1996 + CLEANUP:
  1.1997 +  while(last >= 0)
  1.1998 +    mp_clear(clean[last--]);
  1.1999 +
  1.2000 +  return res;
  1.2001 +
  1.2002 +} /* end mp_xgcd() */
  1.2003 +
  1.2004 +/* }}} */
  1.2005 +
  1.2006 +mp_size mp_trailing_zeros(const mp_int *mp)
  1.2007 +{
  1.2008 +  mp_digit d;
  1.2009 +  mp_size  n = 0;
  1.2010 +  int      ix;
  1.2011 +
  1.2012 +  if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
  1.2013 +    return n;
  1.2014 +
  1.2015 +  for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
  1.2016 +    n += MP_DIGIT_BIT;
  1.2017 +  if (!d)
  1.2018 +    return 0;	/* shouldn't happen, but ... */
  1.2019 +#if !defined(MP_USE_UINT_DIGIT)
  1.2020 +  if (!(d & 0xffffffffU)) {
  1.2021 +    d >>= 32;
  1.2022 +    n  += 32;
  1.2023 +  }
  1.2024 +#endif
  1.2025 +  if (!(d & 0xffffU)) {
  1.2026 +    d >>= 16;
  1.2027 +    n  += 16;
  1.2028 +  }
  1.2029 +  if (!(d & 0xffU)) {
  1.2030 +    d >>= 8;
  1.2031 +    n  += 8;
  1.2032 +  }
  1.2033 +  if (!(d & 0xfU)) {
  1.2034 +    d >>= 4;
  1.2035 +    n  += 4;
  1.2036 +  }
  1.2037 +  if (!(d & 0x3U)) {
  1.2038 +    d >>= 2;
  1.2039 +    n  += 2;
  1.2040 +  }
  1.2041 +  if (!(d & 0x1U)) {
  1.2042 +    d >>= 1;
  1.2043 +    n  += 1;
  1.2044 +  }
  1.2045 +#if MP_ARGCHK == 2
  1.2046 +  assert(0 != (d & 1));
  1.2047 +#endif
  1.2048 +  return n;
  1.2049 +}
  1.2050 +
  1.2051 +/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
  1.2052 +** Returns k (positive) or error (negative).
  1.2053 +** This technique from the paper "Fast Modular Reciprocals" (unpublished)
  1.2054 +** by Richard Schroeppel (a.k.a. Captain Nemo).
  1.2055 +*/
  1.2056 +mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
  1.2057 +{
  1.2058 +  mp_err res;
  1.2059 +  mp_err k    = 0;
  1.2060 +  mp_int d, f, g;
  1.2061 +
  1.2062 +  ARGCHK(a && p && c, MP_BADARG);
  1.2063 +
  1.2064 +  MP_DIGITS(&d) = 0;
  1.2065 +  MP_DIGITS(&f) = 0;
  1.2066 +  MP_DIGITS(&g) = 0;
  1.2067 +  MP_CHECKOK( mp_init(&d) );
  1.2068 +  MP_CHECKOK( mp_init_copy(&f, a) );	/* f = a */
  1.2069 +  MP_CHECKOK( mp_init_copy(&g, p) );	/* g = p */
  1.2070 +
  1.2071 +  mp_set(c, 1);
  1.2072 +  mp_zero(&d);
  1.2073 +
  1.2074 +  if (mp_cmp_z(&f) == 0) {
  1.2075 +    res = MP_UNDEF;
  1.2076 +  } else 
  1.2077 +  for (;;) {
  1.2078 +    int diff_sign;
  1.2079 +    while (mp_iseven(&f)) {
  1.2080 +      mp_size n = mp_trailing_zeros(&f);
  1.2081 +      if (!n) {
  1.2082 +	res = MP_UNDEF;
  1.2083 +	goto CLEANUP;
  1.2084 +      }
  1.2085 +      s_mp_div_2d(&f, n);
  1.2086 +      MP_CHECKOK( s_mp_mul_2d(&d, n) );
  1.2087 +      k += n;
  1.2088 +    }
  1.2089 +    if (mp_cmp_d(&f, 1) == MP_EQ) {	/* f == 1 */
  1.2090 +      res = k;
  1.2091 +      break;
  1.2092 +    }
  1.2093 +    diff_sign = mp_cmp(&f, &g);
  1.2094 +    if (diff_sign < 0) {		/* f < g */
  1.2095 +      s_mp_exch(&f, &g);
  1.2096 +      s_mp_exch(c, &d);
  1.2097 +    } else if (diff_sign == 0) {		/* f == g */
  1.2098 +      res = MP_UNDEF;		/* a and p are not relatively prime */
  1.2099 +      break;
  1.2100 +    }
  1.2101 +    if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
  1.2102 +      MP_CHECKOK( mp_sub(&f, &g, &f) );	/* f = f - g */
  1.2103 +      MP_CHECKOK( mp_sub(c,  &d,  c) );	/* c = c - d */
  1.2104 +    } else {
  1.2105 +      MP_CHECKOK( mp_add(&f, &g, &f) );	/* f = f + g */
  1.2106 +      MP_CHECKOK( mp_add(c,  &d,  c) );	/* c = c + d */
  1.2107 +    }
  1.2108 +  }
  1.2109 +  if (res >= 0) {
  1.2110 +    while (MP_SIGN(c) != MP_ZPOS) {
  1.2111 +      MP_CHECKOK( mp_add(c, p, c) );
  1.2112 +    }
  1.2113 +    res = k;
  1.2114 +  }
  1.2115 +
  1.2116 +CLEANUP:
  1.2117 +  mp_clear(&d);
  1.2118 +  mp_clear(&f);
  1.2119 +  mp_clear(&g);
  1.2120 +  return res;
  1.2121 +}
  1.2122 +
  1.2123 +/* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
  1.2124 +** This technique from the paper "Fast Modular Reciprocals" (unpublished)
  1.2125 +** by Richard Schroeppel (a.k.a. Captain Nemo).
  1.2126 +*/
  1.2127 +mp_digit  s_mp_invmod_radix(mp_digit P)
  1.2128 +{
  1.2129 +  mp_digit T = P;
  1.2130 +  T *= 2 - (P * T);
  1.2131 +  T *= 2 - (P * T);
  1.2132 +  T *= 2 - (P * T);
  1.2133 +  T *= 2 - (P * T);
  1.2134 +#if !defined(MP_USE_UINT_DIGIT)
  1.2135 +  T *= 2 - (P * T);
  1.2136 +  T *= 2 - (P * T);
  1.2137 +#endif
  1.2138 +  return T;
  1.2139 +}
  1.2140 +
  1.2141 +/* Given c, k, and prime p, where a*c == 2**k (mod p), 
  1.2142 +** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
  1.2143 +** This technique from the paper "Fast Modular Reciprocals" (unpublished)
  1.2144 +** by Richard Schroeppel (a.k.a. Captain Nemo).
  1.2145 +*/
  1.2146 +mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
  1.2147 +{
  1.2148 +  int      k_orig = k;
  1.2149 +  mp_digit r;
  1.2150 +  mp_size  ix;
  1.2151 +  mp_err   res;
  1.2152 +
  1.2153 +  if (mp_cmp_z(c) < 0) {		/* c < 0 */
  1.2154 +    MP_CHECKOK( mp_add(c, p, x) );	/* x = c + p */
  1.2155 +  } else {
  1.2156 +    MP_CHECKOK( mp_copy(c, x) );	/* x = c */
  1.2157 +  }
  1.2158 +
  1.2159 +  /* make sure x is large enough */
  1.2160 +  ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
  1.2161 +  ix = MP_MAX(ix, MP_USED(x));
  1.2162 +  MP_CHECKOK( s_mp_pad(x, ix) );
  1.2163 +
  1.2164 +  r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
  1.2165 +
  1.2166 +  for (ix = 0; k > 0; ix++) {
  1.2167 +    int      j = MP_MIN(k, MP_DIGIT_BIT);
  1.2168 +    mp_digit v = r * MP_DIGIT(x, ix);
  1.2169 +    if (j < MP_DIGIT_BIT) {
  1.2170 +      v &= ((mp_digit)1 << j) - 1;	/* v = v mod (2 ** j) */
  1.2171 +    }
  1.2172 +    s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
  1.2173 +    k -= j;
  1.2174 +  }
  1.2175 +  s_mp_clamp(x);
  1.2176 +  s_mp_div_2d(x, k_orig);
  1.2177 +  res = MP_OKAY;
  1.2178 +
  1.2179 +CLEANUP:
  1.2180 +  return res;
  1.2181 +}
  1.2182 +
  1.2183 +/* compute mod inverse using Schroeppel's method, only if m is odd */
  1.2184 +mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
  1.2185 +{
  1.2186 +  int k;
  1.2187 +  mp_err  res;
  1.2188 +  mp_int  x;
  1.2189 +
  1.2190 +  ARGCHK(a && m && c, MP_BADARG);
  1.2191 +
  1.2192 +  if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
  1.2193 +    return MP_RANGE;
  1.2194 +  if (mp_iseven(m))
  1.2195 +    return MP_UNDEF;
  1.2196 +
  1.2197 +  MP_DIGITS(&x) = 0;
  1.2198 +
  1.2199 +  if (a == c) {
  1.2200 +    if ((res = mp_init_copy(&x, a)) != MP_OKAY)
  1.2201 +      return res;
  1.2202 +    if (a == m) 
  1.2203 +      m = &x;
  1.2204 +    a = &x;
  1.2205 +  } else if (m == c) {
  1.2206 +    if ((res = mp_init_copy(&x, m)) != MP_OKAY)
  1.2207 +      return res;
  1.2208 +    m = &x;
  1.2209 +  } else {
  1.2210 +    MP_DIGITS(&x) = 0;
  1.2211 +  }
  1.2212 +
  1.2213 +  MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
  1.2214 +  k = res;
  1.2215 +  MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
  1.2216 +CLEANUP:
  1.2217 +  mp_clear(&x);
  1.2218 +  return res;
  1.2219 +}
  1.2220 +
  1.2221 +/* Known good algorithm for computing modular inverse.  But slow. */
  1.2222 +mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
  1.2223 +{
  1.2224 +  mp_int  g, x;
  1.2225 +  mp_err  res;
  1.2226 +
  1.2227 +  ARGCHK(a && m && c, MP_BADARG);
  1.2228 +
  1.2229 +  if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
  1.2230 +    return MP_RANGE;
  1.2231 +
  1.2232 +  MP_DIGITS(&g) = 0;
  1.2233 +  MP_DIGITS(&x) = 0;
  1.2234 +  MP_CHECKOK( mp_init(&x) );
  1.2235 +  MP_CHECKOK( mp_init(&g) );
  1.2236 +
  1.2237 +  MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
  1.2238 +
  1.2239 +  if (mp_cmp_d(&g, 1) != MP_EQ) {
  1.2240 +    res = MP_UNDEF;
  1.2241 +    goto CLEANUP;
  1.2242 +  }
  1.2243 +
  1.2244 +  res = mp_mod(&x, m, c);
  1.2245 +  SIGN(c) = SIGN(a);
  1.2246 +
  1.2247 +CLEANUP:
  1.2248 +  mp_clear(&x);
  1.2249 +  mp_clear(&g);
  1.2250 +
  1.2251 +  return res;
  1.2252 +}
  1.2253 +
  1.2254 +/* modular inverse where modulus is 2**k. */
  1.2255 +/* c = a**-1 mod 2**k */
  1.2256 +mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
  1.2257 +{
  1.2258 +  mp_err res;
  1.2259 +  mp_size ix = k + 4;
  1.2260 +  mp_int t0, t1, val, tmp, two2k;
  1.2261 +
  1.2262 +  static const mp_digit d2 = 2;
  1.2263 +  static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
  1.2264 +
  1.2265 +  if (mp_iseven(a))
  1.2266 +    return MP_UNDEF;
  1.2267 +  if (k <= MP_DIGIT_BIT) {
  1.2268 +    mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
  1.2269 +    if (k < MP_DIGIT_BIT)
  1.2270 +      i &= ((mp_digit)1 << k) - (mp_digit)1;
  1.2271 +    mp_set(c, i);
  1.2272 +    return MP_OKAY;
  1.2273 +  }
  1.2274 +  MP_DIGITS(&t0) = 0;
  1.2275 +  MP_DIGITS(&t1) = 0;
  1.2276 +  MP_DIGITS(&val) = 0;
  1.2277 +  MP_DIGITS(&tmp) = 0;
  1.2278 +  MP_DIGITS(&two2k) = 0;
  1.2279 +  MP_CHECKOK( mp_init_copy(&val, a) );
  1.2280 +  s_mp_mod_2d(&val, k);
  1.2281 +  MP_CHECKOK( mp_init_copy(&t0, &val) );
  1.2282 +  MP_CHECKOK( mp_init_copy(&t1, &t0)  );
  1.2283 +  MP_CHECKOK( mp_init(&tmp) );
  1.2284 +  MP_CHECKOK( mp_init(&two2k) );
  1.2285 +  MP_CHECKOK( s_mp_2expt(&two2k, k) );
  1.2286 +  do {
  1.2287 +    MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
  1.2288 +    MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
  1.2289 +    MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
  1.2290 +    s_mp_mod_2d(&t1, k);
  1.2291 +    while (MP_SIGN(&t1) != MP_ZPOS) {
  1.2292 +      MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
  1.2293 +    }
  1.2294 +    if (mp_cmp(&t1, &t0) == MP_EQ) 
  1.2295 +      break;
  1.2296 +    MP_CHECKOK( mp_copy(&t1, &t0) );
  1.2297 +  } while (--ix > 0);
  1.2298 +  if (!ix) {
  1.2299 +    res = MP_UNDEF;
  1.2300 +  } else {
  1.2301 +    mp_exch(c, &t1);
  1.2302 +  }
  1.2303 +
  1.2304 +CLEANUP:
  1.2305 +  mp_clear(&t0);
  1.2306 +  mp_clear(&t1);
  1.2307 +  mp_clear(&val);
  1.2308 +  mp_clear(&tmp);
  1.2309 +  mp_clear(&two2k);
  1.2310 +  return res;
  1.2311 +}
  1.2312 +
  1.2313 +mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
  1.2314 +{
  1.2315 +  mp_err res;
  1.2316 +  mp_size k;
  1.2317 +  mp_int oddFactor, evenFactor;	/* factors of the modulus */
  1.2318 +  mp_int oddPart, evenPart;	/* parts to combine via CRT. */
  1.2319 +  mp_int C2, tmp1, tmp2;
  1.2320 +
  1.2321 +  /*static const mp_digit d1 = 1; */
  1.2322 +  /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
  1.2323 +
  1.2324 +  if ((res = s_mp_ispow2(m)) >= 0) {
  1.2325 +    k = res;
  1.2326 +    return s_mp_invmod_2d(a, k, c);
  1.2327 +  }
  1.2328 +  MP_DIGITS(&oddFactor) = 0;
  1.2329 +  MP_DIGITS(&evenFactor) = 0;
  1.2330 +  MP_DIGITS(&oddPart) = 0;
  1.2331 +  MP_DIGITS(&evenPart) = 0;
  1.2332 +  MP_DIGITS(&C2)     = 0;
  1.2333 +  MP_DIGITS(&tmp1)   = 0;
  1.2334 +  MP_DIGITS(&tmp2)   = 0;
  1.2335 +
  1.2336 +  MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
  1.2337 +  MP_CHECKOK( mp_init(&evenFactor) );
  1.2338 +  MP_CHECKOK( mp_init(&oddPart) );
  1.2339 +  MP_CHECKOK( mp_init(&evenPart) );
  1.2340 +  MP_CHECKOK( mp_init(&C2)     );
  1.2341 +  MP_CHECKOK( mp_init(&tmp1)   );
  1.2342 +  MP_CHECKOK( mp_init(&tmp2)   );
  1.2343 +
  1.2344 +  k = mp_trailing_zeros(m);
  1.2345 +  s_mp_div_2d(&oddFactor, k);
  1.2346 +  MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
  1.2347 +
  1.2348 +  /* compute a**-1 mod oddFactor. */
  1.2349 +  MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
  1.2350 +  /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
  1.2351 +  MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
  1.2352 +
  1.2353 +  /* Use Chinese Remainer theorem to compute a**-1 mod m. */
  1.2354 +  /* let m1 = oddFactor,  v1 = oddPart, 
  1.2355 +   * let m2 = evenFactor, v2 = evenPart.
  1.2356 +   */
  1.2357 +
  1.2358 +  /* Compute C2 = m1**-1 mod m2. */
  1.2359 +  MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
  1.2360 +
  1.2361 +  /* compute u = (v2 - v1)*C2 mod m2 */
  1.2362 +  MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
  1.2363 +  MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
  1.2364 +  s_mp_mod_2d(&tmp2, k);
  1.2365 +  while (MP_SIGN(&tmp2) != MP_ZPOS) {
  1.2366 +    MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
  1.2367 +  }
  1.2368 +
  1.2369 +  /* compute answer = v1 + u*m1 */
  1.2370 +  MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
  1.2371 +  MP_CHECKOK( mp_add(&oddPart,  c,          c) );
  1.2372 +  /* not sure this is necessary, but it's low cost if not. */
  1.2373 +  MP_CHECKOK( mp_mod(c,         m,          c) );
  1.2374 +
  1.2375 +CLEANUP:
  1.2376 +  mp_clear(&oddFactor);
  1.2377 +  mp_clear(&evenFactor);
  1.2378 +  mp_clear(&oddPart);
  1.2379 +  mp_clear(&evenPart);
  1.2380 +  mp_clear(&C2);
  1.2381 +  mp_clear(&tmp1);
  1.2382 +  mp_clear(&tmp2);
  1.2383 +  return res;
  1.2384 +}
  1.2385 +
  1.2386 +
  1.2387 +/* {{{ mp_invmod(a, m, c) */
  1.2388 +
  1.2389 +/*
  1.2390 +  mp_invmod(a, m, c)
  1.2391 +
  1.2392 +  Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
  1.2393 +  This is equivalent to the question of whether (a, m) = 1.  If not,
  1.2394 +  MP_UNDEF is returned, and there is no inverse.
  1.2395 + */
  1.2396 +
  1.2397 +mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
  1.2398 +{
  1.2399 +
  1.2400 +  ARGCHK(a && m && c, MP_BADARG);
  1.2401 +
  1.2402 +  if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
  1.2403 +    return MP_RANGE;
  1.2404 +
  1.2405 +  if (mp_isodd(m)) {
  1.2406 +    return s_mp_invmod_odd_m(a, m, c);
  1.2407 +  }
  1.2408 +  if (mp_iseven(a))
  1.2409 +    return MP_UNDEF;	/* not invertable */
  1.2410 +
  1.2411 +  return s_mp_invmod_even_m(a, m, c);
  1.2412 +
  1.2413 +} /* end mp_invmod() */
  1.2414 +
  1.2415 +/* }}} */
  1.2416 +#endif /* if MP_NUMTH */
  1.2417 +
  1.2418 +/* }}} */
  1.2419 +
  1.2420 +/*------------------------------------------------------------------------*/
  1.2421 +/* {{{ mp_print(mp, ofp) */
  1.2422 +
  1.2423 +#if MP_IOFUNC
  1.2424 +/*
  1.2425 +  mp_print(mp, ofp)
  1.2426 +
  1.2427 +  Print a textual representation of the given mp_int on the output
  1.2428 +  stream 'ofp'.  Output is generated using the internal radix.
  1.2429 + */
  1.2430 +
  1.2431 +void   mp_print(mp_int *mp, FILE *ofp)
  1.2432 +{
  1.2433 +  int   ix;
  1.2434 +
  1.2435 +  if(mp == NULL || ofp == NULL)
  1.2436 +    return;
  1.2437 +
  1.2438 +  fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
  1.2439 +
  1.2440 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.2441 +    fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
  1.2442 +  }
  1.2443 +
  1.2444 +} /* end mp_print() */
  1.2445 +
  1.2446 +#endif /* if MP_IOFUNC */
  1.2447 +
  1.2448 +/* }}} */
  1.2449 +
  1.2450 +/*------------------------------------------------------------------------*/
  1.2451 +/* {{{ More I/O Functions */
  1.2452 +
  1.2453 +/* {{{ mp_read_raw(mp, str, len) */
  1.2454 +
  1.2455 +/* 
  1.2456 +   mp_read_raw(mp, str, len)
  1.2457 +
  1.2458 +   Read in a raw value (base 256) into the given mp_int
  1.2459 + */
  1.2460 +
  1.2461 +mp_err  mp_read_raw(mp_int *mp, char *str, int len)
  1.2462 +{
  1.2463 +  int            ix;
  1.2464 +  mp_err         res;
  1.2465 +  unsigned char *ustr = (unsigned char *)str;
  1.2466 +
  1.2467 +  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
  1.2468 +
  1.2469 +  mp_zero(mp);
  1.2470 +
  1.2471 +  /* Get sign from first byte */
  1.2472 +  if(ustr[0])
  1.2473 +    SIGN(mp) = NEG;
  1.2474 +  else
  1.2475 +    SIGN(mp) = ZPOS;
  1.2476 +
  1.2477 +  /* Read the rest of the digits */
  1.2478 +  for(ix = 1; ix < len; ix++) {
  1.2479 +    if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
  1.2480 +      return res;
  1.2481 +    if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
  1.2482 +      return res;
  1.2483 +  }
  1.2484 +
  1.2485 +  return MP_OKAY;
  1.2486 +
  1.2487 +} /* end mp_read_raw() */
  1.2488 +
  1.2489 +/* }}} */
  1.2490 +
  1.2491 +/* {{{ mp_raw_size(mp) */
  1.2492 +
  1.2493 +int    mp_raw_size(mp_int *mp)
  1.2494 +{
  1.2495 +  ARGCHK(mp != NULL, 0);
  1.2496 +
  1.2497 +  return (USED(mp) * sizeof(mp_digit)) + 1;
  1.2498 +
  1.2499 +} /* end mp_raw_size() */
  1.2500 +
  1.2501 +/* }}} */
  1.2502 +
  1.2503 +/* {{{ mp_toraw(mp, str) */
  1.2504 +
  1.2505 +mp_err mp_toraw(mp_int *mp, char *str)
  1.2506 +{
  1.2507 +  int  ix, jx, pos = 1;
  1.2508 +
  1.2509 +  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
  1.2510 +
  1.2511 +  str[0] = (char)SIGN(mp);
  1.2512 +
  1.2513 +  /* Iterate over each digit... */
  1.2514 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.2515 +    mp_digit  d = DIGIT(mp, ix);
  1.2516 +
  1.2517 +    /* Unpack digit bytes, high order first */
  1.2518 +    for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
  1.2519 +      str[pos++] = (char)(d >> (jx * CHAR_BIT));
  1.2520 +    }
  1.2521 +  }
  1.2522 +
  1.2523 +  return MP_OKAY;
  1.2524 +
  1.2525 +} /* end mp_toraw() */
  1.2526 +
  1.2527 +/* }}} */
  1.2528 +
  1.2529 +/* {{{ mp_read_radix(mp, str, radix) */
  1.2530 +
  1.2531 +/*
  1.2532 +  mp_read_radix(mp, str, radix)
  1.2533 +
  1.2534 +  Read an integer from the given string, and set mp to the resulting
  1.2535 +  value.  The input is presumed to be in base 10.  Leading non-digit
  1.2536 +  characters are ignored, and the function reads until a non-digit
  1.2537 +  character or the end of the string.
  1.2538 + */
  1.2539 +
  1.2540 +mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
  1.2541 +{
  1.2542 +  int     ix = 0, val = 0;
  1.2543 +  mp_err  res;
  1.2544 +  mp_sign sig = ZPOS;
  1.2545 +
  1.2546 +  ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, 
  1.2547 +	 MP_BADARG);
  1.2548 +
  1.2549 +  mp_zero(mp);
  1.2550 +
  1.2551 +  /* Skip leading non-digit characters until a digit or '-' or '+' */
  1.2552 +  while(str[ix] && 
  1.2553 +	(s_mp_tovalue(str[ix], radix) < 0) && 
  1.2554 +	str[ix] != '-' &&
  1.2555 +	str[ix] != '+') {
  1.2556 +    ++ix;
  1.2557 +  }
  1.2558 +
  1.2559 +  if(str[ix] == '-') {
  1.2560 +    sig = NEG;
  1.2561 +    ++ix;
  1.2562 +  } else if(str[ix] == '+') {
  1.2563 +    sig = ZPOS; /* this is the default anyway... */
  1.2564 +    ++ix;
  1.2565 +  }
  1.2566 +
  1.2567 +  while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
  1.2568 +    if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
  1.2569 +      return res;
  1.2570 +    if((res = s_mp_add_d(mp, val)) != MP_OKAY)
  1.2571 +      return res;
  1.2572 +    ++ix;
  1.2573 +  }
  1.2574 +
  1.2575 +  if(s_mp_cmp_d(mp, 0) == MP_EQ)
  1.2576 +    SIGN(mp) = ZPOS;
  1.2577 +  else
  1.2578 +    SIGN(mp) = sig;
  1.2579 +
  1.2580 +  return MP_OKAY;
  1.2581 +
  1.2582 +} /* end mp_read_radix() */
  1.2583 +
  1.2584 +mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
  1.2585 +{
  1.2586 +  int     radix = default_radix;
  1.2587 +  int     cx;
  1.2588 +  mp_sign sig   = ZPOS;
  1.2589 +  mp_err  res;
  1.2590 +
  1.2591 +  /* Skip leading non-digit characters until a digit or '-' or '+' */
  1.2592 +  while ((cx = *str) != 0 && 
  1.2593 +	(s_mp_tovalue(cx, radix) < 0) && 
  1.2594 +	cx != '-' &&
  1.2595 +	cx != '+') {
  1.2596 +    ++str;
  1.2597 +  }
  1.2598 +
  1.2599 +  if (cx == '-') {
  1.2600 +    sig = NEG;
  1.2601 +    ++str;
  1.2602 +  } else if (cx == '+') {
  1.2603 +    sig = ZPOS; /* this is the default anyway... */
  1.2604 +    ++str;
  1.2605 +  }
  1.2606 +
  1.2607 +  if (str[0] == '0') {
  1.2608 +    if ((str[1] | 0x20) == 'x') {
  1.2609 +      radix = 16;
  1.2610 +      str += 2;
  1.2611 +    } else {
  1.2612 +      radix = 8;
  1.2613 +      str++;
  1.2614 +    }
  1.2615 +  }
  1.2616 +  res = mp_read_radix(a, str, radix);
  1.2617 +  if (res == MP_OKAY) {
  1.2618 +    MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
  1.2619 +  }
  1.2620 +  return res;
  1.2621 +}
  1.2622 +
  1.2623 +/* }}} */
  1.2624 +
  1.2625 +/* {{{ mp_radix_size(mp, radix) */
  1.2626 +
  1.2627 +int    mp_radix_size(mp_int *mp, int radix)
  1.2628 +{
  1.2629 +  int  bits;
  1.2630 +
  1.2631 +  if(!mp || radix < 2 || radix > MAX_RADIX)
  1.2632 +    return 0;
  1.2633 +
  1.2634 +  bits = USED(mp) * DIGIT_BIT - 1;
  1.2635 + 
  1.2636 +  return s_mp_outlen(bits, radix);
  1.2637 +
  1.2638 +} /* end mp_radix_size() */
  1.2639 +
  1.2640 +/* }}} */
  1.2641 +
  1.2642 +/* {{{ mp_toradix(mp, str, radix) */
  1.2643 +
  1.2644 +mp_err mp_toradix(mp_int *mp, char *str, int radix)
  1.2645 +{
  1.2646 +  int  ix, pos = 0;
  1.2647 +
  1.2648 +  ARGCHK(mp != NULL && str != NULL, MP_BADARG);
  1.2649 +  ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
  1.2650 +
  1.2651 +  if(mp_cmp_z(mp) == MP_EQ) {
  1.2652 +    str[0] = '0';
  1.2653 +    str[1] = '\0';
  1.2654 +  } else {
  1.2655 +    mp_err   res;
  1.2656 +    mp_int   tmp;
  1.2657 +    mp_sign  sgn;
  1.2658 +    mp_digit rem, rdx = (mp_digit)radix;
  1.2659 +    char     ch;
  1.2660 +
  1.2661 +    if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
  1.2662 +      return res;
  1.2663 +
  1.2664 +    /* Save sign for later, and take absolute value */
  1.2665 +    sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
  1.2666 +
  1.2667 +    /* Generate output digits in reverse order      */
  1.2668 +    while(mp_cmp_z(&tmp) != 0) {
  1.2669 +      if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
  1.2670 +	mp_clear(&tmp);
  1.2671 +	return res;
  1.2672 +      }
  1.2673 +
  1.2674 +      /* Generate digits, use capital letters */
  1.2675 +      ch = s_mp_todigit(rem, radix, 0);
  1.2676 +
  1.2677 +      str[pos++] = ch;
  1.2678 +    }
  1.2679 +
  1.2680 +    /* Add - sign if original value was negative */
  1.2681 +    if(sgn == NEG)
  1.2682 +      str[pos++] = '-';
  1.2683 +
  1.2684 +    /* Add trailing NUL to end the string        */
  1.2685 +    str[pos--] = '\0';
  1.2686 +
  1.2687 +    /* Reverse the digits and sign indicator     */
  1.2688 +    ix = 0;
  1.2689 +    while(ix < pos) {
  1.2690 +      char tmp = str[ix];
  1.2691 +
  1.2692 +      str[ix] = str[pos];
  1.2693 +      str[pos] = tmp;
  1.2694 +      ++ix;
  1.2695 +      --pos;
  1.2696 +    }
  1.2697 +    
  1.2698 +    mp_clear(&tmp);
  1.2699 +  }
  1.2700 +
  1.2701 +  return MP_OKAY;
  1.2702 +
  1.2703 +} /* end mp_toradix() */
  1.2704 +
  1.2705 +/* }}} */
  1.2706 +
  1.2707 +/* {{{ mp_tovalue(ch, r) */
  1.2708 +
  1.2709 +int    mp_tovalue(char ch, int r)
  1.2710 +{
  1.2711 +  return s_mp_tovalue(ch, r);
  1.2712 +
  1.2713 +} /* end mp_tovalue() */
  1.2714 +
  1.2715 +/* }}} */
  1.2716 +
  1.2717 +/* }}} */
  1.2718 +
  1.2719 +/* {{{ mp_strerror(ec) */
  1.2720 +
  1.2721 +/*
  1.2722 +  mp_strerror(ec)
  1.2723 +
  1.2724 +  Return a string describing the meaning of error code 'ec'.  The
  1.2725 +  string returned is allocated in static memory, so the caller should
  1.2726 +  not attempt to modify or free the memory associated with this
  1.2727 +  string.
  1.2728 + */
  1.2729 +const char  *mp_strerror(mp_err ec)
  1.2730 +{
  1.2731 +  int   aec = (ec < 0) ? -ec : ec;
  1.2732 +
  1.2733 +  /* Code values are negative, so the senses of these comparisons
  1.2734 +     are accurate */
  1.2735 +  if(ec < MP_LAST_CODE || ec > MP_OKAY) {
  1.2736 +    return mp_err_string[0];  /* unknown error code */
  1.2737 +  } else {
  1.2738 +    return mp_err_string[aec + 1];
  1.2739 +  }
  1.2740 +
  1.2741 +} /* end mp_strerror() */
  1.2742 +
  1.2743 +/* }}} */
  1.2744 +
  1.2745 +/*========================================================================*/
  1.2746 +/*------------------------------------------------------------------------*/
  1.2747 +/* Static function definitions (internal use only)                        */
  1.2748 +
  1.2749 +/* {{{ Memory management */
  1.2750 +
  1.2751 +/* {{{ s_mp_grow(mp, min) */
  1.2752 +
  1.2753 +/* Make sure there are at least 'min' digits allocated to mp              */
  1.2754 +mp_err   s_mp_grow(mp_int *mp, mp_size min)
  1.2755 +{
  1.2756 +  if(min > ALLOC(mp)) {
  1.2757 +    mp_digit   *tmp;
  1.2758 +
  1.2759 +    /* Set min to next nearest default precision block size */
  1.2760 +    min = MP_ROUNDUP(min, s_mp_defprec);
  1.2761 +
  1.2762 +    if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
  1.2763 +      return MP_MEM;
  1.2764 +
  1.2765 +    s_mp_copy(DIGITS(mp), tmp, USED(mp));
  1.2766 +
  1.2767 +#if MP_CRYPTO
  1.2768 +    s_mp_setz(DIGITS(mp), ALLOC(mp));
  1.2769 +#endif
  1.2770 +    s_mp_free(DIGITS(mp));
  1.2771 +    DIGITS(mp) = tmp;
  1.2772 +    ALLOC(mp) = min;
  1.2773 +  }
  1.2774 +
  1.2775 +  return MP_OKAY;
  1.2776 +
  1.2777 +} /* end s_mp_grow() */
  1.2778 +
  1.2779 +/* }}} */
  1.2780 +
  1.2781 +/* {{{ s_mp_pad(mp, min) */
  1.2782 +
  1.2783 +/* Make sure the used size of mp is at least 'min', growing if needed     */
  1.2784 +mp_err   s_mp_pad(mp_int *mp, mp_size min)
  1.2785 +{
  1.2786 +  if(min > USED(mp)) {
  1.2787 +    mp_err  res;
  1.2788 +
  1.2789 +    /* Make sure there is room to increase precision  */
  1.2790 +    if (min > ALLOC(mp)) {
  1.2791 +      if ((res = s_mp_grow(mp, min)) != MP_OKAY)
  1.2792 +	return res;
  1.2793 +    } else {
  1.2794 +      s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
  1.2795 +    }
  1.2796 +
  1.2797 +    /* Increase precision; should already be 0-filled */
  1.2798 +    USED(mp) = min;
  1.2799 +  }
  1.2800 +
  1.2801 +  return MP_OKAY;
  1.2802 +
  1.2803 +} /* end s_mp_pad() */
  1.2804 +
  1.2805 +/* }}} */
  1.2806 +
  1.2807 +/* {{{ s_mp_setz(dp, count) */
  1.2808 +
  1.2809 +#if MP_MACRO == 0
  1.2810 +/* Set 'count' digits pointed to by dp to be zeroes                       */
  1.2811 +void s_mp_setz(mp_digit *dp, mp_size count)
  1.2812 +{
  1.2813 +#if MP_MEMSET == 0
  1.2814 +  int  ix;
  1.2815 +
  1.2816 +  for(ix = 0; ix < count; ix++)
  1.2817 +    dp[ix] = 0;
  1.2818 +#else
  1.2819 +  memset(dp, 0, count * sizeof(mp_digit));
  1.2820 +#endif
  1.2821 +
  1.2822 +} /* end s_mp_setz() */
  1.2823 +#endif
  1.2824 +
  1.2825 +/* }}} */
  1.2826 +
  1.2827 +/* {{{ s_mp_copy(sp, dp, count) */
  1.2828 +
  1.2829 +#if MP_MACRO == 0
  1.2830 +/* Copy 'count' digits from sp to dp                                      */
  1.2831 +void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
  1.2832 +{
  1.2833 +#if MP_MEMCPY == 0
  1.2834 +  int  ix;
  1.2835 +
  1.2836 +  for(ix = 0; ix < count; ix++)
  1.2837 +    dp[ix] = sp[ix];
  1.2838 +#else
  1.2839 +  memcpy(dp, sp, count * sizeof(mp_digit));
  1.2840 +#endif
  1.2841 +  ++mp_copies;
  1.2842 +
  1.2843 +} /* end s_mp_copy() */
  1.2844 +#endif
  1.2845 +
  1.2846 +/* }}} */
  1.2847 +
  1.2848 +/* {{{ s_mp_alloc(nb, ni) */
  1.2849 +
  1.2850 +#if MP_MACRO == 0
  1.2851 +/* Allocate ni records of nb bytes each, and return a pointer to that     */
  1.2852 +void    *s_mp_alloc(size_t nb, size_t ni)
  1.2853 +{
  1.2854 +  ++mp_allocs;
  1.2855 +  return calloc(nb, ni);
  1.2856 +
  1.2857 +} /* end s_mp_alloc() */
  1.2858 +#endif
  1.2859 +
  1.2860 +/* }}} */
  1.2861 +
  1.2862 +/* {{{ s_mp_free(ptr) */
  1.2863 +
  1.2864 +#if MP_MACRO == 0
  1.2865 +/* Free the memory pointed to by ptr                                      */
  1.2866 +void     s_mp_free(void *ptr)
  1.2867 +{
  1.2868 +  if(ptr) {
  1.2869 +    ++mp_frees;
  1.2870 +    free(ptr);
  1.2871 +  }
  1.2872 +} /* end s_mp_free() */
  1.2873 +#endif
  1.2874 +
  1.2875 +/* }}} */
  1.2876 +
  1.2877 +/* {{{ s_mp_clamp(mp) */
  1.2878 +
  1.2879 +#if MP_MACRO == 0
  1.2880 +/* Remove leading zeroes from the given value                             */
  1.2881 +void     s_mp_clamp(mp_int *mp)
  1.2882 +{
  1.2883 +  mp_size used = MP_USED(mp);
  1.2884 +  while (used > 1 && DIGIT(mp, used - 1) == 0)
  1.2885 +    --used;
  1.2886 +  MP_USED(mp) = used;
  1.2887 +} /* end s_mp_clamp() */
  1.2888 +#endif
  1.2889 +
  1.2890 +/* }}} */
  1.2891 +
  1.2892 +/* {{{ s_mp_exch(a, b) */
  1.2893 +
  1.2894 +/* Exchange the data for a and b; (b, a) = (a, b)                         */
  1.2895 +void     s_mp_exch(mp_int *a, mp_int *b)
  1.2896 +{
  1.2897 +  mp_int   tmp;
  1.2898 +
  1.2899 +  tmp = *a;
  1.2900 +  *a = *b;
  1.2901 +  *b = tmp;
  1.2902 +
  1.2903 +} /* end s_mp_exch() */
  1.2904 +
  1.2905 +/* }}} */
  1.2906 +
  1.2907 +/* }}} */
  1.2908 +
  1.2909 +/* {{{ Arithmetic helpers */
  1.2910 +
  1.2911 +/* {{{ s_mp_lshd(mp, p) */
  1.2912 +
  1.2913 +/* 
  1.2914 +   Shift mp leftward by p digits, growing if needed, and zero-filling
  1.2915 +   the in-shifted digits at the right end.  This is a convenient
  1.2916 +   alternative to multiplication by powers of the radix
  1.2917 + */   
  1.2918 +
  1.2919 +mp_err   s_mp_lshd(mp_int *mp, mp_size p)
  1.2920 +{
  1.2921 +  mp_err  res;
  1.2922 +  mp_size pos;
  1.2923 +  int     ix;
  1.2924 +
  1.2925 +  if(p == 0)
  1.2926 +    return MP_OKAY;
  1.2927 +
  1.2928 +  if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
  1.2929 +    return MP_OKAY;
  1.2930 +
  1.2931 +  if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
  1.2932 +    return res;
  1.2933 +
  1.2934 +  pos = USED(mp) - 1;
  1.2935 +
  1.2936 +  /* Shift all the significant figures over as needed */
  1.2937 +  for(ix = pos - p; ix >= 0; ix--) 
  1.2938 +    DIGIT(mp, ix + p) = DIGIT(mp, ix);
  1.2939 +
  1.2940 +  /* Fill the bottom digits with zeroes */
  1.2941 +  for(ix = 0; ix < p; ix++)
  1.2942 +    DIGIT(mp, ix) = 0;
  1.2943 +
  1.2944 +  return MP_OKAY;
  1.2945 +
  1.2946 +} /* end s_mp_lshd() */
  1.2947 +
  1.2948 +/* }}} */
  1.2949 +
  1.2950 +/* {{{ s_mp_mul_2d(mp, d) */
  1.2951 +
  1.2952 +/*
  1.2953 +  Multiply the integer by 2^d, where d is a number of bits.  This
  1.2954 +  amounts to a bitwise shift of the value.
  1.2955 + */
  1.2956 +mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
  1.2957 +{
  1.2958 +  mp_err   res;
  1.2959 +  mp_digit dshift, bshift;
  1.2960 +  mp_digit mask;
  1.2961 +
  1.2962 +  ARGCHK(mp != NULL,  MP_BADARG);
  1.2963 +
  1.2964 +  dshift = d / MP_DIGIT_BIT;
  1.2965 +  bshift = d % MP_DIGIT_BIT;
  1.2966 +  /* bits to be shifted out of the top word */
  1.2967 +  mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift)); 
  1.2968 +  mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
  1.2969 +
  1.2970 +  if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
  1.2971 +    return res;
  1.2972 +
  1.2973 +  if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
  1.2974 +    return res;
  1.2975 +
  1.2976 +  if (bshift) { 
  1.2977 +    mp_digit *pa = MP_DIGITS(mp);
  1.2978 +    mp_digit *alim = pa + MP_USED(mp);
  1.2979 +    mp_digit  prev = 0;
  1.2980 +
  1.2981 +    for (pa += dshift; pa < alim; ) {
  1.2982 +      mp_digit x = *pa;
  1.2983 +      *pa++ = (x << bshift) | prev;
  1.2984 +      prev = x >> (DIGIT_BIT - bshift);
  1.2985 +    }
  1.2986 +  }
  1.2987 +
  1.2988 +  s_mp_clamp(mp);
  1.2989 +  return MP_OKAY;
  1.2990 +} /* end s_mp_mul_2d() */
  1.2991 +
  1.2992 +/* {{{ s_mp_rshd(mp, p) */
  1.2993 +
  1.2994 +/* 
  1.2995 +   Shift mp rightward by p digits.  Maintains the invariant that
  1.2996 +   digits above the precision are all zero.  Digits shifted off the
  1.2997 +   end are lost.  Cannot fail.
  1.2998 + */
  1.2999 +
  1.3000 +void     s_mp_rshd(mp_int *mp, mp_size p)
  1.3001 +{
  1.3002 +  mp_size  ix;
  1.3003 +  mp_digit *src, *dst;
  1.3004 +
  1.3005 +  if(p == 0)
  1.3006 +    return;
  1.3007 +
  1.3008 +  /* Shortcut when all digits are to be shifted off */
  1.3009 +  if(p >= USED(mp)) {
  1.3010 +    s_mp_setz(DIGITS(mp), ALLOC(mp));
  1.3011 +    USED(mp) = 1;
  1.3012 +    SIGN(mp) = ZPOS;
  1.3013 +    return;
  1.3014 +  }
  1.3015 +
  1.3016 +  /* Shift all the significant figures over as needed */
  1.3017 +  dst = MP_DIGITS(mp);
  1.3018 +  src = dst + p;
  1.3019 +  for (ix = USED(mp) - p; ix > 0; ix--)
  1.3020 +    *dst++ = *src++;
  1.3021 +
  1.3022 +  MP_USED(mp) -= p;
  1.3023 +  /* Fill the top digits with zeroes */
  1.3024 +  while (p-- > 0)
  1.3025 +    *dst++ = 0;
  1.3026 +
  1.3027 +#if 0
  1.3028 +  /* Strip off any leading zeroes    */
  1.3029 +  s_mp_clamp(mp);
  1.3030 +#endif
  1.3031 +
  1.3032 +} /* end s_mp_rshd() */
  1.3033 +
  1.3034 +/* }}} */
  1.3035 +
  1.3036 +/* {{{ s_mp_div_2(mp) */
  1.3037 +
  1.3038 +/* Divide by two -- take advantage of radix properties to do it fast      */
  1.3039 +void     s_mp_div_2(mp_int *mp)
  1.3040 +{
  1.3041 +  s_mp_div_2d(mp, 1);
  1.3042 +
  1.3043 +} /* end s_mp_div_2() */
  1.3044 +
  1.3045 +/* }}} */
  1.3046 +
  1.3047 +/* {{{ s_mp_mul_2(mp) */
  1.3048 +
  1.3049 +mp_err s_mp_mul_2(mp_int *mp)
  1.3050 +{
  1.3051 +  mp_digit *pd;
  1.3052 +  int      ix, used;
  1.3053 +  mp_digit kin = 0;
  1.3054 +
  1.3055 +  /* Shift digits leftward by 1 bit */
  1.3056 +  used = MP_USED(mp);
  1.3057 +  pd = MP_DIGITS(mp);
  1.3058 +  for (ix = 0; ix < used; ix++) {
  1.3059 +    mp_digit d = *pd;
  1.3060 +    *pd++ = (d << 1) | kin;
  1.3061 +    kin = (d >> (DIGIT_BIT - 1));
  1.3062 +  }
  1.3063 +
  1.3064 +  /* Deal with rollover from last digit */
  1.3065 +  if (kin) {
  1.3066 +    if (ix >= ALLOC(mp)) {
  1.3067 +      mp_err res;
  1.3068 +      if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
  1.3069 +	return res;
  1.3070 +    }
  1.3071 +
  1.3072 +    DIGIT(mp, ix) = kin;
  1.3073 +    USED(mp) += 1;
  1.3074 +  }
  1.3075 +
  1.3076 +  return MP_OKAY;
  1.3077 +
  1.3078 +} /* end s_mp_mul_2() */
  1.3079 +
  1.3080 +/* }}} */
  1.3081 +
  1.3082 +/* {{{ s_mp_mod_2d(mp, d) */
  1.3083 +
  1.3084 +/*
  1.3085 +  Remainder the integer by 2^d, where d is a number of bits.  This
  1.3086 +  amounts to a bitwise AND of the value, and does not require the full
  1.3087 +  division code
  1.3088 + */
  1.3089 +void     s_mp_mod_2d(mp_int *mp, mp_digit d)
  1.3090 +{
  1.3091 +  mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
  1.3092 +  mp_size  ix;
  1.3093 +  mp_digit dmask;
  1.3094 +
  1.3095 +  if(ndig >= USED(mp))
  1.3096 +    return;
  1.3097 +
  1.3098 +  /* Flush all the bits above 2^d in its digit */
  1.3099 +  dmask = ((mp_digit)1 << nbit) - 1;
  1.3100 +  DIGIT(mp, ndig) &= dmask;
  1.3101 +
  1.3102 +  /* Flush all digits above the one with 2^d in it */
  1.3103 +  for(ix = ndig + 1; ix < USED(mp); ix++)
  1.3104 +    DIGIT(mp, ix) = 0;
  1.3105 +
  1.3106 +  s_mp_clamp(mp);
  1.3107 +
  1.3108 +} /* end s_mp_mod_2d() */
  1.3109 +
  1.3110 +/* }}} */
  1.3111 +
  1.3112 +/* {{{ s_mp_div_2d(mp, d) */
  1.3113 +
  1.3114 +/*
  1.3115 +  Divide the integer by 2^d, where d is a number of bits.  This
  1.3116 +  amounts to a bitwise shift of the value, and does not require the
  1.3117 +  full division code (used in Barrett reduction, see below)
  1.3118 + */
  1.3119 +void     s_mp_div_2d(mp_int *mp, mp_digit d)
  1.3120 +{
  1.3121 +  int       ix;
  1.3122 +  mp_digit  save, next, mask;
  1.3123 +
  1.3124 +  s_mp_rshd(mp, d / DIGIT_BIT);
  1.3125 +  d %= DIGIT_BIT;
  1.3126 +  if (d) {
  1.3127 +    mask = ((mp_digit)1 << d) - 1;
  1.3128 +    save = 0;
  1.3129 +    for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.3130 +      next = DIGIT(mp, ix) & mask;
  1.3131 +      DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
  1.3132 +      save = next;
  1.3133 +    }
  1.3134 +  }
  1.3135 +  s_mp_clamp(mp);
  1.3136 +
  1.3137 +} /* end s_mp_div_2d() */
  1.3138 +
  1.3139 +/* }}} */
  1.3140 +
  1.3141 +/* {{{ s_mp_norm(a, b, *d) */
  1.3142 +
  1.3143 +/*
  1.3144 +  s_mp_norm(a, b, *d)
  1.3145 +
  1.3146 +  Normalize a and b for division, where b is the divisor.  In order
  1.3147 +  that we might make good guesses for quotient digits, we want the
  1.3148 +  leading digit of b to be at least half the radix, which we
  1.3149 +  accomplish by multiplying a and b by a power of 2.  The exponent 
  1.3150 +  (shift count) is placed in *pd, so that the remainder can be shifted 
  1.3151 +  back at the end of the division process.
  1.3152 + */
  1.3153 +
  1.3154 +mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
  1.3155 +{
  1.3156 +  mp_digit  d;
  1.3157 +  mp_digit  mask;
  1.3158 +  mp_digit  b_msd;
  1.3159 +  mp_err    res    = MP_OKAY;
  1.3160 +
  1.3161 +  d = 0;
  1.3162 +  mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);	/* mask is msb of digit */
  1.3163 +  b_msd = DIGIT(b, USED(b) - 1);
  1.3164 +  while (!(b_msd & mask)) {
  1.3165 +    b_msd <<= 1;
  1.3166 +    ++d;
  1.3167 +  }
  1.3168 +
  1.3169 +  if (d) {
  1.3170 +    MP_CHECKOK( s_mp_mul_2d(a, d) );
  1.3171 +    MP_CHECKOK( s_mp_mul_2d(b, d) );
  1.3172 +  }
  1.3173 +
  1.3174 +  *pd = d;
  1.3175 +CLEANUP:
  1.3176 +  return res;
  1.3177 +
  1.3178 +} /* end s_mp_norm() */
  1.3179 +
  1.3180 +/* }}} */
  1.3181 +
  1.3182 +/* }}} */
  1.3183 +
  1.3184 +/* {{{ Primitive digit arithmetic */
  1.3185 +
  1.3186 +/* {{{ s_mp_add_d(mp, d) */
  1.3187 +
  1.3188 +/* Add d to |mp| in place                                                 */
  1.3189 +mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
  1.3190 +{
  1.3191 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3192 +  mp_word   w, k = 0;
  1.3193 +  mp_size   ix = 1;
  1.3194 +
  1.3195 +  w = (mp_word)DIGIT(mp, 0) + d;
  1.3196 +  DIGIT(mp, 0) = ACCUM(w);
  1.3197 +  k = CARRYOUT(w);
  1.3198 +
  1.3199 +  while(ix < USED(mp) && k) {
  1.3200 +    w = (mp_word)DIGIT(mp, ix) + k;
  1.3201 +    DIGIT(mp, ix) = ACCUM(w);
  1.3202 +    k = CARRYOUT(w);
  1.3203 +    ++ix;
  1.3204 +  }
  1.3205 +
  1.3206 +  if(k != 0) {
  1.3207 +    mp_err  res;
  1.3208 +
  1.3209 +    if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
  1.3210 +      return res;
  1.3211 +
  1.3212 +    DIGIT(mp, ix) = (mp_digit)k;
  1.3213 +  }
  1.3214 +
  1.3215 +  return MP_OKAY;
  1.3216 +#else
  1.3217 +  mp_digit * pmp = MP_DIGITS(mp);
  1.3218 +  mp_digit sum, mp_i, carry = 0;
  1.3219 +  mp_err   res = MP_OKAY;
  1.3220 +  int used = (int)MP_USED(mp);
  1.3221 +
  1.3222 +  mp_i = *pmp;
  1.3223 +  *pmp++ = sum = d + mp_i;
  1.3224 +  carry = (sum < d);
  1.3225 +  while (carry && --used > 0) {
  1.3226 +    mp_i = *pmp;
  1.3227 +    *pmp++ = sum = carry + mp_i;
  1.3228 +    carry = !sum;
  1.3229 +  }
  1.3230 +  if (carry && !used) {
  1.3231 +    /* mp is growing */
  1.3232 +    used = MP_USED(mp);
  1.3233 +    MP_CHECKOK( s_mp_pad(mp, used + 1) );
  1.3234 +    MP_DIGIT(mp, used) = carry;
  1.3235 +  }
  1.3236 +CLEANUP:
  1.3237 +  return res;
  1.3238 +#endif
  1.3239 +} /* end s_mp_add_d() */
  1.3240 +
  1.3241 +/* }}} */
  1.3242 +
  1.3243 +/* {{{ s_mp_sub_d(mp, d) */
  1.3244 +
  1.3245 +/* Subtract d from |mp| in place, assumes |mp| > d                        */
  1.3246 +mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
  1.3247 +{
  1.3248 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3249 +  mp_word   w, b = 0;
  1.3250 +  mp_size   ix = 1;
  1.3251 +
  1.3252 +  /* Compute initial subtraction    */
  1.3253 +  w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
  1.3254 +  b = CARRYOUT(w) ? 0 : 1;
  1.3255 +  DIGIT(mp, 0) = ACCUM(w);
  1.3256 +
  1.3257 +  /* Propagate borrows leftward     */
  1.3258 +  while(b && ix < USED(mp)) {
  1.3259 +    w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
  1.3260 +    b = CARRYOUT(w) ? 0 : 1;
  1.3261 +    DIGIT(mp, ix) = ACCUM(w);
  1.3262 +    ++ix;
  1.3263 +  }
  1.3264 +
  1.3265 +  /* Remove leading zeroes          */
  1.3266 +  s_mp_clamp(mp);
  1.3267 +
  1.3268 +  /* If we have a borrow out, it's a violation of the input invariant */
  1.3269 +  if(b)
  1.3270 +    return MP_RANGE;
  1.3271 +  else
  1.3272 +    return MP_OKAY;
  1.3273 +#else
  1.3274 +  mp_digit *pmp = MP_DIGITS(mp);
  1.3275 +  mp_digit mp_i, diff, borrow;
  1.3276 +  mp_size  used = MP_USED(mp);
  1.3277 +
  1.3278 +  mp_i = *pmp;
  1.3279 +  *pmp++ = diff = mp_i - d;
  1.3280 +  borrow = (diff > mp_i);
  1.3281 +  while (borrow && --used) {
  1.3282 +    mp_i = *pmp;
  1.3283 +    *pmp++ = diff = mp_i - borrow;
  1.3284 +    borrow = (diff > mp_i);
  1.3285 +  }
  1.3286 +  s_mp_clamp(mp);
  1.3287 +  return (borrow && !used) ? MP_RANGE : MP_OKAY;
  1.3288 +#endif
  1.3289 +} /* end s_mp_sub_d() */
  1.3290 +
  1.3291 +/* }}} */
  1.3292 +
  1.3293 +/* {{{ s_mp_mul_d(a, d) */
  1.3294 +
  1.3295 +/* Compute a = a * d, single digit multiplication                         */
  1.3296 +mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
  1.3297 +{
  1.3298 +  mp_err  res;
  1.3299 +  mp_size used;
  1.3300 +  int     pow;
  1.3301 +
  1.3302 +  if (!d) {
  1.3303 +    mp_zero(a);
  1.3304 +    return MP_OKAY;
  1.3305 +  }
  1.3306 +  if (d == 1)
  1.3307 +    return MP_OKAY;
  1.3308 +  if (0 <= (pow = s_mp_ispow2d(d))) {
  1.3309 +    return s_mp_mul_2d(a, (mp_digit)pow);
  1.3310 +  }
  1.3311 +
  1.3312 +  used = MP_USED(a);
  1.3313 +  MP_CHECKOK( s_mp_pad(a, used + 1) );
  1.3314 +
  1.3315 +  s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
  1.3316 +
  1.3317 +  s_mp_clamp(a);
  1.3318 +
  1.3319 +CLEANUP:
  1.3320 +  return res;
  1.3321 +  
  1.3322 +} /* end s_mp_mul_d() */
  1.3323 +
  1.3324 +/* }}} */
  1.3325 +
  1.3326 +/* {{{ s_mp_div_d(mp, d, r) */
  1.3327 +
  1.3328 +/*
  1.3329 +  s_mp_div_d(mp, d, r)
  1.3330 +
  1.3331 +  Compute the quotient mp = mp / d and remainder r = mp mod d, for a
  1.3332 +  single digit d.  If r is null, the remainder will be discarded.
  1.3333 + */
  1.3334 +
  1.3335 +mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
  1.3336 +{
  1.3337 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
  1.3338 +  mp_word   w = 0, q;
  1.3339 +#else
  1.3340 +  mp_digit  w, q;
  1.3341 +#endif
  1.3342 +  int       ix;
  1.3343 +  mp_err    res;
  1.3344 +  mp_int    quot;
  1.3345 +  mp_int    rem;
  1.3346 +
  1.3347 +  if(d == 0)
  1.3348 +    return MP_RANGE;
  1.3349 +  if (d == 1) {
  1.3350 +    if (r)
  1.3351 +      *r = 0;
  1.3352 +    return MP_OKAY;
  1.3353 +  }
  1.3354 +  /* could check for power of 2 here, but mp_div_d does that. */
  1.3355 +  if (MP_USED(mp) == 1) {
  1.3356 +    mp_digit n   = MP_DIGIT(mp,0);
  1.3357 +    mp_digit rem;
  1.3358 +
  1.3359 +    q   = n / d;
  1.3360 +    rem = n % d;
  1.3361 +    MP_DIGIT(mp,0) = q;
  1.3362 +    if (r)
  1.3363 +      *r = rem;
  1.3364 +    return MP_OKAY;
  1.3365 +  }
  1.3366 +
  1.3367 +  MP_DIGITS(&rem)  = 0;
  1.3368 +  MP_DIGITS(&quot) = 0;
  1.3369 +  /* Make room for the quotient */
  1.3370 +  MP_CHECKOK( mp_init_size(&quot, USED(mp)) );
  1.3371 +
  1.3372 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
  1.3373 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.3374 +    w = (w << DIGIT_BIT) | DIGIT(mp, ix);
  1.3375 +
  1.3376 +    if(w >= d) {
  1.3377 +      q = w / d;
  1.3378 +      w = w % d;
  1.3379 +    } else {
  1.3380 +      q = 0;
  1.3381 +    }
  1.3382 +
  1.3383 +    s_mp_lshd(&quot, 1);
  1.3384 +    DIGIT(&quot, 0) = (mp_digit)q;
  1.3385 +  }
  1.3386 +#else
  1.3387 +  {
  1.3388 +    mp_digit p;
  1.3389 +#if !defined(MP_ASSEMBLY_DIV_2DX1D)
  1.3390 +    mp_digit norm;
  1.3391 +#endif
  1.3392 +
  1.3393 +    MP_CHECKOK( mp_init_copy(&rem, mp) );
  1.3394 +
  1.3395 +#if !defined(MP_ASSEMBLY_DIV_2DX1D)
  1.3396 +    MP_DIGIT(&quot, 0) = d;
  1.3397 +    MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
  1.3398 +    if (norm)
  1.3399 +      d <<= norm;
  1.3400 +    MP_DIGIT(&quot, 0) = 0;
  1.3401 +#endif
  1.3402 +
  1.3403 +    p = 0;
  1.3404 +    for (ix = USED(&rem) - 1; ix >= 0; ix--) {
  1.3405 +      w = DIGIT(&rem, ix);
  1.3406 +
  1.3407 +      if (p) {
  1.3408 +        MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
  1.3409 +      } else if (w >= d) {
  1.3410 +	q = w / d;
  1.3411 +	w = w % d;
  1.3412 +      } else {
  1.3413 +	q = 0;
  1.3414 +      }
  1.3415 +
  1.3416 +      MP_CHECKOK( s_mp_lshd(&quot, 1) );
  1.3417 +      DIGIT(&quot, 0) = q;
  1.3418 +      p = w;
  1.3419 +    }
  1.3420 +#if !defined(MP_ASSEMBLY_DIV_2DX1D)
  1.3421 +    if (norm)
  1.3422 +      w >>= norm;
  1.3423 +#endif
  1.3424 +  }
  1.3425 +#endif
  1.3426 +
  1.3427 +  /* Deliver the remainder, if desired */
  1.3428 +  if(r)
  1.3429 +    *r = (mp_digit)w;
  1.3430 +
  1.3431 +  s_mp_clamp(&quot);
  1.3432 +  mp_exch(&quot, mp);
  1.3433 +CLEANUP:
  1.3434 +  mp_clear(&quot);
  1.3435 +  mp_clear(&rem);
  1.3436 +
  1.3437 +  return res;
  1.3438 +} /* end s_mp_div_d() */
  1.3439 +
  1.3440 +/* }}} */
  1.3441 +
  1.3442 +
  1.3443 +/* }}} */
  1.3444 +
  1.3445 +/* {{{ Primitive full arithmetic */
  1.3446 +
  1.3447 +/* {{{ s_mp_add(a, b) */
  1.3448 +
  1.3449 +/* Compute a = |a| + |b|                                                  */
  1.3450 +mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
  1.3451 +{
  1.3452 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3453 +  mp_word   w = 0;
  1.3454 +#else
  1.3455 +  mp_digit  d, sum, carry = 0;
  1.3456 +#endif
  1.3457 +  mp_digit *pa, *pb;
  1.3458 +  mp_size   ix;
  1.3459 +  mp_size   used;
  1.3460 +  mp_err    res;
  1.3461 +
  1.3462 +  /* Make sure a has enough precision for the output value */
  1.3463 +  if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
  1.3464 +    return res;
  1.3465 +
  1.3466 +  /*
  1.3467 +    Add up all digits up to the precision of b.  If b had initially
  1.3468 +    the same precision as a, or greater, we took care of it by the
  1.3469 +    padding step above, so there is no problem.  If b had initially
  1.3470 +    less precision, we'll have to make sure the carry out is duly
  1.3471 +    propagated upward among the higher-order digits of the sum.
  1.3472 +   */
  1.3473 +  pa = MP_DIGITS(a);
  1.3474 +  pb = MP_DIGITS(b);
  1.3475 +  used = MP_USED(b);
  1.3476 +  for(ix = 0; ix < used; ix++) {
  1.3477 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3478 +    w = w + *pa + *pb++;
  1.3479 +    *pa++ = ACCUM(w);
  1.3480 +    w = CARRYOUT(w);
  1.3481 +#else
  1.3482 +    d = *pa;
  1.3483 +    sum = d + *pb++;
  1.3484 +    d = (sum < d);			/* detect overflow */
  1.3485 +    *pa++ = sum += carry;
  1.3486 +    carry = d + (sum < carry);		/* detect overflow */
  1.3487 +#endif
  1.3488 +  }
  1.3489 +
  1.3490 +  /* If we run out of 'b' digits before we're actually done, make
  1.3491 +     sure the carries get propagated upward...  
  1.3492 +   */
  1.3493 +  used = MP_USED(a);
  1.3494 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3495 +  while (w && ix < used) {
  1.3496 +    w = w + *pa;
  1.3497 +    *pa++ = ACCUM(w);
  1.3498 +    w = CARRYOUT(w);
  1.3499 +    ++ix;
  1.3500 +  }
  1.3501 +#else
  1.3502 +  while (carry && ix < used) {
  1.3503 +    sum = carry + *pa;
  1.3504 +    *pa++ = sum;
  1.3505 +    carry = !sum;
  1.3506 +    ++ix;
  1.3507 +  }
  1.3508 +#endif
  1.3509 +
  1.3510 +  /* If there's an overall carry out, increase precision and include
  1.3511 +     it.  We could have done this initially, but why touch the memory
  1.3512 +     allocator unless we're sure we have to?
  1.3513 +   */
  1.3514 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3515 +  if (w) {
  1.3516 +    if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
  1.3517 +      return res;
  1.3518 +
  1.3519 +    DIGIT(a, ix) = (mp_digit)w;
  1.3520 +  }
  1.3521 +#else
  1.3522 +  if (carry) {
  1.3523 +    if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
  1.3524 +      return res;
  1.3525 +
  1.3526 +    DIGIT(a, used) = carry;
  1.3527 +  }
  1.3528 +#endif
  1.3529 +
  1.3530 +  return MP_OKAY;
  1.3531 +} /* end s_mp_add() */
  1.3532 +
  1.3533 +/* }}} */
  1.3534 +
  1.3535 +/* Compute c = |a| + |b|         */ /* magnitude addition      */
  1.3536 +mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)  
  1.3537 +{
  1.3538 +  mp_digit *pa, *pb, *pc;
  1.3539 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3540 +  mp_word   w = 0;
  1.3541 +#else
  1.3542 +  mp_digit  sum, carry = 0, d;
  1.3543 +#endif
  1.3544 +  mp_size   ix;
  1.3545 +  mp_size   used;
  1.3546 +  mp_err    res;
  1.3547 +
  1.3548 +  MP_SIGN(c) = MP_SIGN(a);
  1.3549 +  if (MP_USED(a) < MP_USED(b)) {
  1.3550 +    const mp_int *xch = a;
  1.3551 +    a = b;
  1.3552 +    b = xch;
  1.3553 +  }
  1.3554 +
  1.3555 +  /* Make sure a has enough precision for the output value */
  1.3556 +  if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
  1.3557 +    return res;
  1.3558 +
  1.3559 +  /*
  1.3560 +    Add up all digits up to the precision of b.  If b had initially
  1.3561 +    the same precision as a, or greater, we took care of it by the
  1.3562 +    exchange step above, so there is no problem.  If b had initially
  1.3563 +    less precision, we'll have to make sure the carry out is duly
  1.3564 +    propagated upward among the higher-order digits of the sum.
  1.3565 +   */
  1.3566 +  pa = MP_DIGITS(a);
  1.3567 +  pb = MP_DIGITS(b);
  1.3568 +  pc = MP_DIGITS(c);
  1.3569 +  used = MP_USED(b);
  1.3570 +  for (ix = 0; ix < used; ix++) {
  1.3571 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3572 +    w = w + *pa++ + *pb++;
  1.3573 +    *pc++ = ACCUM(w);
  1.3574 +    w = CARRYOUT(w);
  1.3575 +#else
  1.3576 +    d = *pa++;
  1.3577 +    sum = d + *pb++;
  1.3578 +    d = (sum < d);			/* detect overflow */
  1.3579 +    *pc++ = sum += carry;
  1.3580 +    carry = d + (sum < carry);		/* detect overflow */
  1.3581 +#endif
  1.3582 +  }
  1.3583 +
  1.3584 +  /* If we run out of 'b' digits before we're actually done, make
  1.3585 +     sure the carries get propagated upward...  
  1.3586 +   */
  1.3587 +  for (used = MP_USED(a); ix < used; ++ix) {
  1.3588 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3589 +    w = w + *pa++;
  1.3590 +    *pc++ = ACCUM(w);
  1.3591 +    w = CARRYOUT(w);
  1.3592 +#else
  1.3593 +    *pc++ = sum = carry + *pa++;
  1.3594 +    carry = (sum < carry);
  1.3595 +#endif
  1.3596 +  }
  1.3597 +
  1.3598 +  /* If there's an overall carry out, increase precision and include
  1.3599 +     it.  We could have done this initially, but why touch the memory
  1.3600 +     allocator unless we're sure we have to?
  1.3601 +   */
  1.3602 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3603 +  if (w) {
  1.3604 +    if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
  1.3605 +      return res;
  1.3606 +
  1.3607 +    DIGIT(c, used) = (mp_digit)w;
  1.3608 +    ++used;
  1.3609 +  }
  1.3610 +#else
  1.3611 +  if (carry) {
  1.3612 +    if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
  1.3613 +      return res;
  1.3614 +
  1.3615 +    DIGIT(c, used) = carry;
  1.3616 +    ++used;
  1.3617 +  }
  1.3618 +#endif
  1.3619 +  MP_USED(c) = used;
  1.3620 +  return MP_OKAY;
  1.3621 +}
  1.3622 +/* {{{ s_mp_add_offset(a, b, offset) */
  1.3623 +
  1.3624 +/* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
  1.3625 +mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)   
  1.3626 +{
  1.3627 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3628 +  mp_word   w, k = 0;
  1.3629 +#else
  1.3630 +  mp_digit  d, sum, carry = 0;
  1.3631 +#endif
  1.3632 +  mp_size   ib;
  1.3633 +  mp_size   ia;
  1.3634 +  mp_size   lim;
  1.3635 +  mp_err    res;
  1.3636 +
  1.3637 +  /* Make sure a has enough precision for the output value */
  1.3638 +  lim = MP_USED(b) + offset;
  1.3639 +  if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
  1.3640 +    return res;
  1.3641 +
  1.3642 +  /*
  1.3643 +    Add up all digits up to the precision of b.  If b had initially
  1.3644 +    the same precision as a, or greater, we took care of it by the
  1.3645 +    padding step above, so there is no problem.  If b had initially
  1.3646 +    less precision, we'll have to make sure the carry out is duly
  1.3647 +    propagated upward among the higher-order digits of the sum.
  1.3648 +   */
  1.3649 +  lim = USED(b);
  1.3650 +  for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
  1.3651 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3652 +    w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
  1.3653 +    DIGIT(a, ia) = ACCUM(w);
  1.3654 +    k = CARRYOUT(w);
  1.3655 +#else
  1.3656 +    d = MP_DIGIT(a, ia);
  1.3657 +    sum = d + MP_DIGIT(b, ib);
  1.3658 +    d = (sum < d);
  1.3659 +    MP_DIGIT(a,ia) = sum += carry;
  1.3660 +    carry = d + (sum < carry);
  1.3661 +#endif
  1.3662 +  }
  1.3663 +
  1.3664 +  /* If we run out of 'b' digits before we're actually done, make
  1.3665 +     sure the carries get propagated upward...  
  1.3666 +   */
  1.3667 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3668 +  for (lim = MP_USED(a); k && (ia < lim); ++ia) {
  1.3669 +    w = (mp_word)DIGIT(a, ia) + k;
  1.3670 +    DIGIT(a, ia) = ACCUM(w);
  1.3671 +    k = CARRYOUT(w);
  1.3672 +  }
  1.3673 +#else
  1.3674 +  for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
  1.3675 +    d = MP_DIGIT(a, ia);
  1.3676 +    MP_DIGIT(a,ia) = sum = d + carry;
  1.3677 +    carry = (sum < d);
  1.3678 +  }
  1.3679 +#endif
  1.3680 +
  1.3681 +  /* If there's an overall carry out, increase precision and include
  1.3682 +     it.  We could have done this initially, but why touch the memory
  1.3683 +     allocator unless we're sure we have to?
  1.3684 +   */
  1.3685 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
  1.3686 +  if(k) {
  1.3687 +    if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
  1.3688 +      return res;
  1.3689 +
  1.3690 +    DIGIT(a, ia) = (mp_digit)k;
  1.3691 +  }
  1.3692 +#else
  1.3693 +  if (carry) {
  1.3694 +    if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
  1.3695 +      return res;
  1.3696 +
  1.3697 +    DIGIT(a, lim) = carry;
  1.3698 +  }
  1.3699 +#endif
  1.3700 +  s_mp_clamp(a);
  1.3701 +
  1.3702 +  return MP_OKAY;
  1.3703 +
  1.3704 +} /* end s_mp_add_offset() */
  1.3705 +
  1.3706 +/* }}} */
  1.3707 +
  1.3708 +/* {{{ s_mp_sub(a, b) */
  1.3709 +
  1.3710 +/* Compute a = |a| - |b|, assumes |a| >= |b|                              */
  1.3711 +mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
  1.3712 +{
  1.3713 +  mp_digit *pa, *pb, *limit;
  1.3714 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3715 +  mp_sword  w = 0;
  1.3716 +#else
  1.3717 +  mp_digit  d, diff, borrow = 0;
  1.3718 +#endif
  1.3719 +
  1.3720 +  /*
  1.3721 +    Subtract and propagate borrow.  Up to the precision of b, this
  1.3722 +    accounts for the digits of b; after that, we just make sure the
  1.3723 +    carries get to the right place.  This saves having to pad b out to
  1.3724 +    the precision of a just to make the loops work right...
  1.3725 +   */
  1.3726 +  pa = MP_DIGITS(a);
  1.3727 +  pb = MP_DIGITS(b);
  1.3728 +  limit = pb + MP_USED(b);
  1.3729 +  while (pb < limit) {
  1.3730 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3731 +    w = w + *pa - *pb++;
  1.3732 +    *pa++ = ACCUM(w);
  1.3733 +    w >>= MP_DIGIT_BIT;
  1.3734 +#else
  1.3735 +    d = *pa;
  1.3736 +    diff = d - *pb++;
  1.3737 +    d = (diff > d);				/* detect borrow */
  1.3738 +    if (borrow && --diff == MP_DIGIT_MAX)
  1.3739 +      ++d;
  1.3740 +    *pa++ = diff;
  1.3741 +    borrow = d;	
  1.3742 +#endif
  1.3743 +  }
  1.3744 +  limit = MP_DIGITS(a) + MP_USED(a);
  1.3745 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3746 +  while (w && pa < limit) {
  1.3747 +    w = w + *pa;
  1.3748 +    *pa++ = ACCUM(w);
  1.3749 +    w >>= MP_DIGIT_BIT;
  1.3750 +  }
  1.3751 +#else
  1.3752 +  while (borrow && pa < limit) {
  1.3753 +    d = *pa;
  1.3754 +    *pa++ = diff = d - borrow;
  1.3755 +    borrow = (diff > d);
  1.3756 +  }
  1.3757 +#endif
  1.3758 +
  1.3759 +  /* Clobber any leading zeroes we created    */
  1.3760 +  s_mp_clamp(a);
  1.3761 +
  1.3762 +  /* 
  1.3763 +     If there was a borrow out, then |b| > |a| in violation
  1.3764 +     of our input invariant.  We've already done the work,
  1.3765 +     but we'll at least complain about it...
  1.3766 +   */
  1.3767 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3768 +  return w ? MP_RANGE : MP_OKAY;
  1.3769 +#else
  1.3770 +  return borrow ? MP_RANGE : MP_OKAY;
  1.3771 +#endif
  1.3772 +} /* end s_mp_sub() */
  1.3773 +
  1.3774 +/* }}} */
  1.3775 +
  1.3776 +/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
  1.3777 +mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)  
  1.3778 +{
  1.3779 +  mp_digit *pa, *pb, *pc;
  1.3780 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3781 +  mp_sword  w = 0;
  1.3782 +#else
  1.3783 +  mp_digit  d, diff, borrow = 0;
  1.3784 +#endif
  1.3785 +  int       ix, limit;
  1.3786 +  mp_err    res;
  1.3787 +
  1.3788 +  MP_SIGN(c) = MP_SIGN(a);
  1.3789 +
  1.3790 +  /* Make sure a has enough precision for the output value */
  1.3791 +  if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
  1.3792 +    return res;
  1.3793 +
  1.3794 +  /*
  1.3795 +    Subtract and propagate borrow.  Up to the precision of b, this
  1.3796 +    accounts for the digits of b; after that, we just make sure the
  1.3797 +    carries get to the right place.  This saves having to pad b out to
  1.3798 +    the precision of a just to make the loops work right...
  1.3799 +   */
  1.3800 +  pa = MP_DIGITS(a);
  1.3801 +  pb = MP_DIGITS(b);
  1.3802 +  pc = MP_DIGITS(c);
  1.3803 +  limit = MP_USED(b);
  1.3804 +  for (ix = 0; ix < limit; ++ix) {
  1.3805 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3806 +    w = w + *pa++ - *pb++;
  1.3807 +    *pc++ = ACCUM(w);
  1.3808 +    w >>= MP_DIGIT_BIT;
  1.3809 +#else
  1.3810 +    d = *pa++;
  1.3811 +    diff = d - *pb++;
  1.3812 +    d = (diff > d);
  1.3813 +    if (borrow && --diff == MP_DIGIT_MAX)
  1.3814 +      ++d;
  1.3815 +    *pc++ = diff;
  1.3816 +    borrow = d;
  1.3817 +#endif
  1.3818 +  }
  1.3819 +  for (limit = MP_USED(a); ix < limit; ++ix) {
  1.3820 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3821 +    w = w + *pa++;
  1.3822 +    *pc++ = ACCUM(w);
  1.3823 +    w >>= MP_DIGIT_BIT;
  1.3824 +#else
  1.3825 +    d = *pa++;
  1.3826 +    *pc++ = diff = d - borrow;
  1.3827 +    borrow = (diff > d);
  1.3828 +#endif
  1.3829 +  }
  1.3830 +
  1.3831 +  /* Clobber any leading zeroes we created    */
  1.3832 +  MP_USED(c) = ix;
  1.3833 +  s_mp_clamp(c);
  1.3834 +
  1.3835 +  /* 
  1.3836 +     If there was a borrow out, then |b| > |a| in violation
  1.3837 +     of our input invariant.  We've already done the work,
  1.3838 +     but we'll at least complain about it...
  1.3839 +   */
  1.3840 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
  1.3841 +  return w ? MP_RANGE : MP_OKAY;
  1.3842 +#else
  1.3843 +  return borrow ? MP_RANGE : MP_OKAY;
  1.3844 +#endif
  1.3845 +}
  1.3846 +/* {{{ s_mp_mul(a, b) */
  1.3847 +
  1.3848 +/* Compute a = |a| * |b|                                                  */
  1.3849 +mp_err   s_mp_mul(mp_int *a, const mp_int *b)
  1.3850 +{
  1.3851 +  return mp_mul(a, b, a);
  1.3852 +} /* end s_mp_mul() */
  1.3853 +
  1.3854 +/* }}} */
  1.3855 +
  1.3856 +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
  1.3857 +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
  1.3858 +#define MP_MUL_DxD(a, b, Phi, Plo) \
  1.3859 +  { unsigned long long product = (unsigned long long)a * b; \
  1.3860 +    Plo = (mp_digit)product; \
  1.3861 +    Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
  1.3862 +#elif defined(OSF1)
  1.3863 +#define MP_MUL_DxD(a, b, Phi, Plo) \
  1.3864 +  { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
  1.3865 +    Phi = asm ("umulh %a0, %a1, %v0", a, b); }
  1.3866 +#else
  1.3867 +#define MP_MUL_DxD(a, b, Phi, Plo) \
  1.3868 +  { mp_digit a0b1, a1b0; \
  1.3869 +    Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
  1.3870 +    Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
  1.3871 +    a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
  1.3872 +    a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
  1.3873 +    a1b0 += a0b1; \
  1.3874 +    Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
  1.3875 +    if (a1b0 < a0b1)  \
  1.3876 +      Phi += MP_HALF_RADIX; \
  1.3877 +    a1b0 <<= MP_HALF_DIGIT_BIT; \
  1.3878 +    Plo += a1b0; \
  1.3879 +    if (Plo < a1b0) \
  1.3880 +      ++Phi; \
  1.3881 +  }
  1.3882 +#endif
  1.3883 +
  1.3884 +#if !defined(MP_ASSEMBLY_MULTIPLY)
  1.3885 +/* c = a * b */
  1.3886 +void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  1.3887 +{
  1.3888 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
  1.3889 +  mp_digit   d = 0;
  1.3890 +
  1.3891 +  /* Inner product:  Digits of a */
  1.3892 +  while (a_len--) {
  1.3893 +    mp_word w = ((mp_word)b * *a++) + d;
  1.3894 +    *c++ = ACCUM(w);
  1.3895 +    d = CARRYOUT(w);
  1.3896 +  }
  1.3897 +  *c = d;
  1.3898 +#else
  1.3899 +  mp_digit carry = 0;
  1.3900 +  while (a_len--) {
  1.3901 +    mp_digit a_i = *a++;
  1.3902 +    mp_digit a0b0, a1b1;
  1.3903 +
  1.3904 +    MP_MUL_DxD(a_i, b, a1b1, a0b0);
  1.3905 +
  1.3906 +    a0b0 += carry;
  1.3907 +    if (a0b0 < carry)
  1.3908 +      ++a1b1;
  1.3909 +    *c++ = a0b0;
  1.3910 +    carry = a1b1;
  1.3911 +  }
  1.3912 +  *c = carry;
  1.3913 +#endif
  1.3914 +}
  1.3915 +
  1.3916 +/* c += a * b */
  1.3917 +void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
  1.3918 +			      mp_digit *c)
  1.3919 +{
  1.3920 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
  1.3921 +  mp_digit   d = 0;
  1.3922 +
  1.3923 +  /* Inner product:  Digits of a */
  1.3924 +  while (a_len--) {
  1.3925 +    mp_word w = ((mp_word)b * *a++) + *c + d;
  1.3926 +    *c++ = ACCUM(w);
  1.3927 +    d = CARRYOUT(w);
  1.3928 +  }
  1.3929 +  *c = d;
  1.3930 +#else
  1.3931 +  mp_digit carry = 0;
  1.3932 +  while (a_len--) {
  1.3933 +    mp_digit a_i = *a++;
  1.3934 +    mp_digit a0b0, a1b1;
  1.3935 +
  1.3936 +    MP_MUL_DxD(a_i, b, a1b1, a0b0);
  1.3937 +
  1.3938 +    a0b0 += carry;
  1.3939 +    if (a0b0 < carry)
  1.3940 +      ++a1b1;
  1.3941 +    a0b0 += a_i = *c;
  1.3942 +    if (a0b0 < a_i)
  1.3943 +      ++a1b1;
  1.3944 +    *c++ = a0b0;
  1.3945 +    carry = a1b1;
  1.3946 +  }
  1.3947 +  *c = carry;
  1.3948 +#endif
  1.3949 +}
  1.3950 +
  1.3951 +/* Presently, this is only used by the Montgomery arithmetic code. */
  1.3952 +/* c += a * b */
  1.3953 +void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
  1.3954 +{
  1.3955 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
  1.3956 +  mp_digit   d = 0;
  1.3957 +
  1.3958 +  /* Inner product:  Digits of a */
  1.3959 +  while (a_len--) {
  1.3960 +    mp_word w = ((mp_word)b * *a++) + *c + d;
  1.3961 +    *c++ = ACCUM(w);
  1.3962 +    d = CARRYOUT(w);
  1.3963 +  }
  1.3964 +
  1.3965 +  while (d) {
  1.3966 +    mp_word w = (mp_word)*c + d;
  1.3967 +    *c++ = ACCUM(w);
  1.3968 +    d = CARRYOUT(w);
  1.3969 +  }
  1.3970 +#else
  1.3971 +  mp_digit carry = 0;
  1.3972 +  while (a_len--) {
  1.3973 +    mp_digit a_i = *a++;
  1.3974 +    mp_digit a0b0, a1b1;
  1.3975 +
  1.3976 +    MP_MUL_DxD(a_i, b, a1b1, a0b0);
  1.3977 +
  1.3978 +    a0b0 += carry;
  1.3979 +    if (a0b0 < carry)
  1.3980 +      ++a1b1;
  1.3981 +
  1.3982 +    a0b0 += a_i = *c;
  1.3983 +    if (a0b0 < a_i)
  1.3984 +      ++a1b1;
  1.3985 +
  1.3986 +    *c++ = a0b0;
  1.3987 +    carry = a1b1;
  1.3988 +  }
  1.3989 +  while (carry) {
  1.3990 +    mp_digit c_i = *c;
  1.3991 +    carry += c_i;
  1.3992 +    *c++ = carry;
  1.3993 +    carry = carry < c_i;
  1.3994 +  }
  1.3995 +#endif
  1.3996 +}
  1.3997 +#endif
  1.3998 +
  1.3999 +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
  1.4000 +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
  1.4001 +#define MP_SQR_D(a, Phi, Plo) \
  1.4002 +  { unsigned long long square = (unsigned long long)a * a; \
  1.4003 +    Plo = (mp_digit)square; \
  1.4004 +    Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
  1.4005 +#elif defined(OSF1)
  1.4006 +#define MP_SQR_D(a, Phi, Plo) \
  1.4007 +  { Plo = asm ("mulq  %a0, %a0, %v0", a);\
  1.4008 +    Phi = asm ("umulh %a0, %a0, %v0", a); }
  1.4009 +#else
  1.4010 +#define MP_SQR_D(a, Phi, Plo) \
  1.4011 +  { mp_digit Pmid; \
  1.4012 +    Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
  1.4013 +    Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
  1.4014 +    Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
  1.4015 +    Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
  1.4016 +    Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
  1.4017 +    Plo += Pmid;  \
  1.4018 +    if (Plo < Pmid)  \
  1.4019 +      ++Phi;  \
  1.4020 +  }
  1.4021 +#endif
  1.4022 +
  1.4023 +#if !defined(MP_ASSEMBLY_SQUARE)
  1.4024 +/* Add the squares of the digits of a to the digits of b. */
  1.4025 +void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
  1.4026 +{
  1.4027 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
  1.4028 +  mp_word  w;
  1.4029 +  mp_digit d;
  1.4030 +  mp_size  ix;
  1.4031 +
  1.4032 +  w  = 0;
  1.4033 +#define ADD_SQUARE(n) \
  1.4034 +    d = pa[n]; \
  1.4035 +    w += (d * (mp_word)d) + ps[2*n]; \
  1.4036 +    ps[2*n] = ACCUM(w); \
  1.4037 +    w = (w >> DIGIT_BIT) + ps[2*n+1]; \
  1.4038 +    ps[2*n+1] = ACCUM(w); \
  1.4039 +    w = (w >> DIGIT_BIT)
  1.4040 +
  1.4041 +  for (ix = a_len; ix >= 4; ix -= 4) {
  1.4042 +    ADD_SQUARE(0);
  1.4043 +    ADD_SQUARE(1);
  1.4044 +    ADD_SQUARE(2);
  1.4045 +    ADD_SQUARE(3);
  1.4046 +    pa += 4;
  1.4047 +    ps += 8;
  1.4048 +  }
  1.4049 +  if (ix) {
  1.4050 +    ps += 2*ix;
  1.4051 +    pa += ix;
  1.4052 +    switch (ix) {
  1.4053 +    case 3: ADD_SQUARE(-3); /* FALLTHRU */
  1.4054 +    case 2: ADD_SQUARE(-2); /* FALLTHRU */
  1.4055 +    case 1: ADD_SQUARE(-1); /* FALLTHRU */
  1.4056 +    case 0: break;
  1.4057 +    }
  1.4058 +  }
  1.4059 +  while (w) {
  1.4060 +    w += *ps;
  1.4061 +    *ps++ = ACCUM(w);
  1.4062 +    w = (w >> DIGIT_BIT);
  1.4063 +  }
  1.4064 +#else
  1.4065 +  mp_digit carry = 0;
  1.4066 +  while (a_len--) {
  1.4067 +    mp_digit a_i = *pa++;
  1.4068 +    mp_digit a0a0, a1a1;
  1.4069 +
  1.4070 +    MP_SQR_D(a_i, a1a1, a0a0);
  1.4071 +
  1.4072 +    /* here a1a1 and a0a0 constitute a_i ** 2 */
  1.4073 +    a0a0 += carry;
  1.4074 +    if (a0a0 < carry)
  1.4075 +      ++a1a1;
  1.4076 +
  1.4077 +    /* now add to ps */
  1.4078 +    a0a0 += a_i = *ps;
  1.4079 +    if (a0a0 < a_i)
  1.4080 +      ++a1a1;
  1.4081 +    *ps++ = a0a0;
  1.4082 +    a1a1 += a_i = *ps;
  1.4083 +    carry = (a1a1 < a_i);
  1.4084 +    *ps++ = a1a1;
  1.4085 +  }
  1.4086 +  while (carry) {
  1.4087 +    mp_digit s_i = *ps;
  1.4088 +    carry += s_i;
  1.4089 +    *ps++ = carry;
  1.4090 +    carry = carry < s_i;
  1.4091 +  }
  1.4092 +#endif
  1.4093 +}
  1.4094 +#endif
  1.4095 +
  1.4096 +#if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
  1.4097 +&& !defined(MP_ASSEMBLY_DIV_2DX1D)
  1.4098 +/*
  1.4099 +** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized 
  1.4100 +** so its high bit is 1.   This code is from NSPR.
  1.4101 +*/
  1.4102 +mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, 
  1.4103 +		       mp_digit *qp, mp_digit *rp)
  1.4104 +{
  1.4105 +    mp_digit d1, d0, q1, q0;
  1.4106 +    mp_digit r1, r0, m;
  1.4107 +
  1.4108 +    d1 = divisor >> MP_HALF_DIGIT_BIT;
  1.4109 +    d0 = divisor & MP_HALF_DIGIT_MAX;
  1.4110 +    r1 = Nhi % d1;
  1.4111 +    q1 = Nhi / d1;
  1.4112 +    m = q1 * d0;
  1.4113 +    r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
  1.4114 +    if (r1 < m) {
  1.4115 +        q1--, r1 += divisor;
  1.4116 +        if (r1 >= divisor && r1 < m) {
  1.4117 +	    q1--, r1 += divisor;
  1.4118 +	}
  1.4119 +    }
  1.4120 +    r1 -= m;
  1.4121 +    r0 = r1 % d1;
  1.4122 +    q0 = r1 / d1;
  1.4123 +    m = q0 * d0;
  1.4124 +    r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
  1.4125 +    if (r0 < m) {
  1.4126 +        q0--, r0 += divisor;
  1.4127 +        if (r0 >= divisor && r0 < m) {
  1.4128 +	    q0--, r0 += divisor;
  1.4129 +	}
  1.4130 +    }
  1.4131 +    if (qp)
  1.4132 +	*qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
  1.4133 +    if (rp)
  1.4134 +	*rp = r0 - m;
  1.4135 +    return MP_OKAY;
  1.4136 +}
  1.4137 +#endif
  1.4138 +
  1.4139 +#if MP_SQUARE
  1.4140 +/* {{{ s_mp_sqr(a) */
  1.4141 +
  1.4142 +mp_err   s_mp_sqr(mp_int *a)
  1.4143 +{
  1.4144 +  mp_err   res;
  1.4145 +  mp_int   tmp;
  1.4146 +
  1.4147 +  if((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
  1.4148 +    return res;
  1.4149 +  res = mp_sqr(a, &tmp);
  1.4150 +  if (res == MP_OKAY) {
  1.4151 +    s_mp_exch(&tmp, a);
  1.4152 +  }
  1.4153 +  mp_clear(&tmp);
  1.4154 +  return res;
  1.4155 +}
  1.4156 +
  1.4157 +/* }}} */
  1.4158 +#endif
  1.4159 +
  1.4160 +/* {{{ s_mp_div(a, b) */
  1.4161 +
  1.4162 +/*
  1.4163 +  s_mp_div(a, b)
  1.4164 +
  1.4165 +  Compute a = a / b and b = a mod b.  Assumes b > a.
  1.4166 + */
  1.4167 +
  1.4168 +mp_err   s_mp_div(mp_int *rem, 	/* i: dividend, o: remainder */
  1.4169 +                  mp_int *div, 	/* i: divisor                */
  1.4170 +		  mp_int *quot)	/* i: 0;        o: quotient  */
  1.4171 +{
  1.4172 +  mp_int   part, t;
  1.4173 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
  1.4174 +  mp_word  q_msd;
  1.4175 +#else
  1.4176 +  mp_digit q_msd;
  1.4177 +#endif
  1.4178 +  mp_err   res;
  1.4179 +  mp_digit d;
  1.4180 +  mp_digit div_msd;
  1.4181 +  int      ix;
  1.4182 +
  1.4183 +  if(mp_cmp_z(div) == 0)
  1.4184 +    return MP_RANGE;
  1.4185 +
  1.4186 +  DIGITS(&t) = 0;
  1.4187 +  /* Shortcut if divisor is power of two */
  1.4188 +  if((ix = s_mp_ispow2(div)) >= 0) {
  1.4189 +    MP_CHECKOK( mp_copy(rem, quot) );
  1.4190 +    s_mp_div_2d(quot, (mp_digit)ix);
  1.4191 +    s_mp_mod_2d(rem,  (mp_digit)ix);
  1.4192 +
  1.4193 +    return MP_OKAY;
  1.4194 +  }
  1.4195 +
  1.4196 +  MP_SIGN(rem) = ZPOS;
  1.4197 +  MP_SIGN(div) = ZPOS;
  1.4198 +
  1.4199 +  /* A working temporary for division     */
  1.4200 +  MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem)));
  1.4201 +
  1.4202 +  /* Normalize to optimize guessing       */
  1.4203 +  MP_CHECKOK( s_mp_norm(rem, div, &d) );
  1.4204 +
  1.4205 +  part = *rem;
  1.4206 +
  1.4207 +  /* Perform the division itself...woo!   */
  1.4208 +  MP_USED(quot) = MP_ALLOC(quot);
  1.4209 +
  1.4210 +  /* Find a partial substring of rem which is at least div */
  1.4211 +  /* If we didn't find one, we're finished dividing    */
  1.4212 +  while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
  1.4213 +    int i;
  1.4214 +    int unusedRem;
  1.4215 +
  1.4216 +    unusedRem = MP_USED(rem) - MP_USED(div);
  1.4217 +    MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
  1.4218 +    MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
  1.4219 +    MP_USED(&part)   = MP_USED(div);
  1.4220 +    if (s_mp_cmp(&part, div) < 0) {
  1.4221 +      -- unusedRem;
  1.4222 +#if MP_ARGCHK == 2
  1.4223 +      assert(unusedRem >= 0);
  1.4224 +#endif
  1.4225 +      -- MP_DIGITS(&part);
  1.4226 +      ++ MP_USED(&part);
  1.4227 +      ++ MP_ALLOC(&part);
  1.4228 +    }
  1.4229 +
  1.4230 +    /* Compute a guess for the next quotient digit       */
  1.4231 +    q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
  1.4232 +    div_msd = MP_DIGIT(div, MP_USED(div) - 1);
  1.4233 +    if (q_msd >= div_msd) {
  1.4234 +      q_msd = 1;
  1.4235 +    } else if (MP_USED(&part) > 1) {
  1.4236 +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
  1.4237 +      q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
  1.4238 +      q_msd /= div_msd;
  1.4239 +      if (q_msd == RADIX)
  1.4240 +        --q_msd;
  1.4241 +#else
  1.4242 +      mp_digit r;
  1.4243 +      MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2), 
  1.4244 +				  div_msd, &q_msd, &r) );
  1.4245 +#endif
  1.4246 +    } else {
  1.4247 +      q_msd = 0;
  1.4248 +    }
  1.4249 +#if MP_ARGCHK == 2
  1.4250 +    assert(q_msd > 0); /* This case should never occur any more. */
  1.4251 +#endif
  1.4252 +    if (q_msd <= 0)
  1.4253 +      break;
  1.4254 +
  1.4255 +    /* See what that multiplies out to                   */
  1.4256 +    mp_copy(div, &t);
  1.4257 +    MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
  1.4258 +
  1.4259 +    /* 
  1.4260 +       If it's too big, back it off.  We should not have to do this
  1.4261 +       more than once, or, in rare cases, twice.  Knuth describes a
  1.4262 +       method by which this could be reduced to a maximum of once, but
  1.4263 +       I didn't implement that here.
  1.4264 +     * When using s_mpv_div_2dx1d, we may have to do this 3 times.
  1.4265 +     */
  1.4266 +    for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
  1.4267 +      --q_msd;
  1.4268 +      s_mp_sub(&t, div);	/* t -= div */
  1.4269 +    }
  1.4270 +    if (i < 0) {
  1.4271 +      res = MP_RANGE;
  1.4272 +      goto CLEANUP;
  1.4273 +    }
  1.4274 +
  1.4275 +    /* At this point, q_msd should be the right next digit   */
  1.4276 +    MP_CHECKOK( s_mp_sub(&part, &t) );	/* part -= t */
  1.4277 +    s_mp_clamp(rem);
  1.4278 +
  1.4279 +    /*
  1.4280 +      Include the digit in the quotient.  We allocated enough memory
  1.4281 +      for any quotient we could ever possibly get, so we should not
  1.4282 +      have to check for failures here
  1.4283 +     */
  1.4284 +    MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
  1.4285 +  }
  1.4286 +
  1.4287 +  /* Denormalize remainder                */
  1.4288 +  if (d) {
  1.4289 +    s_mp_div_2d(rem, d);
  1.4290 +  }
  1.4291 +
  1.4292 +  s_mp_clamp(quot);
  1.4293 +
  1.4294 +CLEANUP:
  1.4295 +  mp_clear(&t);
  1.4296 +
  1.4297 +  return res;
  1.4298 +
  1.4299 +} /* end s_mp_div() */
  1.4300 +
  1.4301 +
  1.4302 +/* }}} */
  1.4303 +
  1.4304 +/* {{{ s_mp_2expt(a, k) */
  1.4305 +
  1.4306 +mp_err   s_mp_2expt(mp_int *a, mp_digit k)
  1.4307 +{
  1.4308 +  mp_err    res;
  1.4309 +  mp_size   dig, bit;
  1.4310 +
  1.4311 +  dig = k / DIGIT_BIT;
  1.4312 +  bit = k % DIGIT_BIT;
  1.4313 +
  1.4314 +  mp_zero(a);
  1.4315 +  if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
  1.4316 +    return res;
  1.4317 +  
  1.4318 +  DIGIT(a, dig) |= ((mp_digit)1 << bit);
  1.4319 +
  1.4320 +  return MP_OKAY;
  1.4321 +
  1.4322 +} /* end s_mp_2expt() */
  1.4323 +
  1.4324 +/* }}} */
  1.4325 +
  1.4326 +/* {{{ s_mp_reduce(x, m, mu) */
  1.4327 +
  1.4328 +/*
  1.4329 +  Compute Barrett reduction, x (mod m), given a precomputed value for
  1.4330 +  mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
  1.4331 +  faster than straight division, when many reductions by the same
  1.4332 +  value of m are required (such as in modular exponentiation).  This
  1.4333 +  can nearly halve the time required to do modular exponentiation,
  1.4334 +  as compared to using the full integer divide to reduce.
  1.4335 +
  1.4336 +  This algorithm was derived from the _Handbook of Applied
  1.4337 +  Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
  1.4338 +  pp. 603-604.  
  1.4339 + */
  1.4340 +
  1.4341 +mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
  1.4342 +{
  1.4343 +  mp_int   q;
  1.4344 +  mp_err   res;
  1.4345 +
  1.4346 +  if((res = mp_init_copy(&q, x)) != MP_OKAY)
  1.4347 +    return res;
  1.4348 +
  1.4349 +  s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
  1.4350 +  s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
  1.4351 +  s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
  1.4352 +
  1.4353 +  /* x = x mod b^(k+1), quick (no division) */
  1.4354 +  s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
  1.4355 +
  1.4356 +  /* q = q * m mod b^(k+1), quick (no division) */
  1.4357 +  s_mp_mul(&q, m);
  1.4358 +  s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
  1.4359 +
  1.4360 +  /* x = x - q */
  1.4361 +  if((res = mp_sub(x, &q, x)) != MP_OKAY)
  1.4362 +    goto CLEANUP;
  1.4363 +
  1.4364 +  /* If x < 0, add b^(k+1) to it */
  1.4365 +  if(mp_cmp_z(x) < 0) {
  1.4366 +    mp_set(&q, 1);
  1.4367 +    if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
  1.4368 +      goto CLEANUP;
  1.4369 +    if((res = mp_add(x, &q, x)) != MP_OKAY)
  1.4370 +      goto CLEANUP;
  1.4371 +  }
  1.4372 +
  1.4373 +  /* Back off if it's too big */
  1.4374 +  while(mp_cmp(x, m) >= 0) {
  1.4375 +    if((res = s_mp_sub(x, m)) != MP_OKAY)
  1.4376 +      break;
  1.4377 +  }
  1.4378 +
  1.4379 + CLEANUP:
  1.4380 +  mp_clear(&q);
  1.4381 +
  1.4382 +  return res;
  1.4383 +
  1.4384 +} /* end s_mp_reduce() */
  1.4385 +
  1.4386 +/* }}} */
  1.4387 +
  1.4388 +/* }}} */
  1.4389 +
  1.4390 +/* {{{ Primitive comparisons */
  1.4391 +
  1.4392 +/* {{{ s_mp_cmp(a, b) */
  1.4393 +
  1.4394 +/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
  1.4395 +int      s_mp_cmp(const mp_int *a, const mp_int *b)
  1.4396 +{
  1.4397 +  mp_size used_a = MP_USED(a);
  1.4398 +  {
  1.4399 +    mp_size used_b = MP_USED(b);
  1.4400 +
  1.4401 +    if (used_a > used_b)
  1.4402 +      goto IS_GT;
  1.4403 +    if (used_a < used_b)
  1.4404 +      goto IS_LT;
  1.4405 +  }
  1.4406 +  {
  1.4407 +    mp_digit *pa, *pb;
  1.4408 +    mp_digit da = 0, db = 0;
  1.4409 +
  1.4410 +#define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
  1.4411 +
  1.4412 +    pa = MP_DIGITS(a) + used_a;
  1.4413 +    pb = MP_DIGITS(b) + used_a;
  1.4414 +    while (used_a >= 4) {
  1.4415 +      pa     -= 4;
  1.4416 +      pb     -= 4;
  1.4417 +      used_a -= 4;
  1.4418 +      CMP_AB(3);
  1.4419 +      CMP_AB(2);
  1.4420 +      CMP_AB(1);
  1.4421 +      CMP_AB(0);
  1.4422 +    }
  1.4423 +    while (used_a-- > 0 && ((da = *--pa) == (db = *--pb))) 
  1.4424 +      /* do nothing */;
  1.4425 +done:
  1.4426 +    if (da > db)
  1.4427 +      goto IS_GT;
  1.4428 +    if (da < db) 
  1.4429 +      goto IS_LT;
  1.4430 +  }
  1.4431 +  return MP_EQ;
  1.4432 +IS_LT:
  1.4433 +  return MP_LT;
  1.4434 +IS_GT:
  1.4435 +  return MP_GT;
  1.4436 +} /* end s_mp_cmp() */
  1.4437 +
  1.4438 +/* }}} */
  1.4439 +
  1.4440 +/* {{{ s_mp_cmp_d(a, d) */
  1.4441 +
  1.4442 +/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
  1.4443 +int      s_mp_cmp_d(const mp_int *a, mp_digit d)
  1.4444 +{
  1.4445 +  if(USED(a) > 1)
  1.4446 +    return MP_GT;
  1.4447 +
  1.4448 +  if(DIGIT(a, 0) < d)
  1.4449 +    return MP_LT;
  1.4450 +  else if(DIGIT(a, 0) > d)
  1.4451 +    return MP_GT;
  1.4452 +  else
  1.4453 +    return MP_EQ;
  1.4454 +
  1.4455 +} /* end s_mp_cmp_d() */
  1.4456 +
  1.4457 +/* }}} */
  1.4458 +
  1.4459 +/* {{{ s_mp_ispow2(v) */
  1.4460 +
  1.4461 +/*
  1.4462 +  Returns -1 if the value is not a power of two; otherwise, it returns
  1.4463 +  k such that v = 2^k, i.e. lg(v).
  1.4464 + */
  1.4465 +int      s_mp_ispow2(const mp_int *v)
  1.4466 +{
  1.4467 +  mp_digit d;
  1.4468 +  int      extra = 0, ix;
  1.4469 +
  1.4470 +  ix = MP_USED(v) - 1;
  1.4471 +  d = MP_DIGIT(v, ix); /* most significant digit of v */
  1.4472 +
  1.4473 +  extra = s_mp_ispow2d(d);
  1.4474 +  if (extra < 0 || ix == 0)
  1.4475 +    return extra;
  1.4476 +
  1.4477 +  while (--ix >= 0) {
  1.4478 +    if (DIGIT(v, ix) != 0)
  1.4479 +      return -1; /* not a power of two */
  1.4480 +    extra += MP_DIGIT_BIT;
  1.4481 +  }
  1.4482 +
  1.4483 +  return extra;
  1.4484 +
  1.4485 +} /* end s_mp_ispow2() */
  1.4486 +
  1.4487 +/* }}} */
  1.4488 +
  1.4489 +/* {{{ s_mp_ispow2d(d) */
  1.4490 +
  1.4491 +int      s_mp_ispow2d(mp_digit d)
  1.4492 +{
  1.4493 +  if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
  1.4494 +    int pow = 0;
  1.4495 +#if defined (MP_USE_UINT_DIGIT)
  1.4496 +    if (d & 0xffff0000U)
  1.4497 +      pow += 16;
  1.4498 +    if (d & 0xff00ff00U)
  1.4499 +      pow += 8;
  1.4500 +    if (d & 0xf0f0f0f0U)
  1.4501 +      pow += 4;
  1.4502 +    if (d & 0xccccccccU)
  1.4503 +      pow += 2;
  1.4504 +    if (d & 0xaaaaaaaaU)
  1.4505 +      pow += 1;
  1.4506 +#elif defined(MP_USE_LONG_LONG_DIGIT)
  1.4507 +    if (d & 0xffffffff00000000ULL)
  1.4508 +      pow += 32;
  1.4509 +    if (d & 0xffff0000ffff0000ULL)
  1.4510 +      pow += 16;
  1.4511 +    if (d & 0xff00ff00ff00ff00ULL)
  1.4512 +      pow += 8;
  1.4513 +    if (d & 0xf0f0f0f0f0f0f0f0ULL)
  1.4514 +      pow += 4;
  1.4515 +    if (d & 0xccccccccccccccccULL)
  1.4516 +      pow += 2;
  1.4517 +    if (d & 0xaaaaaaaaaaaaaaaaULL)
  1.4518 +      pow += 1;
  1.4519 +#elif defined(MP_USE_LONG_DIGIT)
  1.4520 +    if (d & 0xffffffff00000000UL)
  1.4521 +      pow += 32;
  1.4522 +    if (d & 0xffff0000ffff0000UL)
  1.4523 +      pow += 16;
  1.4524 +    if (d & 0xff00ff00ff00ff00UL)
  1.4525 +      pow += 8;
  1.4526 +    if (d & 0xf0f0f0f0f0f0f0f0UL)
  1.4527 +      pow += 4;
  1.4528 +    if (d & 0xccccccccccccccccUL)
  1.4529 +      pow += 2;
  1.4530 +    if (d & 0xaaaaaaaaaaaaaaaaUL)
  1.4531 +      pow += 1;
  1.4532 +#else
  1.4533 +#error "unknown type for mp_digit"
  1.4534 +#endif
  1.4535 +    return pow;
  1.4536 +  }
  1.4537 +  return -1;
  1.4538 +
  1.4539 +} /* end s_mp_ispow2d() */
  1.4540 +
  1.4541 +/* }}} */
  1.4542 +
  1.4543 +/* }}} */
  1.4544 +
  1.4545 +/* {{{ Primitive I/O helpers */
  1.4546 +
  1.4547 +/* {{{ s_mp_tovalue(ch, r) */
  1.4548 +
  1.4549 +/*
  1.4550 +  Convert the given character to its digit value, in the given radix.
  1.4551 +  If the given character is not understood in the given radix, -1 is
  1.4552 +  returned.  Otherwise the digit's numeric value is returned.
  1.4553 +
  1.4554 +  The results will be odd if you use a radix < 2 or > 62, you are
  1.4555 +  expected to know what you're up to.
  1.4556 + */
  1.4557 +int      s_mp_tovalue(char ch, int r)
  1.4558 +{
  1.4559 +  int    val, xch;
  1.4560 +  
  1.4561 +  if(r > 36)
  1.4562 +    xch = ch;
  1.4563 +  else
  1.4564 +    xch = toupper(ch);
  1.4565 +
  1.4566 +  if(isdigit(xch))
  1.4567 +    val = xch - '0';
  1.4568 +  else if(isupper(xch))
  1.4569 +    val = xch - 'A' + 10;
  1.4570 +  else if(islower(xch))
  1.4571 +    val = xch - 'a' + 36;
  1.4572 +  else if(xch == '+')
  1.4573 +    val = 62;
  1.4574 +  else if(xch == '/')
  1.4575 +    val = 63;
  1.4576 +  else 
  1.4577 +    return -1;
  1.4578 +
  1.4579 +  if(val < 0 || val >= r)
  1.4580 +    return -1;
  1.4581 +
  1.4582 +  return val;
  1.4583 +
  1.4584 +} /* end s_mp_tovalue() */
  1.4585 +
  1.4586 +/* }}} */
  1.4587 +
  1.4588 +/* {{{ s_mp_todigit(val, r, low) */
  1.4589 +
  1.4590 +/*
  1.4591 +  Convert val to a radix-r digit, if possible.  If val is out of range
  1.4592 +  for r, returns zero.  Otherwise, returns an ASCII character denoting
  1.4593 +  the value in the given radix.
  1.4594 +
  1.4595 +  The results may be odd if you use a radix < 2 or > 64, you are
  1.4596 +  expected to know what you're doing.
  1.4597 + */
  1.4598 +  
  1.4599 +char     s_mp_todigit(mp_digit val, int r, int low)
  1.4600 +{
  1.4601 +  char   ch;
  1.4602 +
  1.4603 +  if(val >= r)
  1.4604 +    return 0;
  1.4605 +
  1.4606 +  ch = s_dmap_1[val];
  1.4607 +
  1.4608 +  if(r <= 36 && low)
  1.4609 +    ch = tolower(ch);
  1.4610 +
  1.4611 +  return ch;
  1.4612 +
  1.4613 +} /* end s_mp_todigit() */
  1.4614 +
  1.4615 +/* }}} */
  1.4616 +
  1.4617 +/* {{{ s_mp_outlen(bits, radix) */
  1.4618 +
  1.4619 +/* 
  1.4620 +   Return an estimate for how long a string is needed to hold a radix
  1.4621 +   r representation of a number with 'bits' significant bits, plus an
  1.4622 +   extra for a zero terminator (assuming C style strings here)
  1.4623 + */
  1.4624 +int      s_mp_outlen(int bits, int r)
  1.4625 +{
  1.4626 +  return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
  1.4627 +
  1.4628 +} /* end s_mp_outlen() */
  1.4629 +
  1.4630 +/* }}} */
  1.4631 +
  1.4632 +/* }}} */
  1.4633 +
  1.4634 +/* {{{ mp_read_unsigned_octets(mp, str, len) */
  1.4635 +/* mp_read_unsigned_octets(mp, str, len)
  1.4636 +   Read in a raw value (base 256) into the given mp_int
  1.4637 +   No sign bit, number is positive.  Leading zeros ignored.
  1.4638 + */
  1.4639 +
  1.4640 +mp_err  
  1.4641 +mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
  1.4642 +{
  1.4643 +  int            count;
  1.4644 +  mp_err         res;
  1.4645 +  mp_digit       d;
  1.4646 +
  1.4647 +  ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
  1.4648 +
  1.4649 +  mp_zero(mp);
  1.4650 +
  1.4651 +  count = len % sizeof(mp_digit);
  1.4652 +  if (count) {
  1.4653 +    for (d = 0; count-- > 0; --len) {
  1.4654 +      d = (d << 8) | *str++;
  1.4655 +    }
  1.4656 +    MP_DIGIT(mp, 0) = d;
  1.4657 +  }
  1.4658 +
  1.4659 +  /* Read the rest of the digits */
  1.4660 +  for(; len > 0; len -= sizeof(mp_digit)) {
  1.4661 +    for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
  1.4662 +      d = (d << 8) | *str++;
  1.4663 +    }
  1.4664 +    if (MP_EQ == mp_cmp_z(mp)) {
  1.4665 +      if (!d)
  1.4666 +	continue;
  1.4667 +    } else {
  1.4668 +      if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
  1.4669 +	return res;
  1.4670 +    }
  1.4671 +    MP_DIGIT(mp, 0) = d;
  1.4672 +  }
  1.4673 +  return MP_OKAY;
  1.4674 +} /* end mp_read_unsigned_octets() */
  1.4675 +/* }}} */
  1.4676 +
  1.4677 +/* {{{ mp_unsigned_octet_size(mp) */
  1.4678 +int    
  1.4679 +mp_unsigned_octet_size(const mp_int *mp)
  1.4680 +{
  1.4681 +  int  bytes;
  1.4682 +  int  ix;
  1.4683 +  mp_digit  d = 0;
  1.4684 +
  1.4685 +  ARGCHK(mp != NULL, MP_BADARG);
  1.4686 +  ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
  1.4687 +
  1.4688 +  bytes = (USED(mp) * sizeof(mp_digit));
  1.4689 +
  1.4690 +  /* subtract leading zeros. */
  1.4691 +  /* Iterate over each digit... */
  1.4692 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.4693 +    d = DIGIT(mp, ix);
  1.4694 +    if (d) 
  1.4695 +	break;
  1.4696 +    bytes -= sizeof(d);
  1.4697 +  }
  1.4698 +  if (!bytes)
  1.4699 +    return 1;
  1.4700 +
  1.4701 +  /* Have MSD, check digit bytes, high order first */
  1.4702 +  for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
  1.4703 +    unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
  1.4704 +    if (x) 
  1.4705 +	break;
  1.4706 +    --bytes;
  1.4707 +  }
  1.4708 +  return bytes;
  1.4709 +} /* end mp_unsigned_octet_size() */
  1.4710 +/* }}} */
  1.4711 +
  1.4712 +/* {{{ mp_to_unsigned_octets(mp, str) */
  1.4713 +/* output a buffer of big endian octets no longer than specified. */
  1.4714 +mp_err 
  1.4715 +mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
  1.4716 +{
  1.4717 +  int  ix, pos = 0;
  1.4718 +  int  bytes;
  1.4719 +
  1.4720 +  ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
  1.4721 +
  1.4722 +  bytes = mp_unsigned_octet_size(mp);
  1.4723 +  ARGCHK(bytes >= 0 && bytes <= maxlen, MP_BADARG);
  1.4724 +
  1.4725 +  /* Iterate over each digit... */
  1.4726 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.4727 +    mp_digit  d = DIGIT(mp, ix);
  1.4728 +    int       jx;
  1.4729 +
  1.4730 +    /* Unpack digit bytes, high order first */
  1.4731 +    for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
  1.4732 +      unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
  1.4733 +      if (!pos && !x)	/* suppress leading zeros */
  1.4734 +	continue;
  1.4735 +      str[pos++] = x;
  1.4736 +    }
  1.4737 +  }
  1.4738 +  if (!pos)
  1.4739 +    str[pos++] = 0;
  1.4740 +  return pos;
  1.4741 +} /* end mp_to_unsigned_octets() */
  1.4742 +/* }}} */
  1.4743 +
  1.4744 +/* {{{ mp_to_signed_octets(mp, str) */
  1.4745 +/* output a buffer of big endian octets no longer than specified. */
  1.4746 +mp_err 
  1.4747 +mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
  1.4748 +{
  1.4749 +  int  ix, pos = 0;
  1.4750 +  int  bytes;
  1.4751 +
  1.4752 +  ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
  1.4753 +
  1.4754 +  bytes = mp_unsigned_octet_size(mp);
  1.4755 +  ARGCHK(bytes >= 0 && bytes <= maxlen, MP_BADARG);
  1.4756 +
  1.4757 +  /* Iterate over each digit... */
  1.4758 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.4759 +    mp_digit  d = DIGIT(mp, ix);
  1.4760 +    int       jx;
  1.4761 +
  1.4762 +    /* Unpack digit bytes, high order first */
  1.4763 +    for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
  1.4764 +      unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
  1.4765 +      if (!pos) {
  1.4766 +	if (!x)		/* suppress leading zeros */
  1.4767 +	  continue;
  1.4768 +	if (x & 0x80) { /* add one leading zero to make output positive.  */
  1.4769 +	  ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
  1.4770 +	  if (bytes + 1 > maxlen)
  1.4771 +	    return MP_BADARG;
  1.4772 +	  str[pos++] = 0;
  1.4773 +	}
  1.4774 +      }
  1.4775 +      str[pos++] = x;
  1.4776 +    }
  1.4777 +  }
  1.4778 +  if (!pos)
  1.4779 +    str[pos++] = 0;
  1.4780 +  return pos;
  1.4781 +} /* end mp_to_signed_octets() */
  1.4782 +/* }}} */
  1.4783 +
  1.4784 +/* {{{ mp_to_fixlen_octets(mp, str) */
  1.4785 +/* output a buffer of big endian octets exactly as long as requested. */
  1.4786 +mp_err 
  1.4787 +mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
  1.4788 +{
  1.4789 +  int  ix, pos = 0;
  1.4790 +  int  bytes;
  1.4791 +
  1.4792 +  ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
  1.4793 +
  1.4794 +  bytes = mp_unsigned_octet_size(mp);
  1.4795 +  ARGCHK(bytes >= 0 && bytes <= length, MP_BADARG);
  1.4796 +
  1.4797 +  /* place any needed leading zeros */
  1.4798 +  for (;length > bytes; --length) {
  1.4799 +	*str++ = 0;
  1.4800 +  }
  1.4801 +
  1.4802 +  /* Iterate over each digit... */
  1.4803 +  for(ix = USED(mp) - 1; ix >= 0; ix--) {
  1.4804 +    mp_digit  d = DIGIT(mp, ix);
  1.4805 +    int       jx;
  1.4806 +
  1.4807 +    /* Unpack digit bytes, high order first */
  1.4808 +    for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
  1.4809 +      unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
  1.4810 +      if (!pos && !x)	/* suppress leading zeros */
  1.4811 +	continue;
  1.4812 +      str[pos++] = x;
  1.4813 +    }
  1.4814 +  }
  1.4815 +  if (!pos)
  1.4816 +    str[pos++] = 0;
  1.4817 +  return MP_OKAY;
  1.4818 +} /* end mp_to_fixlen_octets() */
  1.4819 +/* }}} */
  1.4820 +
  1.4821 +
  1.4822 +/*------------------------------------------------------------------------*/
  1.4823 +/* HERE THERE BE DRAGONS                                                  */

mercurial