michael@0: /* michael@0: * mpi-priv.h - Private header file for MPI michael@0: * Arbitrary precision integer arithmetic library michael@0: * michael@0: * NOTE WELL: the content of this header file is NOT part of the "public" michael@0: * API for the MPI library, and may change at any time. michael@0: * Application programs that use libmpi should NOT include this header file. michael@0: * michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #ifndef _MPI_PRIV_H_ michael@0: #define _MPI_PRIV_H_ 1 michael@0: michael@0: #include "mpi.h" michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #if MP_DEBUG michael@0: #include michael@0: michael@0: #define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);} michael@0: #else michael@0: #define DIAG(T,V) michael@0: #endif michael@0: michael@0: /* If we aren't using a wired-in logarithm table, we need to include michael@0: the math library to get the log() function michael@0: */ michael@0: michael@0: /* {{{ s_logv_2[] - log table for 2 in various bases */ michael@0: michael@0: #if MP_LOGTAB michael@0: /* michael@0: A table of the logs of 2 for various bases (the 0 and 1 entries of michael@0: this table are meaningless and should not be referenced). michael@0: michael@0: This table is used to compute output lengths for the mp_toradix() michael@0: function. Since a number n in radix r takes up about log_r(n) michael@0: digits, we estimate the output size by taking the least integer michael@0: greater than log_r(n), where: michael@0: michael@0: log_r(n) = log_2(n) * log_r(2) michael@0: michael@0: This table, therefore, is a table of log_r(2) for 2 <= r <= 36, michael@0: which are the output bases supported. michael@0: */ michael@0: michael@0: extern const float s_logv_2[]; michael@0: #define LOG_V_2(R) s_logv_2[(R)] michael@0: michael@0: #else michael@0: michael@0: /* michael@0: If MP_LOGTAB is not defined, use the math library to compute the michael@0: logarithms on the fly. Otherwise, use the table. michael@0: Pick which works best for your system. michael@0: */ michael@0: michael@0: #include michael@0: #define LOG_V_2(R) (log(2.0)/log(R)) michael@0: michael@0: #endif /* if MP_LOGTAB */ michael@0: michael@0: /* }}} */ michael@0: michael@0: /* {{{ Digit arithmetic macros */ michael@0: michael@0: /* michael@0: When adding and multiplying digits, the results can be larger than michael@0: can be contained in an mp_digit. Thus, an mp_word is used. These michael@0: macros mask off the upper and lower digits of the mp_word (the michael@0: mp_word may be more than 2 mp_digits wide, but we only concern michael@0: ourselves with the low-order 2 mp_digits) michael@0: */ michael@0: michael@0: #define CARRYOUT(W) (mp_digit)((W)>>DIGIT_BIT) michael@0: #define ACCUM(W) (mp_digit)(W) michael@0: michael@0: #define MP_MIN(a,b) (((a) < (b)) ? (a) : (b)) michael@0: #define MP_MAX(a,b) (((a) > (b)) ? (a) : (b)) michael@0: #define MP_HOWMANY(a,b) (((a) + (b) - 1)/(b)) michael@0: #define MP_ROUNDUP(a,b) (MP_HOWMANY(a,b) * (b)) michael@0: michael@0: /* }}} */ michael@0: michael@0: /* {{{ Comparison constants */ michael@0: michael@0: #define MP_LT -1 michael@0: #define MP_EQ 0 michael@0: #define MP_GT 1 michael@0: michael@0: /* }}} */ michael@0: michael@0: /* {{{ private function declarations */ michael@0: michael@0: /* michael@0: If MP_MACRO is false, these will be defined as actual functions; michael@0: otherwise, suitable macro definitions will be used. This works michael@0: around the fact that ANSI C89 doesn't support an 'inline' keyword michael@0: (although I hear C9x will ... about bloody time). At present, the michael@0: macro definitions are identical to the function bodies, but they'll michael@0: expand in place, instead of generating a function call. michael@0: michael@0: I chose these particular functions to be made into macros because michael@0: some profiling showed they are called a lot on a typical workload, michael@0: and yet they are primarily housekeeping. michael@0: */ michael@0: #if MP_MACRO == 0 michael@0: void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ michael@0: void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ michael@0: void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ michael@0: void s_mp_free(void *ptr); /* general free function */ michael@0: extern unsigned long mp_allocs; michael@0: extern unsigned long mp_frees; michael@0: extern unsigned long mp_copies; michael@0: #else michael@0: michael@0: /* Even if these are defined as macros, we need to respect the settings michael@0: of the MP_MEMSET and MP_MEMCPY configuration options... michael@0: */ michael@0: #if MP_MEMSET == 0 michael@0: #define s_mp_setz(dp, count) \ michael@0: {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;} michael@0: #else michael@0: #define s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit)) michael@0: #endif /* MP_MEMSET */ michael@0: michael@0: #if MP_MEMCPY == 0 michael@0: #define s_mp_copy(sp, dp, count) \ michael@0: {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];} michael@0: #else michael@0: #define s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit)) michael@0: #endif /* MP_MEMCPY */ michael@0: michael@0: #define s_mp_alloc(nb, ni) calloc(nb, ni) michael@0: #define s_mp_free(ptr) {if(ptr) free(ptr);} michael@0: #endif /* MP_MACRO */ michael@0: michael@0: mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ michael@0: mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ michael@0: michael@0: #if MP_MACRO == 0 michael@0: void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ michael@0: #else michael@0: #define s_mp_clamp(mp)\ michael@0: { mp_size used = MP_USED(mp); \ michael@0: while (used > 1 && DIGIT(mp, used - 1) == 0) --used; \ michael@0: MP_USED(mp) = used; \ michael@0: } michael@0: #endif /* MP_MACRO */ michael@0: michael@0: void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ michael@0: michael@0: mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ michael@0: void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ michael@0: mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */ michael@0: void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ michael@0: void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ michael@0: void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ michael@0: mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ michael@0: mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd); michael@0: /* normalize for division */ michael@0: mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ michael@0: mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ michael@0: mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ michael@0: mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); michael@0: /* unsigned digit divide */ michael@0: mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu); michael@0: /* Barrett reduction */ michael@0: mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */ michael@0: mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c); michael@0: mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */ michael@0: mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c); michael@0: mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset); michael@0: /* a += b * RADIX^offset */ michael@0: mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */ michael@0: #if MP_SQUARE michael@0: mp_err s_mp_sqr(mp_int *a); /* magnitude square */ michael@0: #else michael@0: #define s_mp_sqr(a) s_mp_mul(a, a) michael@0: #endif michael@0: mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */ michael@0: mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); michael@0: mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ michael@0: int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */ michael@0: int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */ michael@0: int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */ michael@0: int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ michael@0: michael@0: int s_mp_tovalue(char ch, int r); /* convert ch to value */ michael@0: char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */ michael@0: int s_mp_outlen(int bits, int r); /* output length in bytes */ michael@0: mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */ michael@0: mp_err s_mp_invmod_odd_m( const mp_int *a, const mp_int *m, mp_int *c); michael@0: mp_err s_mp_invmod_2d( const mp_int *a, mp_size k, mp_int *c); michael@0: mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c); michael@0: michael@0: #ifdef NSS_USE_COMBA michael@0: michael@0: #define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1))) michael@0: michael@0: void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C); michael@0: void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C); michael@0: void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); michael@0: void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C); michael@0: michael@0: void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); michael@0: void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); michael@0: void s_mp_sqr_comba_16(const mp_int *A, mp_int *B); michael@0: void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); michael@0: michael@0: #endif /* end NSS_USE_COMBA */ michael@0: michael@0: /* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */ michael@0: #if defined (__OS2__) && defined (__IBMC__) michael@0: #define MPI_ASM_DECL __cdecl michael@0: #else michael@0: #define MPI_ASM_DECL michael@0: #endif michael@0: michael@0: #ifdef MPI_AMD64 michael@0: michael@0: mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit*, mp_digit *, mp_size, mp_digit); michael@0: mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit*, const mp_digit*, mp_size, mp_digit); michael@0: michael@0: /* c = a * b */ michael@0: #define s_mpv_mul_d(a, a_len, b, c) \ michael@0: ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b) michael@0: michael@0: /* c += a * b */ michael@0: #define s_mpv_mul_d_add(a, a_len, b, c) \ michael@0: ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b) michael@0: michael@0: michael@0: #else michael@0: michael@0: void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len, michael@0: mp_digit b, mp_digit *c); michael@0: void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, michael@0: mp_digit b, mp_digit *c); michael@0: michael@0: #endif michael@0: michael@0: void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, michael@0: mp_size a_len, mp_digit b, michael@0: mp_digit *c); michael@0: void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, michael@0: mp_size a_len, michael@0: mp_digit *sqrs); michael@0: michael@0: mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, michael@0: mp_digit divisor, mp_digit *quot, mp_digit *rem); michael@0: michael@0: /* c += a * b * (MP_RADIX ** offset); */ michael@0: #define s_mp_mul_d_add_offset(a, b, c, off) \ michael@0: (s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY) michael@0: michael@0: typedef struct { michael@0: mp_int N; /* modulus N */ michael@0: mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */ michael@0: } mp_mont_modulus; michael@0: michael@0: mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, michael@0: mp_mont_modulus *mmm); michael@0: mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm); michael@0: michael@0: /* michael@0: * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line michael@0: * if a cache exists, or zero if there is no cache. If more than one michael@0: * cache line exists, it should return the smallest line size (which is michael@0: * usually the L1 cache). michael@0: * michael@0: * mp_modexp uses this information to make sure that private key information michael@0: * isn't being leaked through the cache. michael@0: * michael@0: * see mpcpucache.c for the implementation. michael@0: */ michael@0: unsigned long s_mpi_getProcessorLineSize(); michael@0: michael@0: /* }}} */ michael@0: #endif michael@0: