SmartAudio/lichee/brandy/u-boot-2014.07/include/openssl/bn_lcl.h

320 lines
8.3 KiB
C
Executable File

#ifndef HEADER_BN_LCL_H
#define HEADER_BN_LCL_H
#include "bn.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 937 ? 6 : \
(b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
#endif
/* Pentium pro 16,16,16,32,64 */
/* Alpha 16,16,16,16.64 */
#define BN_MULL_SIZE_NORMAL (16) /* 32 */
#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
/*
* BN_UMULT_HIGH section.
*
* No, I'm not trying to overwhelm you when stating that the
* product of N-bit numbers is 2*N bits wide:-) No, I don't expect
* you to be impressed when I say that if the compiler doesn't
* support 2*N integer type, then you have to replace every N*N
* multiplication with 4 (N/2)*(N/2) accompanied by some shifts
* and additions which unavoidably results in severe performance
* penalties. Of course provided that the hardware is capable of
* producing 2*N result... That's when you normally start
* considering assembler implementation. However! It should be
* pointed out that some CPUs (most notably Alpha, PowerPC and
* upcoming IA-64 family:-) provide *separate* instruction
* calculating the upper half of the product placing the result
* into a general purpose register. Now *if* the compiler supports
* inline assembler, then it's not impossible to implement the
* "bignum" routines (and have the compiler optimize 'em)
* exhibiting "native" performance in C. That's what BN_UMULT_HIGH
* macro is about:-)
*
* <appro@fy.chalmers.se>
*/
# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__DECC)
# include <c_asm.h>
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
# elif defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %1,%2,%0" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("mulhdu %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
asm ("mulq %3" \
: "=a"(discard),"=d"(ret) \
: "a"(a), "g"(b) \
: "cc"); \
ret; })
# define BN_UMULT_LOHI(low,high,a,b) \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(a),"g"(b) \
: "cc");
# endif
# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
# if defined(_MSC_VER) && _MSC_VER>=1400
unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b);
unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b,
unsigned __int64 *h);
# pragma intrinsic(__umulh,_umul128)
# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
# endif
# endif /* cpu */
#endif /* OPENSSL_NO_ASM */
/*************************************************************
* Using the long long type
*/
#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
#ifdef BN_DEBUG_RAND
#define bn_clear_top2max(a) \
{ \
int ind = (a)->dmax - (a)->top; \
BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
for (; ind != 0; ind--) \
*(++ftl) = 0x0; \
}
#else
#define bn_clear_top2max(a)
#endif
#ifdef BN_LLONG
#define mul_add(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (r) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
#define mul(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
#define sqr(r0,r1,a) { \
BN_ULLONG t; \
t=(BN_ULLONG)(a)*(a); \
(r0)=Lw(t); \
(r1)=Hw(t); \
}
#elif defined(BN_UMULT_LOHI)
#define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
BN_UMULT_LOHI(low,high,w,tmp); \
ret += (c); \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
BN_UMULT_LOHI(low,high,w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
BN_UMULT_LOHI(r0,r1,tmp,tmp); \
}
#elif defined(BN_UMULT_HIGH)
#define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
high= BN_UMULT_HIGH(w,tmp); \
ret += (c); \
low = (w) * tmp; \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
low = (w) * ta; \
high= BN_UMULT_HIGH(w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
(r0) = tmp * tmp; \
(r1) = BN_UMULT_HIGH(tmp,tmp); \
}
#else
/*************************************************************
* No long long type
*/
#define LBITS(a) ((a)&BN_MASK2l)
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
#define LLBITS(a) ((a)&BN_MASKl)
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
#define mul64(l,h,bl,bh) \
{ \
BN_ULONG m,m1,lt,ht; \
\
lt=l; \
ht=h; \
m =(bh)*(lt); \
lt=(bl)*(lt); \
m1=(bl)*(ht); \
ht =(bh)*(ht); \
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
ht+=HBITS(m); \
m1=L2HBITS(m); \
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
(l)=lt; \
(h)=ht; \
}
#define sqr64(lo,ho,in) \
{ \
BN_ULONG l,h,m; \
\
h=(in); \
l=LBITS(h); \
h=HBITS(h); \
m =(l)*(h); \
l*=l; \
h*=h; \
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
l=(l+m)&BN_MASK2; if (l < m) h++; \
(lo)=l; \
(ho)=h; \
}
#define mul_add(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=(r); \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l; \
}
#define mul(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l&BN_MASK2; \
}
#endif /* !BN_LLONG */
void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
int dna,int dnb,BN_ULONG *t);
void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
int n,int tna,int tnb,BN_ULONG *t);
void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
BN_ULONG *t);
void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
BN_ULONG *t);
BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
#ifdef __cplusplus
}
#endif
#endif