From e461bc72b8149e6ef45457c1f2319bea91d30ca7 Mon Sep 17 00:00:00 2001 From: Jacob Barthelmeh Date: Fri, 5 Jun 2015 15:39:37 -0600 Subject: [PATCH] curve25519 and ed25519 low memory --- configure.ac | 22 +- src/include.am | 8 + wolfcrypt/src/curve25519.c | 68 +-- wolfcrypt/src/ed25519.c | 707 ++---------------------------- wolfcrypt/src/fe_low_mem.c | 596 +++++++++++++++++++++++++ wolfcrypt/src/fe_operations.c | 69 ++- wolfcrypt/src/ge_low_mem.c | 558 +++++++++++++++++++++++ wolfcrypt/src/ge_operations.c | 680 +++++++++++++++++++++++++++- wolfcrypt/test/test.c | 24 +- wolfssl/wolfcrypt/fe_operations.h | 91 +++- wolfssl/wolfcrypt/ge_operations.h | 27 +- 11 files changed, 2060 insertions(+), 790 deletions(-) create mode 100644 wolfcrypt/src/fe_low_mem.c create mode 100644 wolfcrypt/src/ge_low_mem.c diff --git a/configure.ac b/configure.ac index f15b552c582..d2b073efcfd 100644 --- a/configure.ac +++ b/configure.ac @@ -680,6 +680,9 @@ then fi +# for using memory optimization setting on both curve25519 and ed25519 +ENABLED_CURVED25519_SMALL=no + # CURVE25519 AC_ARG_ENABLE([curve25519], [AS_HELP_STRING([--enable-curve25519],[Enable Curve25519 (default: disabled)])], @@ -687,10 +690,18 @@ AC_ARG_ENABLE([curve25519], [ ENABLED_CURVE25519=no ] ) + +if test "$ENABLED_CURVE25519" = "small" +then + AM_CFLAGS="$AM_CFLAGS -DCURVED25519_SMALL" + ENABLED_CURVED25519_SMALL=yes + ENABLED_CURVE25519=yes +fi + if test "$ENABLED_CURVE25519" = "yes" then - ENABLED_FEMATH=yes AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519" + ENABLED_FEMATH=yes fi @@ -705,6 +716,13 @@ AC_ARG_ENABLE([ed25519], ) +if test "$ENABLED_ED25519" = "small" +then + AM_CFLAGS="$AM_CFLAGS -DCURVED25519_SMALL" + ENABLED_CURVED25519_SMALL=yes + ENABLED_ED25519=yes +fi + if test "$ENABLED_ED25519" = "yes" then if test "$ENABLED_SHA512" = "no" @@ -716,8 +734,8 @@ then AM_CFLAGS="$AM_CFLAGS -DHAVE_ED25519" fi - AM_CONDITIONAL([BUILD_ED25519], [test "x$ENABLED_ED25519" = "xyes"]) +AM_CONDITIONAL([BUILD_CURVED25519_SMALL], [test "x$ENABLED_CURVED25519_SMALL" = "xyes"]) AM_CONDITIONAL([BUILD_FEMATH], [test "x$ENABLED_FEMATH" = "xyes"]) AM_CONDITIONAL([BUILD_GEMATH], [test "x$ENABLED_GEMATH" = "xyes"]) diff --git a/src/include.am b/src/include.am index a89d7d4722f..8081e5e4c7a 100644 --- a/src/include.am +++ b/src/include.am @@ -176,12 +176,20 @@ src_libwolfssl_la_SOURCES += wolfcrypt/src/ed25519.c endif if BUILD_FEMATH +if BUILD_CURVED25519_SMALL +src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_low_mem.c +else src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_operations.c endif +endif if BUILD_GEMATH +if BUILD_CURVED25519_SMALL +src_libwolfssl_la_SOURCES += wolfcrypt/src/ge_low_mem.c +else src_libwolfssl_la_SOURCES += wolfcrypt/src/ge_operations.c endif +endif if BUILD_LIBZ src_libwolfssl_la_SOURCES += wolfcrypt/src/compress.c diff --git a/wolfcrypt/src/curve25519.c b/wolfcrypt/src/curve25519.c index 74fb53c8366..b745a046cd2 100644 --- a/wolfcrypt/src/curve25519.c +++ b/wolfcrypt/src/curve25519.c @@ -46,72 +46,6 @@ const curve25519_set_type curve25519_sets[] = { }; -/* internal function */ -static int curve25519(unsigned char* q, unsigned char* n, unsigned char* p) -{ - unsigned char e[32]; - unsigned int i; - fe x1; - fe x2; - fe z2; - fe x3; - fe z3; - fe tmp0; - fe tmp1; - int pos; - unsigned int swap; - unsigned int b; - - for (i = 0;i < 32;++i) e[i] = n[i]; - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fe_frombytes(x1,p); - fe_1(x2); - fe_0(z2); - fe_copy(x3,x1); - fe_1(z3); - - swap = 0; - for (pos = 254;pos >= 0;--pos) { - b = e[pos / 8] >> (pos & 7); - b &= 1; - swap ^= b; - fe_cswap(x2,x3,swap); - fe_cswap(z2,z3,swap); - swap = b; - - /* montgomery */ - fe_sub(tmp0,x3,z3); - fe_sub(tmp1,x2,z2); - fe_add(x2,x2,z2); - fe_add(z2,x3,z3); - fe_mul(z3,tmp0,x2); - fe_mul(z2,z2,tmp1); - fe_sq(tmp0,tmp1); - fe_sq(tmp1,x2); - fe_add(x3,z3,z2); - fe_sub(z2,z3,z2); - fe_mul(x2,tmp1,tmp0); - fe_sub(tmp1,tmp1,tmp0); - fe_sq(z2,z2); - fe_mul121666(z3,tmp1); - fe_sq(x3,x3); - fe_add(tmp0,tmp0,z3); - fe_mul(z3,x1,z2); - fe_mul(z2,tmp1,tmp0); - } - fe_cswap(x2,x3,swap); - fe_cswap(z2,z3,swap); - - fe_invert(z2,z2); - fe_mul(x2,x2,z2); - fe_tobytes(q,x2); - - return 0; -} - int wc_curve25519_make_key(RNG* rng, int keysize, curve25519_key* key) { @@ -138,7 +72,7 @@ int wc_curve25519_make_key(RNG* rng, int keysize, curve25519_key* key) key->k.point[31] &= 127; key->k.point[31] |= 64; - /*compute public key*/ + /* compute public key */ ret = curve25519(p, key->k.point, basepoint); /* store keys in big endian format */ diff --git a/wolfcrypt/src/ed25519.c b/wolfcrypt/src/ed25519.c index 54dc25b9728..ba0dcbe534e 100644 --- a/wolfcrypt/src/ed25519.c +++ b/wolfcrypt/src/ed25519.c @@ -39,636 +39,6 @@ #endif -void sc_reduce(byte* s); -void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c); - -/* -Input: - s[0]+256*s[1]+...+256^63*s[63] = s - -Output: - s[0]+256*s[1]+...+256^31*s[31] = s mod l - where l = 2^252 + 27742317777372353535851937790883648493. - Overwrites s in place. -*/ - -void sc_reduce(byte* s) -{ - int64_t s0 = 2097151 & load_3(s); - int64_t s1 = 2097151 & (load_4(s + 2) >> 5); - int64_t s2 = 2097151 & (load_3(s + 5) >> 2); - int64_t s3 = 2097151 & (load_4(s + 7) >> 7); - int64_t s4 = 2097151 & (load_4(s + 10) >> 4); - int64_t s5 = 2097151 & (load_3(s + 13) >> 1); - int64_t s6 = 2097151 & (load_4(s + 15) >> 6); - int64_t s7 = 2097151 & (load_3(s + 18) >> 3); - int64_t s8 = 2097151 & load_3(s + 21); - int64_t s9 = 2097151 & (load_4(s + 23) >> 5); - int64_t s10 = 2097151 & (load_3(s + 26) >> 2); - int64_t s11 = 2097151 & (load_4(s + 28) >> 7); - int64_t s12 = 2097151 & (load_4(s + 31) >> 4); - int64_t s13 = 2097151 & (load_3(s + 34) >> 1); - int64_t s14 = 2097151 & (load_4(s + 36) >> 6); - int64_t s15 = 2097151 & (load_3(s + 39) >> 3); - int64_t s16 = 2097151 & load_3(s + 42); - int64_t s17 = 2097151 & (load_4(s + 44) >> 5); - int64_t s18 = 2097151 & (load_3(s + 47) >> 2); - int64_t s19 = 2097151 & (load_4(s + 49) >> 7); - int64_t s20 = 2097151 & (load_4(s + 52) >> 4); - int64_t s21 = 2097151 & (load_3(s + 55) >> 1); - int64_t s22 = 2097151 & (load_4(s + 57) >> 6); - int64_t s23 = (load_4(s + 60) >> 3); - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - s23 = 0; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - s22 = 0; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - s21 = 0; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - s20 = 0; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - s19 = 0; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - s18 = 0; - - carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; - carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; - carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; - carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; - carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; - carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; - - carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; - carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; - carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; - carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; - carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - s17 = 0; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - s16 = 0; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - s15 = 0; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - s14 = 0; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - s13 = 0; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; - carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; - carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; - carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; - carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; - carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; - - carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; - carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; - carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; - carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; - carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; - carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; - carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; - carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; - carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; - carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; - carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; - carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; - carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; - carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; - carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; - carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; - carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; - carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; - carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; - carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; - carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; - carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; - carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; - carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; - carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; - carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; - carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; - - s[0] = s0 >> 0; - s[1] = s0 >> 8; - s[2] = (s0 >> 16) | (s1 << 5); - s[3] = s1 >> 3; - s[4] = s1 >> 11; - s[5] = (s1 >> 19) | (s2 << 2); - s[6] = s2 >> 6; - s[7] = (s2 >> 14) | (s3 << 7); - s[8] = s3 >> 1; - s[9] = s3 >> 9; - s[10] = (s3 >> 17) | (s4 << 4); - s[11] = s4 >> 4; - s[12] = s4 >> 12; - s[13] = (s4 >> 20) | (s5 << 1); - s[14] = s5 >> 7; - s[15] = (s5 >> 15) | (s6 << 6); - s[16] = s6 >> 2; - s[17] = s6 >> 10; - s[18] = (s6 >> 18) | (s7 << 3); - s[19] = s7 >> 5; - s[20] = s7 >> 13; - s[21] = s8 >> 0; - s[22] = s8 >> 8; - s[23] = (s8 >> 16) | (s9 << 5); - s[24] = s9 >> 3; - s[25] = s9 >> 11; - s[26] = (s9 >> 19) | (s10 << 2); - s[27] = s10 >> 6; - s[28] = (s10 >> 14) | (s11 << 7); - s[29] = s11 >> 1; - s[30] = s11 >> 9; - s[31] = s11 >> 17; - - /* hush warnings after setting values to 0 */ - (void)s12; - (void)s13; - (void)s14; - (void)s15; - (void)s16; - (void)s17; - (void)s18; - (void)s19; - (void)s20; - (void)s21; - (void)s22; - (void)s23; -} - - -/* -Input: - a[0]+256*a[1]+...+256^31*a[31] = a - b[0]+256*b[1]+...+256^31*b[31] = b - c[0]+256*c[1]+...+256^31*c[31] = c - -Output: - s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l - where l = 2^252 + 27742317777372353535851937790883648493. -*/ - -void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) -{ - int64_t a0 = 2097151 & load_3(a); - int64_t a1 = 2097151 & (load_4(a + 2) >> 5); - int64_t a2 = 2097151 & (load_3(a + 5) >> 2); - int64_t a3 = 2097151 & (load_4(a + 7) >> 7); - int64_t a4 = 2097151 & (load_4(a + 10) >> 4); - int64_t a5 = 2097151 & (load_3(a + 13) >> 1); - int64_t a6 = 2097151 & (load_4(a + 15) >> 6); - int64_t a7 = 2097151 & (load_3(a + 18) >> 3); - int64_t a8 = 2097151 & load_3(a + 21); - int64_t a9 = 2097151 & (load_4(a + 23) >> 5); - int64_t a10 = 2097151 & (load_3(a + 26) >> 2); - int64_t a11 = (load_4(a + 28) >> 7); - int64_t b0 = 2097151 & load_3(b); - int64_t b1 = 2097151 & (load_4(b + 2) >> 5); - int64_t b2 = 2097151 & (load_3(b + 5) >> 2); - int64_t b3 = 2097151 & (load_4(b + 7) >> 7); - int64_t b4 = 2097151 & (load_4(b + 10) >> 4); - int64_t b5 = 2097151 & (load_3(b + 13) >> 1); - int64_t b6 = 2097151 & (load_4(b + 15) >> 6); - int64_t b7 = 2097151 & (load_3(b + 18) >> 3); - int64_t b8 = 2097151 & load_3(b + 21); - int64_t b9 = 2097151 & (load_4(b + 23) >> 5); - int64_t b10 = 2097151 & (load_3(b + 26) >> 2); - int64_t b11 = (load_4(b + 28) >> 7); - int64_t c0 = 2097151 & load_3(c); - int64_t c1 = 2097151 & (load_4(c + 2) >> 5); - int64_t c2 = 2097151 & (load_3(c + 5) >> 2); - int64_t c3 = 2097151 & (load_4(c + 7) >> 7); - int64_t c4 = 2097151 & (load_4(c + 10) >> 4); - int64_t c5 = 2097151 & (load_3(c + 13) >> 1); - int64_t c6 = 2097151 & (load_4(c + 15) >> 6); - int64_t c7 = 2097151 & (load_3(c + 18) >> 3); - int64_t c8 = 2097151 & load_3(c + 21); - int64_t c9 = 2097151 & (load_4(c + 23) >> 5); - int64_t c10 = 2097151 & (load_3(c + 26) >> 2); - int64_t c11 = (load_4(c + 28) >> 7); - int64_t s0; - int64_t s1; - int64_t s2; - int64_t s3; - int64_t s4; - int64_t s5; - int64_t s6; - int64_t s7; - int64_t s8; - int64_t s9; - int64_t s10; - int64_t s11; - int64_t s12; - int64_t s13; - int64_t s14; - int64_t s15; - int64_t s16; - int64_t s17; - int64_t s18; - int64_t s19; - int64_t s20; - int64_t s21; - int64_t s22; - int64_t s23; - int64_t carry0; - int64_t carry1; - int64_t carry2; - int64_t carry3; - int64_t carry4; - int64_t carry5; - int64_t carry6; - int64_t carry7; - int64_t carry8; - int64_t carry9; - int64_t carry10; - int64_t carry11; - int64_t carry12; - int64_t carry13; - int64_t carry14; - int64_t carry15; - int64_t carry16; - int64_t carry17; - int64_t carry18; - int64_t carry19; - int64_t carry20; - int64_t carry21; - int64_t carry22; - - s0 = c0 + a0*b0; - s1 = c1 + a0*b1 + a1*b0; - s2 = c2 + a0*b2 + a1*b1 + a2*b0; - s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0; - s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0; - s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0; - s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0; - s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0; - s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0; - s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0; - s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0; - s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0; - s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1; - s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2; - s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3; - s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4; - s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5; - s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6; - s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7; - s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8; - s20 = a9*b11 + a10*b10 + a11*b9; - s21 = a10*b11 + a11*b10; - s22 = a11*b11; - s23 = 0; - - carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; - carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; - carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; - carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; - carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; - carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; - carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; - carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; - carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; - carry18 = (s18 + (1<<20)) >> 21; s19 += carry18; s18 -= carry18 << 21; - carry20 = (s20 + (1<<20)) >> 21; s21 += carry20; s20 -= carry20 << 21; - carry22 = (s22 + (1<<20)) >> 21; s23 += carry22; s22 -= carry22 << 21; - - carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; - carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; - carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; - carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; - carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; - carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; - carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; - carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; - carry17 = (s17 + (1<<20)) >> 21; s18 += carry17; s17 -= carry17 << 21; - carry19 = (s19 + (1<<20)) >> 21; s20 += carry19; s19 -= carry19 << 21; - carry21 = (s21 + (1<<20)) >> 21; s22 += carry21; s21 -= carry21 << 21; - - s11 += s23 * 666643; - s12 += s23 * 470296; - s13 += s23 * 654183; - s14 -= s23 * 997805; - s15 += s23 * 136657; - s16 -= s23 * 683901; - s23 = 0; - - s10 += s22 * 666643; - s11 += s22 * 470296; - s12 += s22 * 654183; - s13 -= s22 * 997805; - s14 += s22 * 136657; - s15 -= s22 * 683901; - s22 = 0; - - s9 += s21 * 666643; - s10 += s21 * 470296; - s11 += s21 * 654183; - s12 -= s21 * 997805; - s13 += s21 * 136657; - s14 -= s21 * 683901; - s21 = 0; - - s8 += s20 * 666643; - s9 += s20 * 470296; - s10 += s20 * 654183; - s11 -= s20 * 997805; - s12 += s20 * 136657; - s13 -= s20 * 683901; - s20 = 0; - - s7 += s19 * 666643; - s8 += s19 * 470296; - s9 += s19 * 654183; - s10 -= s19 * 997805; - s11 += s19 * 136657; - s12 -= s19 * 683901; - s19 = 0; - - s6 += s18 * 666643; - s7 += s18 * 470296; - s8 += s18 * 654183; - s9 -= s18 * 997805; - s10 += s18 * 136657; - s11 -= s18 * 683901; - s18 = 0; - - carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; - carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; - carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; - carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; - carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; - carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; - - carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; - carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; - carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; - carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; - carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; - - s5 += s17 * 666643; - s6 += s17 * 470296; - s7 += s17 * 654183; - s8 -= s17 * 997805; - s9 += s17 * 136657; - s10 -= s17 * 683901; - s17 = 0; - - s4 += s16 * 666643; - s5 += s16 * 470296; - s6 += s16 * 654183; - s7 -= s16 * 997805; - s8 += s16 * 136657; - s9 -= s16 * 683901; - s16 = 0; - - s3 += s15 * 666643; - s4 += s15 * 470296; - s5 += s15 * 654183; - s6 -= s15 * 997805; - s7 += s15 * 136657; - s8 -= s15 * 683901; - s15 = 0; - - s2 += s14 * 666643; - s3 += s14 * 470296; - s4 += s14 * 654183; - s5 -= s14 * 997805; - s6 += s14 * 136657; - s7 -= s14 * 683901; - s14 = 0; - - s1 += s13 * 666643; - s2 += s13 * 470296; - s3 += s13 * 654183; - s4 -= s13 * 997805; - s5 += s13 * 136657; - s6 -= s13 * 683901; - s13 = 0; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; - carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; - carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; - carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; - carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; - carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; - - carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; - carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; - carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; - carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; - carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; - carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; - carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; - carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; - carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; - carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; - carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; - carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; - carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; - carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; - carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; - carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; - carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21; - - s0 += s12 * 666643; - s1 += s12 * 470296; - s2 += s12 * 654183; - s3 -= s12 * 997805; - s4 += s12 * 136657; - s5 -= s12 * 683901; - s12 = 0; - - carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; - carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; - carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; - carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; - carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; - carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; - carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; - carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; - carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; - carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; - carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; - - s[0] = s0 >> 0; - s[1] = s0 >> 8; - s[2] = (s0 >> 16) | (s1 << 5); - s[3] = s1 >> 3; - s[4] = s1 >> 11; - s[5] = (s1 >> 19) | (s2 << 2); - s[6] = s2 >> 6; - s[7] = (s2 >> 14) | (s3 << 7); - s[8] = s3 >> 1; - s[9] = s3 >> 9; - s[10] = (s3 >> 17) | (s4 << 4); - s[11] = s4 >> 4; - s[12] = s4 >> 12; - s[13] = (s4 >> 20) | (s5 << 1); - s[14] = s5 >> 7; - s[15] = (s5 >> 15) | (s6 << 6); - s[16] = s6 >> 2; - s[17] = s6 >> 10; - s[18] = (s6 >> 18) | (s7 << 3); - s[19] = s7 >> 5; - s[20] = s7 >> 13; - s[21] = s8 >> 0; - s[22] = s8 >> 8; - s[23] = (s8 >> 16) | (s9 << 5); - s[24] = s9 >> 3; - s[25] = s9 >> 11; - s[26] = (s9 >> 19) | (s10 << 2); - s[27] = s10 >> 6; - s[28] = (s10 >> 14) | (s11 << 7); - s[29] = s11 >> 1; - s[30] = s11 >> 9; - s[31] = s11 >> 17; - - /* hush warnings after setting values to 0 */ - (void)s12; - (void)s13; - (void)s14; - (void)s15; - (void)s16; - (void)s17; - (void)s18; - (void)s19; - (void)s20; - (void)s21; - (void)s22; - (void)s23; -} - - /* generate an ed25519 key pair. returns 0 on success @@ -676,8 +46,8 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) int wc_ed25519_make_key(RNG* rng, int keySz, ed25519_key* key) { byte az[64]; - ge_p3 A; int ret; + ge_p3 A; if (rng == NULL || key == NULL) return BAD_FUNC_ARG; @@ -713,13 +83,13 @@ int wc_ed25519_make_key(RNG* rng, int keySz, ed25519_key* key) int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out, word32 *outlen, ed25519_key* key) { - int ret = 0; - byte nonce[64]; - byte hram[64]; + ge_p3 R; + byte nonce[SHA512_DIGEST_SIZE]; + byte hram[SHA512_DIGEST_SIZE]; byte az[64]; word32 sigSz; - ge_p3 R; Sha512 sha; + int ret = 0; /* sanity check on arguments */ if (in == NULL || out == NULL || outlen == NULL || key == NULL) @@ -732,7 +102,8 @@ int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out, return BAD_FUNC_ARG; *outlen = sigSz; - /* create nonce to use */ + /* step 1: create nonce to use where nonce is r in + r = H(h_b, ... ,h_2b-1,M) */ ret |= wc_Sha512Hash(key->k,32,az); az[0] &= 248; az[31] &= 63; @@ -742,10 +113,14 @@ int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out, ret |= wc_Sha512Update(&sha, in, inlen); ret |= wc_Sha512Final(&sha, nonce); sc_reduce(nonce); + + /* step 2: computing R = rB where rB is the scalar multiplication of + r and B */ ge_scalarmult_base(&R,nonce); ge_p3_tobytes(out,&R); - /* hash scalarmult of nonce + public key + message */ + /* step 3: hash R + public key + message getting H(R,A,M) then + creating S = (r + H(R,A,M)a) mod l */ ret |= wc_InitSha512(&sha); ret |= wc_Sha512Update(&sha, out, 32); ret |= wc_Sha512Update(&sha, key->p, 32); @@ -768,13 +143,13 @@ int wc_ed25519_sign_msg(const byte* in, word32 inlen, byte* out, int wc_ed25519_verify_msg(byte* sig, word32 siglen, const byte* msg, word32 msglen, int* stat, ed25519_key* key) { - int ret; - word32 sigSz; - byte h[64]; byte rcheck[32]; - Sha512 sha; + byte h[SHA512_DIGEST_SIZE]; ge_p3 A; ge_p2 R; + word32 sigSz; + int ret; + Sha512 sha; /* sanity check on arguments */ if (sig == NULL || msg == NULL || stat == NULL || key == NULL) @@ -789,10 +164,12 @@ int wc_ed25519_verify_msg(byte* sig, word32 siglen, const byte* msg, return BAD_FUNC_ARG; if (sig[63] & 224) return BAD_FUNC_ARG; + + /* uncompress A (public key), test if valid, and negate it */ if (ge_frombytes_negate_vartime(&A, key->p) != 0) return BAD_FUNC_ARG; - /* reduce hash of r + public key + message */ + /* find H(R,A,M) and store it as h */ ret |= wc_InitSha512(&sha); ret |= wc_Sha512Update(&sha, sig, 32); ret |= wc_Sha512Update(&sha, key->p, 32); @@ -800,12 +177,16 @@ int wc_ed25519_verify_msg(byte* sig, word32 siglen, const byte* msg, ret |= wc_Sha512Final(&sha, h); sc_reduce(h); - /* scalarmult placed in R using hash + A + s */ - ge_double_scalarmult_vartime(&R, h, &A, sig + 32); - ge_tobytes(rcheck,&R); + /* + Uses a fast single-signature verification SB = R + H(R,A,M)A becomes + SB - H(R,A,M)A saving decompression of R + */ + ret |= ge_double_scalarmult_vartime(&R, h, &A, sig + 32); + ge_tobytes(rcheck, &R); - /* comparison of r created to r in sig */ + /* comparison of R created to R in sig */ ret |= ConstantCompare(rcheck, sig, 32); + *stat = (ret == 0)? 1: 0; return ret; @@ -843,6 +224,7 @@ int wc_ed25519_export_public(ed25519_key* key, byte* out, word32* outLen) { word32 keySz; + /* sanity check on arguments */ if (key == NULL || out == NULL || outLen == NULL) return BAD_FUNC_ARG; @@ -858,35 +240,6 @@ int wc_ed25519_export_public(ed25519_key* key, byte* out, word32* outLen) } -/* internal function for importing uncompressed public keys */ -static int compress_key(byte* out, const byte* xIn, const byte* yIn, - word32 keySz) -{ - fe x,y,z; - ge_p3 g; - byte bArray[ED25519_KEY_SIZE]; - word32 i; - - fe_0(x); - fe_0(y); - fe_1(z); - fe_frombytes(x, xIn); - fe_frombytes(y, yIn); - - fe_copy(g.X, x); - fe_copy(g.Y, y); - fe_copy(g.Z, z); - - ge_p3_tobytes(bArray, &g); - - for (i = 0; i < keySz; i++) { - out[keySz - 1 - i] = bArray[i]; - } - - return 0; -} - - /* Imports a compressed/uncompressed public key. in the byte array containing the public key @@ -898,6 +251,7 @@ int wc_ed25519_import_public(const byte* in, word32 inLen, ed25519_key* key) word32 keySz; int ret; + /* sanity check on arguments */ if (in == NULL || key == NULL) return BAD_FUNC_ARG; @@ -917,7 +271,7 @@ int wc_ed25519_import_public(const byte* in, word32 inLen, ed25519_key* key) /* importing uncompressed public key */ if (in[0] == 0x04) { /* pass in (x,y) and store compressed key */ - ret = compress_key(key->p, (in+1), (in+1+keySz), keySz); + ret = ge_compress_key(key->p, (in+1), (in+1+keySz), keySz); return ret; } @@ -942,6 +296,7 @@ int wc_ed25519_import_private_key(const byte* priv, word32 privSz, word32 keySz; int ret; + /* sanity check on arguments */ if (priv == NULL || pub == NULL || key == NULL) return BAD_FUNC_ARG; diff --git a/wolfcrypt/src/fe_low_mem.c b/wolfcrypt/src/fe_low_mem.c new file mode 100644 index 00000000000..80834f54ce8 --- /dev/null +++ b/wolfcrypt/src/fe_low_mem.c @@ -0,0 +1,596 @@ +/* fe_low_mem.c + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* Based from Daniel Beer's public domain word. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) + +#include + +#ifdef NO_INLINE + #include +#else + #include +#endif + + +void fprime_copy(byte *x, const byte *a) +{ + int i; + for (i = 0; i < F25519_SIZE; i++) + x[i] = a[i]; +} + + +void fe_copy(fe x, const fe a) +{ + int i; + for (i = 0; i < F25519_SIZE; i++) + x[i] = a[i]; +} + + +/* Double an X-coordinate */ +static void xc_double(byte *x3, byte *z3, + const byte *x1, const byte *z1) +{ + /* Explicit formulas database: dbl-1987-m + * + * source 1987 Montgomery "Speeding the Pollard and elliptic + * curve methods of factorization", page 261, fourth display + * compute X3 = (X1^2-Z1^2)^2 + * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2) + */ + byte x1sq[F25519_SIZE]; + byte z1sq[F25519_SIZE]; + byte x1z1[F25519_SIZE]; + byte a[F25519_SIZE]; + + fe_mul__distinct(x1sq, x1, x1); + fe_mul__distinct(z1sq, z1, z1); + fe_mul__distinct(x1z1, x1, z1); + + fe_sub(a, x1sq, z1sq); + fe_mul__distinct(x3, a, a); + + fe_mul_c(a, x1z1, 486662); + fe_add(a, x1sq, a); + fe_add(a, z1sq, a); + fe_mul__distinct(x1sq, x1z1, a); + fe_mul_c(z3, x1sq, 4); +} + + +/* Differential addition */ +static void xc_diffadd(byte *x5, byte *z5, + const byte *x1, const byte *z1, + const byte *x2, const byte *z2, + const byte *x3, const byte *z3) +{ + /* Explicit formulas database: dbl-1987-m3 + * + * source 1987 Montgomery "Speeding the Pollard and elliptic curve + * methods of factorization", page 261, fifth display, plus + * common-subexpression elimination + * compute A = X2+Z2 + * compute B = X2-Z2 + * compute C = X3+Z3 + * compute D = X3-Z3 + * compute DA = D A + * compute CB = C B + * compute X5 = Z1(DA+CB)^2 + * compute Z5 = X1(DA-CB)^2 + */ + byte da[F25519_SIZE]; + byte cb[F25519_SIZE]; + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + + fe_add(a, x2, z2); + fe_sub(b, x3, z3); /* D */ + fe_mul__distinct(da, a, b); + + fe_sub(b, x2, z2); + fe_add(a, x3, z3); /* C */ + fe_mul__distinct(cb, a, b); + + fe_add(a, da, cb); + fe_mul__distinct(b, a, a); + fe_mul__distinct(x5, z1, b); + + fe_sub(a, da, cb); + fe_mul__distinct(b, a, a); + fe_mul__distinct(z5, x1, b); +} + + +int curve25519(byte *result, byte *e, byte *q) +{ + /* Current point: P_m */ + byte xm[F25519_SIZE]; + byte zm[F25519_SIZE] = {1}; + + /* Predecessor: P_(m-1) */ + byte xm1[F25519_SIZE] = {1}; + byte zm1[F25519_SIZE] = {0}; + + int i; + + /* Note: bit 254 is assumed to be 1 */ + fe_copy(xm, q); + + for (i = 253; i >= 0; i--) { + const int bit = (e[i >> 3] >> (i & 7)) & 1; + byte xms[F25519_SIZE]; + byte zms[F25519_SIZE]; + + /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */ + xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1); + xc_double(xm, zm, xm, zm); + + /* Compute P_(2m+1) */ + xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one); + + /* Select: + * bit = 1 --> (P_(2m+1), P_(2m)) + * bit = 0 --> (P_(2m), P_(2m-1)) + */ + fe_select(xm1, xm1, xm, bit); + fe_select(zm1, zm1, zm, bit); + fe_select(xm, xm, xms, bit); + fe_select(zm, zm, zms, bit); + } + + /* Freeze out of projective coordinates */ + fe_inv__distinct(zm1, zm); + fe_mul__distinct(result, zm1, xm); + fe_normalize(result); + return 0; +} + + +static void raw_add(byte *x, const byte *p) +{ + word16 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c += ((word16)x[i]) + ((word16)p[i]); + x[i] = c; + c >>= 8; + } +} + + +static void raw_try_sub(byte *x, const byte *p) +{ + byte minusp[F25519_SIZE]; + word16 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c = ((word16)x[i]) - ((word16)p[i]) - c; + minusp[i] = c; + c = (c >> 8) & 1; + } + + fprime_select(x, minusp, x, c); +} + + +static int prime_msb(const byte *p) +{ + int i; + byte x; + int shift = 1; + int z = F25519_SIZE - 1; + + /* + Test for any hot bits. + As soon as one instance is incountered set shift to 0. + */ + for (i = F25519_SIZE - 1; i >= 0; i--) { + shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1); + z -= shift; + } + x = p[z]; + z <<= 3; + shift = 1; + for (i = 0; i < 8; i++) { + shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1); + z += shift; + } + + return z - 1; +} + + +void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition) +{ + const byte mask = -condition; + int i; + + for (i = 0; i < F25519_SIZE; i++) + dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i])); +} + + +void fprime_add(byte *r, const byte *a, const byte *modulus) +{ + raw_add(r, a); + raw_try_sub(r, modulus); +} + + +void fprime_sub(byte *r, const byte *a, const byte *modulus) +{ + raw_add(r, modulus); + raw_try_sub(r, a); + raw_try_sub(r, modulus); +} + + +void fprime_mul(byte *r, const byte *a, const byte *b, + const byte *modulus) +{ + word16 c = 0; + int i,j; + + XMEMSET(r, 0, F25519_SIZE); + + for (i = prime_msb(modulus); i >= 0; i--) { + const byte bit = (b[i >> 3] >> (i & 7)) & 1; + byte plusa[F25519_SIZE]; + + for (j = 0; j < F25519_SIZE; j++) { + c |= ((word16)r[j]) << 1; + r[j] = c; + c >>= 8; + } + raw_try_sub(r, modulus); + + fprime_copy(plusa, r); + fprime_add(plusa, a, modulus); + + fprime_select(r, r, plusa, bit); + } +} + + +void fe_load(byte *x, word32 c) +{ + word32 i; + + for (i = 0; i < sizeof(c); i++) { + x[i] = c; + c >>= 8; + } + + for (; i < F25519_SIZE; i++) + x[i] = 0; +} + + +void fe_normalize(byte *x) +{ + byte minusp[F25519_SIZE]; + word16 c; + int i; + + /* Reduce using 2^255 = 19 mod p */ + c = (x[31] >> 7) * 19; + x[31] &= 127; + + for (i = 0; i < F25519_SIZE; i++) { + c += x[i]; + x[i] = c; + c >>= 8; + } + + /* The number is now less than 2^255 + 18, and therefore less than + * 2p. Try subtracting p, and conditionally load the subtracted + * value if underflow did not occur. + */ + c = 19; + + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += x[i]; + minusp[i] = c; + c >>= 8; + } + + c += ((word16)x[i]) - 128; + minusp[31] = c; + + /* Load x-p if no underflow */ + fe_select(x, minusp, x, (c >> 15) & 1); +} + + +void fe_select(byte *dst, + const byte *zero, const byte *one, + byte condition) +{ + const byte mask = -condition; + int i; + + for (i = 0; i < F25519_SIZE; i++) + dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i])); +} + + +void fe_add(fe r, const fe a, const fe b) +{ + word16 c = 0; + int i; + + /* Add */ + for (i = 0; i < F25519_SIZE; i++) { + c >>= 8; + c += ((word16)a[i]) + ((word16)b[i]); + r[i] = c; + } + + /* Reduce with 2^255 = 19 mod p */ + r[31] &= 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_sub(fe r, const fe a, const fe b) +{ + word32 c = 0; + int i; + + /* Calculate a + 2p - b, to avoid underflow */ + c = 218; + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += 65280 + ((word32)a[i]) - ((word32)b[i]); + r[i] = c; + c >>= 8; + } + + c += ((word32)a[31]) - ((word32)b[31]); + r[31] = c & 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_neg(fe r, const fe a) +{ + word32 c = 0; + int i; + + /* Calculate 2p - a, to avoid underflow */ + c = 218; + for (i = 0; i + 1 < F25519_SIZE; i++) { + c += 65280 - ((word32)a[i]); + r[i] = c; + c >>= 8; + } + + c -= ((word32)a[31]); + r[31] = c & 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_mul__distinct(byte *r, const byte *a, const byte *b) +{ + word32 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + int j; + + c >>= 8; + for (j = 0; j <= i; j++) + c += ((word32)a[j]) * ((word32)b[i - j]); + + for (; j < F25519_SIZE; j++) + c += ((word32)a[j]) * + ((word32)b[i + F25519_SIZE - j]) * 38; + + r[i] = c; + } + + r[31] &= 127; + c = (c >> 7) * 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_mul(fe r, const fe a, const fe b) +{ + byte tmp[F25519_SIZE]; + + fe_mul__distinct(tmp, a, b); + fe_copy(r, tmp); +} + + +void fe_mul_c(byte *r, const byte *a, word32 b) +{ + word32 c = 0; + int i; + + for (i = 0; i < F25519_SIZE; i++) { + c >>= 8; + c += b * ((word32)a[i]); + r[i] = c; + } + + r[31] &= 127; + c >>= 7; + c *= 19; + + for (i = 0; i < F25519_SIZE; i++) { + c += r[i]; + r[i] = c; + c >>= 8; + } +} + + +void fe_inv__distinct(byte *r, const byte *x) +{ + byte s[F25519_SIZE]; + int i; + + /* This is a prime field, so by Fermat's little theorem: + * + * x^(p-1) = 1 mod p + * + * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative + * inverse. + * + * This is a 255-bit binary number with the digits: + * + * 11111111... 01011 + * + * We compute the result by the usual binary chain, but + * alternate between keeping the accumulator in r and s, so as + * to avoid copying temporaries. + */ + + /* 1 1 */ + fe_mul__distinct(s, x, x); + fe_mul__distinct(r, s, x); + + /* 1 x 248 */ + for (i = 0; i < 248; i++) { + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); + } + + /* 0 */ + fe_mul__distinct(s, r, r); + + /* 1 */ + fe_mul__distinct(r, s, s); + fe_mul__distinct(s, r, x); + + /* 0 */ + fe_mul__distinct(r, s, s); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); +} + + +void fe_invert(fe r, const fe x) +{ + byte tmp[F25519_SIZE]; + + fe_inv__distinct(tmp, x); + fe_copy(r, tmp); +} + + +/* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary + * storage. + */ +static void exp2523(byte *r, const byte *x, byte *s) +{ + int i; + + /* This number is a 252-bit number with the binary expansion: + * + * 111111... 01 + */ + + /* 1 1 */ + fe_mul__distinct(r, x, x); + fe_mul__distinct(s, r, x); + + /* 1 x 248 */ + for (i = 0; i < 248; i++) { + fe_mul__distinct(r, s, s); + fe_mul__distinct(s, r, x); + } + + /* 0 */ + fe_mul__distinct(r, s, s); + + /* 1 */ + fe_mul__distinct(s, r, r); + fe_mul__distinct(r, s, x); +} + + +void fe_sqrt(byte *r, const byte *a) +{ + byte v[F25519_SIZE]; + byte i[F25519_SIZE]; + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + + /* v = (2a)^((p-5)/8) [x = 2a] */ + fe_mul_c(x, a, 2); + exp2523(v, x, y); + + /* i = 2av^2 - 1 */ + fe_mul__distinct(y, v, v); + fe_mul__distinct(i, x, y); + fe_load(y, 1); + fe_sub(i, i, y); + + /* r = avi */ + fe_mul__distinct(x, v, a); + fe_mul__distinct(r, x, i); +} + +#endif /* HAVE_CURVE25519 or HAVE_ED25519 */ + diff --git a/wolfcrypt/src/fe_operations.c b/wolfcrypt/src/fe_operations.c index 5d50517cc68..da07c951c11 100644 --- a/wolfcrypt/src/fe_operations.c +++ b/wolfcrypt/src/fe_operations.c @@ -105,6 +105,72 @@ void fe_0(fe h) } +int curve25519(byte* q, byte* n, byte* p) +{ + unsigned char e[32]; + unsigned int i; + fe x1; + fe x2; + fe z2; + fe x3; + fe z3; + fe tmp0; + fe tmp1; + int pos; + unsigned int swap; + unsigned int b; + + for (i = 0;i < 32;++i) e[i] = n[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + + fe_frombytes(x1,p); + fe_1(x2); + fe_0(z2); + fe_copy(x3,x1); + fe_1(z3); + + swap = 0; + for (pos = 254;pos >= 0;--pos) { + b = e[pos / 8] >> (pos & 7); + b &= 1; + swap ^= b; + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + swap = b; + + /* montgomery */ + fe_sub(tmp0,x3,z3); + fe_sub(tmp1,x2,z2); + fe_add(x2,x2,z2); + fe_add(z2,x3,z3); + fe_mul(z3,tmp0,x2); + fe_mul(z2,z2,tmp1); + fe_sq(tmp0,tmp1); + fe_sq(tmp1,x2); + fe_add(x3,z3,z2); + fe_sub(z2,z3,z2); + fe_mul(x2,tmp1,tmp0); + fe_sub(tmp1,tmp1,tmp0); + fe_sq(z2,z2); + fe_mul121666(z3,tmp1); + fe_sq(x3,x3); + fe_add(tmp0,tmp0,z3); + fe_mul(z3,x1,z2); + fe_mul(z2,tmp1,tmp0); + } + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + + fe_invert(z2,z2); + fe_mul(x2,x2,z2); + fe_tobytes(q,x2); + + return 0; +} + + /* h = f * f Can overlap h with f. @@ -1236,9 +1302,6 @@ void fe_neg(fe h,const fe f) /* -return 1 if f == 0 -return 0 if f != 0 - Preconditions: |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ diff --git a/wolfcrypt/src/ge_low_mem.c b/wolfcrypt/src/ge_low_mem.c new file mode 100644 index 00000000000..43c533c695b --- /dev/null +++ b/wolfcrypt/src/ge_low_mem.c @@ -0,0 +1,558 @@ +/* ge_low_mem.c + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + + /* Based from Daniel Beer's public domain work. */ + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_ED25519 + +#include +#include +#ifdef NO_INLINE + #include +#else + #include +#endif + +void ed25519_smult(ge_p3 *r, const ge_p3 *a, const byte *e); +void ed25519_add(ge_p3 *r, const ge_p3 *a, const ge_p3 *b); +void ed25519_double(ge_p3 *r, const ge_p3 *a); + + +static const byte ed25519_order[F25519_SIZE] = { + 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, + 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 +}; + +/*Arithmetic modulo the group order m = 2^252 + + 27742317777372353535851937790883648493 = + 7237005577332262213973186563042994240857116359379907606001950938285454250989 */ + +static const word32 m[32] = { + 0xED,0xD3,0xF5,0x5C,0x1A,0x63,0x12,0x58,0xD6,0x9C,0xF7,0xA2,0xDE,0xF9, + 0xDE,0x14,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x10 +}; + +static const word32 mu[33] = { + 0x1B,0x13,0x2C,0x0A,0xA3,0xE5,0x9C,0xED,0xA7,0x29,0x63,0x08,0x5D,0x21, + 0x06,0x21,0xEB,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, + 0xFF,0xFF,0xFF,0xFF,0x0F +}; + + +int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, + word32 keySz) +{ + byte tmp[F25519_SIZE]; + byte parity; + int i; + + fe_copy(tmp, xIn); + parity = (tmp[0] & 1) << 7; + + byte pt[32]; + fe_copy(pt, yIn); + pt[31] |= parity; + + for(i = 0; i < 32; i++) { + out[32-i-1] = pt[i]; + } + (void)keySz; + return 0; +} + + +static word32 lt(word32 a,word32 b) /* 16-bit inputs */ +{ + unsigned int x = a; + x -= (unsigned int) b; /* 0..65535: no; 4294901761..4294967295: yes */ + x >>= 31; /* 0: no; 1: yes */ + return x; +} + + +/* Reduce coefficients of r before calling reduce_add_sub */ +static void reduce_add_sub(word32 *r) +{ + word32 pb = 0; + word32 b; + word32 mask; + int i; + unsigned char t[32]; + + for(i=0;i<32;i++) + { + pb += m[i]; + b = lt(r[i],pb); + t[i] = r[i]-pb+(b<<8); + pb = b; + } + mask = b - 1; + for(i=0;i<32;i++) + r[i] ^= mask & (r[i] ^ t[i]); +} + + +/* Reduce coefficients of x before calling barrett_reduce */ +static void barrett_reduce(word32* r, word32 x[64]) +{ + /* See HAC, Alg. 14.42 */ + int i,j; + word32 q2[66]; + word32 *q3 = q2 + 33; + word32 r1[33]; + word32 r2[33]; + word32 carry; + word32 pb = 0; + word32 b; + + for (i = 0;i < 66;++i) q2[i] = 0; + for (i = 0;i < 33;++i) r2[i] = 0; + + for(i=0;i<33;i++) + for(j=0;j<33;j++) + if(i+j >= 31) q2[i+j] += mu[i]*x[j+31]; + carry = q2[31] >> 8; + q2[32] += carry; + carry = q2[32] >> 8; + q2[33] += carry; + + for(i=0;i<33;i++)r1[i] = x[i]; + for(i=0;i<32;i++) + for(j=0;j<33;j++) + if(i+j < 33) r2[i+j] += m[i]*q3[j]; + + for(i=0;i<32;i++) + { + carry = r2[i] >> 8; + r2[i+1] += carry; + r2[i] &= 0xff; + } + + for(i=0;i<32;i++) + { + pb += r2[i]; + b = lt(r1[i],pb); + r[i] = r1[i]-pb+(b<<8); + pb = b; + } + + /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 + * r is an unsigned type. + * If so: Handle it here! + */ + + reduce_add_sub(r); + reduce_add_sub(r); +} + + +void sc_reduce(unsigned char x[64]) +{ + int i; + word32 t[64]; + word32 r[32]; + for(i=0;i<64;i++) t[i] = x[i]; + barrett_reduce(r, t); + for(i=0;i<32;i++) x[i] = (r[i] & 0xFF); +} + + +void sc_muladd(byte* out, const byte* a, const byte* b, const byte* c) +{ + + byte s[32]; + byte e[64]; + + XMEMSET(e, 0, sizeof(e)); + XMEMCPY(e, b, 32); + + /* Obtain e */ + sc_reduce(e); + + /* Compute s = ze + k */ + fprime_mul(s, a, e, ed25519_order); + fprime_add(s, c, ed25519_order); + + XMEMCPY(out, s, 32); +} + + +/* Base point is (numbers wrapped): + * + * x = 151122213495354007725011514095885315114 + * 54012693041857206046113283949847762202 + * y = 463168356949264781694283940034751631413 + * 07993866256225615783033603165251855960 + * + * y is derived by transforming the original Montgomery base (u=9). x + * is the corresponding positive coordinate for the new curve equation. + * t is x*y. + */ +const ge_p3 ed25519_base = { + .X = { + 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, + 0xb2, 0xa7, 0x25, 0x95, 0x60, 0xc7, 0x2c, 0x69, + 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21 + }, + .Y = { + 0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 + }, + .T = { + 0xa3, 0xdd, 0xb7, 0xa5, 0xb3, 0x8a, 0xde, 0x6d, + 0xf5, 0x52, 0x51, 0x77, 0x80, 0x9f, 0xf0, 0x20, + 0x7d, 0xe3, 0xab, 0x64, 0x8e, 0x4e, 0xea, 0x66, + 0x65, 0x76, 0x8b, 0xd7, 0x0f, 0x5f, 0x87, 0x67 + }, + .Z = {1, 0} +}; + + +const ge_p3 ed25519_neutral = { + .X = {0}, + .Y = {1, 0}, + .T = {0}, + .Z = {1, 0} +}; + + +static const byte ed25519_d[F25519_SIZE] = { + 0xa3, 0x78, 0x59, 0x13, 0xca, 0x4d, 0xeb, 0x75, + 0xab, 0xd8, 0x41, 0x41, 0x4d, 0x0a, 0x70, 0x00, + 0x98, 0xe8, 0x79, 0x77, 0x79, 0x40, 0xc7, 0x8c, + 0x73, 0xfe, 0x6f, 0x2b, 0xee, 0x6c, 0x03, 0x52 +}; + + +/* k = 2d */ +static const byte ed25519_k[F25519_SIZE] = { + 0x59, 0xf1, 0xb2, 0x26, 0x94, 0x9b, 0xd6, 0xeb, + 0x56, 0xb1, 0x83, 0x82, 0x9a, 0x14, 0xe0, 0x00, + 0x30, 0xd1, 0xf3, 0xee, 0xf2, 0x80, 0x8e, 0x19, + 0xe7, 0xfc, 0xdf, 0x56, 0xdc, 0xd9, 0x06, 0x24 +}; + + +void ed25519_add(ge_p3 *r, + const ge_p3 *p1, const ge_p3 *p2) +{ + /* Explicit formulas database: add-2008-hwcd-3 + * + * source 2008 Hisil--Wong--Carter--Dawson, + * http://eprint.iacr.org/2008/522, Section 3.1 + * appliesto extended-1 + * parameter k + * assume k = 2 d + * compute A = (Y1-X1)(Y2-X2) + * compute B = (Y1+X1)(Y2+X2) + * compute C = T1 k T2 + * compute D = Z1 2 Z2 + * compute E = B - A + * compute F = D - C + * compute G = D + C + * compute H = B + A + * compute X3 = E F + * compute Y3 = G H + * compute T3 = E H + * compute Z3 = F G + */ + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + byte d[F25519_SIZE]; + byte e[F25519_SIZE]; + byte f[F25519_SIZE]; + byte g[F25519_SIZE]; + byte h[F25519_SIZE]; + + /* A = (Y1-X1)(Y2-X2) */ + fe_sub(c, p1->Y, p1->X); + fe_sub(d, p2->Y, p2->X); + fe_mul__distinct(a, c, d); + + /* B = (Y1+X1)(Y2+X2) */ + fe_add(c, p1->Y, p1->X); + fe_add(d, p2->Y, p2->X); + fe_mul__distinct(b, c, d); + + /* C = T1 k T2 */ + fe_mul__distinct(d, p1->T, p2->T); + fe_mul__distinct(c, d, ed25519_k); + + /* D = Z1 2 Z2 */ + fe_mul__distinct(d, p1->Z, p2->Z); + fe_add(d, d, d); + + /* E = B - A */ + fe_sub(e, b, a); + + /* F = D - C */ + fe_sub(f, d, c); + + /* G = D + C */ + fe_add(g, d, c); + + /* H = B + A */ + fe_add(h, b, a); + + /* X3 = E F */ + fe_mul__distinct(r->X, e, f); + + /* Y3 = G H */ + fe_mul__distinct(r->Y, g, h); + + /* T3 = E H */ + fe_mul__distinct(r->T, e, h); + + /* Z3 = F G */ + fe_mul__distinct(r->Z, f, g); +} + + +void ed25519_double(ge_p3 *r, const ge_p3 *p) +{ + /* Explicit formulas database: dbl-2008-hwcd + * + * source 2008 Hisil--Wong--Carter--Dawson, + * http://eprint.iacr.org/2008/522, Section 3.3 + * compute A = X1^2 + * compute B = Y1^2 + * compute C = 2 Z1^2 + * compute D = a A + * compute E = (X1+Y1)^2-A-B + * compute G = D + B + * compute F = G - C + * compute H = D - B + * compute X3 = E F + * compute Y3 = G H + * compute T3 = E H + * compute Z3 = F G + */ + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + byte e[F25519_SIZE]; + byte f[F25519_SIZE]; + byte g[F25519_SIZE]; + byte h[F25519_SIZE]; + + /* A = X1^2 */ + fe_mul__distinct(a, p->X, p->X); + + /* B = Y1^2 */ + fe_mul__distinct(b, p->Y, p->Y); + + /* C = 2 Z1^2 */ + fe_mul__distinct(c, p->Z, p->Z); + fe_add(c, c, c); + + /* D = a A (alter sign) */ + /* E = (X1+Y1)^2-A-B */ + fe_add(f, p->X, p->Y); + fe_mul__distinct(e, f, f); + fe_sub(e, e, a); + fe_sub(e, e, b); + + /* G = D + B */ + fe_sub(g, b, a); + + /* F = G - C */ + fe_sub(f, g, c); + + /* H = D - B */ + fe_neg(h, b); + fe_sub(h, h, a); + + /* X3 = E F */ + fe_mul__distinct(r->X, e, f); + + /* Y3 = G H */ + fe_mul__distinct(r->Y, g, h); + + /* T3 = E H */ + fe_mul__distinct(r->T, e, h); + + /* Z3 = F G */ + fe_mul__distinct(r->Z, f, g); +} + + +void ed25519_smult(ge_p3 *r_out, const ge_p3 *p, const byte *e) +{ + ge_p3 r; + int i; + + XMEMCPY(&r, &ed25519_neutral, sizeof(r)); + + for (i = 255; i >= 0; i--) { + const byte bit = (e[i >> 3] >> (i & 7)) & 1; + ge_p3 s; + + ed25519_double(&r, &r); + ed25519_add(&s, &r, p); + + fe_select(r.X, r.X, s.X, bit); + fe_select(r.Y, r.Y, s.Y, bit); + fe_select(r.Z, r.Z, s.Z, bit); + fe_select(r.T, r.T, s.T, bit); + } + XMEMCPY(r_out, &r, sizeof(r)); +} + + +void ge_scalarmult_base(ge_p3 *R,const unsigned char *nonce) +{ + ed25519_smult(R, &ed25519_base, nonce); +} + + +/* pack the point h into array s */ +void ge_p3_tobytes(unsigned char *s,const ge_p3 *h) +{ + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte z1[F25519_SIZE]; + byte parity; + + fe_inv__distinct(z1, h->Z); + fe_mul__distinct(x, h->X, z1); + fe_mul__distinct(y, h->Y, z1); + + fe_normalize(x); + fe_normalize(y); + + parity = (x[0] & 1) << 7; + fe_copy(s, y); + fe_normalize(s); + s[31] |= parity; +} + + +/* pack the point h into array s */ +void ge_tobytes(unsigned char *s,const ge_p2 *h) +{ + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte z1[F25519_SIZE]; + byte parity; + + fe_inv__distinct(z1, h->Z); + fe_mul__distinct(x, h->X, z1); + fe_mul__distinct(y, h->Y, z1); + + fe_normalize(x); + fe_normalize(y); + + parity = (x[0] & 1) << 7; + fe_copy(s, y); + fe_normalize(s); + s[31] |= parity; +} + + +/* + Test if the public key can be uncommpressed and negate it (-X,Y,Z,-T) + return 0 on success + */ +int ge_frombytes_negate_vartime(ge_p3 *p,const unsigned char *s) +{ + + byte parity; + byte x[F25519_SIZE]; + byte y[F25519_SIZE]; + byte a[F25519_SIZE]; + byte b[F25519_SIZE]; + byte c[F25519_SIZE]; + int ret = 0; + + /* unpack the key s */ + parity = s[31] >> 7; + fe_copy(y, s); + y[31] &= 127; + + fe_mul__distinct(c, y, y); + fe_mul__distinct(b, c, ed25519_d); + fe_add(a, b, f25519_one); + fe_inv__distinct(b, a); + fe_sub(a, c, f25519_one); + fe_mul__distinct(c, a, b); + fe_sqrt(a, c); + fe_neg(b, a); + fe_select(x, a, b, (a[0] ^ parity) & 1); + + /* test that x^2 is equal to c */ + fe_mul__distinct(a, x, x); + fe_normalize(a); + fe_normalize(c); + ret |= ConstantCompare(a, c, F25519_SIZE); + + /* project the key s onto p */ + fe_copy(p->X, x); + fe_copy(p->Y, y); + fe_load(p->Z, 1); + fe_mul__distinct(p->T, x, y); + + /* negate, the point becomes (-X,Y,Z,-T) */ + fe_neg(p->X,p->X); + fe_neg(p->T,p->T); + + return ret; +} + + +int ge_double_scalarmult_vartime(ge_p2* R, const unsigned char *h, + const ge_p3 *inA,const unsigned char *sig) +{ + ge_p3 p, A; + int ret = 0; + + XMEMCPY(&A, inA, sizeof(ge_p3)); + + /* find SB */ + ed25519_smult(&p, &ed25519_base, sig); + + /* find H(R,A,M) * -A */ + ed25519_smult(&A, &A, h); + + /* SB + -H(R,A,M)A */ + ed25519_add(&A, &p, &A); + + fe_copy(R->X, A.X); + fe_copy(R->Y, A.Y); + fe_copy(R->Z, A.Z); + + return ret; +} + +#endif /* HAVE_ED25519 */ + diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index 2a4885ae894..665cfe89b0a 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -51,12 +51,670 @@ where d = -121665/121666. ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T ge_precomp (Duif): (y+x,y-x,2dxy) */ + + /* +Input: + s[0]+256*s[1]+...+256^63*s[63] = s + +Output: + s[0]+256*s[1]+...+256^31*s[31] = s mod l + where l = 2^252 + 27742317777372353535851937790883648493. + Overwrites s in place. +*/ +void sc_reduce(byte* s) +{ + int64_t s0 = 2097151 & load_3(s); + int64_t s1 = 2097151 & (load_4(s + 2) >> 5); + int64_t s2 = 2097151 & (load_3(s + 5) >> 2); + int64_t s3 = 2097151 & (load_4(s + 7) >> 7); + int64_t s4 = 2097151 & (load_4(s + 10) >> 4); + int64_t s5 = 2097151 & (load_3(s + 13) >> 1); + int64_t s6 = 2097151 & (load_4(s + 15) >> 6); + int64_t s7 = 2097151 & (load_3(s + 18) >> 3); + int64_t s8 = 2097151 & load_3(s + 21); + int64_t s9 = 2097151 & (load_4(s + 23) >> 5); + int64_t s10 = 2097151 & (load_3(s + 26) >> 2); + int64_t s11 = 2097151 & (load_4(s + 28) >> 7); + int64_t s12 = 2097151 & (load_4(s + 31) >> 4); + int64_t s13 = 2097151 & (load_3(s + 34) >> 1); + int64_t s14 = 2097151 & (load_4(s + 36) >> 6); + int64_t s15 = 2097151 & (load_3(s + 39) >> 3); + int64_t s16 = 2097151 & load_3(s + 42); + int64_t s17 = 2097151 & (load_4(s + 44) >> 5); + int64_t s18 = 2097151 & (load_3(s + 47) >> 2); + int64_t s19 = 2097151 & (load_4(s + 49) >> 7); + int64_t s20 = 2097151 & (load_4(s + 52) >> 4); + int64_t s21 = 2097151 & (load_3(s + 55) >> 1); + int64_t s22 = 2097151 & (load_4(s + 57) >> 6); + int64_t s23 = (load_4(s + 60) >> 3); + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; + carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; + carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; + carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; + carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; + carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; + + carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; + carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; + carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; + carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; + carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; + carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; + carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; + carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; + carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; + carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; + + carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; + carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; + carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; + carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; + carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; + carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; + carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; + carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; + carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; + carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; + carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; + carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; + carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; + carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; + carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; + carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; + carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; + carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; + carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; + carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; + carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; + carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; + carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; + carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; + carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; + carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; + carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; + + s[0] = s0 >> 0; + s[1] = s0 >> 8; + s[2] = (s0 >> 16) | (s1 << 5); + s[3] = s1 >> 3; + s[4] = s1 >> 11; + s[5] = (s1 >> 19) | (s2 << 2); + s[6] = s2 >> 6; + s[7] = (s2 >> 14) | (s3 << 7); + s[8] = s3 >> 1; + s[9] = s3 >> 9; + s[10] = (s3 >> 17) | (s4 << 4); + s[11] = s4 >> 4; + s[12] = s4 >> 12; + s[13] = (s4 >> 20) | (s5 << 1); + s[14] = s5 >> 7; + s[15] = (s5 >> 15) | (s6 << 6); + s[16] = s6 >> 2; + s[17] = s6 >> 10; + s[18] = (s6 >> 18) | (s7 << 3); + s[19] = s7 >> 5; + s[20] = s7 >> 13; + s[21] = s8 >> 0; + s[22] = s8 >> 8; + s[23] = (s8 >> 16) | (s9 << 5); + s[24] = s9 >> 3; + s[25] = s9 >> 11; + s[26] = (s9 >> 19) | (s10 << 2); + s[27] = s10 >> 6; + s[28] = (s10 >> 14) | (s11 << 7); + s[29] = s11 >> 1; + s[30] = s11 >> 9; + s[31] = s11 >> 17; + + /* hush warnings after setting values to 0 */ + (void)s12; + (void)s13; + (void)s14; + (void)s15; + (void)s16; + (void)s17; + (void)s18; + (void)s19; + (void)s20; + (void)s21; + (void)s22; + (void)s23; +} -r = p + q +/* +Input: + a[0]+256*a[1]+...+256^31*a[31] = a + b[0]+256*b[1]+...+256^31*b[31] = b + c[0]+256*c[1]+...+256^31*c[31] = c + +Output: + s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l + where l = 2^252 + 27742317777372353535851937790883648493. */ +void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +{ + int64_t a0 = 2097151 & load_3(a); + int64_t a1 = 2097151 & (load_4(a + 2) >> 5); + int64_t a2 = 2097151 & (load_3(a + 5) >> 2); + int64_t a3 = 2097151 & (load_4(a + 7) >> 7); + int64_t a4 = 2097151 & (load_4(a + 10) >> 4); + int64_t a5 = 2097151 & (load_3(a + 13) >> 1); + int64_t a6 = 2097151 & (load_4(a + 15) >> 6); + int64_t a7 = 2097151 & (load_3(a + 18) >> 3); + int64_t a8 = 2097151 & load_3(a + 21); + int64_t a9 = 2097151 & (load_4(a + 23) >> 5); + int64_t a10 = 2097151 & (load_3(a + 26) >> 2); + int64_t a11 = (load_4(a + 28) >> 7); + int64_t b0 = 2097151 & load_3(b); + int64_t b1 = 2097151 & (load_4(b + 2) >> 5); + int64_t b2 = 2097151 & (load_3(b + 5) >> 2); + int64_t b3 = 2097151 & (load_4(b + 7) >> 7); + int64_t b4 = 2097151 & (load_4(b + 10) >> 4); + int64_t b5 = 2097151 & (load_3(b + 13) >> 1); + int64_t b6 = 2097151 & (load_4(b + 15) >> 6); + int64_t b7 = 2097151 & (load_3(b + 18) >> 3); + int64_t b8 = 2097151 & load_3(b + 21); + int64_t b9 = 2097151 & (load_4(b + 23) >> 5); + int64_t b10 = 2097151 & (load_3(b + 26) >> 2); + int64_t b11 = (load_4(b + 28) >> 7); + int64_t c0 = 2097151 & load_3(c); + int64_t c1 = 2097151 & (load_4(c + 2) >> 5); + int64_t c2 = 2097151 & (load_3(c + 5) >> 2); + int64_t c3 = 2097151 & (load_4(c + 7) >> 7); + int64_t c4 = 2097151 & (load_4(c + 10) >> 4); + int64_t c5 = 2097151 & (load_3(c + 13) >> 1); + int64_t c6 = 2097151 & (load_4(c + 15) >> 6); + int64_t c7 = 2097151 & (load_3(c + 18) >> 3); + int64_t c8 = 2097151 & load_3(c + 21); + int64_t c9 = 2097151 & (load_4(c + 23) >> 5); + int64_t c10 = 2097151 & (load_3(c + 26) >> 2); + int64_t c11 = (load_4(c + 28) >> 7); + int64_t s0; + int64_t s1; + int64_t s2; + int64_t s3; + int64_t s4; + int64_t s5; + int64_t s6; + int64_t s7; + int64_t s8; + int64_t s9; + int64_t s10; + int64_t s11; + int64_t s12; + int64_t s13; + int64_t s14; + int64_t s15; + int64_t s16; + int64_t s17; + int64_t s18; + int64_t s19; + int64_t s20; + int64_t s21; + int64_t s22; + int64_t s23; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + int64_t carry17; + int64_t carry18; + int64_t carry19; + int64_t carry20; + int64_t carry21; + int64_t carry22; + + s0 = c0 + a0*b0; + s1 = c1 + a0*b1 + a1*b0; + s2 = c2 + a0*b2 + a1*b1 + a2*b0; + s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0; + s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0; + s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0; + s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0; + s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0; + s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + + a8*b0; + s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + + a8*b1 + a9*b0; + s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + + a8*b2 + a9*b1 + a10*b0; + s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + + a8*b3 + a9*b2 + a10*b1 + a11*b0; + s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + + a10*b2 + a11*b1; + s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + + a11*b2; + s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + + a11*b3; + s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4; + s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5; + s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6; + s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7; + s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8; + s20 = a9*b11 + a10*b10 + a11*b9; + s21 = a10*b11 + a11*b10; + s22 = a11*b11; + s23 = 0; + + carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; + carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; + carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; + carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; + carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; + carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; + carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; + carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; + carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; + carry18 = (s18 + (1<<20)) >> 21; s19 += carry18; s18 -= carry18 << 21; + carry20 = (s20 + (1<<20)) >> 21; s21 += carry20; s20 -= carry20 << 21; + carry22 = (s22 + (1<<20)) >> 21; s23 += carry22; s22 -= carry22 << 21; + + carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; + carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; + carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; + carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; + carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; + carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; + carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; + carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; + carry17 = (s17 + (1<<20)) >> 21; s18 += carry17; s17 -= carry17 << 21; + carry19 = (s19 + (1<<20)) >> 21; s20 += carry19; s19 -= carry19 << 21; + carry21 = (s21 + (1<<20)) >> 21; s22 += carry21; s21 -= carry21 << 21; + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; + carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; + carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; + carry12 = (s12 + (1<<20)) >> 21; s13 += carry12; s12 -= carry12 << 21; + carry14 = (s14 + (1<<20)) >> 21; s15 += carry14; s14 -= carry14 << 21; + carry16 = (s16 + (1<<20)) >> 21; s17 += carry16; s16 -= carry16 << 21; + + carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; + carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; + carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; + carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21; + carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21; + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21; + carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21; + carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21; + carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21; + carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21; + carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21; + + carry1 = (s1 + (1<<20)) >> 21; s2 += carry1; s1 -= carry1 << 21; + carry3 = (s3 + (1<<20)) >> 21; s4 += carry3; s3 -= carry3 << 21; + carry5 = (s5 + (1<<20)) >> 21; s6 += carry5; s5 -= carry5 << 21; + carry7 = (s7 + (1<<20)) >> 21; s8 += carry7; s7 -= carry7 << 21; + carry9 = (s9 + (1<<20)) >> 21; s10 += carry9; s9 -= carry9 << 21; + carry11 = (s11 + (1<<20)) >> 21; s12 += carry11; s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; + carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; + carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; + carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; + carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; + carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; + carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; + carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; + carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; + carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; + carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; + carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21; + carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21; + carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21; + carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21; + carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21; + carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21; + carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21; + carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21; + carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21; + carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21; + carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21; + + s[0] = s0 >> 0; + s[1] = s0 >> 8; + s[2] = (s0 >> 16) | (s1 << 5); + s[3] = s1 >> 3; + s[4] = s1 >> 11; + s[5] = (s1 >> 19) | (s2 << 2); + s[6] = s2 >> 6; + s[7] = (s2 >> 14) | (s3 << 7); + s[8] = s3 >> 1; + s[9] = s3 >> 9; + s[10] = (s3 >> 17) | (s4 << 4); + s[11] = s4 >> 4; + s[12] = s4 >> 12; + s[13] = (s4 >> 20) | (s5 << 1); + s[14] = s5 >> 7; + s[15] = (s5 >> 15) | (s6 << 6); + s[16] = s6 >> 2; + s[17] = s6 >> 10; + s[18] = (s6 >> 18) | (s7 << 3); + s[19] = s7 >> 5; + s[20] = s7 >> 13; + s[21] = s8 >> 0; + s[22] = s8 >> 8; + s[23] = (s8 >> 16) | (s9 << 5); + s[24] = s9 >> 3; + s[25] = s9 >> 11; + s[26] = (s9 >> 19) | (s10 << 2); + s[27] = s10 >> 6; + s[28] = (s10 >> 14) | (s11 << 7); + s[29] = s11 >> 1; + s[30] = s11 >> 9; + s[31] = s11 >> 17; + + /* hush warnings after setting values to 0 */ + (void)s12; + (void)s13; + (void)s14; + (void)s15; + (void)s16; + (void)s17; + (void)s18; + (void)s19; + (void)s20; + (void)s21; + (void)s22; + (void)s23; +} + + +int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz) +{ + fe x,y,z; + ge_p3 g; + byte bArray[keySz]; + word32 i; + + fe_0(x); + fe_0(y); + fe_1(z); + fe_frombytes(x, xIn); + fe_frombytes(y, yIn); + + fe_copy(g.X, x); + fe_copy(g.Y, y); + fe_copy(g.Z, z); + ge_p3_tobytes(bArray, &g); + + for (i = 0; i < keySz; i++) { + out[keySz - 1 - i] = bArray[i]; + } + + return 0; +} + + +/* +r = p + q +*/ void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q) { fe t0; @@ -89,7 +747,8 @@ static unsigned char equal(signed char b,signed char c) static unsigned char negative(signed char b) { - unsigned long long x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */ + unsigned long long x = b; /* 18446744073709551361..18446744073709551615: + yes; 0..255: no */ x >>= 63; /* 1: yes; 0: no */ return x; } @@ -1482,7 +2141,6 @@ B is the Ed25519 base point (x,4/5) with x positive. Preconditions: a[31] <= 127 */ - void ge_scalarmult_base(ge_p3 *h,const unsigned char *a) { signed char e[64]; @@ -1610,8 +2268,8 @@ where a = a[0]+256*a[1]+...+256^31 a[31]. and b = b[0]+256*b[1]+...+256^31 b[31]. B is the Ed25519 base point (x,4/5) with x positive. */ - -void ge_double_scalarmult_vartime(ge_p2 *r,const unsigned char *a,const ge_p3 *A,const unsigned char *b) +int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, + const ge_p3 *A, const unsigned char *b) { signed char aslide[256]; signed char bslide[256]; @@ -1661,16 +2319,20 @@ void ge_double_scalarmult_vartime(ge_p2 *r,const unsigned char *a,const ge_p3 *A ge_p1p1_to_p2(r,&t); } + + return 0; } static const fe d = { --10913610,13857413,-15372611,6949391,114729,-8787816,-6275908,-3247719,-18696448,-12055116 +-10913610,13857413,-15372611,6949391,114729, +-8787816,-6275908,-3247719,-18696448,-12055116 } ; static const fe sqrtm1 = { --32595792,-7943725,9377950,3500415,12389472,-272473,-25146209,-2005654,326686,11406482 +-32595792,-7943725,9377950,3500415,12389472, +-272473,-25146209,-2005654,326686,11406482 } ; @@ -1689,6 +2351,7 @@ int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s) fe_sub(u,u,h->Z); /* u = y^2-1 */ fe_add(v,v,h->Z); /* v = dy^2+1 */ + fe_sq(v3,v); fe_mul(v3,v3,v); /* v3 = v^3 */ fe_sq(h->X,v3); @@ -1850,7 +2513,8 @@ r = p */ static const fe d2 = { --21827239,-5839606,-30745221,13898782,229458,15978800,-12551817,-6495438,29715968,9444199 +-21827239,-5839606,-30745221,13898782,229458, +15978800,-12551817,-6495438,29715968,9444199 } ; diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 6620a5fdad7..ee199092912 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -5728,54 +5728,54 @@ int ed25519_test(void) if (wc_ed25519_import_private_key(sKeys[i], ED25519_KEY_SIZE, pKeys[i], pKeySz[i], &key) != 0) - return -1021; + return -1021 - i; if (wc_ed25519_sign_msg(msgs[i], msgSz[i], out, &outlen, &key) != 0) - return -1022; + return -1027 - i; if (XMEMCMP(out, sigs[i], 64)) - return -1023; + return -1033 - i; /* test verify on good msg */ if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key) != 0 || verify != 1) - return -1024; + return -1039 - i; /* test verify on bad msg */ out[outlen-1] = out[outlen-1] + 1; if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key) == 0 || verify == 1) - return -1025; + return -1045 - i; /* test api for import/exporting keys */ exportPSz = sizeof(exportPKey); exportSSz = sizeof(exportSKey); if (wc_ed25519_export_public(&key, exportPKey, &exportPSz) != 0) - return -1026; + return -1051 - i; if (wc_ed25519_import_public(exportPKey, exportPSz, &key2) != 0) - return -1027; + return -1057 - i; if (wc_ed25519_export_private_only(&key, exportSKey, &exportSSz) != 0) - return -1028; + return -1063 - i; if (wc_ed25519_import_private_key(exportSKey, exportSSz, exportPKey, exportPSz, &key2) != 0) - return -1029; + return -1069 - i; /* clear "out" buffer and test sign with imported keys */ outlen = sizeof(out); XMEMSET(out, 0, sizeof(out)); if (wc_ed25519_sign_msg(msgs[i], msgSz[i], out, &outlen, &key2) != 0) - return -1030; + return -1075 - i; if (wc_ed25519_verify_msg(out, outlen, msgs[i], msgSz[i], &verify, &key2) != 0 || verify != 1) - return -1031; + return -1081 - i; if (XMEMCMP(out, sigs[i], 64)) - return -1032; + return -1087 - i; } /* clean up keys when done */ diff --git a/wolfssl/wolfcrypt/fe_operations.h b/wolfssl/wolfcrypt/fe_operations.h index 3cd49015a49..a779f2be7a6 100644 --- a/wolfssl/wolfcrypt/fe_operations.h +++ b/wolfssl/wolfcrypt/fe_operations.h @@ -19,9 +19,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ - /* Based On Daniel J Bernstein's curve25519 and ed25519 Public Domain ref10 - work. */ - #ifndef WOLF_CRYPT_FE_OPERATIONS_H #define WOLF_CRYPT_FE_OPERATIONS_H @@ -29,7 +26,10 @@ #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) -#include +#ifndef CURVED25519_SMALL + #include +#endif +#include /* fe means field element. @@ -39,29 +39,94 @@ t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on context. */ -typedef int32_t fe[10]; +#ifdef CURVED25519_SMALL + #define F25519_SIZE 32 + typedef byte fe[32]; +#else + typedef int32_t fe[10]; +#endif -WOLFSSL_LOCAL void fe_0(fe); -WOLFSSL_LOCAL void fe_1(fe); +WOLFSSL_LOCAL int curve25519(byte * q, byte * n, byte * p); +WOLFSSL_LOCAL void fe_copy(fe, const fe); WOLFSSL_LOCAL void fe_add(fe, const fe, const fe); -WOLFSSL_LOCAL void fe_tobytes(unsigned char *, const fe); +WOLFSSL_LOCAL void fe_neg(fe,const fe); WOLFSSL_LOCAL void fe_sub(fe, const fe, const fe); WOLFSSL_LOCAL void fe_invert(fe, const fe); +WOLFSSL_LOCAL void fe_mul(fe,const fe,const fe); + +/* default to be faster but take more memory */ +#ifndef CURVED25519_SMALL + +/* Based On Daniel J Bernstein's curve25519 and ed25519 Public Domain ref10 + work. */ + +WOLFSSL_LOCAL void fe_0(fe); +WOLFSSL_LOCAL void fe_1(fe); +WOLFSSL_LOCAL int fe_isnonzero(const fe); +WOLFSSL_LOCAL int fe_isnegative(const fe); +WOLFSSL_LOCAL void fe_tobytes(unsigned char *, const fe); WOLFSSL_LOCAL void fe_sq(fe, const fe); WOLFSSL_LOCAL void fe_sq2(fe,const fe); WOLFSSL_LOCAL void fe_frombytes(fe,const unsigned char *); -WOLFSSL_LOCAL void fe_mul(fe,const fe,const fe); -WOLFSSL_LOCAL void fe_copy(fe, const fe); WOLFSSL_LOCAL void fe_cswap(fe,fe,unsigned int); WOLFSSL_LOCAL void fe_mul121666(fe,fe); -WOLFSSL_LOCAL int fe_isnonzero(const fe); -WOLFSSL_LOCAL int fe_isnegative(const fe); WOLFSSL_LOCAL void fe_cmov(fe,const fe,unsigned int); -WOLFSSL_LOCAL void fe_neg(fe,const fe); WOLFSSL_LOCAL void fe_pow22523(fe,const fe); + +/* 64 type needed for SHA512 */ WOLFSSL_LOCAL uint64_t load_3(const unsigned char *in); WOLFSSL_LOCAL uint64_t load_4(const unsigned char *in); +#endif /* not defined CURVED25519_SMALL */ + +/* Use less memory and only 32bit types or less, but is slower + Based on Daniel Beer's public domain work. */ +#ifdef CURVED25519_SMALL +static const byte c25519_base_x[F25519_SIZE] = {9}; +static const byte f25519_zero[F25519_SIZE] = {0}; +static const byte f25519_one[F25519_SIZE] = {1}; +static const byte fprime_zero[F25519_SIZE] = {0}; +static const byte fprime_one[F25519_SIZE] = {1}; + +WOLFSSL_LOCAL void fe_load(byte *x, word32 c); +WOLFSSL_LOCAL void fe_normalize(byte *x); +WOLFSSL_LOCAL void fe_inv__distinct(byte *r, const byte *x); + +/* Conditional copy. If condition == 0, then zero is copied to dst. If + * condition == 1, then one is copied to dst. Any other value results in + * undefined behaviour. + */ +WOLFSSL_LOCAL void fe_select(byte *dst, const byte *zero, const byte *one, + byte condition); +/* Multiply a point by a small constant. The two pointers are not + * required to be distinct. + * + * The constant must be less than 2^24. + */ +WOLFSSL_LOCAL void fe_mul_c(byte *r, const byte *a, word32 b); +WOLFSSL_LOCAL void fe_mul__distinct(byte *r, const byte *a, const byte *b); + +/* Compute one of the square roots of the field element, if the element + * is square. The other square is -r. + * + * If the input is not square, the returned value is a valid field + * element, but not the correct answer. If you don't already know that + * your element is square, you should square the return value and test. + */ +WOLFSSL_LOCAL void fe_sqrt(byte *r, const byte *x); + +/* Conditional copy. If condition == 0, then zero is copied to dst. If + * condition == 1, then one is copied to dst. Any other value results in + * undefined behaviour. + */ +WOLFSSL_LOCAL void fprime_select(byte *dst, const byte *zero, const byte *one, + byte condition); +WOLFSSL_LOCAL void fprime_add(byte *r, const byte *a, const byte *modulus); +WOLFSSL_LOCAL void fprime_sub(byte *r, const byte *a, const byte *modulus); +WOLFSSL_LOCAL void fprime_mul(byte *r, const byte *a, const byte *b, + const byte *modulus); +WOLFSSL_LOCAL void fprime_copy(byte *x, const byte *a); +#endif /* CURVED25519_SMALL */ #endif /* HAVE_CURVE25519 or HAVE_ED25519 */ #endif /* WOLF_CRYPT_FE_OPERATIONS_H */ diff --git a/wolfssl/wolfcrypt/ge_operations.h b/wolfssl/wolfcrypt/ge_operations.h index 8cd09c5475a..00d1b3edc67 100644 --- a/wolfssl/wolfcrypt/ge_operations.h +++ b/wolfssl/wolfcrypt/ge_operations.h @@ -28,7 +28,9 @@ #ifdef HAVE_ED25519 -#include +#ifndef CURVED25519_SMALL + #include +#endif #include /* @@ -59,6 +61,20 @@ typedef struct { fe T; } ge_p3; +WOLFSSL_LOCAL int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, + word32 keySz); +WOLFSSL_LOCAL int ge_frombytes_negate_vartime(ge_p3 *,const unsigned char *); + +WOLFSSL_LOCAL int ge_double_scalarmult_vartime(ge_p2 *,const unsigned char *, + const ge_p3 *,const unsigned char *); +WOLFSSL_LOCAL void ge_scalarmult_base(ge_p3 *,const unsigned char *); +WOLFSSL_LOCAL void sc_reduce(byte* s); +WOLFSSL_LOCAL void sc_muladd(byte* s, const byte* a, const byte* b, + const byte* c); +WOLFSSL_LOCAL void ge_tobytes(unsigned char *,const ge_p2 *); +WOLFSSL_LOCAL void ge_p3_tobytes(unsigned char *,const ge_p3 *); + +#ifndef CURVED25519_SMALL typedef struct { fe X; fe Y; @@ -79,10 +95,6 @@ typedef struct { fe T2d; } ge_cached; -WOLFSSL_LOCAL void ge_tobytes(unsigned char *,const ge_p2 *); -WOLFSSL_LOCAL void ge_p3_tobytes(unsigned char *,const ge_p3 *); -WOLFSSL_LOCAL int ge_frombytes_negate_vartime(ge_p3 *,const unsigned char *); - WOLFSSL_LOCAL void ge_p2_0(ge_p2 *); WOLFSSL_LOCAL void ge_p3_0(ge_p3 *); WOLFSSL_LOCAL void ge_precomp_0(ge_precomp *); @@ -97,10 +109,7 @@ WOLFSSL_LOCAL void ge_madd(ge_p1p1 *,const ge_p3 *,const ge_precomp *); WOLFSSL_LOCAL void ge_msub(ge_p1p1 *,const ge_p3 *,const ge_precomp *); WOLFSSL_LOCAL void ge_add(ge_p1p1 *,const ge_p3 *,const ge_cached *); WOLFSSL_LOCAL void ge_sub(ge_p1p1 *,const ge_p3 *,const ge_cached *); -WOLFSSL_LOCAL void ge_scalarmult_base(ge_p3 *,const unsigned char *); -WOLFSSL_LOCAL void ge_double_scalarmult_vartime(ge_p2 *,const unsigned char *, - const ge_p3 *,const unsigned char *); - +#endif /* no CURVED25519_SMALL */ #endif /* HAVE_ED25519 */ #endif /* WOLF_CRYPT_GE_OPERATIONS_H */