From 3121c55e4e72e34ffe33f4462cf83434ebf7496b Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Sat, 30 May 2026 15:11:15 -0500 Subject: [PATCH 1/2] linuxkm on x86: global refactor across PK implementations of sp-asm vector register preservation, including removal of all residual can't-fail vector paths in PK algs. wolfcrypt/src/sp_x86_64.c: * fix ASSERT_SAVED_VECTOR_REGISTERS() in C wrappers: add where missing for implementations that use AVX2, and remove frivolous checks for ones that don't. * refactor vector save-restore with a single locally tracked save in sp_RsaPublic_#(), sp_RsaPrivate_#(), sp_ecc_mulmod_add_#(), sp_ecc_mulmod_base_add_#(), sp_ecc_make_key_#(), and sp_#_calc_s_#(). * fix feature test in sp_ModExp_Fp_star_1024(), sp_Pairing_1024(), and sp_Pairing_gen_precomp_1024(), to properly gate on IS_INTEL_AVX2(cpuid_flags) and SAVE_VECTOR_REGISTERS2() == 0. wolfcrypt/src/{dh.c,dsa.c,ecc.c,eccsi.c,rsa.c,sp_int.c}: * remove all vector register provisions (SAVE_VECTOR_REGISTERS(), RESTORE_VECTOR_REGISTERS(), ASSERT_SAVED_VECTOR_REGISTERS()); * add explicit WC_CHECK_FOR_INTR_SIGNALS() and WC_RELAX_LONG_LOOP() to the lengthy loops in wc_DhGenerateParams(), wc_MakeDsaParameters(), ecc_sign_hash_sw(), and wc_MakeRsaKey(). wolfssl/wolfcrypt/{error-crypt.h,logging.h,memory.h}: * make wc_backtrace_render() and wc_backtrace_set_fp() available whenever defined(WOLFSSL_DEBUG_BACKTRACE_ERROR_CODES); * add support for DEBUG_VECTOR_REGISTERS_BACKTRACE_ON_FAIL, activating backtraces on vector register errors. * also improve the debugging format from the DEBUG_VECTOR_REGISTER_ACCESS variants of SAVE_VECTOR_REGISTERS() and friends. linuxkm/lkcapi_{dh,ecdh,ecdsa,rsa}_glue.c: harmonize PK driver names with AES, SHA, and DRBG, notably adding AVX2 annotation when enabled. wolfcrypt/src/{sp_x86_64_asm.S,sp_x86_64_asm.asm}: synchronize with wolfssl/scripts#581 (removes SSE2 implementations of sp_#_get_from_table_#(), which no longer have users). --- .wolfssl_known_macro_extras | 2 +- linuxkm/lkcapi_dh_glue.c | 27 +- linuxkm/lkcapi_ecdh_glue.c | 19 +- linuxkm/lkcapi_ecdsa_glue.c | 22 +- linuxkm/lkcapi_rsa_glue.c | 37 +- wolfcrypt/src/dh.c | 48 +- wolfcrypt/src/dsa.c | 21 +- wolfcrypt/src/ecc.c | 123 +---- wolfcrypt/src/rsa.c | 65 +-- wolfcrypt/src/sp_int.c | 45 +- wolfcrypt/src/sp_x86_64.c | 847 ++++++++++++++++---------------- wolfssl/wolfcrypt/error-crypt.h | 5 +- wolfssl/wolfcrypt/logging.h | 7 +- wolfssl/wolfcrypt/memory.h | 42 +- 14 files changed, 550 insertions(+), 760 deletions(-) diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index c4a26205436..fb34d7c6f29 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -741,10 +741,10 @@ WOLFSSL_CONTIKI WOLFSSL_CRL_ALLOW_MISSING_CDP WOLFSSL_DISABLE_EARLY_SANITY_CHECKS WOLFSSL_DRBG_SHA256 +WOLFSSL_DTLS13_ECHO_LEGACY_SESSION_ID WOLFSSL_DTLS_DISALLOW_FUTURE WOLFSSL_DTLS_RECORDS_CAN_SPAN_DATAGRAMS WOLFSSL_DTLS_RESEND_ONLY_TIMEOUT -WOLFSSL_DTLS13_ECHO_LEGACY_SESSION_ID WOLFSSL_DUMP_MEMIO_STREAM WOLFSSL_DUP_CERTPOL WOLFSSL_EARLY_DATA_NO_ANTI_REPLAY diff --git a/linuxkm/lkcapi_dh_glue.c b/linuxkm/lkcapi_dh_glue.c index 6caabf06ac8..581776b230d 100644 --- a/linuxkm/lkcapi_dh_glue.c +++ b/linuxkm/lkcapi_dh_glue.c @@ -89,37 +89,40 @@ #include #define WOLFKM_DH_NAME ("dh") -#define WOLFKM_DH_DRIVER ("dh" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + +#if defined(WOLFSSL_SP_X86_64_ASM) && !defined(NO_AVX2_SUPPORT) + #define WOLFKM_DH_DRIVER_ISA_EXT "-avx2" +#else + #define WOLFKM_DH_DRIVER_ISA_EXT "" +#endif +#define WOLFKM_DH_DRIVER_SUFFIX WOLFKM_DH_DRIVER_ISA_EXT \ + WOLFKM_DRIVER_SUFFIX_BASE + +#define WOLFKM_DH_DRIVER ("dh" WOLFKM_DH_DRIVER_SUFFIX) #ifdef HAVE_FFDHE_2048 #define WOLFKM_FFDHE2048_NAME ("ffdhe2048(dh)") - #define WOLFKM_FFDHE2048_DRIVER ("ffdhe2048" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + #define WOLFKM_FFDHE2048_DRIVER ("ffdhe2048" WOLFKM_DH_DRIVER_SUFFIX) #endif /* HAVE_FFDHE_2048 */ #ifdef HAVE_FFDHE_3072 #define WOLFKM_FFDHE3072_NAME ("ffdhe3072(dh)") - #define WOLFKM_FFDHE3072_DRIVER ("ffdhe3072" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + #define WOLFKM_FFDHE3072_DRIVER ("ffdhe3072" WOLFKM_DH_DRIVER_SUFFIX) #endif /* HAVE_FFDHE_3072 */ #ifdef HAVE_FFDHE_4096 #define WOLFKM_FFDHE4096_NAME ("ffdhe4096(dh)") - #define WOLFKM_FFDHE4096_DRIVER ("ffdhe4096" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + #define WOLFKM_FFDHE4096_DRIVER ("ffdhe4096" WOLFKM_DH_DRIVER_SUFFIX) #endif /* HAVE_FFDHE_4096 */ #ifdef HAVE_FFDHE_6144 #define WOLFKM_FFDHE6144_NAME ("ffdhe6144(dh)") - #define WOLFKM_FFDHE6144_DRIVER ("ffdhe6144" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + #define WOLFKM_FFDHE6144_DRIVER ("ffdhe6144" WOLFKM_DH_DRIVER_SUFFIX) #endif /* HAVE_FFDHE_6144 */ #ifdef HAVE_FFDHE_8192 #define WOLFKM_FFDHE8192_NAME ("ffdhe8192(dh)") - #define WOLFKM_FFDHE8192_DRIVER ("ffdhe8192" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") + #define WOLFKM_FFDHE8192_DRIVER ("ffdhe8192" WOLFKM_DH_DRIVER_SUFFIX) #endif /* HAVE_FFDHE_8192 */ static int linuxkm_test_kpp_driver(const char * driver, diff --git a/linuxkm/lkcapi_ecdh_glue.c b/linuxkm/lkcapi_ecdh_glue.c index 2f88bd20377..24755917a5c 100644 --- a/linuxkm/lkcapi_ecdh_glue.c +++ b/linuxkm/lkcapi_ecdh_glue.c @@ -63,19 +63,24 @@ #include #include -#define WOLFKM_ECDH_DRIVER ("ecdh-wolfcrypt") +#if defined(WOLFSSL_SP_X86_64_ASM) && !defined(NO_AVX2_SUPPORT) + #define WOLFKM_ECDH_DRIVER_ISA_EXT "-avx2" +#else + #define WOLFKM_ECDH_DRIVER_ISA_EXT "" +#endif +#define WOLFKM_ECDH_DRIVER_SUFFIX WOLFKM_ECDH_DRIVER_ISA_EXT \ + WOLFKM_DRIVER_SUFFIX_BASE + +#define WOLFKM_ECDH_DRIVER ("ecdh" WOLFKM_ECDH_DRIVER_SUFFIX) #define WOLFKM_ECDH_P192_NAME ("ecdh-nist-p192") -#define WOLFKM_ECDH_P192_DRIVER ("ecdh-nist-p192" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDH_P192_DRIVER ("ecdh-nist-p192" WOLFKM_ECDH_DRIVER_SUFFIX) #define WOLFKM_ECDH_P256_NAME ("ecdh-nist-p256") -#define WOLFKM_ECDH_P256_DRIVER ("ecdh-nist-p256" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDH_P256_DRIVER ("ecdh-nist-p256" WOLFKM_ECDH_DRIVER_SUFFIX) #define WOLFKM_ECDH_P384_NAME ("ecdh-nist-p384") -#define WOLFKM_ECDH_P384_DRIVER ("ecdh-nist-p384" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDH_P384_DRIVER ("ecdh-nist-p384" WOLFKM_ECDH_DRIVER_SUFFIX) static int linuxkm_test_ecdh_nist_driver(const char * driver, const byte * b_pub, diff --git a/linuxkm/lkcapi_ecdsa_glue.c b/linuxkm/lkcapi_ecdsa_glue.c index 46469131e50..13a0e90be57 100644 --- a/linuxkm/lkcapi_ecdsa_glue.c +++ b/linuxkm/lkcapi_ecdsa_glue.c @@ -82,23 +82,27 @@ #include #include -#define WOLFKM_ECDSA_DRIVER ("ecdsa-wolfcrypt") +#if defined(WOLFSSL_SP_X86_64_ASM) && !defined(NO_AVX2_SUPPORT) + #define WOLFKM_ECDSA_DRIVER_ISA_EXT "-avx2" +#else + #define WOLFKM_ECDSA_DRIVER_ISA_EXT "" +#endif +#define WOLFKM_ECDSA_DRIVER_SUFFIX WOLFKM_ECDSA_DRIVER_ISA_EXT \ + WOLFKM_DRIVER_SUFFIX_BASE + +#define WOLFKM_ECDSA_DRIVER ("ecdsa" WOLFKM_ECDSA_DRIVER_SUFFIX) #define WOLFKM_ECDSA_P192_NAME ("ecdsa-nist-p192") -#define WOLFKM_ECDSA_P192_DRIVER ("ecdsa-nist-p192" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDSA_P192_DRIVER ("ecdsa-nist-p192" WOLFKM_ECDSA_DRIVER_SUFFIX) #define WOLFKM_ECDSA_P256_NAME ("ecdsa-nist-p256") -#define WOLFKM_ECDSA_P256_DRIVER ("ecdsa-nist-p256" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDSA_P256_DRIVER ("ecdsa-nist-p256" WOLFKM_ECDSA_DRIVER_SUFFIX) #define WOLFKM_ECDSA_P384_NAME ("ecdsa-nist-p384") -#define WOLFKM_ECDSA_P384_DRIVER ("ecdsa-nist-p384" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDSA_P384_DRIVER ("ecdsa-nist-p384" WOLFKM_ECDSA_DRIVER_SUFFIX) #define WOLFKM_ECDSA_P521_NAME ("ecdsa-nist-p521") -#define WOLFKM_ECDSA_P521_DRIVER ("ecdsa-nist-p521" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt") +#define WOLFKM_ECDSA_P521_DRIVER ("ecdsa-nist-p521" WOLFKM_ECDSA_DRIVER_SUFFIX) static int linuxkm_test_ecdsa_nist_driver(const char * driver, diff --git a/linuxkm/lkcapi_rsa_glue.c b/linuxkm/lkcapi_rsa_glue.c index e38dfaf28cf..ac7f8ff5c78 100644 --- a/linuxkm/lkcapi_rsa_glue.c +++ b/linuxkm/lkcapi_rsa_glue.c @@ -99,45 +99,46 @@ #include #define WOLFKM_RSA_NAME ("rsa") -#define WOLFKM_RSA_DRIVER ("rsa" WOLFKM_DRIVER_FIPS "-wolfcrypt") + +#if defined(WOLFSSL_SP_X86_64_ASM) && !defined(NO_AVX2_SUPPORT) + #define WOLFKM_RSA_DRIVER_ISA_EXT "-avx2" +#else + #define WOLFKM_RSA_DRIVER_ISA_EXT "" +#endif +#define WOLFKM_RSA_DRIVER_SUFFIX WOLFKM_RSA_DRIVER_ISA_EXT \ + WOLFKM_DRIVER_SUFFIX_BASE + +#define WOLFKM_RSA_DRIVER ("rsa" WOLFKM_RSA_DRIVER_SUFFIX) #if defined(LINUXKM_AKCIPHER_NO_SIGNVERIFY) /* the akcipher alg */ - #define WOLFKM_PKCS1PAD_NAME ("pkcs1pad(rsa)") - #define WOLFKM_PKCS1PAD_DRIVER ("pkcs1pad(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt)") + #define WOLFKM_PKCS1PAD_NAME "pkcs1pad(rsa)" + #define WOLFKM_PKCS1PAD_DRIVER "pkcs1pad-rsa" WOLFKM_RSA_DRIVER_SUFFIX #endif /* LINUXKM_AKCIPHER_NO_SIGNVERIFY */ /* * pkcs1 sign verify alg names * */ #define WOLFKM_PKCS1_SHA224_NAME (PKCS1_NAME "(rsa,sha224)") -#define WOLFKM_PKCS1_SHA224_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha224)") +#define WOLFKM_PKCS1_SHA224_DRIVER ("pkcs1pad-rsa-sha224" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA256_NAME (PKCS1_NAME "(rsa,sha256)") -#define WOLFKM_PKCS1_SHA256_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha256)") +#define WOLFKM_PKCS1_SHA256_DRIVER ("pkcs1pad-rsa-sha256" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA384_NAME (PKCS1_NAME "(rsa,sha384)") -#define WOLFKM_PKCS1_SHA384_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha384)") +#define WOLFKM_PKCS1_SHA384_DRIVER ("pkcs1pad-rsa-sha384" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA512_NAME (PKCS1_NAME "(rsa,sha512)") -#define WOLFKM_PKCS1_SHA512_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha512)") +#define WOLFKM_PKCS1_SHA512_DRIVER ("pkcs1pad-rsa-sha512" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA3_256_NAME (PKCS1_NAME "(rsa,sha3-256)") -#define WOLFKM_PKCS1_SHA3_256_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha3-256)") +#define WOLFKM_PKCS1_SHA3_256_DRIVER ("pkcs1pad-rsa-sha3-256" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA3_384_NAME (PKCS1_NAME "(rsa,sha3-384)") -#define WOLFKM_PKCS1_SHA3_384_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha3-384)") +#define WOLFKM_PKCS1_SHA3_384_DRIVER ("pkcs1pad-rsa-sha3-384" WOLFKM_RSA_DRIVER_SUFFIX) #define WOLFKM_PKCS1_SHA3_512_NAME (PKCS1_NAME "(rsa,sha3-512)") -#define WOLFKM_PKCS1_SHA3_512_DRIVER (PKCS1_NAME "(rsa" WOLFKM_DRIVER_FIPS \ - "-wolfcrypt,sha3-512)") +#define WOLFKM_PKCS1_SHA3_512_DRIVER ("pkcs1pad-rsa-sha3-512" WOLFKM_RSA_DRIVER_SUFFIX) #if defined(WOLFSSL_KEY_GEN) #if defined(LINUXKM_DIRECT_RSA) diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c index 26166c73102..c7560d185c0 100644 --- a/wolfcrypt/src/dh.c +++ b/wolfcrypt/src/dh.c @@ -57,14 +57,6 @@ } #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - /* Possible DH enable options: * NO_RSA: Overall control of DH default: on (not defined) @@ -1425,8 +1417,6 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz, return BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - ret = GeneratePublicDh(key, priv, privSz, pub, pubSz); #if FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_DH_KEYGEN) @@ -1436,8 +1426,6 @@ int wc_DhGeneratePublic(DhKey* key, byte* priv, word32 privSz, ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, privSz); #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ - RESTORE_VECTOR_REGISTERS(); - return ret; } @@ -1451,8 +1439,6 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng, return BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - ret = GeneratePrivateDh(key, rng, priv, privSz); if (ret == 0) @@ -1464,9 +1450,6 @@ static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng, ret = _ffc_pairwise_consistency_test(key, pub, *pubSz, priv, *privSz); #endif /* FIPS V5 or later || WOLFSSL_VALIDATE_DH_KEYGEN */ - - RESTORE_VECTOR_REGISTERS(); - return ret; } #endif /* !WOLFSSL_KCAPI_DH */ @@ -1589,8 +1572,6 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz, return MP_INIT_E; } - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (mp_read_unsigned_bin(y, pub, pubSz) != MP_OKAY) { ret = MP_READ_E; } @@ -1679,8 +1660,6 @@ static int _ffc_validate_public_key(DhKey* key, const byte* pub, word32 pubSz, mp_clear(p); mp_clear(q); - RESTORE_VECTOR_REGISTERS(); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) XFREE(q, key->heap, DYNAMIC_TYPE_DH); XFREE(p, key->heap, DYNAMIC_TYPE_DH); @@ -1919,8 +1898,6 @@ static int _ffc_pairwise_consistency_test(DhKey* key, return MP_INIT_E; } - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - /* Load the private and public keys into big integers. */ if (mp_read_unsigned_bin(publicKey, pub, pubSz) != MP_OKAY || mp_read_unsigned_bin(privateKey, priv, privSz) != MP_OKAY) { @@ -1979,8 +1956,6 @@ static int _ffc_pairwise_consistency_test(DhKey* key, mp_clear(publicKey); mp_clear(checkKey); - RESTORE_VECTOR_REGISTERS(); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) XFREE(checkKey, key->heap, DYNAMIC_TYPE_DH); XFREE(privateKey, key->heap, DYNAMIC_TYPE_DH); @@ -2174,8 +2149,6 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, ret = MP_INIT_E; if (ret == 0) { - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (ret == 0 && mp_read_unsigned_bin(y, otherPub, pubSz) != MP_OKAY) ret = MP_READ_E; @@ -2201,8 +2174,6 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, } mp_clear(y); - - RESTORE_VECTOR_REGISTERS(); } /* make sure agree is > 1 (SP800-56A, 5.7.1.1) */ @@ -2253,8 +2224,6 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, } #endif - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (mp_read_unsigned_bin(x, priv, privSz) != MP_OKAY) ret = MP_READ_E; #ifdef WOLFSSL_CHECK_MEM_ZERO @@ -2313,8 +2282,6 @@ static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz, mp_clear(y); mp_forcezero(x); - RESTORE_VECTOR_REGISTERS(); - #else (void)ct; ret = WC_KEY_SIZE_E; @@ -2601,8 +2568,6 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, ret = BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - if (ret == 0) { /* may have leading 0 */ if (p[0] == 0) { @@ -2714,8 +2679,6 @@ static int _DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g, mp_clear(keyP); } - RESTORE_VECTOR_REGISTERS(); - return ret; } @@ -3204,8 +3167,6 @@ int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh) } #endif - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (ret == 0) { /* force magnitude */ buf[0] |= 0xC0; @@ -3264,9 +3225,10 @@ int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh) if (ret != 0 || primeCheck == MP_YES) break; - /* linuxkm: release the kernel for a moment before iterating. */ - RESTORE_VECTOR_REGISTERS(); - SAVE_VECTOR_REGISTERS(ret = _svr_ret; break;); + ret = WC_CHECK_FOR_INTR_SIGNALS(); + if (ret != 0) + break; + WC_RELAX_LONG_LOOP(); }; } @@ -3308,8 +3270,6 @@ int wc_DhGenerateParams(WC_RNG *rng, int modSz, DhKey *dh) mp_clear(&dh->g); } - RESTORE_VECTOR_REGISTERS(); - #ifndef WOLFSSL_NO_MALLOC if (buf != NULL) #endif diff --git a/wolfcrypt/src/dsa.c b/wolfcrypt/src/dsa.c index 0e849c23499..5df809c2dab 100644 --- a/wolfcrypt/src/dsa.c +++ b/wolfcrypt/src/dsa.c @@ -36,14 +36,6 @@ #include #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - #ifdef _MSC_VER /* disable for while(0) cases (MSVC bug) */ #pragma warning(disable:4127) @@ -269,8 +261,6 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa) } #endif - SAVE_VECTOR_REGISTERS(;); - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) if ((tmpQ = (mp_int *)XMALLOC(sizeof(*tmpQ), dsa->heap, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) @@ -338,8 +328,6 @@ int wc_MakeDsaKey(WC_RNG *rng, DsaKey *dsa) mp_clear(tmpQ); #endif - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -454,6 +442,11 @@ int wc_MakeDsaParameters(WC_RNG *rng, int modulus_size, DsaKey *dsa) break; loop_check_prime++; } + + err = WC_CHECK_FOR_INTR_SIGNALS(); + if (err != 0) + break; + WC_RELAX_LONG_LOOP(); } } @@ -794,8 +787,6 @@ int wc_DsaSign_ex(const byte* digest, word32 digestSz, byte* out, DsaKey* key, return BAD_LENGTH_E; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - do { #ifdef WOLFSSL_SMALL_STACK k = (mp_int *)XMALLOC(sizeof *k, key->heap, DYNAMIC_TYPE_TMP_BUFFER); @@ -1040,8 +1031,6 @@ int wc_DsaSign_ex(const byte* digest, word32 digestSz, byte* out, DsaKey* key, } } while (0); - RESTORE_VECTOR_REGISTERS(); - #ifdef WOLFSSL_SMALL_STACK if (k) { if ((ret != WC_NO_ERR_TRACE(MP_INIT_E)) && diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 57b29197d66..11276ce6e9f 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -275,14 +275,6 @@ ECC Curve Sizes: #include #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - #if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_MICROCHIP_TA100) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ @@ -5150,8 +5142,6 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point, return ECC_BAD_ARG_E; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - switch (private_key->state) { case ECC_STATE_NONE: case ECC_STATE_SHARED_SEC_GEN: @@ -5194,8 +5184,6 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point, err = BAD_STATE_E; } /* switch */ - RESTORE_VECTOR_REGISTERS(); - /* if async pending then return and skip done cleanup below */ if (err == WC_NO_ERR_TRACE(WC_PENDING_E)) { return err; @@ -5280,8 +5268,6 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx) return ECC_BAD_ARG_E; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - ALLOC_CURVE_SPECS(3, err); if (err == MP_OKAY) { err = wc_ecc_curve_load(wc_ecc_get_curve_params(curve_idx), &curve, @@ -5296,8 +5282,6 @@ int wc_ecc_point_is_on_curve(ecc_point *p, int curve_idx) wc_ecc_curve_free(curve); FREE_CURVE_SPECS(); - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -5456,8 +5440,6 @@ static int ecc_make_pub_ex(ecc_key* key, ecc_curve_spec* curve, return BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - #ifdef HAVE_ECC_MAKE_PUB /* if ecc_point passed in then use it as output for public key point */ if (pubOut != NULL) { @@ -5603,8 +5585,6 @@ static int ecc_make_pub_ex(ecc_key* key, ecc_curve_spec* curve, key->type = ECC_PRIVATEKEY; } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -6107,8 +6087,6 @@ int wc_ecc_make_key_ex2(WC_RNG* rng, int keysize, ecc_key* key, int curve_id, { int err; - SAVE_VECTOR_REGISTERS(return _svr_ret;); - err = _ecc_make_key_ex(rng, keysize, key, curve_id, flags); #if (FIPS_VERSION_GE(5,0) || defined(WOLFSSL_VALIDATE_ECC_KEYGEN)) && \ @@ -6132,8 +6110,6 @@ int wc_ecc_make_key_ex2(WC_RNG* rng, int keysize, ecc_key* key, int curve_id, } #endif - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -7195,6 +7171,11 @@ static int ecc_sign_hash_sw(ecc_key* key, ecc_key* pubkey, WC_RNG* rng, mp_clear(pubkey->pubkey.z); #endif mp_forcezero(pubkey->k); + + err = WC_CHECK_FOR_INTR_SIGNALS(); + if (err != 0) + break; + WC_RELAX_LONG_LOOP(); } mp_forcezero(b); FREE_MP_INT_SIZE(b, key->heap, DYNAMIC_TYPE_ECC); @@ -7244,10 +7225,8 @@ static int ecc_sign_hash_sp(const byte* in, word32 inlen, WC_RNG* rng, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_sign_256(in, inlen, rng, ecc_get_k(key), r, s, sign_k, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -7256,10 +7235,8 @@ static int ecc_sign_hash_sp(const byte* in, word32 inlen, WC_RNG* rng, #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if (ecc_sets[key->idx].id == ECC_SM2P256V1) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_sign_sm2_256(in, inlen, rng, ecc_get_k(key), r, s, sign_k, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -7284,10 +7261,8 @@ static int ecc_sign_hash_sp(const byte* in, word32 inlen, WC_RNG* rng, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_sign_384(in, inlen, rng, ecc_get_k(key), r, s, sign_k, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -7314,10 +7289,8 @@ static int ecc_sign_hash_sp(const byte* in, word32 inlen, WC_RNG* rng, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_sign_521(in, inlen, rng, ecc_get_k(key), r, s, sign_k, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -8938,10 +8911,8 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_verify_256(hash, hashlen, key->pubkey.x, key->pubkey.y, key->pubkey.z, r, s, res, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -8982,10 +8953,8 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_verify_384(hash, hashlen, key->pubkey.x, key->pubkey.y, key->pubkey.z, r, s, res, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -9011,10 +8980,8 @@ static int ecc_verify_hash_sp(mp_int *r, mp_int *s, const byte* hash, #if !defined(WC_ECC_NONBLOCK) || (defined(WC_ECC_NONBLOCK) && !defined(WC_ECC_NONBLOCK_ONLY)) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_verify_521(hash, hashlen, key->pubkey.x, key->pubkey.y, key->pubkey.z, r, s, res, key->heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -9652,8 +9619,6 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen, if (err != MP_OKAY) return MEMORY_E; - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* check for point type (4, 2, or 3) */ pointType = in[0]; if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN && @@ -9845,8 +9810,6 @@ int wc_ecc_import_point_der_ex(const byte* in, word32 inLen, mp_clear(point->z); } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -10197,8 +10160,6 @@ static int _ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) return err; } - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - /* compute y^2 */ if (err == MP_OKAY) err = mp_sqr(ecp->y, t1); @@ -10267,8 +10228,6 @@ static int _ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime) mp_clear(t1); mp_clear(t2); - RESTORE_VECTOR_REGISTERS(); - WC_FREE_VAR_EX(t2, NULL, DYNAMIC_TYPE_ECC); WC_FREE_VAR_EX(t1, NULL, DYNAMIC_TYPE_ECC); @@ -10718,8 +10677,6 @@ static int _ecc_validate_public_key(ecc_key* key, int partial, int priv) DECLARE_CURVE_SPECS(4); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (key == NULL) return BAD_FUNC_ARG; @@ -10871,9 +10828,7 @@ WOLFSSL_ABI int wc_ecc_check_key(ecc_key* key) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = _ecc_validate_public_key(key, 0, 1); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -10938,8 +10893,6 @@ static int _ecc_import_x963_ex2(const byte* in, word32 inLen, ecc_key* key, mp_forcezero(key->kb); #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* check for point type (4, 2, or 3) */ pointType = in[0]; if (pointType != ECC_POINT_UNCOMP && pointType != ECC_POINT_COMP_EVEN && @@ -11271,8 +11224,6 @@ static int _ecc_import_x963_ex2(const byte* in, word32 inLen, ecc_key* key, mp_forcezero(key->k); } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -11691,10 +11642,6 @@ static int _ecc_import_private_key_ex(const byte* priv, word32 privSz, } #else -#ifdef WOLFSSL_VALIDATE_ECC_IMPORT - SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif - ret = mp_read_unsigned_bin(key->k, priv, privSz); #ifdef HAVE_WOLF_BIGINT if (ret == 0 && wc_bigint_from_unsigned_bin(&key->k->raw, priv, @@ -11745,10 +11692,6 @@ static int _ecc_import_private_key_ex(const byte* priv, word32 privSz, #endif -#ifdef WOLFSSL_VALIDATE_ECC_IMPORT - RESTORE_VECTOR_REGISTERS(); -#endif - #ifdef WOLFSSL_MAXQ10XX_CRYPTO if ((ret == 0) && (key->devId != INVALID_DEVID)) { ret = wc_MAXQ10XX_EccSetKey(key, key->dp->size); @@ -12137,10 +12080,6 @@ static int _ecc_import_raw_private(ecc_key* key, const char* qx, } #endif -#ifdef WOLFSSL_VALIDATE_ECC_IMPORT - SAVE_VECTOR_REGISTERS(return _svr_ret;); -#endif - /* import private key */ if (err == MP_OKAY) { if (d != NULL) { @@ -12234,10 +12173,6 @@ static int _ecc_import_raw_private(ecc_key* key, const char* qx, } #endif -#ifdef WOLFSSL_VALIDATE_ECC_IMPORT - RESTORE_VECTOR_REGISTERS(); -#endif - #ifdef WOLFSSL_MAXQ10XX_CRYPTO if (err == MP_OKAY) { err = wc_MAXQ10XX_EccSetKey(key, key->dp->size); @@ -13858,8 +13793,6 @@ int ecc_mul2add(ecc_point* A, mp_int* kA, } #endif /* HAVE_THREAD_LS */ - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - /* find point */ idx1 = find_base(A); @@ -13942,8 +13875,6 @@ int ecc_mul2add(ecc_point* A, mp_int* kA, } } - RESTORE_VECTOR_REGISTERS(); - #ifndef HAVE_THREAD_LS wc_UnLockMutex(&ecc_fp_lock); #endif /* HAVE_THREAD_LS */ @@ -14011,8 +13942,6 @@ int wc_ecc_mulmod_ex(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, got_ecc_fp_lock = 1; #endif /* HAVE_THREAD_LS */ - SAVE_VECTOR_REGISTERS(err = _svr_ret; goto out;); - /* find point */ idx = find_base(G); @@ -14061,8 +13990,6 @@ int wc_ecc_mulmod_ex(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, } } - RESTORE_VECTOR_REGISTERS(); - out: #ifndef HAVE_THREAD_LS @@ -14088,36 +14015,28 @@ int wc_ecc_mulmod_ex(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret); ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_256(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifdef WOLFSSL_SP_384 if (mp_count_bits(modulus) == 384) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_384(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifdef WOLFSSL_SP_521 if (mp_count_bits(modulus) == 521) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_521(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -14182,8 +14101,6 @@ int wc_ecc_mulmod_ex2(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, got_ecc_fp_lock = 1; #endif /* HAVE_THREAD_LS */ - SAVE_VECTOR_REGISTERS(err = _svr_ret; goto out;); - /* find point */ idx = find_base(G); @@ -14232,8 +14149,6 @@ int wc_ecc_mulmod_ex2(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, } } - RESTORE_VECTOR_REGISTERS(); - out: #ifndef HAVE_THREAD_LS @@ -14262,36 +14177,28 @@ int wc_ecc_mulmod_ex2(const mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, #if defined(WOLFSSL_SM2) && defined(WOLFSSL_SP_SM2) if ((mp_count_bits(modulus) == 256) && (!mp_is_bit_set(modulus, 224))) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_sm2_256(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifndef WOLFSSL_SP_NO_256 if (mp_count_bits(modulus) == 256) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_256(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifdef WOLFSSL_SP_384 if (mp_count_bits(modulus) == 384) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_384(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif #ifdef WOLFSSL_SP_521 if (mp_count_bits(modulus) == 521) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = sp_ecc_mulmod_521(k, G, R, map, heap); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -14887,8 +14794,6 @@ int wc_ecc_encrypt_ex(ecc_key* privKey, ecc_key* pubKey, const byte* msg, } #endif - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - #ifdef WOLFSSL_ECIES_ISO18033 XMEMCPY(sharedSecret, out - pubKeySz, pubKeySz); sharedSz -= pubKeySz; @@ -15105,8 +15010,6 @@ int wc_ecc_encrypt_ex(ecc_key* privKey, ecc_key* pubKey, const byte* msg, #endif } - RESTORE_VECTOR_REGISTERS(); - ForceZero(sharedSecret, sharedSz); ForceZero(keys, (word32)keysLen); WC_FREE_VAR_EX(sharedSecret, ctx->heap, DYNAMIC_TYPE_ECC_BUFFER); @@ -15266,8 +15169,6 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, } #endif - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - #ifndef WOLFSSL_ECIES_OLD if (pubKey == NULL) { WC_ALLOC_VAR_EX(peerKey, ecc_key, 1, ctx->heap, @@ -15501,8 +15402,6 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg, if (ret == 0) *outSz = msgSz - digestSz; - RESTORE_VECTOR_REGISTERS(); - #ifndef WOLFSSL_ECIES_OLD if (pubKey == peerKey) wc_ecc_free(peerKey); @@ -15576,8 +15475,6 @@ static int mp_jacobi(mp_int* a, mp_int* n, int* c) return res; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - if ((res = mp_mod(a, n, a1)) != MP_OKAY) { goto done; } @@ -15635,8 +15532,6 @@ static int mp_jacobi(mp_int* a, mp_int* n, int* c) done: - RESTORE_VECTOR_REGISTERS(); - /* cleanup */ mp_clear(n1); mp_clear(a1); @@ -15674,8 +15569,6 @@ static int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret) return MP_VAL; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - res = mp_init(&e); if (res == MP_OKAY) res = mp_mod_d(prime, 8, &i); @@ -15699,8 +15592,6 @@ static int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret) mp_clear(&e); - RESTORE_VECTOR_REGISTERS(); - return res; #else int res, legendre, done = 0; @@ -15720,8 +15611,6 @@ static int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret) mp_int t1[1], C[1], Q[1], S[1], Z[1], M[1], T[1], R[1], N[1], two[1]; #endif - SAVE_VECTOR_REGISTERS(res = _svr_ret; goto out;); - if ((mp_init_multi(t1, C, Q, S, Z, M) != MP_OKAY) || (mp_init_multi(T, R, N, two, NULL, NULL) != MP_OKAY)) { res = MP_INIT_E; @@ -15933,8 +15822,6 @@ static int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret) out: - RESTORE_VECTOR_REGISTERS(); - #ifdef WOLFSSL_SMALL_STACK if (t1) { if (res != WC_NO_ERR_TRACE(MP_INIT_E)) diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 5fbbd6bdf82..e265d69cf42 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -60,14 +60,6 @@ RSA keys can be used to encrypt, decrypt, sign and verify data. #include #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - /* * RSA Build Options: * @@ -868,8 +860,6 @@ int wc_CheckRsaKey(RsaKey* key) } } - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (ret == 0) { if (INIT_MP_INT_SIZE(tmp, mp_bitsused(&key->n)) != MP_OKAY) ret = MP_INIT_E; @@ -982,8 +972,6 @@ int wc_CheckRsaKey(RsaKey* key) mp_forcezero(tmp); - RESTORE_VECTOR_REGISTERS(); - if ((rng != NULL) && (rng != key->rng)) { wc_FreeRng(rng); #ifdef WOLFSSL_SMALL_STACK @@ -3571,7 +3559,6 @@ static int wc_RsaFunction_ex(const byte* in, word32 inLen, byte* out, #ifdef WOLF_CRYPTO_CB_ONLY_RSA return NO_VALID_DEVID; #else /* !WOLF_CRYPTO_CB_ONLY_RSA */ - SAVE_VECTOR_REGISTERS(return _svr_ret;); #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(TEST_UNPAD_CONSTANT_TIME) && \ !defined(NO_RSA_BOUNDS_CHECK) @@ -3580,7 +3567,6 @@ static int wc_RsaFunction_ex(const byte* in, word32 inLen, byte* out, ret = RsaFunctionCheckIn(in, inLen, key, checkSmallCt); if (ret != 0) { - RESTORE_VECTOR_REGISTERS(); return ret; } } @@ -3592,7 +3578,6 @@ static int wc_RsaFunction_ex(const byte* in, word32 inLen, byte* out, ret = RsaFunctionCheckIn(in, inLen, key, checkSmallCt); if (ret != 0) { - RESTORE_VECTOR_REGISTERS(); return ret; } } @@ -3623,8 +3608,6 @@ static int wc_RsaFunction_ex(const byte* in, word32 inLen, byte* out, ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng); } - RESTORE_VECTOR_REGISTERS(); - /* handle error */ if (ret < 0 && ret != WC_NO_ERR_TRACE(WC_PENDING_E) #ifdef WC_RSA_NONBLOCK @@ -4172,11 +4155,9 @@ int wc_RsaPublicEncrypt(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, WC_RNG* rng) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PUBLIC_ENCRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4188,10 +4169,8 @@ int wc_RsaPublicEncrypt_ex(const byte* in, word32 inLen, byte* out, word32 labelSz) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PUBLIC_ENCRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif /* WC_NO_RSA_OAEP */ @@ -4211,11 +4190,9 @@ int wc_RsaPrivateDecryptInline(byte* in, word32 inLen, byte** out, RsaKey* key) #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4235,11 +4212,9 @@ int wc_RsaPrivateDecryptInline_ex(byte* in, word32 inLen, byte** out, #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif /* WC_NO_RSA_OAEP */ @@ -4258,11 +4233,9 @@ int wc_RsaPrivateDecrypt(const byte* in, word32 inLen, byte* out, #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key, RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4282,11 +4255,9 @@ int wc_RsaPrivateDecrypt_ex(const byte* in, word32 inLen, byte* out, #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key, RSA_PRIVATE_DECRYPT, RSA_BLOCK_TYPE_2, type, hash, mgf, label, labelSz, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif /* WC_NO_RSA_OAEP || WC_RSA_NO_PADDING */ @@ -4305,11 +4276,9 @@ int wc_RsaSSL_VerifyInline(byte* in, word32 inLen, byte** out, RsaKey* key) #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -4325,10 +4294,8 @@ int wc_RsaSSL_Verify_ex(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, int pad_type) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = wc_RsaSSL_Verify_ex2(in, inLen, out, outLen, key, pad_type, WC_HASH_TYPE_NONE); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4348,7 +4315,6 @@ int wc_RsaSSL_Verify_ex2(const byte* in, word32 inLen, byte* out, word32 outLen rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); #ifndef WOLFSSL_PSS_SALT_LEN_DISCOVER ret = RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key, RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, pad_type, @@ -4358,7 +4324,6 @@ int wc_RsaSSL_Verify_ex2(const byte* in, word32 inLen, byte* out, word32 outLen RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, pad_type, hash, wc_hash2mgf(hash), NULL, 0, RSA_PSS_SALT_LEN_DISCOVER, rng); #endif - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -4416,11 +4381,9 @@ int wc_RsaPSS_VerifyInline_ex(byte* in, word32 inLen, byte** out, #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, in, inLen, out, key, RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, hash, mgf, NULL, 0, saltLen, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4474,11 +4437,9 @@ int wc_RsaPSS_Verify_ex(const byte* in, word32 inLen, byte* out, word32 outLen, #else rng = NULL; #endif - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPrivateDecryptEx(in, inLen, out, outLen, NULL, key, RSA_PUBLIC_DECRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, hash, mgf, NULL, 0, saltLen, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4754,11 +4715,9 @@ int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen, RsaKey* key, WC_RNG* rng) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, WC_MGF1NONE, NULL, 0, 0, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } @@ -4805,11 +4764,9 @@ int wc_RsaPSS_Sign_ex(const byte* in, word32 inLen, byte* out, word32 outLen, WC_RNG* rng) { int ret; - SAVE_VECTOR_REGISTERS(return _svr_ret;); ret = RsaPublicEncryptEx(in, inLen, out, outLen, key, RSA_PRIVATE_ENCRYPT, RSA_BLOCK_TYPE_1, WC_RSA_PSS_PAD, hash, mgf, NULL, 0, saltLen, rng); - RESTORE_VECTOR_REGISTERS(); return ret; } #endif @@ -5350,12 +5307,8 @@ int wc_CheckProbablePrime_ex(const byte* pRaw, word32 pRawSz, if (ret == MP_OKAY) ret = mp_read_unsigned_bin(e, eRaw, eRawSz); - if (ret == MP_OKAY) - SAVE_VECTOR_REGISTERS(ret = _svr_ret;); - if (ret == 0) { ret = _CheckProbablePrime(p, Q, e, nlen, isPrime, rng); - RESTORE_VECTOR_REGISTERS(); } ret = (ret == MP_OKAY) ? 0 : PRIME_GEN_E; @@ -5549,8 +5502,6 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) } #endif - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - /* make p */ if (err == MP_OKAY) { #ifdef WOLFSSL_CHECK_MEM_ZERO @@ -5591,9 +5542,10 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) if (err != MP_OKAY || isPrime || i >= failCount) break; - /* linuxkm: release the kernel for a moment before iterating. */ - RESTORE_VECTOR_REGISTERS(); - SAVE_VECTOR_REGISTERS(err = _svr_ret; break;); + err = WC_CHECK_FOR_INTR_SIGNALS(); + if (err != 0) + break; + WC_RELAX_LONG_LOOP(); }; } @@ -5644,6 +5596,12 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) /* Keep the old retry behavior in non-FIPS build. */ (void)i; #endif + + err = WC_CHECK_FOR_INTR_SIGNALS(); + if (err != 0) + break; + WC_RELAX_LONG_LOOP(); + } while (err == MP_OKAY && !isPrime && i < failCount); } @@ -5769,9 +5727,6 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng) } #endif - if (err != WC_NO_ERR_TRACE(WC_ACCEL_INHIBIT_E)) - RESTORE_VECTOR_REGISTERS(); - /* Last value p - 1. */ mp_forcezero(tmp1); /* Last value q - 1. */ diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 71ad6c25bee..ff5a33e2e60 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -125,8 +125,6 @@ This library provides single precision (SP) integer math functions. * WOLFSSL_NO_ASM: Disable all assembly implementations * WOLFSSL_KEIL: Keil compiler in use, affects inline assembly * syntax - * WOLFSSL_USE_SAVE_VECTOR_REGISTERS: Save/restore vector registers around - * SP ASM calls * WOLFSSL_SP_INT_LARGE_COMBA: Enable large Comba multiplication and * squaring * WOLFSSL_SP_INT_SQR_VOLATILE: Declare squaring intermediate variables as @@ -198,15 +196,6 @@ This library provides single precision (SP) integer math functions. PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"") #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) \ - SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - /* DECL_SP_INT: Declare one variable of type 'sp_int'. */ #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ !defined(WOLFSSL_SP_NO_MALLOC) @@ -5487,6 +5476,22 @@ static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1, sp_int* r2, int y, unsigned int used) { unsigned int i; +#ifdef WC_NO_GLOBAL_OBJECT_POINTERS + static const wc_ptr_t wc_off_on_addr[2] = + { + #if defined(WC_64BIT_CPU) + W64LIT(0x0000000000000000), + W64LIT(0xffffffffffffffff) + #elif defined(WC_16BIT_CPU) + 0x0000U, + 0xffffU + #else + /* 32 bit */ + 0x00000000U, + 0xffffffffU + #endif + }; +#endif /* Copy data - constant time. */ for (i = 0; i < used; i++) { @@ -14329,11 +14334,9 @@ int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r) if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } - SAVE_VECTOR_REGISTERS(err = _svr_ret;); if (err == MP_OKAY) { err = sp_exptmod_ex(b, e, (int)e->used, m, r); } - RESTORE_VECTOR_REGISTERS(); return err; } #endif @@ -19554,8 +19557,6 @@ int sp_prime_is_prime(const sp_int* a, int trials, int* result) haveRes = 1; } - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - /* Check against known small primes when a has 1 digit. */ if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) && (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) { @@ -19572,8 +19573,6 @@ int sp_prime_is_prime(const sp_int* a, int trials, int* result) err = _sp_prime_trials(a, trials, result); } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -19714,8 +19713,6 @@ int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng) haveRes = 1; } - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - /* Check against known small primes when a has 1 digit. */ if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) && (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) { @@ -19740,8 +19737,6 @@ int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng) *result = ret; } - RESTORE_VECTOR_REGISTERS(); - return err; } #endif /* WOLFSSL_SP_PRIME_GEN */ @@ -19783,8 +19778,6 @@ static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r) unsigned int used = (a->used >= b->used) ? a->used + 1U : b->used + 1U; DECL_SP_INT_ARRAY(d, used, 3); - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL); if (err == MP_OKAY) { u = d[0]; @@ -19849,8 +19842,6 @@ static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r) FREE_SP_INT_ARRAY(d, NULL); - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -19955,8 +19946,6 @@ static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r) _sp_init_size(t[0], used); _sp_init_size(t[1], used); - SAVE_VECTOR_REGISTERS(err = _svr_ret;); - if (err == MP_OKAY) { /* 1. t0 = gcd(a, b) */ err = sp_gcd(a, b, t[0]); @@ -19985,8 +19974,6 @@ static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r) } } } - - RESTORE_VECTOR_REGISTERS(); } FREE_SP_INT_ARRAY(t, NULL); diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 37a4bac2325..39e629ee42c 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -47,6 +47,17 @@ #include +#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) && \ + !defined(DEBUG_VECTOR_REGISTER_ACCESS) + /* force off unneeded vector register save/restore. */ + #undef SAVE_VECTOR_REGISTERS + #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) + #undef SAVE_VECTOR_REGISTERS2 + #define SAVE_VECTOR_REGISTERS2() 0 + #undef RESTORE_VECTOR_REGISTERS + #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() +#endif + #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm #define __volatile__ volatile @@ -429,8 +440,6 @@ static void sp_2048_mont_norm_16(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 16); - ASSERT_SAVED_VECTOR_REGISTERS(); - /* r = 2^n mod m */ sp_2048_sub_in_place_16(r, m); } @@ -520,7 +529,6 @@ extern sp_digit div_2048_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -539,7 +547,6 @@ static WC_INLINE sp_digit div_2048_word_16(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -607,8 +614,6 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[15]; @@ -661,18 +666,38 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_2048_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_16(a, m, NULL, r); } -#ifdef __cplusplus -extern "C" { -#endif -extern void sp_2048_get_from_table_16(sp_digit* r, sp_digit** table, int idx); -#ifdef __cplusplus -} -#endif +#ifndef WC_NO_CACHE_RESISTANT +static void sp_2048_get_from_table_16(sp_digit* r, + sp_digit** table, int idx) +{ + int e, j; + sp_digit mask; + sp_digit diff; + + for (j = 0; j < 16; j++) { + r[j] = 0; + } + + for (e = 0; e < 32; e++) { + /* Constant-time: mask = (e == idx) ? ~0 : 0 + * diff = e ^ idx is 0 iff equal. + * (diff | -diff) has its sign bit set iff diff != 0. + * Shift to get 1/0, subtract from 1 to invert, negate to mask. + */ + diff = (sp_digit)(e ^ idx); + diff = (diff | ((sp_digit)0 - diff)) + >> (sizeof(sp_digit) * 8 - 1); /* 1 if !=, 0 if == */ + mask = (sp_digit)0 - ((sp_digit)1 - diff); /* all-1 if ==, else 0 */ + for (j = 0; j < 16; j++) { + r[j] |= table[e][j] & mask; + } + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ /* Modular exponentiate a to the e mod m. (r = a^e mod m) * * r A single precision number that is the result of the operation. @@ -699,8 +724,6 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -869,6 +892,7 @@ extern void sp_2048_mont_reduce_avx2_16(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_2048_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_2048_mul_avx2_16(r, a, b); sp_2048_mont_reduce_avx2_16(r, m, mp); } @@ -885,6 +909,7 @@ SP_NOINLINE static void sp_2048_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_mont_sqr_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_2048_sqr_avx2_16(r, a); sp_2048_mont_reduce_avx2_16(r, m, mp); } @@ -926,7 +951,6 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digi int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -1089,8 +1113,6 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 32); - ASSERT_SAVED_VECTOR_REGISTERS(); - /* r = 2^n mod m */ sp_2048_sub_in_place_32(r, m); } @@ -1174,7 +1196,6 @@ extern sp_digit div_2048_word_asm_32(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -1193,7 +1214,6 @@ static WC_INLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -1224,8 +1244,6 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[31]; @@ -1284,7 +1302,6 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_2048_mod_32_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_32_cond(a, m, NULL, r); } @@ -1354,8 +1371,6 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[31]; @@ -1409,19 +1424,39 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_2048_div_32(a, m, NULL, r); } #endif /* WOLFSSL_HAVE_SP_DH || !WOLFSSL_RSA_PUBLIC_ONLY */ -#ifdef __cplusplus -extern "C" { -#endif -extern void sp_2048_get_from_table_32(sp_digit* r, sp_digit** table, int idx); -#ifdef __cplusplus -} -#endif +#ifndef WC_NO_CACHE_RESISTANT +static void sp_2048_get_from_table_32(sp_digit* r, + sp_digit** table, int idx) +{ + int e, j; + sp_digit mask; + sp_digit diff; + + for (j = 0; j < 32; j++) { + r[j] = 0; + } + + for (e = 0; e < 64; e++) { + /* Constant-time: mask = (e == idx) ? ~0 : 0 + * diff = e ^ idx is 0 iff equal. + * (diff | -diff) has its sign bit set iff diff != 0. + * Shift to get 1/0, subtract from 1 to invert, negate to mask. + */ + diff = (sp_digit)(e ^ idx); + diff = (diff | ((sp_digit)0 - diff)) + >> (sizeof(sp_digit) * 8 - 1); /* 1 if !=, 0 if == */ + mask = (sp_digit)0 - ((sp_digit)1 - diff); /* all-1 if ==, else 0 */ + for (j = 0; j < 32; j++) { + r[j] |= table[e][j] & mask; + } + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ /* Modular exponentiate a to the e mod m. (r = a^e mod m) * * r A single precision number that is the result of the operation. @@ -1448,8 +1483,6 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -1652,6 +1685,7 @@ extern void sp_2048_mont_reduce_avx2_32(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_2048_mont_mul_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_2048_mul_avx2_32(r, a, b); sp_2048_mont_reduce_avx2_32(r, m, mp); } @@ -1668,6 +1702,7 @@ SP_NOINLINE static void sp_2048_mont_mul_avx2_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_mont_sqr_avx2_32(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_2048_sqr_avx2_32(r, a); sp_2048_mont_reduce_avx2_32(r, m, mp); } @@ -1710,7 +1745,6 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digi int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -1918,10 +1952,9 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (*outLen < 256) { err = MP_TO_E; } @@ -1953,6 +1986,13 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_2048_from_mp(m, 32, mm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && + (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; +#endif + if (e == 0x10001) { int i; sp_digit mp; @@ -1967,9 +2007,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { /* r = a ^ 0x10000 => r = a squared 16 times */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i = 15; i >= 0; i--) { sp_2048_mont_sqr_avx2_32(r, r, m, mp); } @@ -1977,7 +2015,6 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m */ sp_2048_mont_mul_avx2_32(r, r, ah, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -2001,9 +2038,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } else if (e == 0x3) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { if (err == MP_OKAY) { sp_2048_sqr_avx2_32(r, ah); err = sp_2048_mod_32_cond(r, r, m); @@ -2012,7 +2047,6 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, sp_2048_mul_avx2_32(r, ah, r); err = sp_2048_mod_32_cond(r, r, m); } - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -2046,9 +2080,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, XMEMCPY(r, a, sizeof(sp_digit) * 32); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i--; i>=0; i--) { sp_2048_mont_sqr_avx2_32(r, r, m, mp); if (((e >> i) & 1) == 1) { @@ -2057,7 +2089,6 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } XMEMSET(&r[32], 0, sizeof(sp_digit) * 32); sp_2048_mont_reduce_avx2_32(r, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -2087,6 +2118,11 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, *outLen = 256; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(a, NULL, DYNAMIC_TYPE_RSA); return err; @@ -2121,8 +2157,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r = NULL; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)pm; (void)qm; (void)dpm; @@ -2220,10 +2254,9 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)dm; (void)mm; @@ -2244,6 +2277,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, } SP_ALLOC_VAR(sp_digit, a, 16 * 11, NULL, DYNAMIC_TYPE_RSA); + if (err == MP_OKAY) { p = a + 32 * 2; q = p + 16; @@ -2259,10 +2293,11 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_2048_mod_exp_avx2_16(tmpa, a, dp, 1024, p, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_2048_mod_exp_16(tmpa, a, dp, 1024, p, 1); @@ -2270,11 +2305,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { sp_2048_from_mp(dq, 16, dqm); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) err = sp_2048_mod_exp_avx2_16(tmpb, a, dq, 1024, q, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_2048_mod_exp_16(tmpb, a, dq, 1024, q, 1); @@ -2283,11 +2315,9 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_16(tmpa, tmpb); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { c += sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -2298,31 +2328,21 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, sp_2048_from_mp(qi, 16, qim); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_2048_mul_avx2_16(tmpa, tmpa, qi); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_2048_mul_16(tmpa, tmpa, qi); - } err = sp_2048_mod_16(tmpa, tmpa, p); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_2048_mul_avx2_16(tmpa, q, tmpa); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_2048_mul_16(tmpa, q, tmpa); - } XMEMSET(&tmpb[16], 0, sizeof(sp_digit) * 16); sp_2048_add_32(r, tmpb, tmpa); @@ -2330,6 +2350,11 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, *outLen = 256; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_ZEROFREE_VAR(sp_digit, a, 16 * 11, NULL, DYNAMIC_TYPE_RSA); return err; @@ -2429,8 +2454,6 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 2048) { err = MP_READ_E; } @@ -2512,7 +2535,6 @@ static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits, int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -2630,8 +2652,6 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -2750,8 +2770,6 @@ int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 2048) { err = MP_READ_E; } @@ -2843,8 +2861,6 @@ int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 1024) { err = MP_READ_E; } @@ -3238,8 +3254,6 @@ static void sp_3072_mont_norm_24(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 24); - ASSERT_SAVED_VECTOR_REGISTERS(); - /* r = 2^n mod m */ sp_3072_sub_in_place_24(r, m); } @@ -3329,7 +3343,6 @@ extern sp_digit div_3072_word_asm_24(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -3348,7 +3361,6 @@ static WC_INLINE sp_digit div_3072_word_24(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -3416,8 +3428,6 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[23]; @@ -3470,18 +3480,38 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_3072_mod_24(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_24(a, m, NULL, r); } -#ifdef __cplusplus -extern "C" { -#endif -extern void sp_3072_get_from_table_24(sp_digit* r, sp_digit** table, int idx); -#ifdef __cplusplus -} -#endif +#ifndef WC_NO_CACHE_RESISTANT +static void sp_3072_get_from_table_24(sp_digit* r, + sp_digit** table, int idx) +{ + int e, j; + sp_digit mask; + sp_digit diff; + + for (j = 0; j < 24; j++) { + r[j] = 0; + } + + for (e = 0; e < 32; e++) { + /* Constant-time: mask = (e == idx) ? ~0 : 0 + * diff = e ^ idx is 0 iff equal. + * (diff | -diff) has its sign bit set iff diff != 0. + * Shift to get 1/0, subtract from 1 to invert, negate to mask. + */ + diff = (sp_digit)(e ^ idx); + diff = (diff | ((sp_digit)0 - diff)) + >> (sizeof(sp_digit) * 8 - 1); /* 1 if !=, 0 if == */ + mask = (sp_digit)0 - ((sp_digit)1 - diff); /* all-1 if ==, else 0 */ + for (j = 0; j < 24; j++) { + r[j] |= table[e][j] & mask; + } + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ /* Modular exponentiate a to the e mod m. (r = a^e mod m) * * r A single precision number that is the result of the operation. @@ -3508,8 +3538,6 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -3678,6 +3706,7 @@ extern void sp_3072_mont_reduce_avx2_24(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_3072_mont_mul_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_3072_mul_avx2_24(r, a, b); sp_3072_mont_reduce_avx2_24(r, m, mp); } @@ -3694,6 +3723,7 @@ SP_NOINLINE static void sp_3072_mont_mul_avx2_24(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_mont_sqr_avx2_24(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_3072_sqr_avx2_24(r, a); sp_3072_mont_reduce_avx2_24(r, m, mp); } @@ -3735,7 +3765,6 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digi int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -3898,8 +3927,6 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 48); - ASSERT_SAVED_VECTOR_REGISTERS(); - /* r = 2^n mod m */ sp_3072_sub_in_place_48(r, m); } @@ -3983,7 +4010,6 @@ extern sp_digit div_3072_word_asm_48(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -4002,7 +4028,6 @@ static WC_INLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -4033,8 +4058,6 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[47]; @@ -4093,7 +4116,6 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_3072_mod_48_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_48_cond(a, m, NULL, r); } @@ -4163,8 +4185,6 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[47]; @@ -4218,19 +4238,39 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_3072_div_48(a, m, NULL, r); } #endif /* WOLFSSL_HAVE_SP_DH || !WOLFSSL_RSA_PUBLIC_ONLY */ -#ifdef __cplusplus -extern "C" { -#endif -extern void sp_3072_get_from_table_48(sp_digit* r, sp_digit** table, int idx); -#ifdef __cplusplus -} -#endif +#ifndef WC_NO_CACHE_RESISTANT +static void sp_3072_get_from_table_48(sp_digit* r, + sp_digit** table, int idx) +{ + int e, j; + sp_digit mask; + sp_digit diff; + + for (j = 0; j < 48; j++) { + r[j] = 0; + } + for (e = 0; e < 16; e++) { + /* Constant-time: mask = (e == idx) ? ~0 : 0 + * diff = e ^ idx is 0 iff equal. + * (diff | -diff) has its sign bit set iff diff != 0. + * Shift to get 1/0, subtract from 1 to invert, negate to mask. + */ + diff = (sp_digit)(e ^ idx); + diff = (diff | ((sp_digit)0 - diff)) + >> (sizeof(sp_digit) * 8 - 1); /* 1 if !=, 0 if == */ + mask = (sp_digit)0 - ((sp_digit)1 - diff); /* all-1 if ==, else 0 */ + + for (j = 0; j < 48; j++) { + r[j] |= table[e][j] & mask; + } + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ /* Modular exponentiate a to the e mod m. (r = a^e mod m) * * r A single precision number that is the result of the operation. @@ -4257,8 +4297,6 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -4409,6 +4447,7 @@ extern void sp_3072_mont_reduce_avx2_48(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_3072_mont_mul_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_3072_mul_avx2_48(r, a, b); sp_3072_mont_reduce_avx2_48(r, m, mp); } @@ -4425,6 +4464,7 @@ SP_NOINLINE static void sp_3072_mont_mul_avx2_48(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_mont_sqr_avx2_48(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_3072_sqr_avx2_48(r, a); sp_3072_mont_reduce_avx2_48(r, m, mp); } @@ -4467,7 +4507,6 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digi int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -4623,10 +4662,9 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (*outLen < 384) { err = MP_TO_E; } @@ -4658,6 +4696,13 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_3072_from_mp(m, 48, mm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && + (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; +#endif + if (e == 0x10001) { int i; sp_digit mp; @@ -4672,9 +4717,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { /* r = a ^ 0x10000 => r = a squared 16 times */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i = 15; i >= 0; i--) { sp_3072_mont_sqr_avx2_48(r, r, m, mp); } @@ -4682,7 +4725,6 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m */ sp_3072_mont_mul_avx2_48(r, r, ah, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -4706,9 +4748,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } else if (e == 0x3) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { if (err == MP_OKAY) { sp_3072_sqr_avx2_48(r, ah); err = sp_3072_mod_48_cond(r, r, m); @@ -4717,7 +4757,6 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, sp_3072_mul_avx2_48(r, ah, r); err = sp_3072_mod_48_cond(r, r, m); } - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -4751,9 +4790,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, XMEMCPY(r, a, sizeof(sp_digit) * 48); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i--; i>=0; i--) { sp_3072_mont_sqr_avx2_48(r, r, m, mp); if (((e >> i) & 1) == 1) { @@ -4762,7 +4799,6 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } XMEMSET(&r[48], 0, sizeof(sp_digit) * 48); sp_3072_mont_reduce_avx2_48(r, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -4792,6 +4828,11 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, *outLen = 384; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(a, NULL, DYNAMIC_TYPE_RSA); return err; @@ -4826,8 +4867,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r = NULL; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)pm; (void)qm; (void)dpm; @@ -4925,10 +4964,9 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)dm; (void)mm; @@ -4949,6 +4987,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, } SP_ALLOC_VAR(sp_digit, a, 24 * 11, NULL, DYNAMIC_TYPE_RSA); + if (err == MP_OKAY) { p = a + 48 * 2; q = p + 24; @@ -4964,10 +5003,11 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_3072_mod_exp_avx2_24(tmpa, a, dp, 1536, p, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_3072_mod_exp_24(tmpa, a, dp, 1536, p, 1); @@ -4975,11 +5015,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { sp_3072_from_mp(dq, 24, dqm); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) err = sp_3072_mod_exp_avx2_24(tmpb, a, dq, 1536, q, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_3072_mod_exp_24(tmpb, a, dq, 1536, q, 1); @@ -4988,11 +5025,9 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { c = sp_3072_sub_in_place_24(tmpa, tmpb); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { c += sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -5003,31 +5038,21 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, sp_3072_from_mp(qi, 24, qim); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_3072_mul_avx2_24(tmpa, tmpa, qi); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_3072_mul_24(tmpa, tmpa, qi); - } err = sp_3072_mod_24(tmpa, tmpa, p); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_3072_mul_avx2_24(tmpa, q, tmpa); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_3072_mul_24(tmpa, q, tmpa); - } XMEMSET(&tmpb[24], 0, sizeof(sp_digit) * 24); sp_3072_add_48(r, tmpb, tmpa); @@ -5035,6 +5060,11 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, *outLen = 384; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_ZEROFREE_VAR(sp_digit, a, 24 * 11, NULL, DYNAMIC_TYPE_RSA); return err; @@ -5134,8 +5164,6 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 3072) { err = MP_READ_E; } @@ -5217,7 +5245,6 @@ static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits, int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -5335,8 +5362,6 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -5455,8 +5480,6 @@ int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 3072) { err = MP_READ_E; } @@ -5548,8 +5571,6 @@ int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 1536) { err = MP_READ_E; } @@ -5859,8 +5880,6 @@ static void sp_4096_mont_norm_64(sp_digit* r, const sp_digit* m) { XMEMSET(r, 0, sizeof(sp_digit) * 64); - ASSERT_SAVED_VECTOR_REGISTERS(); - /* r = 2^n mod m */ sp_4096_sub_in_place_64(r, m); } @@ -5944,7 +5963,6 @@ extern sp_digit div_4096_word_asm_64(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -5963,7 +5981,6 @@ static WC_INLINE sp_digit div_4096_word_64(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -5994,8 +6011,6 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[63]; @@ -6054,7 +6069,6 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s static WC_INLINE int sp_4096_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_4096_div_64_cond(a, m, NULL, r); } @@ -6124,8 +6138,6 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[63]; @@ -6179,19 +6191,39 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_4096_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_4096_div_64(a, m, NULL, r); } #endif /* WOLFSSL_HAVE_SP_DH || !WOLFSSL_RSA_PUBLIC_ONLY */ -#ifdef __cplusplus -extern "C" { -#endif -extern void sp_4096_get_from_table_64(sp_digit* r, sp_digit** table, int idx); -#ifdef __cplusplus -} -#endif +#ifndef WC_NO_CACHE_RESISTANT +static void sp_4096_get_from_table_64(sp_digit* r, + sp_digit** table, int idx) +{ + int e, j; + sp_digit mask; + sp_digit diff; + + for (j = 0; j < 64; j++) { + r[j] = 0; + } + + for (e = 0; e < 16; e++) { + /* Constant-time: mask = (e == idx) ? ~0 : 0 + * diff = e ^ idx is 0 iff equal. + * (diff | -diff) has its sign bit set iff diff != 0. + * Shift to get 1/0, subtract from 1 to invert, negate to mask. + */ + diff = (sp_digit)(e ^ idx); + diff = (diff | ((sp_digit)0 - diff)) + >> (sizeof(sp_digit) * 8 - 1); /* 1 if !=, 0 if == */ + mask = (sp_digit)0 - ((sp_digit)1 - diff); /* all-1 if ==, else 0 */ + for (j = 0; j < 64; j++) { + r[j] |= table[e][j] & mask; + } + } +} +#endif /* !WC_NO_CACHE_RESISTANT */ /* Modular exponentiate a to the e mod m. (r = a^e mod m) * * r A single precision number that is the result of the operation. @@ -6218,8 +6250,6 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -6370,6 +6400,7 @@ extern void sp_4096_mont_reduce_avx2_64(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_4096_mont_mul_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_4096_mul_avx2_64(r, a, b); sp_4096_mont_reduce_avx2_64(r, m, mp); } @@ -6386,6 +6417,7 @@ SP_NOINLINE static void sp_4096_mont_mul_avx2_64(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_4096_mont_sqr_avx2_64(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_4096_sqr_avx2_64(r, a); sp_4096_mont_reduce_avx2_64(r, m, mp); } @@ -6428,7 +6460,6 @@ static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digi int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -6584,10 +6615,9 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (*outLen < 512) { err = MP_TO_E; } @@ -6619,6 +6649,13 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { sp_4096_from_mp(m, 64, mm); +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && + (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; +#endif + if (e == 0x10001) { int i; sp_digit mp; @@ -6633,9 +6670,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, if (err == MP_OKAY) { /* r = a ^ 0x10000 => r = a squared 16 times */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i = 15; i >= 0; i--) { sp_4096_mont_sqr_avx2_64(r, r, m, mp); } @@ -6643,7 +6678,6 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * mont_red(r.R * a) = (r.R.a / R) mod m = r.a mod m */ sp_4096_mont_mul_avx2_64(r, r, ah, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -6667,9 +6701,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } else if (e == 0x3) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { if (err == MP_OKAY) { sp_4096_sqr_avx2_64(r, ah); err = sp_4096_mod_64_cond(r, r, m); @@ -6678,7 +6710,6 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, sp_4096_mul_avx2_64(r, ah, r); err = sp_4096_mod_64_cond(r, r, m); } - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -6712,9 +6743,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, XMEMCPY(r, a, sizeof(sp_digit) * 64); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { for (i--; i>=0; i--) { sp_4096_mont_sqr_avx2_64(r, r, m, mp); if (((e >> i) & 1) == 1) { @@ -6723,7 +6752,6 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } XMEMSET(&r[64], 0, sizeof(sp_digit) * 64); sp_4096_mont_reduce_avx2_64(r, m, mp); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -6753,6 +6781,11 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, *outLen = 512; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(a, NULL, DYNAMIC_TYPE_RSA); return err; @@ -6787,8 +6820,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_digit* r = NULL; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)pm; (void)qm; (void)dpm; @@ -6886,10 +6917,9 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)dm; (void)mm; @@ -6910,6 +6940,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, } SP_ALLOC_VAR(sp_digit, a, 32 * 11, NULL, DYNAMIC_TYPE_RSA); + if (err == MP_OKAY) { p = a + 64 * 2; q = p + 32; @@ -6925,10 +6956,11 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_2048_mod_exp_avx2_32(tmpa, a, dp, 2048, p, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_2048_mod_exp_32(tmpa, a, dp, 2048, p, 1); @@ -6936,11 +6968,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { sp_4096_from_mp(dq, 32, dqm); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) err = sp_2048_mod_exp_avx2_32(tmpb, a, dq, 2048, q, 1); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_2048_mod_exp_32(tmpb, a, dq, 2048, q, 1); @@ -6949,11 +6978,9 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_32(tmpa, tmpb); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { c += sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -6964,31 +6991,21 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, sp_2048_from_mp(qi, 32, qim); #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_2048_mul_avx2_32(tmpa, tmpa, qi); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_2048_mul_32(tmpa, tmpa, qi); - } err = sp_2048_mod_32(tmpa, tmpa, p); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_2048_mul_avx2_32(tmpa, q, tmpa); - RESTORE_VECTOR_REGISTERS(); - } else #endif - { sp_2048_mul_32(tmpa, q, tmpa); - } XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32); sp_4096_add_64(r, tmpb, tmpa); @@ -6996,6 +7013,11 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, *outLen = 512; } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_ZEROFREE_VAR(sp_digit, a, 32 * 11, NULL, DYNAMIC_TYPE_RSA); return err; @@ -7095,8 +7117,6 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #endif int expBits = mp_count_bits(exp); - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 4096) { err = MP_READ_E; } @@ -7178,7 +7198,6 @@ static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits, int err = MP_OKAY; ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -7296,8 +7315,6 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - ASSERT_SAVED_VECTOR_REGISTERS(); - if (bits == 0) { err = MP_VAL; } @@ -7416,8 +7433,6 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - if (mp_count_bits(base) > 4096) { err = MP_READ_E; } @@ -7636,8 +7651,6 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* int64_t a32[8]; int64_t o; - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; a32[0] = (int64_t)(a[0] & 0xffffffff); @@ -10962,6 +10975,7 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_256, point, 2, heap, DYNAMIC_TYPE_ECC); @@ -10986,33 +11000,26 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_256_ecc_mulmod_avx2_4(point, point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_256_ecc_mulmod_4(point, point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_proj_point_add_avx2_4(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_proj_point_add_4(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_map_avx2_4(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_map_4(point, point, tmp); @@ -11021,6 +11028,11 @@ int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm, err = sp_256_point_to_ecc_point_4(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, heap, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, heap, DYNAMIC_TYPE_ECC); @@ -23718,6 +23730,7 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_256, point, 2, NULL, DYNAMIC_TYPE_ECC); @@ -23741,33 +23754,26 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_256_ecc_mulmod_base_avx2_4(point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_256_ecc_mulmod_base_4(point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_proj_point_add_avx2_4(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_proj_point_add_4(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_map_avx2_4(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_map_4(point, point, tmp); @@ -23776,6 +23782,11 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, err = sp_256_point_to_ecc_point_4(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, NULL, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, NULL, DYNAMIC_TYPE_ECC); @@ -23887,6 +23898,7 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif (void)heap; @@ -23907,10 +23919,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_256_ecc_mulmod_base_avx2_4(point, k, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_256_ecc_mulmod_base_4(point, k, 1, 1, NULL); @@ -23919,11 +23932,9 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { err = sp_256_ecc_mulmod_avx2_4(infinity, point, p256_order, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -23936,6 +23947,11 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) } #endif +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + if (err == MP_OKAY) { err = sp_256_to_mp(k, priv); } @@ -24211,7 +24227,6 @@ extern sp_digit div_256_word_asm_4(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -24230,7 +24245,6 @@ static WC_INLINE sp_digit div_256_word_4(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -24283,8 +24297,6 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[3]; @@ -24337,7 +24349,6 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_256_div_4(a, m, NULL, r); } @@ -24351,7 +24362,6 @@ static WC_INLINE int sp_256_mod_4(sp_digit* r, const sp_digit* a, */ static void sp_256_mont_mul_order_4(sp_digit* r, const sp_digit* a, const sp_digit* b) { - ASSERT_SAVED_VECTOR_REGISTERS(); sp_256_mul_4(r, a, b); sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); } @@ -24377,7 +24387,6 @@ static const word64 p256_order_low[2] = { */ static void sp_256_mont_sqr_order_4(sp_digit* r, const sp_digit* a) { - ASSERT_SAVED_VECTOR_REGISTERS(); sp_256_sqr_4(r, a); sp_256_mont_reduce_order_4(r, p256_order, p256_mp_order); } @@ -24393,8 +24402,6 @@ static void sp_256_mont_sqr_n_order_4(sp_digit* r, const sp_digit* a, int n) { int i; - ASSERT_SAVED_VECTOR_REGISTERS(); - sp_256_mont_sqr_order_4(r, a); for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; @@ -24483,8 +24488,6 @@ static void sp_256_mont_inv_order_4(sp_digit* r, const sp_digit* a, sp_digit* t4 = td + 6 * 4; int i; - ASSERT_SAVED_VECTOR_REGISTERS(); - /* t = a^2 */ sp_256_mont_sqr_order_4(t, a); /* t = a^3 = t * a */ @@ -24617,7 +24620,6 @@ static void sp_256_mont_sqr_n_order_avx2_4(sp_digit* r, const sp_digit* a, int n int i; ASSERT_SAVED_VECTOR_REGISTERS(); - sp_256_mont_sqr_order_avx2_4(r, a); for (i=1; i= sizeof(*sp_ctx) ? -1 : 1]; @@ -24707,7 +24708,6 @@ static void sp_256_mont_inv_order_avx2_4(sp_digit* r, const sp_digit* a, int i; ASSERT_SAVED_VECTOR_REGISTERS(); - /* t = a^2 */ sp_256_mont_sqr_order_avx2_4(t, a); /* t = a^3 = t * a */ @@ -24839,15 +24839,16 @@ static int sp_256_calc_s_4(sp_digit* s, const sp_digit* r, sp_digit* k, sp_digit* kInv = k; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif /* Conv k to Montgomery form (mod order) */ #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) sp_256_mul_avx2_4(k, k, p256_norm_order); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_mul_4(k, k, p256_norm_order); @@ -24857,11 +24858,8 @@ static int sp_256_calc_s_4(sp_digit* s, const sp_digit* r, sp_digit* k, /* kInv = 1/k mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_mont_inv_order_avx2_4(kInv, k, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_mont_inv_order_4(kInv, k, tmp); @@ -24869,11 +24867,8 @@ static int sp_256_calc_s_4(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = r * x + e */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_mul_avx2_4(x, x, r); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_mul_4(x, x, r); @@ -24891,17 +24886,19 @@ static int sp_256_calc_s_4(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = s * k^-1 mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_256_mont_mul_order_avx2_4(s, s, kInv); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_256_mont_mul_order_4(s, s, kInv); sp_256_norm_4(s); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + return err; } @@ -27843,6 +27840,7 @@ extern void sp_384_mont_reduce_order_avx2_6(sp_digit* a, const sp_digit* m, sp_d SP_NOINLINE static void sp_384_mont_mul_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_384_mul_avx2_6(r, a, b); sp_384_mont_reduce_avx2_6(r, m, mp); } @@ -27859,6 +27857,7 @@ SP_NOINLINE static void sp_384_mont_mul_avx2_6(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_384_mont_sqr_avx2_6(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_384_sqr_avx2_6(r, a); sp_384_mont_reduce_avx2_6(r, m, mp); } @@ -29755,6 +29754,7 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_384, point, 2, heap, DYNAMIC_TYPE_ECC); @@ -29779,33 +29779,26 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_384_ecc_mulmod_avx2_6(point, point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_384_ecc_mulmod_6(point, point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_proj_point_add_avx2_6(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_proj_point_add_6(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_map_avx2_6(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_map_6(point, point, tmp); @@ -29814,6 +29807,11 @@ int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm, err = sp_384_point_to_ecc_point_6(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, heap, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, heap, DYNAMIC_TYPE_ECC); @@ -48325,6 +48323,7 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_384, point, 2, NULL, DYNAMIC_TYPE_ECC); @@ -48348,33 +48347,26 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_384_ecc_mulmod_base_avx2_6(point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_384_ecc_mulmod_base_6(point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_proj_point_add_avx2_6(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_proj_point_add_6(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_map_avx2_6(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_map_6(point, point, tmp); @@ -48383,6 +48375,11 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, err = sp_384_point_to_ecc_point_6(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, NULL, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, NULL, DYNAMIC_TYPE_ECC); @@ -48494,6 +48491,7 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif (void)heap; @@ -48514,10 +48512,11 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_384_ecc_mulmod_base_avx2_6(point, k, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_384_ecc_mulmod_base_6(point, k, 1, 1, NULL); @@ -48526,11 +48525,9 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { err = sp_384_ecc_mulmod_avx2_6(infinity, point, p384_order, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -48543,6 +48540,11 @@ int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) } #endif +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + if (err == MP_OKAY) { err = sp_384_to_mp(k, priv); } @@ -48818,7 +48820,6 @@ extern sp_digit div_384_word_asm_6(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -48837,7 +48838,6 @@ static WC_INLINE sp_digit div_384_word_6(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -48892,8 +48892,6 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[5]; @@ -48946,7 +48944,6 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit static WC_INLINE int sp_384_mod_6(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_384_div_6(a, m, NULL, r); } @@ -49304,15 +49301,16 @@ static int sp_384_calc_s_6(sp_digit* s, const sp_digit* r, sp_digit* k, sp_digit* kInv = k; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif /* Conv k to Montgomery form (mod order) */ #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) sp_384_mul_avx2_6(k, k, p384_norm_order); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_mul_6(k, k, p384_norm_order); @@ -49322,11 +49320,8 @@ static int sp_384_calc_s_6(sp_digit* s, const sp_digit* r, sp_digit* k, /* kInv = 1/k mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_mont_inv_order_avx2_6(kInv, k, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_mont_inv_order_6(kInv, k, tmp); @@ -49334,11 +49329,8 @@ static int sp_384_calc_s_6(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = r * x + e */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_mul_avx2_6(x, x, r); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_mul_6(x, x, r); @@ -49356,17 +49348,19 @@ static int sp_384_calc_s_6(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = s * k^-1 mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_384_mont_mul_order_avx2_6(s, s, kInv); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_384_mont_mul_order_6(s, s, kInv); sp_384_norm_6(s); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + return err; } @@ -54246,6 +54240,7 @@ int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_521, point, 2, heap, DYNAMIC_TYPE_ECC); @@ -54270,33 +54265,26 @@ int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_521_ecc_mulmod_avx2_9(point, point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_521_ecc_mulmod_9(point, point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_proj_point_add_avx2_9(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_proj_point_add_9(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_map_avx2_9(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_map_9(point, point, tmp); @@ -54305,6 +54293,11 @@ int sp_ecc_mulmod_add_521(const mp_int* km, const ecc_point* gm, err = sp_521_point_to_ecc_point_9(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, heap, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, heap, DYNAMIC_TYPE_ECC); @@ -89002,6 +88995,7 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_521, point, 2, NULL, DYNAMIC_TYPE_ECC); @@ -89025,33 +89019,26 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_521_ecc_mulmod_base_avx2_9(point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_521_ecc_mulmod_base_9(point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_proj_point_add_avx2_9(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_proj_point_add_9(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_map_avx2_9(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_map_9(point, point, tmp); @@ -89060,6 +89047,11 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, err = sp_521_point_to_ecc_point_9(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, NULL, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, NULL, DYNAMIC_TYPE_ECC); @@ -89172,6 +89164,7 @@ int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif (void)heap; @@ -89192,10 +89185,11 @@ int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + + if (saved_vector_registers) err = sp_521_ecc_mulmod_base_avx2_9(point, k, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_521_ecc_mulmod_base_9(point, k, 1, 1, NULL); @@ -89204,11 +89198,9 @@ int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) { err = sp_521_ecc_mulmod_avx2_9(infinity, point, p521_order, 1, 1, NULL); - RESTORE_VECTOR_REGISTERS(); } else #endif @@ -89221,6 +89213,11 @@ int sp_ecc_make_key_521(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) } #endif +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + if (err == MP_OKAY) { err = sp_521_to_mp(k, priv); } @@ -89517,7 +89514,6 @@ extern sp_digit div_521_word_asm_9(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_521_word_9(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -89536,7 +89532,6 @@ static WC_INLINE sp_digit div_521_word_9(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -89595,8 +89590,6 @@ static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, sp_digit word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = (d[8] << 55) | (d[7] >> 9); @@ -89651,7 +89644,6 @@ static WC_INLINE int sp_521_div_9(const sp_digit* a, const sp_digit* d, sp_digit static WC_INLINE int sp_521_mod_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_521_div_9(a, m, NULL, r); } @@ -90037,15 +90029,16 @@ static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, sp_digit* kInv = k; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif /* Conv k to Montgomery form (mod order) */ #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) sp_521_mul_avx2_9(k, k, p521_norm_order); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_mul_9(k, k, p521_norm_order); @@ -90055,11 +90048,8 @@ static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, /* kInv = 1/k mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_mont_inv_order_avx2_9(kInv, k, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_mont_inv_order_9(kInv, k, tmp); @@ -90067,11 +90057,8 @@ static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = r * x + e */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_mul_avx2_9(x, x, r); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_mul_9(x, x, r); @@ -90089,17 +90076,19 @@ static int sp_521_calc_s_9(sp_digit* s, const sp_digit* r, sp_digit* k, /* s = s * k^-1 mod order */ #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_521_mont_mul_order_avx2_9(s, s, kInv); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_521_mont_mul_order_9(s, s, kInv); sp_521_norm_9(s); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + return err; } @@ -91607,7 +91596,6 @@ extern sp_digit div_1024_word_asm_16(sp_digit d1, sp_digit d0, sp_digit div); static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) { - ASSERT_SAVED_VECTOR_REGISTERS(); #if _MSC_VER >= 1920 return _udiv128(d1, d0, div, NULL); #else @@ -91626,7 +91614,6 @@ static WC_INLINE sp_digit div_1024_word_16(sp_digit d1, sp_digit d0, sp_digit div) { register sp_digit r asm("rax"); - ASSERT_SAVED_VECTOR_REGISTERS(); __asm__ __volatile__ ( "divq %3" : "=a" (r) @@ -91694,8 +91681,6 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - (void)m; div = d[15]; @@ -91748,7 +91733,6 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig static WC_INLINE int sp_1024_mod_16(sp_digit* r, const sp_digit* a, const sp_digit* m) { - ASSERT_SAVED_VECTOR_REGISTERS(); return sp_1024_div_16(a, m, NULL, r); } @@ -93220,6 +93204,7 @@ extern void sp_1024_mont_reduce_avx2_16(sp_digit* a, const sp_digit* m, sp_digit SP_NOINLINE static void sp_1024_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_1024_mul_avx2_16(r, a, b); sp_1024_mont_reduce_avx2_16(r, m, mp); } @@ -93236,6 +93221,7 @@ SP_NOINLINE static void sp_1024_mont_mul_avx2_16(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_1024_mont_sqr_avx2_16(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { + ASSERT_SAVED_VECTOR_REGISTERS(); sp_1024_sqr_avx2_16(r, a); sp_1024_mont_reduce_avx2_16(r, m, mp); } @@ -98454,6 +98440,7 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, int err = MP_OKAY; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); + int saved_vector_registers = 0; #endif SP_ALLOC_VAR(sp_point_1024, point, 2, NULL, DYNAMIC_TYPE_ECC); @@ -98477,33 +98464,26 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) + saved_vector_registers = 1; + if (saved_vector_registers) err = sp_1024_ecc_mulmod_base_avx2_16(point, k, 0, 0, heap); - RESTORE_VECTOR_REGISTERS(); - } else #endif err = sp_1024_ecc_mulmod_base_16(point, k, 0, 0, heap); } if (err == MP_OKAY) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_1024_proj_point_add_avx2_16(point, point, addP, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_1024_proj_point_add_16(point, point, addP, tmp); if (map) { #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && - IS_INTEL_AVX2(cpuid_flags) && - (SAVE_VECTOR_REGISTERS2() == 0)) { + if (saved_vector_registers) sp_1024_map_avx2_16(point, point, tmp); - RESTORE_VECTOR_REGISTERS(); - } else #endif sp_1024_map_16(point, point, tmp); @@ -98512,6 +98492,11 @@ int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am, err = sp_1024_point_to_ecc_point_16(point, r); } +#ifdef HAVE_INTEL_AVX2 + if (saved_vector_registers) + RESTORE_VECTOR_REGISTERS(); +#endif + SP_FREE_VAR(k, NULL, DYNAMIC_TYPE_ECC); SP_FREE_VAR(point, NULL, DYNAMIC_TYPE_ECC); @@ -100712,11 +100697,11 @@ int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res) word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { err = sp_ModExp_Fp_star_avx2_1024(base, exp, res); + RESTORE_VECTOR_REGISTERS(); } else #endif @@ -102314,11 +102299,11 @@ int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res) word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { err = sp_Pairing_avx2_1024(pm, qm, res); + RESTORE_VECTOR_REGISTERS(); } else #endif @@ -103439,11 +103424,11 @@ int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table, word32* len) word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { err = sp_Pairing_gen_precomp_avx2_1024(pm, table, len); + RESTORE_VECTOR_REGISTERS(); } else #endif @@ -103475,11 +103460,11 @@ int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm, mp_int* re word32 cpuid_flags = cpuid_get_flags(); #endif - ASSERT_SAVED_VECTOR_REGISTERS(); - #ifdef HAVE_INTEL_AVX2 - if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { + if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags) && + IS_INTEL_AVX2(cpuid_flags) && (SAVE_VECTOR_REGISTERS2() == 0)) { err = sp_Pairing_precomp_avx2_1024(pm, qm, res, table, len); + RESTORE_VECTOR_REGISTERS(); } else #endif diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h index 8d49ffc18ef..736954a0a76 100644 --- a/wolfssl/wolfcrypt/error-crypt.h +++ b/wolfssl/wolfcrypt/error-crypt.h @@ -356,10 +356,13 @@ WOLFSSL_API void wc_ErrorString(int err, char* buff); WOLFSSL_ABI WOLFSSL_API const char* wc_GetErrorString(int error); #endif +#ifdef WOLFSSL_DEBUG_BACKTRACE_ERROR_CODES + WOLFSSL_API extern int wc_backtrace_render(void); +#endif + #if defined(WOLFSSL_DEBUG_TRACE_ERROR_CODES) && \ (defined(BUILDING_WOLFSSL) || \ defined(WOLFSSL_DEBUG_TRACE_ERROR_CODES_ALWAYS)) - WOLFSSL_API extern int wc_backtrace_render(void); #define WC_NO_ERR_TRACE(label) (CONST_NUM_ERR_ ## label) #ifndef WOLFSSL_DEBUG_BACKTRACE_RENDER_CLAUSE #ifdef WOLFSSL_DEBUG_BACKTRACE_ERROR_CODES diff --git a/wolfssl/wolfcrypt/logging.h b/wolfssl/wolfcrypt/logging.h index 414b41c2337..efed186fdb7 100644 --- a/wolfssl/wolfcrypt/logging.h +++ b/wolfssl/wolfcrypt/logging.h @@ -579,12 +579,13 @@ WOLFSSL_API void wolfSSL_SetLoggingPrefix(const char* prefix); #error "Failed: Cannot WOLFSSL_DEBUG_CERTS with WOLFSSL_DEBUG_ERRORS_ONLY" #endif +#if defined(WOLFSSL_DEBUG_BACKTRACE_ERROR_CODES) && defined(XFILE) + WOLFSSL_API XFILE wc_backtrace_set_fp(XFILE new_fp); +#endif + #ifdef WOLFSSL_DEBUG_TRACE_ERROR_CODES WOLFSSL_API int wc_debug_trace_error_codes_enabled(void); WOLFSSL_API int wc_debug_trace_error_codes_set(int state); - #ifdef XFILE - WOLFSSL_API XFILE wc_backtrace_set_fp(XFILE new_fp); - #endif #endif #ifdef __cplusplus diff --git a/wolfssl/wolfcrypt/memory.h b/wolfssl/wolfcrypt/memory.h index 06518690608..cdc6a040452 100644 --- a/wolfssl/wolfcrypt/memory.h +++ b/wolfssl/wolfcrypt/memory.h @@ -370,6 +370,8 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE abort(); #elif defined(DEBUG_VECTOR_REGISTERS_EXIT_ON_FAIL) #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE exit(1); + #elif defined(DEBUG_VECTOR_REGISTERS_BACKTRACE_ON_FAIL) + #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE wc_backtrace_render(); #elif !defined(DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE) #define DEBUG_VECTOR_REGISTERS_EXTRA_FAIL_CLAUSE #endif @@ -381,8 +383,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, ++wc_svr_count; \ if (wc_svr_count > 5) { \ fprintf(stderr, \ - ("%s @ L%d : incr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : incr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -403,8 +406,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, #define WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(x) do { \ if (((x) != 0) && (wc_svr_count > 0)) { \ fprintf(stderr, \ - ("%s @ L%d : incr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : incr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -422,8 +426,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, ++wc_svr_count; \ if (wc_svr_count > 5) { \ fprintf(stderr, \ - ("%s @ L%d : incr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : incr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -445,8 +450,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, if (wc_debug_vector_registers_retval != 0) { \ if (wc_svr_count > 0) { \ fprintf(stderr, \ - ("%s @ L%d : incr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : incr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -459,8 +465,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, ++wc_svr_count; \ if (wc_svr_count > 5) { \ fprintf(stderr, \ - ("%s @ L%d : incr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : incr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -480,8 +487,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, #define ASSERT_SAVED_VECTOR_REGISTERS() do { \ if (wc_svr_count <= 0) { \ fprintf(stderr, \ - ("ASSERT_SAVED_VECTOR_REGISTERS : %s @ L%d : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("ASSERT_SAVED_VECTOR_REGISTERS : %s() %s @ L %d : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -493,8 +501,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, #define ASSERT_RESTORED_VECTOR_REGISTERS(fail_clause) do { \ if (wc_svr_count != 0) { \ fprintf(stderr, \ - ("ASSERT_RESTORED_VECTOR_REGISTERS : %s @ L%d" \ - " : wc_svr_count %d (last op %s L%d)\n"), \ + ("ASSERT_RESTORED_VECTOR_REGISTERS : %s() %s @ L %d" \ + " : wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ @@ -508,8 +517,9 @@ WOLFSSL_LOCAL int wc_debug_CipherLifecycleFree(void **CipherLifecycleTag, --wc_svr_count; \ if ((wc_svr_count > 4) || (wc_svr_count < 0)) { \ fprintf(stderr, \ - ("%s @ L%d : decr : " \ - "wc_svr_count %d (last op %s L%d)\n"), \ + ("%s() %s @ L %d : decr : " \ + "wc_svr_count %d (last op %s L %d)\n"), \ + __FUNCTION__, \ __FILE__, \ __LINE__, \ wc_svr_count, \ From 3a4c2cded0cde4cdb11ad3cddb5dd2e983d03027 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Sat, 30 May 2026 15:11:36 -0500 Subject: [PATCH 2/2] activate ECCSI and SAKKE in linuxkm: wolfssl/wolfcrypt/settings.h: add WC_NO_GLOBAL_OBJECT_POINTERS implicitly in WC_SYM_RELOC_TABLES section of WOLFSSL_LINUXKM setup. wolfssl/wolfcrypt/wolfmath.h, wolfcrypt/src/wolfmath.c, wolfcrypt/src/sp_int.c, wolfcrypt/src/sakke.c: when WC_NO_GLOBAL_OBJECT_POINTERS, use static local wc_off_on_addr rather than global in wolfmath.c. wolfcrypt/src/sakke.c: * in wc_DeriveSakkeSSV(), initialize a[] with explicit XMEMSET() rather than " = {0}", to avoid unmaskable implicit memset() emitted by compiler. * remove all vector register provisions (SAVE_VECTOR_REGISTERS(), RESTORE_VECTOR_REGISTERS(), ASSERT_SAVED_VECTOR_REGISTERS()). linuxkm/module_exports.c.template: add includes for eccsi.h and sakke.h. configure.ac: * tweak enable-all-crypto setup to make enable_eccsi unconditional alongside enable_fpecc; * move enable_sakke to be conditional only on !FIPS. * notably this activates ECCSI and SAKKE on kernel all-crypto builds. wolfcrypt/test/test.c: WC_*_VAR*() refactors for eccsi_test() and sakke_test(). --- configure.ac | 4 +- linuxkm/module_exports.c.template | 6 ++ wolfcrypt/src/eccsi.c | 16 ----- wolfcrypt/src/sakke.c | 48 ++++++--------- wolfcrypt/src/wolfmath.c | 13 ++-- wolfcrypt/test/test.c | 99 ++++++++++--------------------- wolfssl/wolfcrypt/settings.h | 3 + wolfssl/wolfcrypt/wolfmath.h | 13 ++-- 8 files changed, 76 insertions(+), 126 deletions(-) diff --git a/configure.ac b/configure.ac index 7cd5cf6671a..bc4bdb51d24 100644 --- a/configure.ac +++ b/configure.ac @@ -1513,6 +1513,7 @@ then test "$enable_hkdf" = "" && enable_hkdf=yes test "$enable_eccencrypt" = "" && test "$enable_ecc" != "no" && enable_eccencrypt=yes test "$enable_fpecc" = "" && test "$enable_ecc" != "no" && enable_fpecc=yes + test "$enable_eccsi" = "" && test "$enable_ecc" != "no" && enable_eccsi=yes test "$enable_psk" = "" && enable_psk=yes test "$enable_cmac" = "" && enable_cmac=yes test "$enable_cmac_kdf" = "" && enable_cmac_kdf=yes @@ -1580,13 +1581,12 @@ then test "$enable_aessiv" = "" && enable_aessiv=yes # AFALG lacks AES-EAX test "$enable_aeseax" = "" && test "$enable_afalg" != "yes" && enable_aeseax=yes + test "$enable_sakke" = "" && test "$enable_ecc" != "no" && enable_sakke=yes if test "$KERNEL_MODE_DEFAULTS" != "yes" then test "$enable_cryptocb" = "" && enable_cryptocb=yes test "$enable_pkcallbacks" = "" && enable_pkcallbacks=yes - test "$enable_eccsi" = "" && test "$enable_ecc" != "no" && enable_eccsi=yes - test "$enable_sakke" = "" && test "$enable_ecc" != "no" && enable_sakke=yes fi fi diff --git a/linuxkm/module_exports.c.template b/linuxkm/module_exports.c.template index f37eea1e498..8209c6277f9 100644 --- a/linuxkm/module_exports.c.template +++ b/linuxkm/module_exports.c.template @@ -91,6 +91,12 @@ #ifdef HAVE_ECC #include #endif +#ifdef WOLFCRYPT_HAVE_ECCSI + #include +#endif +#ifdef WOLFCRYPT_HAVE_SAKKE + #include +#endif #ifdef HAVE_HPKE #include #endif diff --git a/wolfcrypt/src/eccsi.c b/wolfcrypt/src/eccsi.c index d919dd8a341..d0417ec879a 100644 --- a/wolfcrypt/src/eccsi.c +++ b/wolfcrypt/src/eccsi.c @@ -36,14 +36,6 @@ #include #endif -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - #ifndef WOLFSSL_HAVE_ECC_KEY_GET_PRIV /* FIPS build has replaced ecc.h. */ #define wc_ecc_key_get_priv(key) (&((key)->k)) @@ -1507,8 +1499,6 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType, if (err != 0) return err; - SAVE_VECTOR_REGISTERS(return _svr_ret;); - params = &key->params; hs = &key->tmp; res = &key->pubkey.pubkey; @@ -1563,8 +1553,6 @@ int wc_ValidateEccsiPair(EccsiKey* key, enum wc_HashType hashType, } } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -2231,8 +2219,6 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType, if (err != 0) return err; - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* Decode the signature into components. */ r = wc_ecc_key_get_priv(&key->pubkey); pvt = &key->pubkey.pubkey; @@ -2318,8 +2304,6 @@ int wc_VerifyEccsiHash(EccsiKey* key, enum wc_HashType hashType, *verified = ((err == 0) && (mp_cmp(jx, r) == MP_EQ)); } - RESTORE_VECTOR_REGISTERS(); - return err; } #endif /* WOLFCRYPT_ECCSI_CLIENT */ diff --git a/wolfcrypt/src/sakke.c b/wolfcrypt/src/sakke.c index 4fc1a85b485..a7b64e32a76 100644 --- a/wolfcrypt/src/sakke.c +++ b/wolfcrypt/src/sakke.c @@ -37,14 +37,6 @@ #include #include -#if defined(WOLFSSL_USE_SAVE_VECTOR_REGISTERS) && !defined(WOLFSSL_SP_ASM) - /* force off unneeded vector register save/restore. */ - #undef SAVE_VECTOR_REGISTERS - #define SAVE_VECTOR_REGISTERS(fail_clause) SAVE_NO_VECTOR_REGISTERS(fail_clause) - #undef RESTORE_VECTOR_REGISTERS - #define RESTORE_VECTOR_REGISTERS() RESTORE_NO_VECTOR_REGISTERS() -#endif - #ifndef WOLFSSL_HAVE_ECC_KEY_GET_PRIV /* FIPS build has replaced ecc.h. */ #define wc_ecc_key_get_priv(key) (&((key)->k)) @@ -1328,13 +1320,11 @@ int wc_GenerateSakkeRskTable(const SakkeKey* key, const ecc_point* rsk, err = BAD_FUNC_ARG; } if (err == 0) { - SAVE_VECTOR_REGISTERS(return _svr_ret;); #ifdef WOLFSSL_SP_1024 err = sp_Pairing_gen_precomp_1024(rsk, table, len); #else err = NOT_COMPILED_IN; #endif - RESTORE_VECTOR_REGISTERS(); } return err; @@ -2441,8 +2431,6 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz, err = BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* Load elliptic curve parameters */ if (err == 0) { err = sakke_load_params(key); @@ -2478,8 +2466,6 @@ int wc_ValidateSakkeRsk(SakkeKey* key, const byte* id, word16 idSz, *valid = ((err == 0) && (mp_cmp(a, &key->params.g) == MP_EQ)); } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -2622,6 +2608,22 @@ static int sakke_modexp_loop(SakkeKey* key, mp_int* b, mp_int* e, mp_proj* r, mp_int* by = key->tmp.p1->z; mp_int* prime = &key->params.prime; int i; +#ifdef WC_NO_GLOBAL_OBJECT_POINTERS + static const wc_ptr_t wc_off_on_addr[2] = + { + #if defined(WC_64BIT_CPU) + W64LIT(0x0000000000000000), + W64LIT(0xffffffffffffffff) + #elif defined(WC_16BIT_CPU) + 0x0000U, + 0xffffU + #else + /* 32 bit */ + 0x00000000U, + 0xffffffffU + #endif + }; +#endif #ifdef WC_NO_CACHE_RESISTANT c[0] = r; @@ -6387,8 +6389,6 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz) err = BAD_FUNC_ARG; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - if (err == 0) { err = sakke_load_params(key); } @@ -6401,8 +6401,6 @@ int wc_MakeSakkePointI(SakkeKey* key, const byte* id, word16 idSz) key->i.idSz = idSz; } - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -6532,9 +6530,7 @@ int wc_GenerateSakkePointITable(SakkeKey* key, byte* table, word32* len) #ifdef WOLFSSL_HAVE_SP_ECC if (err == 0) { - SAVE_VECTOR_REGISTERS(return _svr_ret;); err = sp_ecc_gen_table_1024(key->i.i, table, len, key->heap); - RESTORE_VECTOR_REGISTERS(); } if (err == 0) { key->i.table = table; @@ -6722,8 +6718,6 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType, err = BAD_STATE_E; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* Load parameters */ if (err == 0) { err = sakke_load_params(key); @@ -6799,8 +6793,6 @@ int wc_MakeSakkeEncapsulatedSSV(SakkeKey* key, enum wc_HashType hashType, /* Step 6: Output SSV - already encoded in buffer */ - RESTORE_VECTOR_REGISTERS(); - return err; } @@ -6898,7 +6890,9 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv, mp_int* ri = NULL; byte* wb = NULL; byte* test = NULL; - byte a[WC_MAX_DIGEST_SIZE] = {0}; + byte a[WC_MAX_DIGEST_SIZE]; + + XMEMSET(a, 0, sizeof(a)); if ((key == NULL) || (ssv == NULL) || (auth == NULL) || (ssvSz == 0)) { err = BAD_FUNC_ARG; @@ -6907,8 +6901,6 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv, err = BAD_STATE_E; } - SAVE_VECTOR_REGISTERS(return _svr_ret;); - /* Load parameters */ if (err == 0) { err = sakke_load_params(key); @@ -6979,8 +6971,6 @@ int wc_DeriveSakkeSSV(SakkeKey* key, enum wc_HashType hashType, byte* ssv, err = SAKKE_VERIFY_FAIL_E; } - RESTORE_VECTOR_REGISTERS(); - return err; } #endif /* WOLFCRYPT_SAKKE_CLIENT */ diff --git a/wolfcrypt/src/wolfmath.c b/wolfcrypt/src/wolfmath.c index 06fb8ed017d..f073be133e3 100644 --- a/wolfcrypt/src/wolfmath.c +++ b/wolfcrypt/src/wolfmath.c @@ -43,12 +43,13 @@ #if !defined(NO_BIG_INT) || defined(WOLFSSL_SP_MATH) -#if (!defined(WC_NO_CACHE_RESISTANT) && \ - ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \ - (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))) || \ - ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \ - defined(OPENSSL_ALL) && defined(WC_PROTECT_ENCRYPTED_MEM)) +#if !defined(WC_NO_GLOBAL_OBJECT_POINTERS) && \ + ((!defined(WC_NO_CACHE_RESISTANT) && \ + ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \ + (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))) || \ + ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \ + defined(OPENSSL_ALL) && defined(WC_PROTECT_ENCRYPTED_MEM))) /* all off / all on pointer addresses for constant calculations */ /* ecc.c uses same table */ diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 329c88cffdf..f3fc88c8a16 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -60460,38 +60460,25 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t eccsi_test(void) wc_test_ret_t ret = 0; WC_RNG rng; int rng_inited = 0; - EccsiKey* priv = NULL; - EccsiKey* pub = NULL; - mp_int* ssk = NULL; + WC_DECLARE_VAR(priv, EccsiKey, 1, HEAP_HINT); + WC_DECLARE_VAR(pub, EccsiKey, 1, HEAP_HINT); + WC_DECLARE_VAR(ssk, mp_int, 1, HEAP_HINT); ecc_point* pvt = NULL; WOLFSSL_ENTER("eccsi_test"); - priv = (EccsiKey*)XMALLOC(sizeof(EccsiKey), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (priv == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(priv, 0, sizeof(*priv)); - + WC_ALLOC_VAR_EX(priv, EccsiKey, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); if (ret == 0) { - pub = (EccsiKey*)XMALLOC(sizeof(EccsiKey), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (pub == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(pub, 0, sizeof(*pub)); + XMEMSET(priv, 0, sizeof(*priv)); + WC_ALLOC_VAR_EX(pub, EccsiKey, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); } - if (ret == 0) { - ssk = (mp_int*)XMALLOC(sizeof(mp_int), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (ssk == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(ssk, 0, sizeof(*ssk)); + XMEMSET(pub, 0, sizeof(*pub)); + WC_ALLOC_VAR_EX(ssk, mp_int, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); } if (ret == 0) { + XMEMSET(ssk, 0, sizeof(*ssk)); + #ifndef HAVE_FIPS ret = wc_InitRng_ex(&rng, HEAP_HINT, devId); #else @@ -60547,18 +60534,12 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t eccsi_test(void) wc_ecc_del_point(pvt); if (rng_inited) wc_FreeRng(&rng); - if (ssk != NULL) { - mp_free(ssk); - XFREE(ssk, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - } - if (pub != NULL) { - wc_FreeEccsiKey(pub); - XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - } - if (priv != NULL) { - wc_FreeEccsiKey(priv); - XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - } + mp_free(ssk); + WC_FREE_VAR_EX(ssk, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + wc_FreeEccsiKey(pub); + WC_FREE_VAR_EX(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + wc_FreeEccsiKey(priv); + WC_FREE_VAR_EX(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); return ret; } @@ -61704,38 +61685,25 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t sakke_test(void) wc_test_ret_t ret = 0; WC_RNG rng; int rng_inited = 0; - SakkeKey* priv = NULL; - SakkeKey* pub = NULL; - SakkeKey* key = NULL; + WC_DECLARE_VAR(priv, SakkeKey, 1, HEAP_HINT); + WC_DECLARE_VAR(pub, SakkeKey, 1, HEAP_HINT); + WC_DECLARE_VAR(key, SakkeKey, 1, HEAP_HINT); ecc_point* rsk = NULL; WOLFSSL_ENTER("sakke_test"); - priv = (SakkeKey*)XMALLOC(sizeof(SakkeKey), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (priv == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(priv, 0, sizeof(*priv)); - + WC_ALLOC_VAR_EX(priv, SakkeKey, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); if (ret == 0) { - pub = (SakkeKey*)XMALLOC(sizeof(SakkeKey), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (pub == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(pub, 0, sizeof(*pub)); + XMEMSET(priv, 0, sizeof(*priv)); + WC_ALLOC_VAR_EX(pub, SakkeKey, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); } - if (ret == 0) { - key = (SakkeKey*)XMALLOC(sizeof(SakkeKey), HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - if (key == NULL) - ret = WC_TEST_RET_ENC_NC; - else - XMEMSET(key, 0, sizeof(*key)); + XMEMSET(pub, 0, sizeof(*pub)); + WC_ALLOC_VAR_EX(key, SakkeKey, 1, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER, ret = WC_TEST_RET_ENC_EC(MEMORY_E)); } if (ret == 0) { + XMEMSET(key, 0, sizeof(*key)); + #ifndef HAVE_FIPS ret = wc_InitRng_ex(&rng, HEAP_HINT, devId); #else @@ -61791,15 +61759,12 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t sakke_test(void) } if (rng_inited) wc_FreeRng(&rng); - XFREE(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - if (pub != NULL) { - wc_FreeSakkeKey(pub); - XFREE(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - } - if (priv != NULL) { - wc_FreeSakkeKey(priv); - XFREE(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - } + + WC_FREE_VAR_EX(key, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + wc_FreeSakkeKey(pub); + WC_FREE_VAR_EX(pub, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + wc_FreeSakkeKey(priv); + WC_FREE_VAR_EX(priv, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); return ret; } diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index c3ced88da7f..0fb49e93510 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -3963,6 +3963,9 @@ extern void uITRON4_free(void *p) ; #ifndef WC_NO_INTERNAL_FUNCTION_POINTERS #define WC_NO_INTERNAL_FUNCTION_POINTERS #endif + #ifndef WC_NO_GLOBAL_OBJECT_POINTERS + #define WC_NO_GLOBAL_OBJECT_POINTERS + #endif #ifndef WOLFSSL_ECC_CURVE_STATIC #define WOLFSSL_ECC_CURVE_STATIC #endif diff --git a/wolfssl/wolfcrypt/wolfmath.h b/wolfssl/wolfcrypt/wolfmath.h index f6563299790..1353f40d17a 100644 --- a/wolfssl/wolfcrypt/wolfmath.h +++ b/wolfssl/wolfcrypt/wolfmath.h @@ -74,12 +74,13 @@ This library provides big integer math functions. #endif /* timing resistance array */ -#if (!defined(WC_NO_CACHE_RESISTANT) && \ - ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \ - (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))) || \ - ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \ - defined(OPENSSL_ALL) && defined(WC_PROTECT_ENCRYPTED_MEM)) +#if !defined(WC_NO_GLOBAL_OBJECT_POINTERS) && \ + ((!defined(WC_NO_CACHE_RESISTANT) && \ + ((defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT)) || \ + (defined(USE_FAST_MATH) && defined(TFM_TIMING_RESISTANT)))) || \ + ((defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \ + defined(OPENSSL_ALL) && defined(WC_PROTECT_ENCRYPTED_MEM))) extern const wc_ptr_t wc_off_on_addr[2]; #endif