From 2c75be0d64772c17c8e39fafd0e8c9daff935485 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 13 Oct 2020 16:08:30 -0400 Subject: [PATCH] Do not always enable SSE4 on X64 due to old Atom chips. Enable instead only for AES-NI code which is only run if AES-NI is present, which it is not on these old chips. --- make-linux.mk | 8 ++++---- node/AES.cpp | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/make-linux.mk b/make-linux.mk index a375884ef..83877025a 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -119,15 +119,15 @@ ifeq ($(CC_MACH),x86_64) ZT_ARCHITECTURE=2 ZT_USE_X64_ASM_SALSA=1 ZT_USE_X64_ASM_ED25519=1 - override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul - override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul + override CFLAGS+=-msse -msse2 -maes -mpclmul + override CXXFLAGS+=-msse -msse2 -maes -mpclmul endif ifeq ($(CC_MACH),amd64) ZT_ARCHITECTURE=2 ZT_USE_X64_ASM_SALSA=1 ZT_USE_X64_ASM_ED25519=1 - override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul - override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul + override CFLAGS+=-msse -msse2 -maes -mpclmul + override CXXFLAGS+=-msse -msse2 -maes -mpclmul endif ifeq ($(CC_MACH),powerpc64le) ZT_ARCHITECTURE=8 diff --git a/node/AES.cpp b/node/AES.cpp index 87ca39c83..00402146f 100644 --- a/node/AES.cpp +++ b/node/AES.cpp @@ -146,6 +146,7 @@ void s_gfmul(const uint64_t hh, const uint64_t hl, uint64_t &y0, uint64_t &y1) n // SSE shuffle parameter to reverse bytes in a 128-bit vector. static const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2"))) static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept { y = _mm_shuffle_epi8(y, s_sseSwapBytes); @@ -169,6 +170,7 @@ static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept #endif +__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2"))) void AES::GMAC::update(const void *const data, unsigned int len) noexcept { const uint8_t *in = reinterpret_cast(data); @@ -322,6 +324,7 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept _rp = len; // len is always less than 16 here } +__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2"))) void AES::GMAC::finish(uint8_t tag[16]) noexcept { #ifdef ZT_AES_AESNI @@ -593,6 +596,7 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co #endif // ZT_AES_AESNI +__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2"))) void AES::CTR::crypt(const void *const input, unsigned int len) noexcept { const uint8_t *in = reinterpret_cast(input); @@ -1473,6 +1477,7 @@ static __m128i _init256_2_aesni(__m128i a, __m128i b) noexcept return x; } +__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2"))) void AES::_init_aesni(const uint8_t key[32]) noexcept { __m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13;