Skip to content
Permalink
Browse files

ICP: Improve AES-GCM performance

Currently SIMD accelerated AES-GCM performance is limited by two
factors:

a. The need to disable preemption and interrupts and save the FPU
state before using it and to do the reverse when done. Due to the
way the code is organized (see (b) below) we have to pay this price
twice for each 16 byte GCM block processed.

b. Most processing is done in C, operating on single GCM blocks.
The use of SIMD instructions is limited to the AES encryption of the
counter block (AES-NI) and the Galois multiplication (PCLMULQDQ).
This leads to the FPU not being fully utilized for crypto
operations.

To solve (a) we do crypto processing in larger chunks while owning
the FPU. An `icp_gcm_avx_chunk_size` module parameter was introduced
to make this chunk size tweakable. It defaults to 32 KiB. This step
alone roughly doubles performance. (b) is tackled by porting and
using the highly optimized openssl AES-GCM assembler routines, which
do all the processing (CTR, AES, GMULT) in a single routine. Both
steps together result in up to 32x reduction of the time spend in
the en/decryption routines, leading up to approximately 12x
throughput increase for large (128 KiB) blocks.

Signed-off-by: Attila Fülöp <attila@fueloep.org>
  • Loading branch information
AttilaFueloep committed Sep 23, 2019
1 parent 82e996c commit ed074a072a2c7dbe38aaaa91a261aacd28e8344d
@@ -401,3 +401,23 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ], [
AC_MSG_RESULT([no])
])
])

dnl #
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
dnl #
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
AC_MSG_CHECKING([whether host toolchain supports MOVBE])
AC_LINK_IFELSE([AC_LANG_SOURCE([
[
void main()
{
__asm__ __volatile__("movbe 0(%eax), %eax");
}
]])], [
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_MOVBE], 1, [Define if host toolchain supports MOVBE])
], [
AC_MSG_RESULT([no])
])
])
@@ -477,6 +477,19 @@ zfs_pclmulqdq_available(void)
#endif
}

/*
* Check if MOVBE instruction is available
*/
static inline boolean_t
zfs_movbe_available(void)
{
#if defined(X86_FEATURE_MOVBE)
return (!!boot_cpu_has(X86_FEATURE_MOVBE));
#else
return (B_FALSE);
#endif
}

/*
* AVX-512 family of instruction sets:
*
@@ -15,6 +15,8 @@ ASM_SOURCES_AS = \
asm-x86_64/aes/aes_amd64.S \
asm-x86_64/aes/aes_aesni.S \
asm-x86_64/modes/gcm_pclmulqdq.S \
asm-x86_64/modes/aesni-gcm-x86_64.S \
asm-x86_64/modes/ghash-x86_64.S \
asm-x86_64/sha1/sha1-x86_64.S \
asm-x86_64/sha2/sha256_impl.S \
asm-x86_64/sha2/sha512_impl.S
@@ -77,7 +77,8 @@ typedef enum cpuid_inst_sets {
AVX512ER,
AVX512VL,
AES,
PCLMULQDQ
PCLMULQDQ,
MOVBE
} cpuid_inst_sets_t;

/*
@@ -101,6 +102,7 @@ typedef struct cpuid_feature_desc {
#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
#define _AES_BIT (1U << 25)
#define _PCLMULQDQ_BIT (1U << 1)
#define _MOVBE_BIT (1U << 22)

/*
* Descriptions of supported instruction sets
@@ -128,6 +130,7 @@ static const cpuid_feature_desc_t cpuid_features[] = {
[AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
[AES] = {1U, 0U, _AES_BIT, ECX },
[PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
[MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
};

/*
@@ -200,6 +203,7 @@ CPUID_FEATURE_CHECK(avx512er, AVX512ER);
CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
CPUID_FEATURE_CHECK(aes, AES);
CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
CPUID_FEATURE_CHECK(movbe, MOVBE);

/*
* Detect register set support
@@ -332,6 +336,15 @@ zfs_pclmulqdq_available(void)
return (__cpuid_has_pclmulqdq());
}

/*
* Check if MOVBE instruction is available
*/
static inline boolean_t
zfs_movbe_available(void)
{
return (__cpuid_has_movbe());
}

/*
* AVX-512 family of instruction sets:
*
@@ -10,6 +10,8 @@ ASM_SOURCES := asm-x86_64/aes/aeskey.o
ASM_SOURCES += asm-x86_64/aes/aes_amd64.o
ASM_SOURCES += asm-x86_64/aes/aes_aesni.o
ASM_SOURCES += asm-x86_64/modes/gcm_pclmulqdq.o
ASM_SOURCES += asm-x86_64/modes/aesni-gcm-x86_64.o
ASM_SOURCES += asm-x86_64/modes/ghash-x86_64.o
ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o
ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o
ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o

0 comments on commit ed074a0

Please sign in to comment.
You can’t perform that action at this time.