This repository has been archived by the owner on Mar 3, 2021. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BACKPORT: crypto: arm64/aes - add scalar implementation
This adds a scalar implementation of AES, based on the precomputed tables that are exposed by the generic AES code. Since rotates are cheap on arm64, this implementation only uses the 4 core tables (of 1 KB each), and avoids the prerotated ones, reducing the D-cache footprint by 75%. On Cortex-A57, this code manages 13.0 cycles per byte, which is ~34% faster than the generic C code. (Note that this is still >13x slower than the code that uses the optional ARMv8 Crypto Extensions, which manages <1 cycles per byte.) Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> (cherry picked from commit bed593c0e852f5c1efd3ca4e984fd744c51cf6ee) Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
- Loading branch information
Showing
4 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* | ||
* Scalar AES core transform | ||
* | ||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <linux/linkage.h> | ||
#include <asm/assembler.h> | ||
|
||
.text | ||
|
||
rk .req x0 | ||
out .req x1 | ||
in .req x2 | ||
rounds .req x3 | ||
tt .req x4 | ||
lt .req x2 | ||
|
||
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc | ||
ldp \out0, \out1, [rk], #8 | ||
|
||
ubfx w13, \in0, #0, #8 | ||
ubfx w14, \in1, #8, #8 | ||
ldr w13, [tt, w13, uxtw #2] | ||
ldr w14, [tt, w14, uxtw #2] | ||
|
||
.if \enc | ||
ubfx w17, \in1, #0, #8 | ||
ubfx w18, \in2, #8, #8 | ||
.else | ||
ubfx w17, \in3, #0, #8 | ||
ubfx w18, \in0, #8, #8 | ||
.endif | ||
ldr w17, [tt, w17, uxtw #2] | ||
ldr w18, [tt, w18, uxtw #2] | ||
|
||
ubfx w15, \in2, #16, #8 | ||
ubfx w16, \in3, #24, #8 | ||
ldr w15, [tt, w15, uxtw #2] | ||
ldr w16, [tt, w16, uxtw #2] | ||
|
||
.if \enc | ||
ubfx \t0, \in3, #16, #8 | ||
ubfx \t1, \in0, #24, #8 | ||
.else | ||
ubfx \t0, \in1, #16, #8 | ||
ubfx \t1, \in2, #24, #8 | ||
.endif | ||
ldr \t0, [tt, \t0, uxtw #2] | ||
ldr \t1, [tt, \t1, uxtw #2] | ||
|
||
eor \out0, \out0, w13 | ||
eor \out1, \out1, w17 | ||
eor \out0, \out0, w14, ror #24 | ||
eor \out1, \out1, w18, ror #24 | ||
eor \out0, \out0, w15, ror #16 | ||
eor \out1, \out1, \t0, ror #16 | ||
eor \out0, \out0, w16, ror #8 | ||
eor \out1, \out1, \t1, ror #8 | ||
.endm | ||
|
||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3 | ||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1 | ||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1 | ||
.endm | ||
|
||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3 | ||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0 | ||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0 | ||
.endm | ||
|
||
.macro do_crypt, round, ttab, ltab | ||
ldp w5, w6, [in] | ||
ldp w7, w8, [in, #8] | ||
ldp w9, w10, [rk], #16 | ||
ldp w11, w12, [rk, #-8] | ||
|
||
CPU_BE( rev w5, w5 ) | ||
CPU_BE( rev w6, w6 ) | ||
CPU_BE( rev w7, w7 ) | ||
CPU_BE( rev w8, w8 ) | ||
|
||
eor w5, w5, w9 | ||
eor w6, w6, w10 | ||
eor w7, w7, w11 | ||
eor w8, w8, w12 | ||
|
||
ldr tt, =\ttab | ||
ldr lt, =\ltab | ||
|
||
tbnz rounds, #1, 1f | ||
|
||
0: \round w9, w10, w11, w12, w5, w6, w7, w8 | ||
\round w5, w6, w7, w8, w9, w10, w11, w12 | ||
|
||
1: subs rounds, rounds, #4 | ||
\round w9, w10, w11, w12, w5, w6, w7, w8 | ||
csel tt, tt, lt, hi | ||
\round w5, w6, w7, w8, w9, w10, w11, w12 | ||
b.hi 0b | ||
|
||
CPU_BE( rev w5, w5 ) | ||
CPU_BE( rev w6, w6 ) | ||
CPU_BE( rev w7, w7 ) | ||
CPU_BE( rev w8, w8 ) | ||
|
||
stp w5, w6, [out] | ||
stp w7, w8, [out, #8] | ||
ret | ||
|
||
.align 4 | ||
.ltorg | ||
.endm | ||
|
||
.align 5 | ||
ENTRY(__aes_arm64_encrypt) | ||
do_crypt fround, crypto_ft_tab, crypto_fl_tab | ||
ENDPROC(__aes_arm64_encrypt) | ||
|
||
.align 5 | ||
ENTRY(__aes_arm64_decrypt) | ||
do_crypt iround, crypto_it_tab, crypto_il_tab | ||
ENDPROC(__aes_arm64_decrypt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Scalar AES core transform | ||
* | ||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License version 2 as | ||
* published by the Free Software Foundation. | ||
*/ | ||
|
||
#include <crypto/aes.h> | ||
#include <linux/crypto.h> | ||
#include <linux/module.h> | ||
|
||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); | ||
EXPORT_SYMBOL(__aes_arm64_encrypt); | ||
|
||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); | ||
EXPORT_SYMBOL(__aes_arm64_decrypt); | ||
|
||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
{ | ||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
int rounds = 6 + ctx->key_length / 4; | ||
|
||
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds); | ||
} | ||
|
||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
{ | ||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
int rounds = 6 + ctx->key_length / 4; | ||
|
||
__aes_arm64_decrypt(ctx->key_dec, out, in, rounds); | ||
} | ||
|
||
static struct crypto_alg aes_alg = { | ||
.cra_name = "aes", | ||
.cra_driver_name = "aes-arm64", | ||
.cra_priority = 200, | ||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
.cra_blocksize = AES_BLOCK_SIZE, | ||
.cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
.cra_module = THIS_MODULE, | ||
|
||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, | ||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, | ||
.cra_cipher.cia_setkey = crypto_aes_set_key, | ||
.cra_cipher.cia_encrypt = aes_encrypt, | ||
.cra_cipher.cia_decrypt = aes_decrypt | ||
}; | ||
|
||
static int __init aes_init(void) | ||
{ | ||
return crypto_register_alg(&aes_alg); | ||
} | ||
|
||
static void __exit aes_fini(void) | ||
{ | ||
crypto_unregister_alg(&aes_alg); | ||
} | ||
|
||
module_init(aes_init); | ||
module_exit(aes_fini); | ||
|
||
MODULE_DESCRIPTION("Scalar AES cipher for arm64"); | ||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
MODULE_LICENSE("GPL v2"); | ||
MODULE_ALIAS_CRYPTO("aes"); |