diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S index 053f8769539..e75f2c9b942 100644 --- a/wolfcrypt/src/aes_gcm_asm.S +++ b/wolfcrypt/src/aes_gcm_asm.S @@ -59,7 +59,7 @@ .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_aesni_rev8: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -71,7 +71,7 @@ L_GCM_generate_m0_aesni_rev8: .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_aesni_mod2_128: -.quad 0x0, 0xe100000000000000 +.quad 0x0000000000000000,0xe100000000000000 #ifndef __APPLE__ .text .globl GCM_generate_m0_aesni @@ -325,7 +325,7 @@ _GCM_generate_m0_aesni: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_one: -.quad 0x0, 0x1 +.quad 0x0000000000000000,0x0000000000000001 #ifndef __APPLE__ .data #else @@ -337,7 +337,7 @@ L_aes_gcm_one: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_two: -.quad 0x0, 0x2 +.quad 0x0000000000000000,0x0000000000000002 #ifndef __APPLE__ .data #else @@ -349,7 +349,7 @@ L_aes_gcm_two: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_three: -.quad 0x0, 0x3 +.quad 0x0000000000000000,0x0000000000000003 #ifndef __APPLE__ .data #else @@ -361,7 +361,7 @@ L_aes_gcm_three: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_four: -.quad 0x0, 0x4 +.quad 0x0000000000000000,0x0000000000000004 #ifndef __APPLE__ .data #else @@ -373,7 +373,7 @@ L_aes_gcm_four: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_five: -.quad 0x0, 0x5 +.quad 0x0000000000000000,0x0000000000000005 #ifndef __APPLE__ .data #else @@ -385,7 +385,7 @@ L_aes_gcm_five: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_six: -.quad 0x0, 0x6 +.quad 0x0000000000000000,0x0000000000000006 #ifndef __APPLE__ .data #else @@ -397,7 +397,7 @@ L_aes_gcm_six: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_seven: -.quad 0x0, 0x7 +.quad 0x0000000000000000,0x0000000000000007 #ifndef __APPLE__ .data #else @@ -409,7 +409,7 @@ L_aes_gcm_seven: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_eight: -.quad 0x0, 0x8 +.quad 0x0000000000000000,0x0000000000000008 #ifndef __APPLE__ .data #else @@ -421,7 +421,7 @@ L_aes_gcm_eight: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_bswap_epi64: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .data #else @@ -433,7 +433,7 @@ L_aes_gcm_bswap_epi64: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_bswap_mask: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -445,7 +445,7 @@ L_aes_gcm_bswap_mask: .p2align 4 #endif /* __APPLE__ */ L_aes_gcm_mod2_128: -.quad 0x1, 0xc200000000000000 +.quad 0x0000000000000001,0xc200000000000000 #ifndef __APPLE__ .text .globl AES_GCM_encrypt_aesni @@ -6490,7 +6490,7 @@ L_AES_GCM_decrypt_final_aesni_cmp_tag_done: .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_avx1_rev8: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -6502,7 +6502,7 @@ L_GCM_generate_m0_avx1_rev8: .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_avx1_mod2_128: -.quad 0x0, 0xe100000000000000 +.quad 0x0000000000000000,0xe100000000000000 #ifndef __APPLE__ .text .globl GCM_generate_m0_avx1 @@ -6722,7 +6722,7 @@ _GCM_generate_m0_avx1: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_one: -.quad 0x0, 0x1 +.quad 0x0000000000000000,0x0000000000000001 #ifndef __APPLE__ .data #else @@ -6734,7 +6734,7 @@ L_avx1_aes_gcm_one: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_two: -.quad 0x0, 0x2 +.quad 0x0000000000000000,0x0000000000000002 #ifndef __APPLE__ .data #else @@ -6746,7 +6746,7 @@ L_avx1_aes_gcm_two: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_three: -.quad 0x0, 0x3 +.quad 0x0000000000000000,0x0000000000000003 #ifndef __APPLE__ .data #else @@ -6758,7 +6758,7 @@ L_avx1_aes_gcm_three: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_four: -.quad 0x0, 0x4 +.quad 0x0000000000000000,0x0000000000000004 #ifndef __APPLE__ .data #else @@ -6770,7 +6770,7 @@ L_avx1_aes_gcm_four: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_five: -.quad 0x0, 0x5 +.quad 0x0000000000000000,0x0000000000000005 #ifndef __APPLE__ .data #else @@ -6782,7 +6782,7 @@ L_avx1_aes_gcm_five: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_six: -.quad 0x0, 0x6 +.quad 0x0000000000000000,0x0000000000000006 #ifndef __APPLE__ .data #else @@ -6794,7 +6794,7 @@ L_avx1_aes_gcm_six: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_seven: -.quad 0x0, 0x7 +.quad 0x0000000000000000,0x0000000000000007 #ifndef __APPLE__ .data #else @@ -6806,7 +6806,7 @@ L_avx1_aes_gcm_seven: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_eight: -.quad 0x0, 0x8 +.quad 0x0000000000000000,0x0000000000000008 #ifndef __APPLE__ .data #else @@ -6818,7 +6818,7 @@ L_avx1_aes_gcm_eight: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_bswap_epi64: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .data #else @@ -6830,7 +6830,7 @@ L_avx1_aes_gcm_bswap_epi64: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_bswap_mask: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -6842,7 +6842,7 @@ L_avx1_aes_gcm_bswap_mask: .p2align 4 #endif /* __APPLE__ */ L_avx1_aes_gcm_mod2_128: -.quad 0x1, 0xc200000000000000 +.quad 0x0000000000000001,0xc200000000000000 #ifndef __APPLE__ .text .globl AES_GCM_encrypt_avx1 @@ -11953,7 +11953,7 @@ L_AES_GCM_decrypt_final_avx1_cmp_tag_done: .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_avx2_rev8: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -11965,7 +11965,7 @@ L_GCM_generate_m0_avx2_rev8: .p2align 4 #endif /* __APPLE__ */ L_GCM_generate_m0_avx2_mod2_128: -.quad 0x0, 0xe100000000000000 +.quad 0x0000000000000000,0xe100000000000000 #ifndef __APPLE__ .text .globl GCM_generate_m0_avx2 @@ -12185,7 +12185,7 @@ _GCM_generate_m0_avx2: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_one: -.quad 0x0, 0x1 +.quad 0x0000000000000000,0x0000000000000001 #ifndef __APPLE__ .data #else @@ -12197,7 +12197,7 @@ L_avx2_aes_gcm_one: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_two: -.quad 0x0, 0x2 +.quad 0x0000000000000000,0x0000000000000002 #ifndef __APPLE__ .data #else @@ -12209,7 +12209,7 @@ L_avx2_aes_gcm_two: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_three: -.quad 0x0, 0x3 +.quad 0x0000000000000000,0x0000000000000003 #ifndef __APPLE__ .data #else @@ -12221,7 +12221,7 @@ L_avx2_aes_gcm_three: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_four: -.quad 0x0, 0x4 +.quad 0x0000000000000000,0x0000000000000004 #ifndef __APPLE__ .data #else @@ -12233,7 +12233,7 @@ L_avx2_aes_gcm_four: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_five: -.quad 0x0, 0x5 +.quad 0x0000000000000000,0x0000000000000005 #ifndef __APPLE__ .data #else @@ -12245,7 +12245,7 @@ L_avx2_aes_gcm_five: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_six: -.quad 0x0, 0x6 +.quad 0x0000000000000000,0x0000000000000006 #ifndef __APPLE__ .data #else @@ -12257,7 +12257,7 @@ L_avx2_aes_gcm_six: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_seven: -.quad 0x0, 0x7 +.quad 0x0000000000000000,0x0000000000000007 #ifndef __APPLE__ .data #else @@ -12269,7 +12269,7 @@ L_avx2_aes_gcm_seven: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_eight: -.quad 0x0, 0x8 +.quad 0x0000000000000000,0x0000000000000008 #ifndef __APPLE__ .data #else @@ -12281,7 +12281,7 @@ L_avx2_aes_gcm_eight: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_one: -.quad 0x0, 0x100000000000000 +.quad 0x0000000000000000,0x0100000000000000 #ifndef __APPLE__ .data #else @@ -12293,7 +12293,7 @@ L_avx2_aes_gcm_bswap_one: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_epi64: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .data #else @@ -12305,7 +12305,7 @@ L_avx2_aes_gcm_bswap_epi64: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_mask: -.quad 0x8090a0b0c0d0e0f, 0x1020304050607 +.quad 0x08090a0b0c0d0e0f,0x0001020304050607 #ifndef __APPLE__ .data #else @@ -12317,7 +12317,7 @@ L_avx2_aes_gcm_bswap_mask: .p2align 4 #endif /* __APPLE__ */ L_avx2_aes_gcm_mod2_128: -.quad 0x1, 0xc200000000000000 +.quad 0x0000000000000001,0xc200000000000000 #ifndef __APPLE__ .text .globl AES_GCM_encrypt_avx2 diff --git a/wolfcrypt/src/aes_gcm_asm.asm b/wolfcrypt/src/aes_gcm_asm.asm index 61eb671bb2e..d222bc14478 100644 --- a/wolfcrypt/src/aes_gcm_asm.asm +++ b/wolfcrypt/src/aes_gcm_asm.asm @@ -18,6 +18,7 @@ ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ + IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT @@ -42,15 +43,17 @@ ENDIF _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_aesni_rev8 QWORD 579005069656919567, 283686952306183 +L_GCM_generate_m0_aesni_rev8 QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_GCM_generate_m0_aesni_rev8 QWORD L_GCM_generate_m0_aesni_rev8 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_aesni_mod2_128 QWORD 0, 16212958658533785600 +L_GCM_generate_m0_aesni_mod2_128 QWORD \ + 0000000000000000h, 0e100000000000000h ptr_L_GCM_generate_m0_aesni_mod2_128 QWORD L_GCM_generate_m0_aesni_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA GCM_generate_m0_aesni PROC sub rsp, 80 movdqu OWORD PTR [rsp], xmm6 @@ -292,63 +295,74 @@ GCM_generate_m0_aesni PROC add rsp, 80 ret GCM_generate_m0_aesni ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_one QWORD 0, 1 +L_aes_gcm_one QWORD \ + 0000000000000000h, 0000000000000001h ptr_L_aes_gcm_one QWORD L_aes_gcm_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_two QWORD 0, 2 +L_aes_gcm_two QWORD \ + 0000000000000000h, 0000000000000002h ptr_L_aes_gcm_two QWORD L_aes_gcm_two _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_three QWORD 0, 3 +L_aes_gcm_three QWORD \ + 0000000000000000h, 0000000000000003h ptr_L_aes_gcm_three QWORD L_aes_gcm_three _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_four QWORD 0, 4 +L_aes_gcm_four QWORD \ + 0000000000000000h, 0000000000000004h ptr_L_aes_gcm_four QWORD L_aes_gcm_four _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_five QWORD 0, 5 +L_aes_gcm_five QWORD \ + 0000000000000000h, 0000000000000005h ptr_L_aes_gcm_five QWORD L_aes_gcm_five _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_six QWORD 0, 6 +L_aes_gcm_six QWORD \ + 0000000000000000h, 0000000000000006h ptr_L_aes_gcm_six QWORD L_aes_gcm_six _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_seven QWORD 0, 7 +L_aes_gcm_seven QWORD \ + 0000000000000000h, 0000000000000007h ptr_L_aes_gcm_seven QWORD L_aes_gcm_seven _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_eight QWORD 0, 8 +L_aes_gcm_eight QWORD \ + 0000000000000000h, 0000000000000008h ptr_L_aes_gcm_eight QWORD L_aes_gcm_eight _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567 +L_aes_gcm_bswap_epi64 QWORD \ + 0001020304050607h, 08090a0b0c0d0e0fh ptr_L_aes_gcm_bswap_epi64 QWORD L_aes_gcm_bswap_epi64 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183 +L_aes_gcm_bswap_mask QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_aes_gcm_bswap_mask QWORD L_aes_gcm_bswap_mask _DATA ENDS _DATA SEGMENT ALIGN 16 -L_aes_gcm_mod2_128 QWORD 1, 13979173243358019584 +L_aes_gcm_mod2_128 QWORD \ + 0000000000000001h, 0c200000000000000h ptr_L_aes_gcm_mod2_128 QWORD L_aes_gcm_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_aesni PROC push r13 push rdi @@ -2218,8 +2232,8 @@ L_AES_GCM_encrypt_aesni_store_tag_done: pop r13 ret AES_GCM_encrypt_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_aesni PROC push r13 push rdi @@ -3641,8 +3655,8 @@ L_AES_GCM_decrypt_aesni_cmp_tag_done: pop r13 ret AES_GCM_decrypt_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_init_aesni PROC push rdi push rsi @@ -3999,8 +4013,8 @@ L_AES_GCM_init_aesni_iv_done: pop rdi ret AES_GCM_init_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_aad_update_aesni PROC mov rax, rcx sub rsp, 32 @@ -4076,8 +4090,8 @@ L_AES_GCM_aad_update_aesni_16_loop: add rsp, 32 ret AES_GCM_aad_update_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_block_aesni PROC mov r10, r8 mov r11, r9 @@ -4116,8 +4130,8 @@ L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last: pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask ret AES_GCM_encrypt_block_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_ghash_block_aesni PROC sub rsp, 32 movdqu OWORD PTR [rsp], xmm6 @@ -4187,8 +4201,8 @@ AES_GCM_ghash_block_aesni PROC add rsp, 32 ret AES_GCM_ghash_block_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_update_aesni PROC push r13 push r12 @@ -5426,8 +5440,8 @@ L_AES_GCM_encrypt_update_aesni_done_enc: pop r13 ret AES_GCM_encrypt_update_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_final_aesni PROC push r13 push r12 @@ -5538,8 +5552,8 @@ L_AES_GCM_encrypt_final_aesni_store_tag_done: pop r13 ret AES_GCM_encrypt_final_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_update_aesni PROC push r13 push r12 @@ -6321,8 +6335,8 @@ L_AES_GCM_decrypt_update_aesni_done_dec: pop r13 ret AES_GCM_decrypt_update_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_final_aesni PROC push r13 push r12 @@ -6454,19 +6468,21 @@ L_AES_GCM_decrypt_final_aesni_cmp_tag_done: pop r13 ret AES_GCM_decrypt_final_aesni ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX1 _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_avx1_rev8 QWORD 579005069656919567, 283686952306183 +L_GCM_generate_m0_avx1_rev8 QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_GCM_generate_m0_avx1_rev8 QWORD L_GCM_generate_m0_avx1_rev8 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_avx1_mod2_128 QWORD 0, 16212958658533785600 +L_GCM_generate_m0_avx1_mod2_128 QWORD \ + 0000000000000000h, 0e100000000000000h ptr_L_GCM_generate_m0_avx1_mod2_128 QWORD L_GCM_generate_m0_avx1_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA GCM_generate_m0_avx1 PROC sub rsp, 80 vmovdqu OWORD PTR [rsp], xmm6 @@ -6674,63 +6690,74 @@ GCM_generate_m0_avx1 PROC add rsp, 80 ret GCM_generate_m0_avx1 ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_one QWORD 0, 1 +L_avx1_aes_gcm_one QWORD \ + 0000000000000000h, 0000000000000001h ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_two QWORD 0, 2 +L_avx1_aes_gcm_two QWORD \ + 0000000000000000h, 0000000000000002h ptr_L_avx1_aes_gcm_two QWORD L_avx1_aes_gcm_two _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_three QWORD 0, 3 +L_avx1_aes_gcm_three QWORD \ + 0000000000000000h, 0000000000000003h ptr_L_avx1_aes_gcm_three QWORD L_avx1_aes_gcm_three _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_four QWORD 0, 4 +L_avx1_aes_gcm_four QWORD \ + 0000000000000000h, 0000000000000004h ptr_L_avx1_aes_gcm_four QWORD L_avx1_aes_gcm_four _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_five QWORD 0, 5 +L_avx1_aes_gcm_five QWORD \ + 0000000000000000h, 0000000000000005h ptr_L_avx1_aes_gcm_five QWORD L_avx1_aes_gcm_five _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_six QWORD 0, 6 +L_avx1_aes_gcm_six QWORD \ + 0000000000000000h, 0000000000000006h ptr_L_avx1_aes_gcm_six QWORD L_avx1_aes_gcm_six _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_seven QWORD 0, 7 +L_avx1_aes_gcm_seven QWORD \ + 0000000000000000h, 0000000000000007h ptr_L_avx1_aes_gcm_seven QWORD L_avx1_aes_gcm_seven _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_eight QWORD 0, 8 +L_avx1_aes_gcm_eight QWORD \ + 0000000000000000h, 0000000000000008h ptr_L_avx1_aes_gcm_eight QWORD L_avx1_aes_gcm_eight _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567 +L_avx1_aes_gcm_bswap_epi64 QWORD \ + 0001020304050607h, 08090a0b0c0d0e0fh ptr_L_avx1_aes_gcm_bswap_epi64 QWORD L_avx1_aes_gcm_bswap_epi64 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183 +L_avx1_aes_gcm_bswap_mask QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_avx1_aes_gcm_bswap_mask QWORD L_avx1_aes_gcm_bswap_mask _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_gcm_mod2_128 QWORD 1, 13979173243358019584 +L_avx1_aes_gcm_mod2_128 QWORD \ + 0000000000000001h, 0c200000000000000h ptr_L_avx1_aes_gcm_mod2_128 QWORD L_avx1_aes_gcm_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_avx1 PROC push r13 push rdi @@ -8328,8 +8355,8 @@ L_AES_GCM_encrypt_avx1_store_tag_done: pop r13 ret AES_GCM_encrypt_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_avx1 PROC push r13 push rdi @@ -9521,8 +9548,8 @@ L_AES_GCM_decrypt_avx1_cmp_tag_done: pop r13 ret AES_GCM_decrypt_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_init_avx1 PROC push rdi push rsi @@ -9843,8 +9870,8 @@ L_AES_GCM_init_avx1_iv_done: pop rdi ret AES_GCM_init_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_aad_update_avx1 PROC mov rax, rcx sub rsp, 32 @@ -9909,8 +9936,8 @@ L_AES_GCM_aad_update_avx1_16_loop: add rsp, 32 ret AES_GCM_aad_update_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_block_avx1 PROC mov r10, r8 mov r11, r9 @@ -9949,8 +9976,8 @@ L_AES_GCM_encrypt_block_avx1_aesenc_block_last: vzeroupper ret AES_GCM_encrypt_block_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_ghash_block_avx1 PROC sub rsp, 32 vmovdqu OWORD PTR [rsp], xmm6 @@ -10010,8 +10037,8 @@ AES_GCM_ghash_block_avx1 PROC add rsp, 32 ret AES_GCM_ghash_block_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_update_avx1 PROC push r13 push r12 @@ -11052,8 +11079,8 @@ L_AES_GCM_encrypt_update_avx1_done_enc: pop r13 ret AES_GCM_encrypt_update_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_final_avx1 PROC push r13 push r12 @@ -11153,8 +11180,8 @@ L_AES_GCM_encrypt_final_avx1_store_tag_done: pop r13 ret AES_GCM_encrypt_final_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_update_avx1 PROC push r13 push r12 @@ -11779,8 +11806,8 @@ L_AES_GCM_decrypt_update_avx1_done_dec: pop r13 ret AES_GCM_decrypt_update_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_final_avx1 PROC push r13 push r12 @@ -11901,20 +11928,22 @@ L_AES_GCM_decrypt_final_avx1_cmp_tag_done: pop r13 ret AES_GCM_decrypt_final_avx1 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_avx2_rev8 QWORD 579005069656919567, 283686952306183 +L_GCM_generate_m0_avx2_rev8 QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_GCM_generate_m0_avx2_rev8 QWORD L_GCM_generate_m0_avx2_rev8 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_GCM_generate_m0_avx2_mod2_128 QWORD 0, 16212958658533785600 +L_GCM_generate_m0_avx2_mod2_128 QWORD \ + 0000000000000000h, 0e100000000000000h ptr_L_GCM_generate_m0_avx2_mod2_128 QWORD L_GCM_generate_m0_avx2_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA GCM_generate_m0_avx2 PROC sub rsp, 80 vmovdqu OWORD PTR [rsp], xmm6 @@ -12122,68 +12151,80 @@ GCM_generate_m0_avx2 PROC add rsp, 80 ret GCM_generate_m0_avx2 ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_one QWORD 0, 1 +L_avx2_aes_gcm_one QWORD \ + 0000000000000000h, 0000000000000001h ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_two QWORD 0, 2 +L_avx2_aes_gcm_two QWORD \ + 0000000000000000h, 0000000000000002h ptr_L_avx2_aes_gcm_two QWORD L_avx2_aes_gcm_two _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_three QWORD 0, 3 +L_avx2_aes_gcm_three QWORD \ + 0000000000000000h, 0000000000000003h ptr_L_avx2_aes_gcm_three QWORD L_avx2_aes_gcm_three _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_four QWORD 0, 4 +L_avx2_aes_gcm_four QWORD \ + 0000000000000000h, 0000000000000004h ptr_L_avx2_aes_gcm_four QWORD L_avx2_aes_gcm_four _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_five QWORD 0, 5 +L_avx2_aes_gcm_five QWORD \ + 0000000000000000h, 0000000000000005h ptr_L_avx2_aes_gcm_five QWORD L_avx2_aes_gcm_five _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_six QWORD 0, 6 +L_avx2_aes_gcm_six QWORD \ + 0000000000000000h, 0000000000000006h ptr_L_avx2_aes_gcm_six QWORD L_avx2_aes_gcm_six _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_seven QWORD 0, 7 +L_avx2_aes_gcm_seven QWORD \ + 0000000000000000h, 0000000000000007h ptr_L_avx2_aes_gcm_seven QWORD L_avx2_aes_gcm_seven _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_eight QWORD 0, 8 +L_avx2_aes_gcm_eight QWORD \ + 0000000000000000h, 0000000000000008h ptr_L_avx2_aes_gcm_eight QWORD L_avx2_aes_gcm_eight _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_bswap_one QWORD 0, 72057594037927936 +L_avx2_aes_gcm_bswap_one QWORD \ + 0000000000000000h, 0100000000000000h ptr_L_avx2_aes_gcm_bswap_one QWORD L_avx2_aes_gcm_bswap_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567 +L_avx2_aes_gcm_bswap_epi64 QWORD \ + 0001020304050607h, 08090a0b0c0d0e0fh ptr_L_avx2_aes_gcm_bswap_epi64 QWORD L_avx2_aes_gcm_bswap_epi64 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183 +L_avx2_aes_gcm_bswap_mask QWORD \ + 08090a0b0c0d0e0fh, 0001020304050607h ptr_L_avx2_aes_gcm_bswap_mask QWORD L_avx2_aes_gcm_bswap_mask _DATA ENDS _DATA SEGMENT ALIGN 16 -L_avx2_aes_gcm_mod2_128 QWORD 1, 13979173243358019584 +L_avx2_aes_gcm_mod2_128 QWORD \ + 0000000000000001h, 0c200000000000000h ptr_L_avx2_aes_gcm_mod2_128 QWORD L_avx2_aes_gcm_mod2_128 _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_avx2 PROC push r13 push rdi @@ -13504,8 +13545,8 @@ L_AES_GCM_encrypt_avx2_store_tag_done: pop r13 ret AES_GCM_encrypt_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_avx2 PROC push r13 push rdi @@ -14489,8 +14530,8 @@ L_AES_GCM_decrypt_avx2_cmp_tag_done: pop r13 ret AES_GCM_decrypt_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_init_avx2 PROC push rbx push rdi @@ -14763,8 +14804,8 @@ L_AES_GCM_init_avx2_iv_done: pop rbx ret AES_GCM_init_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_aad_update_avx2 PROC mov rax, rcx sub rsp, 16 @@ -14815,8 +14856,8 @@ L_AES_GCM_aad_update_avx2_16_loop: add rsp, 16 ret AES_GCM_aad_update_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_block_avx2 PROC mov r10, r8 mov r11, r9 @@ -14870,8 +14911,8 @@ L_AES_GCM_encrypt_block_avx2_aesenc_block_last: add rsp, 152 ret AES_GCM_encrypt_block_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_ghash_block_avx2 PROC sub rsp, 16 vmovdqu OWORD PTR [rsp], xmm6 @@ -14916,8 +14957,8 @@ AES_GCM_ghash_block_avx2 PROC add rsp, 16 ret AES_GCM_ghash_block_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_update_avx2 PROC push r12 push r13 @@ -15791,8 +15832,8 @@ L_AES_GCM_encrypt_update_avx2_done_enc: pop r12 ret AES_GCM_encrypt_update_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_encrypt_final_avx2 PROC push r12 push r13 @@ -15862,8 +15903,8 @@ L_AES_GCM_encrypt_final_avx2_store_tag_done: pop r12 ret AES_GCM_encrypt_final_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_update_avx2 PROC push r13 push r12 @@ -16390,8 +16431,8 @@ L_AES_GCM_decrypt_update_avx2_done_dec: pop r13 ret AES_GCM_decrypt_update_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_GCM_decrypt_final_avx2 PROC push r12 push r13 @@ -16475,6 +16516,6 @@ L_AES_GCM_decrypt_final_avx2_cmp_tag_done: pop r12 ret AES_GCM_decrypt_final_avx2 ENDP -_text ENDS +_TEXT ENDS ENDIF END diff --git a/wolfcrypt/src/aes_gcm_x86_asm.S b/wolfcrypt/src/aes_gcm_x86_asm.S index 152624298d8..d24b350d56a 100644 --- a/wolfcrypt/src/aes_gcm_x86_asm.S +++ b/wolfcrypt/src/aes_gcm_x86_asm.S @@ -33,71 +33,93 @@ #endif /* NO_AVX2_SUPPORT */ .type data, @object +.align 16 L_aes_gcm_one: -.long 0x0,0x0,0x1,0x0 +.long 0x00000000,0x00000000,0x00000001,0x00000000 .type data, @object +.align 16 L_aes_gcm_two: -.long 0x0,0x0,0x2,0x0 +.long 0x00000000,0x00000000,0x00000002,0x00000000 .type data, @object +.align 16 L_aes_gcm_three: -.long 0x0,0x0,0x3,0x0 +.long 0x00000000,0x00000000,0x00000003,0x00000000 .type data, @object +.align 16 L_aes_gcm_four: -.long 0x0,0x0,0x4,0x0 +.long 0x00000000,0x00000000,0x00000004,0x00000000 .type data, @object +.align 16 L_aes_gcm_bswap_epi64: -.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.long 0x04050607,0x00010203,0x0c0d0e0f,0x08090a0b .type data, @object +.align 16 L_aes_gcm_bswap_mask: -.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.long 0x0c0d0e0f,0x08090a0b,0x04050607,0x00010203 .type data, @object +.align 16 L_aes_gcm_mod2_128: -.long 0x1,0x0,0x0,0xc2000000 +.long 0x00000001,0x00000000,0x00000000,0xc2000000 .type data, @object +.align 16 L_aes_gcm_avx1_one: -.long 0x0,0x0,0x1,0x0 +.long 0x00000000,0x00000000,0x00000001,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx1_two: -.long 0x0,0x0,0x2,0x0 +.long 0x00000000,0x00000000,0x00000002,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx1_three: -.long 0x0,0x0,0x3,0x0 +.long 0x00000000,0x00000000,0x00000003,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx1_four: -.long 0x0,0x0,0x4,0x0 +.long 0x00000000,0x00000000,0x00000004,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx1_bswap_epi64: -.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.long 0x04050607,0x00010203,0x0c0d0e0f,0x08090a0b .type data, @object +.align 16 L_aes_gcm_avx1_bswap_mask: -.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.long 0x0c0d0e0f,0x08090a0b,0x04050607,0x00010203 .type data, @object +.align 16 L_aes_gcm_avx1_mod2_128: -.long 0x1,0x0,0x0,0xc2000000 +.long 0x00000001,0x00000000,0x00000000,0xc2000000 .type data, @object +.align 16 L_aes_gcm_avx2_one: -.long 0x0,0x0,0x1,0x0 +.long 0x00000000,0x00000000,0x00000001,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx2_two: -.long 0x0,0x0,0x2,0x0 +.long 0x00000000,0x00000000,0x00000002,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx2_three: -.long 0x0,0x0,0x3,0x0 +.long 0x00000000,0x00000000,0x00000003,0x00000000 .type data, @object +.align 16 L_aes_gcm_avx2_four: -.long 0x0,0x0,0x4,0x0 +.long 0x00000000,0x00000000,0x00000004,0x00000000 .type data, @object +.align 16 L_avx2_aes_gcm_bswap_one: -.long 0x0,0x0,0x0,0x1000000 +.long 0x00000000,0x00000000,0x00000000,0x01000000 .type data, @object +.align 16 L_aes_gcm_avx2_bswap_epi64: -.long 0x4050607,0x10203,0xc0d0e0f,0x8090a0b +.long 0x04050607,0x00010203,0x0c0d0e0f,0x08090a0b .type data, @object +.align 16 L_aes_gcm_avx2_bswap_mask: -.long 0xc0d0e0f,0x8090a0b,0x4050607,0x10203 +.long 0x0c0d0e0f,0x08090a0b,0x04050607,0x00010203 .type data, @object +.align 16 L_aes_gcm_avx2_mod2_128: -.long 0x1,0x0,0x0,0xc2000000 +.long 0x00000001,0x00000000,0x00000000,0xc2000000 .text .globl AES_GCM_encrypt_aesni .type AES_GCM_encrypt_aesni,@function diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S index ee646203023..09045c6d8f7 100644 --- a/wolfcrypt/src/aes_xts_asm.S +++ b/wolfcrypt/src/aes_xts_asm.S @@ -107,6 +107,11 @@ L_AES_XTS_init_aesni_tweak_aes_enc_block_last: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_xts_gc_xts: .long 0x00000087,0x00000001,0x00000001,0x00000001 #ifndef __APPLE__ @@ -1490,6 +1495,11 @@ L_AES_XTS_init_avx1_tweak_aes_enc_block_last: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_xts_gc_xts: .long 0x00000087,0x00000001,0x00000001,0x00000001 #ifndef __APPLE__ diff --git a/wolfcrypt/src/aes_xts_asm.asm b/wolfcrypt/src/aes_xts_asm.asm index c28cb2c9ad2..b0e5cebf316 100644 --- a/wolfcrypt/src/aes_xts_asm.asm +++ b/wolfcrypt/src/aes_xts_asm.asm @@ -18,6 +18,7 @@ ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ + IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT @@ -40,7 +41,7 @@ IFNDEF _WIN64 _WIN64 = 1 ENDIF -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_XTS_init_aesni PROC movdqu xmm0, OWORD PTR [rcx] ; aes_enc_block @@ -81,13 +82,14 @@ L_AES_XTS_init_aesni_tweak_aes_enc_block_last: movdqu OWORD PTR [rcx], xmm0 ret AES_XTS_init_aesni ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_aes_xts_gc_xts DWORD 135,1,1,1 +L_aes_xts_gc_xts DWORD \ + 00000087h, 00000001h, 00000001h, 00000001h ptr_L_aes_xts_gc_xts QWORD L_aes_xts_gc_xts _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_XTS_encrypt_aesni PROC push rdi push rsi @@ -419,8 +421,8 @@ L_AES_XTS_encrypt_aesni_done_enc: pop rdi ret AES_XTS_encrypt_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_encrypt_update_aesni PROC push rdi push rsi @@ -715,8 +717,8 @@ L_AES_XTS_encrypt_update_aesni_done_enc: pop rdi ret AES_XTS_encrypt_update_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_decrypt_aesni PROC push rdi push rsi @@ -1102,8 +1104,8 @@ L_AES_XTS_decrypt_aesni_done_dec: pop rdi ret AES_XTS_decrypt_aesni ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_decrypt_update_aesni PROC push rdi push rsi @@ -1452,9 +1454,9 @@ L_AES_XTS_decrypt_update_aesni_done_dec: pop rdi ret AES_XTS_decrypt_update_aesni ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX1 -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_XTS_init_avx1 PROC vmovdqu xmm0, OWORD PTR [rcx] ; aes_enc_block @@ -1495,13 +1497,14 @@ L_AES_XTS_init_avx1_tweak_aes_enc_block_last: vmovdqu OWORD PTR [rcx], xmm0 ret AES_XTS_init_avx1 ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_avx1_aes_xts_gc_xts DWORD 135,1,1,1 +L_avx1_aes_xts_gc_xts DWORD \ + 00000087h, 00000001h, 00000001h, 00000001h ptr_L_avx1_aes_xts_gc_xts QWORD L_avx1_aes_xts_gc_xts _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA AES_XTS_encrypt_avx1 PROC push rdi push rsi @@ -1824,8 +1827,8 @@ L_AES_XTS_encrypt_avx1_done_enc: pop rdi ret AES_XTS_encrypt_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_encrypt_update_avx1 PROC push rdi push rsi @@ -2111,8 +2114,8 @@ L_AES_XTS_encrypt_update_avx1_done_enc: pop rdi ret AES_XTS_encrypt_update_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_decrypt_avx1 PROC push rdi push rsi @@ -2487,8 +2490,8 @@ L_AES_XTS_decrypt_avx1_done_dec: pop rdi ret AES_XTS_decrypt_avx1 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA AES_XTS_decrypt_update_avx1 PROC push rdi push rsi @@ -2826,6 +2829,6 @@ L_AES_XTS_decrypt_update_avx1_done_dec: pop rdi ret AES_XTS_decrypt_update_avx1 ENDP -_text ENDS +_TEXT ENDS ENDIF END diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S index 2e5debb9b13..6109e22f603 100644 --- a/wolfcrypt/src/chacha_asm.S +++ b/wolfcrypt/src/chacha_asm.S @@ -489,7 +489,7 @@ L_chacha_x64_done: .p2align 4 #endif /* __APPLE__ */ L_chacha20_avx1_rotl8: -.quad 0x605040702010003, 0xe0d0c0f0a09080b +.quad 0x0605040702010003,0x0e0d0c0f0a09080b #ifndef __APPLE__ .data #else @@ -501,7 +501,7 @@ L_chacha20_avx1_rotl8: .p2align 4 #endif /* __APPLE__ */ L_chacha20_avx1_rotl16: -.quad 0x504070601000302, 0xd0c0f0e09080b0a +.quad 0x0504070601000302,0x0d0c0f0e09080b0a #ifndef __APPLE__ .data #else @@ -513,7 +513,7 @@ L_chacha20_avx1_rotl16: .p2align 4 #endif /* __APPLE__ */ L_chacha20_avx1_add: -.quad 0x100000000, 0x300000002 +.quad 0x0000000100000000,0x0000000300000002 #ifndef __APPLE__ .data #else @@ -525,7 +525,7 @@ L_chacha20_avx1_add: .p2align 4 #endif /* __APPLE__ */ L_chacha20_avx1_four: -.quad 0x400000004, 0x400000004 +.quad 0x0000000400000004,0x0000000400000004 #ifndef __APPLE__ .text .globl chacha_encrypt_avx1 @@ -1057,8 +1057,8 @@ L_chacha20_avx1_partial_done: .p2align 5 #endif /* __APPLE__ */ L_chacha20_avx2_rotl8: -.quad 0x605040702010003, 0xe0d0c0f0a09080b -.quad 0x605040702010003, 0xe0d0c0f0a09080b +.quad 0x0605040702010003,0x0e0d0c0f0a09080b +.quad 0x0605040702010003,0x0e0d0c0f0a09080b #ifndef __APPLE__ .data #else @@ -1070,8 +1070,8 @@ L_chacha20_avx2_rotl8: .p2align 5 #endif /* __APPLE__ */ L_chacha20_avx2_rotl16: -.quad 0x504070601000302, 0xd0c0f0e09080b0a -.quad 0x504070601000302, 0xd0c0f0e09080b0a +.quad 0x0504070601000302,0x0d0c0f0e09080b0a +.quad 0x0504070601000302,0x0d0c0f0e09080b0a #ifndef __APPLE__ .data #else @@ -1083,8 +1083,8 @@ L_chacha20_avx2_rotl16: .p2align 5 #endif /* __APPLE__ */ L_chacha20_avx2_add: -.quad 0x100000000, 0x300000002 -.quad 0x500000004, 0x700000006 +.quad 0x0000000100000000,0x0000000300000002 +.quad 0x0000000500000004,0x0000000700000006 #ifndef __APPLE__ .data #else @@ -1096,8 +1096,8 @@ L_chacha20_avx2_add: .p2align 5 #endif /* __APPLE__ */ L_chacha20_avx2_eight: -.quad 0x800000008, 0x800000008 -.quad 0x800000008, 0x800000008 +.quad 0x0000000800000008,0x0000000800000008 +.quad 0x0000000800000008,0x0000000800000008 #ifndef __APPLE__ .text .globl chacha_encrypt_avx2 diff --git a/wolfcrypt/src/chacha_asm.asm b/wolfcrypt/src/chacha_asm.asm index e663709e8d1..b9444254c90 100644 --- a/wolfcrypt/src/chacha_asm.asm +++ b/wolfcrypt/src/chacha_asm.asm @@ -18,6 +18,7 @@ ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ + IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT @@ -40,7 +41,7 @@ IFNDEF _WIN64 _WIN64 = 1 ENDIF -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA chacha_encrypt_x64 PROC push rbx push rbp @@ -457,29 +458,33 @@ L_chacha_x64_done: pop rbx ret chacha_encrypt_x64 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX1 _DATA SEGMENT ALIGN 16 -L_chacha20_avx1_rotl8 QWORD 433757367256023043, 1012478749960636427 +L_chacha20_avx1_rotl8 QWORD \ + 0605040702010003h, 0e0d0c0f0a09080bh ptr_L_chacha20_avx1_rotl8 QWORD L_chacha20_avx1_rotl8 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx1_rotl16 QWORD 361421592464458498, 940142975169071882 +L_chacha20_avx1_rotl16 QWORD \ + 0504070601000302h, 0d0c0f0e09080b0ah ptr_L_chacha20_avx1_rotl16 QWORD L_chacha20_avx1_rotl16 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx1_add QWORD 4294967296, 12884901890 +L_chacha20_avx1_add QWORD \ + 0000000100000000h, 0000000300000002h ptr_L_chacha20_avx1_add QWORD L_chacha20_avx1_add _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx1_four QWORD 17179869188, 17179869188 +L_chacha20_avx1_four QWORD \ + 0000000400000004h, 0000000400000004h ptr_L_chacha20_avx1_four QWORD L_chacha20_avx1_four _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA chacha_encrypt_avx1 PROC push r12 push r13 @@ -1009,34 +1014,38 @@ L_chacha20_avx1_partial_done: pop r12 ret chacha_encrypt_avx1 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 _DATA SEGMENT ALIGN 16 -L_chacha20_avx2_rotl8 QWORD 433757367256023043, 1012478749960636427, - 433757367256023043, 1012478749960636427 +L_chacha20_avx2_rotl8 QWORD \ + 0605040702010003h, 0e0d0c0f0a09080bh, + 0605040702010003h, 0e0d0c0f0a09080bh ptr_L_chacha20_avx2_rotl8 QWORD L_chacha20_avx2_rotl8 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx2_rotl16 QWORD 361421592464458498, 940142975169071882, - 361421592464458498, 940142975169071882 +L_chacha20_avx2_rotl16 QWORD \ + 0504070601000302h, 0d0c0f0e09080b0ah, + 0504070601000302h, 0d0c0f0e09080b0ah ptr_L_chacha20_avx2_rotl16 QWORD L_chacha20_avx2_rotl16 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx2_add QWORD 4294967296, 12884901890, - 21474836484, 30064771078 +L_chacha20_avx2_add QWORD \ + 0000000100000000h, 0000000300000002h, + 0000000500000004h, 0000000700000006h ptr_L_chacha20_avx2_add QWORD L_chacha20_avx2_add _DATA ENDS _DATA SEGMENT ALIGN 16 -L_chacha20_avx2_eight QWORD 34359738376, 34359738376, - 34359738376, 34359738376 +L_chacha20_avx2_eight QWORD \ + 0000000800000008h, 0000000800000008h, + 0000000800000008h, 0000000800000008h ptr_L_chacha20_avx2_eight QWORD L_chacha20_avx2_eight _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA chacha_encrypt_avx2 PROC push r12 push r13 @@ -1420,6 +1429,6 @@ L_chacha20_avx2_end256: pop r12 ret chacha_encrypt_avx2 ENDP -_text ENDS +_TEXT ENDS ENDIF END diff --git a/wolfcrypt/src/fe_x25519_asm.S b/wolfcrypt/src/fe_x25519_asm.S index f4cdf343c0a..ed53856bca8 100644 --- a/wolfcrypt/src/fe_x25519_asm.S +++ b/wolfcrypt/src/fe_x25519_asm.S @@ -2343,8 +2343,8 @@ _fe_invert_x64: .p2align 5 #endif /* __APPLE__ */ L_curve25519_base_x64_x2: -.quad 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c -.quad 0xd9750c687d157114, 0x20d342d51873f1b7 +.quad 0x5cae469cdd684efb,0x8f3f5ced1e350b5c +.quad 0xd9750c687d157114,0x20d342d51873f1b7 #ifndef __APPLE__ .text .globl curve25519_base_x64 @@ -8852,7 +8852,7 @@ _ge_p2_dbl_x64: shldq $0x01, %r11, %r12 shldq $0x01, %r10, %r11 shldq $0x01, %r9, %r10 - shlq $0x01, %r9 + shlq $1, %r9 movq $0x7fffffffffffffff, %r8 shrq $62, %rax andq %r8, %r12 @@ -11629,7 +11629,7 @@ _fe_sq2_x64: shldq $0x01, %r9, %r10 shldq $0x01, %r8, %r9 shldq $0x01, %rcx, %r8 - shlq $0x01, %rcx + shlq $1, %rcx movq $0x7fffffffffffffff, %r15 shrq $62, %rax andq %r15, %r10 @@ -12157,10 +12157,10 @@ _fe_invert_nct_x64: testb $0x01, %r11b jnz fe_invert_nct_v_even_end fe_invert_nct_v_even_start: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 movb $0x01, (%rsp,%r15,1) incq %r15 testb $0x01, %r11b @@ -12185,17 +12185,17 @@ L_fe_invert_nct_uv_u: sbbq %r12, %r8 sbbq %r13, %r9 sbbq %r14, %r10 - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 testb $0x01, %cl jnz fe_invert_nct_usubv_even_end fe_invert_nct_usubv_even_start: - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 movb $0x00, (%rsp,%r15,1) incq %r15 testb $0x01, %cl @@ -12217,17 +12217,17 @@ L_fe_invert_nct_uv_v: sbbq %r8, %r12 sbbq %r9, %r13 sbbq %r10, %r14 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 testb $0x01, %r11b jnz fe_invert_nct_vsubu_even_end fe_invert_nct_vsubu_even_start: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 movb $0x01, (%rsp,%r15,1) incq %r15 testb $0x01, %r11b @@ -12282,10 +12282,10 @@ L_fe_invert_nct_op_div2_b: movq $0x7fffffffffffffff, %rdx adcq %rdx, %r10 L_fe_invert_nct_op_div2_b_mod: - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 movb (%rsp,%r15,1), %dl incq %r15 cmpb $0x01, %dl @@ -12317,10 +12317,10 @@ L_fe_invert_nct_op_div2_d: movq $0x7fffffffffffffff, %rdx adcq %rdx, %r14 L_fe_invert_nct_op_div2_d_mod: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 movb (%rsp,%r15,1), %dl incq %r15 cmpb $0x01, %dl @@ -13201,8 +13201,8 @@ _fe_invert_avx2: .p2align 5 #endif /* __APPLE__ */ L_curve25519_base_avx2_x2: -.quad 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c -.quad 0xd9750c687d157114, 0x20d342d51873f1b7 +.quad 0x5cae469cdd684efb,0x8f3f5ced1e350b5c +.quad 0xd9750c687d157114,0x20d342d51873f1b7 #ifndef __APPLE__ .text .globl curve25519_base_avx2 @@ -18609,7 +18609,7 @@ _ge_p2_dbl_avx2: shldq $0x01, %r12, %r13 shldq $0x01, %r11, %r12 shldq $0x01, %r10, %r11 - shlq $0x01, %r10 + shlq $1, %r10 movq $0x7fffffffffffffff, %rcx shrq $62, %r9 andq %rcx, %r13 @@ -20979,7 +20979,7 @@ _fe_sq2_avx2: shldq $0x01, %r10, %r11 shldq $0x01, %r9, %r10 shldq $0x01, %r8, %r9 - shlq $0x01, %r8 + shlq $1, %r8 movq $0x7fffffffffffffff, %rcx shrq $62, %rax andq %rcx, %r11 @@ -21415,7 +21415,12 @@ _sc_muladd_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__prime: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_prime: .long 0x03ffffed,0x03ffffff,0x03ffffff,0x03ffffff .long 0x03ffffff,0x00000000,0x00000000,0x00000000 .long 0x03ffffff,0x03ffffff,0x03ffffff,0x03ffffff @@ -21430,15 +21435,20 @@ L_sp_mod_inv_avx2__prime: #else .p2align 5 #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__one: -.quad 0x1, 0x0 -.quad 0x0, 0x0 +L_fe_invert_nct_avx2_one: +.quad 0x0000000000000001,0x0000000000000000 +.quad 0x0000000000000000,0x0000000000000000 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__all_one: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_all_one: .long 0x00000001,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000001,0x00000001,0x00000001 #ifndef __APPLE__ @@ -21446,7 +21456,12 @@ L_sp_mod_inv_avx2__all_one: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__mask01111: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_mask01111: .long 0x00000000,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000000,0x00000000,0x00000000 #ifndef __APPLE__ @@ -21454,7 +21469,12 @@ L_sp_mod_inv_avx2__mask01111: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__down_one_dword: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_down_one_dword: .long 0x00000001,0x00000002,0x00000003,0x00000004 .long 0x00000005,0x00000006,0x00000007,0x00000007 #ifndef __APPLE__ @@ -21462,7 +21482,12 @@ L_sp_mod_inv_avx2__down_one_dword: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__neg: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_neg: .long 0x00000000,0x00000000,0x00000000,0x00000000 .long 0x80000000,0x00000000,0x00000000,0x00000000 #ifndef __APPLE__ @@ -21470,7 +21495,12 @@ L_sp_mod_inv_avx2__neg: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__up_one_dword: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_up_one_dword: .long 0x00000007,0x00000000,0x00000001,0x00000002 .long 0x00000003,0x00000007,0x00000007,0x00000007 #ifndef __APPLE__ @@ -21478,7 +21508,12 @@ L_sp_mod_inv_avx2__up_one_dword: #else .section __DATA,__data #endif /* __APPLE__ */ -L_sp_mod_inv_avx2__mask26: +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ +L_fe_invert_nct_avx2_mask26: .long 0x03ffffff,0x03ffffff,0x03ffffff,0x03ffffff .long 0x03ffffff,0x00000000,0x00000000,0x00000000 /* Non-constant time modular inversion. @@ -21513,39 +21548,39 @@ _fe_invert_nct_avx2: movq 8(%rsi), %r11 movq 16(%rsi), %r12 movq 24(%rsi), %r13 - leaq L_sp_mod_inv_avx2__prime(%rip), %rbx + leaq L_fe_invert_nct_avx2_prime(%rip), %rbx vmovupd (%rbx), %ymm6 vmovupd 32(%rbx), %ymm7 - leaq L_sp_mod_inv_avx2__one(%rip), %rbx + leaq L_fe_invert_nct_avx2_one(%rip), %rbx vmovupd (%rbx), %ymm8 - leaq L_sp_mod_inv_avx2__mask01111(%rip), %rbx + leaq L_fe_invert_nct_avx2_mask01111(%rip), %rbx vmovupd (%rbx), %ymm9 - leaq L_sp_mod_inv_avx2__all_one(%rip), %rbx + leaq L_fe_invert_nct_avx2_all_one(%rip), %rbx vmovupd (%rbx), %ymm10 - leaq L_sp_mod_inv_avx2__down_one_dword(%rip), %rbx + leaq L_fe_invert_nct_avx2_down_one_dword(%rip), %rbx vmovupd (%rbx), %ymm11 - leaq L_sp_mod_inv_avx2__neg(%rip), %rbx + leaq L_fe_invert_nct_avx2_neg(%rip), %rbx vmovupd (%rbx), %ymm12 - leaq L_sp_mod_inv_avx2__up_one_dword(%rip), %rbx + leaq L_fe_invert_nct_avx2_up_one_dword(%rip), %rbx vmovupd (%rbx), %ymm13 - leaq L_sp_mod_inv_avx2__mask26(%rip), %rbx + leaq L_fe_invert_nct_avx2_mask26(%rip), %rbx vmovupd (%rbx), %ymm14 vpxor %xmm0, %xmm0, %xmm0 vpxor %xmm1, %xmm1, %xmm1 vmovdqu %ymm8, %ymm2 vpxor %xmm3, %xmm3, %xmm3 testb $0x01, %r10b - jnz L__mod_inv_avx2__v_even_end -L__mod_inv_avx2__v_even_start: - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 + jnz L_fe_invert_nct_avx2_v_even_end +L_fe_invert_nct_avx2_v_even_start: + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 vptest %ymm8, %ymm2 - jz L__mod_inv_avx2__v_even_shr1 + jz L_fe_invert_nct_avx2_v_even_shr1 vpaddd %ymm6, %ymm2, %ymm2 vpaddd %ymm7, %ymm3, %ymm3 -L__mod_inv_avx2__v_even_shr1: +L_fe_invert_nct_avx2_v_even_shr1: vpand %ymm9, %ymm2, %ymm4 vpand %ymm10, %ymm3, %ymm5 vpermd %ymm4, %ymm11, %ymm4 @@ -21556,21 +21591,21 @@ L__mod_inv_avx2__v_even_shr1: vpaddd %ymm5, %ymm2, %ymm2 vpaddd %ymm4, %ymm3, %ymm3 testb $0x01, %r10b - jz L__mod_inv_avx2__v_even_start -L__mod_inv_avx2__v_even_end: -L__mod_inv_avx2__uv_start: + jz L_fe_invert_nct_avx2_v_even_start +L_fe_invert_nct_avx2_v_even_end: +L_fe_invert_nct_avx2_uv_start: cmpq %r13, %r9 - jb L__mod_inv_avx2__uv_v - ja L__mod_inv_avx2__uv_u + jb L_fe_invert_nct_avx2_uv_v + ja L_fe_invert_nct_avx2_uv_u cmpq %r12, %r8 - jb L__mod_inv_avx2__uv_v - ja L__mod_inv_avx2__uv_u + jb L_fe_invert_nct_avx2_uv_v + ja L_fe_invert_nct_avx2_uv_u cmpq %r11, %rcx - jb L__mod_inv_avx2__uv_v - ja L__mod_inv_avx2__uv_u + jb L_fe_invert_nct_avx2_uv_v + ja L_fe_invert_nct_avx2_uv_u cmpq %r10, %rax - jb L__mod_inv_avx2__uv_v -L__mod_inv_avx2__uv_u: + jb L_fe_invert_nct_avx2_uv_v +L_fe_invert_nct_avx2_uv_u: subq %r10, %rax sbbq %r11, %rcx vpsubd %ymm2, %ymm0, %ymm0 @@ -21578,20 +21613,20 @@ L__mod_inv_avx2__uv_u: vpsubd %ymm3, %ymm1, %ymm1 sbbq %r13, %r9 vptest %ymm12, %ymm1 - jz L__mod_inv_avx2__usubv_done_neg + jz L_fe_invert_nct_avx2_usubv_done_neg vpaddd %ymm6, %ymm0, %ymm0 vpaddd %ymm7, %ymm1, %ymm1 -L__mod_inv_avx2__usubv_done_neg: -L__mod_inv_avx2__usubv_shr1: - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrq $0x01, %r9 +L_fe_invert_nct_avx2_usubv_done_neg: +L_fe_invert_nct_avx2_usubv_shr1: + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrq $1, %r9 vptest %ymm8, %ymm0 - jz L__mod_inv_avx2__usubv_sub_shr1 + jz L_fe_invert_nct_avx2_usubv_sub_shr1 vpaddd %ymm6, %ymm0, %ymm0 vpaddd %ymm7, %ymm1, %ymm1 -L__mod_inv_avx2__usubv_sub_shr1: +L_fe_invert_nct_avx2_usubv_sub_shr1: vpand %ymm9, %ymm0, %ymm4 vpand %ymm10, %ymm1, %ymm5 vpermd %ymm4, %ymm11, %ymm4 @@ -21602,14 +21637,14 @@ L__mod_inv_avx2__usubv_sub_shr1: vpaddd %ymm5, %ymm0, %ymm0 vpaddd %ymm4, %ymm1, %ymm1 testb $0x01, %al - jz L__mod_inv_avx2__usubv_shr1 + jz L_fe_invert_nct_avx2_usubv_shr1 cmpq $0x01, %rax - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start movq %rcx, %rdx orq %r8, %rdx - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start orq %r9, %rdx - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start vpextrd $0x00, %xmm0, %eax vpextrd $0x01, %xmm0, %r8d vpextrd $2, %xmm0, %r10d @@ -21622,8 +21657,8 @@ L__mod_inv_avx2__usubv_sub_shr1: vextracti128 $0x01, %ymm1, %xmm1 vpextrd $0x00, %xmm0, %r14d vpextrd $0x00, %xmm1, %r15d - jmp L__mod_inv_avx2__store_done -L__mod_inv_avx2__uv_v: + jmp L_fe_invert_nct_avx2_store_done +L_fe_invert_nct_avx2_uv_v: subq %rax, %r10 sbbq %rcx, %r11 vpsubd %ymm0, %ymm2, %ymm2 @@ -21631,20 +21666,20 @@ L__mod_inv_avx2__uv_v: vpsubd %ymm1, %ymm3, %ymm3 sbbq %r9, %r13 vptest %ymm12, %ymm3 - jz L__mod_inv_avx2__vsubu_done_neg + jz L_fe_invert_nct_avx2_vsubu_done_neg vpaddd %ymm6, %ymm2, %ymm2 vpaddd %ymm7, %ymm3, %ymm3 -L__mod_inv_avx2__vsubu_done_neg: -L__mod_inv_avx2__vsubu_shr1: - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 +L_fe_invert_nct_avx2_vsubu_done_neg: +L_fe_invert_nct_avx2_vsubu_shr1: + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 vptest %ymm8, %ymm2 - jz L__mod_inv_avx2__vsubu_sub_shr1 + jz L_fe_invert_nct_avx2_vsubu_sub_shr1 vpaddd %ymm6, %ymm2, %ymm2 vpaddd %ymm7, %ymm3, %ymm3 -L__mod_inv_avx2__vsubu_sub_shr1: +L_fe_invert_nct_avx2_vsubu_sub_shr1: vpand %ymm9, %ymm2, %ymm4 vpand %ymm10, %ymm3, %ymm5 vpermd %ymm4, %ymm11, %ymm4 @@ -21655,14 +21690,14 @@ L__mod_inv_avx2__vsubu_sub_shr1: vpaddd %ymm5, %ymm2, %ymm2 vpaddd %ymm4, %ymm3, %ymm3 testb $0x01, %r10b - jz L__mod_inv_avx2__vsubu_shr1 + jz L_fe_invert_nct_avx2_vsubu_shr1 cmpq $0x01, %r10 - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start movq %r11, %rdx orq %r12, %rdx - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start orq %r13, %rdx - jne L__mod_inv_avx2__uv_start + jne L_fe_invert_nct_avx2_uv_start vpextrd $0x00, %xmm2, %eax vpextrd $0x01, %xmm2, %r8d vpextrd $2, %xmm2, %r10d @@ -21675,7 +21710,7 @@ L__mod_inv_avx2__vsubu_sub_shr1: vextracti128 $0x01, %ymm3, %xmm3 vpextrd $0x00, %xmm2, %r14d vpextrd $0x00, %xmm3, %r15d -L__mod_inv_avx2__store_done: +L_fe_invert_nct_avx2_store_done: movl %eax, %edx andl $0x3ffffff, %eax sarl $26, %edx @@ -21732,7 +21767,7 @@ L__mod_inv_avx2__store_done: adcq %r13, %r12 movslq %r14d, %r14 adcq %r15, %r14 - jge L__mod_inv_avx2__3_no_add_prime + jge L_fe_invert_nct_avx2_uv_start_no_add_prime movq $0xfffffffffffed, %rcx movq $0xfffffffffffff, %r9 movq $0xfffffffffffff, %r11 @@ -21760,7 +21795,7 @@ L__mod_inv_avx2__store_done: andq %rdx, %r12 sarq $52, %r13 addq %r13, %r14 -L__mod_inv_avx2__3_no_add_prime: +L_fe_invert_nct_avx2_uv_start_no_add_prime: movq %r8, %rcx movq %r10, %r9 movq %r12, %r11 diff --git a/wolfcrypt/src/poly1305_asm.S b/wolfcrypt/src/poly1305_asm.S index a1e6f68dbe2..7f73e87b67e 100644 --- a/wolfcrypt/src/poly1305_asm.S +++ b/wolfcrypt/src/poly1305_asm.S @@ -672,8 +672,8 @@ _poly1305_setkey_avx2: .p2align 5 #endif /* __APPLE__ */ L_poly1305_avx2_blocks_mask: -.quad 0x3ffffff, 0x3ffffff -.quad 0x3ffffff, 0x3ffffff +.quad 0x0000000003ffffff,0x0000000003ffffff +.quad 0x0000000003ffffff,0x0000000003ffffff #ifndef __APPLE__ .data #else @@ -685,8 +685,8 @@ L_poly1305_avx2_blocks_mask: .p2align 5 #endif /* __APPLE__ */ L_poly1305_avx2_blocks_hibit: -.quad 0x1000000, 0x1000000 -.quad 0x1000000, 0x1000000 +.quad 0x0000000001000000,0x0000000001000000 +.quad 0x0000000001000000,0x0000000001000000 #ifndef __APPLE__ .text .globl poly1305_blocks_avx2 diff --git a/wolfcrypt/src/poly1305_asm.asm b/wolfcrypt/src/poly1305_asm.asm index ecabf55b96b..de7e5259ae5 100644 --- a/wolfcrypt/src/poly1305_asm.asm +++ b/wolfcrypt/src/poly1305_asm.asm @@ -18,6 +18,7 @@ ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ + IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT @@ -41,7 +42,7 @@ _WIN64 = 1 ENDIF IFDEF HAVE_INTEL_AVX1 -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA poly1305_setkey_avx PROC push r12 push r13 @@ -93,8 +94,8 @@ poly1305_setkey_avx PROC pop r12 ret poly1305_setkey_avx ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA poly1305_block_avx PROC push r15 push rbx @@ -166,8 +167,8 @@ poly1305_block_avx PROC pop r15 ret poly1305_block_avx ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA poly1305_blocks_avx PROC push rdi push rsi @@ -249,8 +250,8 @@ L_poly1305_avx_blocks_start: pop rdi ret poly1305_blocks_avx ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA poly1305_final_avx PROC push rdi push rbx @@ -318,10 +319,10 @@ L_poly1305_avx_final_no_more: pop rdi ret poly1305_final_avx ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA poly1305_calc_powers_avx2 PROC push r12 push r13 @@ -581,8 +582,8 @@ poly1305_calc_powers_avx2 PROC pop r12 ret poly1305_calc_powers_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA poly1305_setkey_avx2 PROC call poly1305_setkey_avx vpxor ymm0, ymm0, ymm0 @@ -595,20 +596,22 @@ poly1305_setkey_avx2 PROC mov WORD PTR [rcx+616], 0 ret poly1305_setkey_avx2 ENDP -_text ENDS +_TEXT ENDS _DATA SEGMENT ALIGN 16 -L_poly1305_avx2_blocks_mask QWORD 67108863, 67108863, - 67108863, 67108863 +L_poly1305_avx2_blocks_mask QWORD \ + 0000000003ffffffh, 0000000003ffffffh, + 0000000003ffffffh, 0000000003ffffffh ptr_L_poly1305_avx2_blocks_mask QWORD L_poly1305_avx2_blocks_mask _DATA ENDS _DATA SEGMENT ALIGN 16 -L_poly1305_avx2_blocks_hibit QWORD 16777216, 16777216, - 16777216, 16777216 +L_poly1305_avx2_blocks_hibit QWORD \ + 0000000001000000h, 0000000001000000h, + 0000000001000000h, 0000000001000000h ptr_L_poly1305_avx2_blocks_hibit QWORD L_poly1305_avx2_blocks_hibit _DATA ENDS -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA poly1305_blocks_avx2 PROC push r12 push rdi @@ -990,8 +993,8 @@ L_poly1305_avx2_blocks_complete: pop r12 ret poly1305_blocks_avx2 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA poly1305_final_avx2 PROC push rdi push rsi @@ -1055,6 +1058,6 @@ L_poly1305_avx2_final_cmp_copy: pop rdi ret poly1305_final_avx2 ENDP -_text ENDS +_TEXT ENDS ENDIF END diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 88882b3a486..2112845ce06 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -968,7 +968,7 @@ L_aes_set_key_arm32_crypto_done: .globl AES_encrypt_AARCH32 .type AES_encrypt_AARCH32, %function AES_encrypt_AARCH32: - vpush {d8, d9} + vpush {d8-d9} vld1.8 {q0}, [r0] vldm r2!, {q1-q4} aese.8 q0, q1 @@ -989,19 +989,19 @@ AES_encrypt_AARCH32: aese.8 q0, q4 aesmc.8 q0, q0 subs r3, r3, #10 - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 beq L_aes_encrypt_arm32_crypto_round_done - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! subs r3, r3, #2 aesmc.8 q0, q0 aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 beq L_aes_encrypt_arm32_crypto_round_done - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! aesmc.8 q0, q0 aese.8 q0, q1 aesmc.8 q0, q0 @@ -1010,7 +1010,7 @@ L_aes_encrypt_arm32_crypto_round_done: vld1.32 {q1}, [r2] veor.32 q0, q0, q1 vst1.8 {q0}, [r1] - vpop {d8, d9} + vpop {d8-d9} bx lr .size AES_encrypt_AARCH32,.-AES_encrypt_AARCH32 #endif /* defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) */ @@ -1021,7 +1021,7 @@ L_aes_encrypt_arm32_crypto_round_done: .globl AES_decrypt_AARCH32 .type AES_decrypt_AARCH32, %function AES_decrypt_AARCH32: - vpush {d8, d9} + vpush {d8-d9} vld1.8 {q0}, [r0] vldm r2!, {q1-q4} aesd.8 q0, q1 @@ -1041,20 +1041,20 @@ AES_decrypt_AARCH32: aesimc.8 q0, q0 aesd.8 q0, q4 aesimc.8 q0, q0 - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! aesd.8 q0, q1 aesimc.8 q0, q0 aesd.8 q0, q2 subs r3, r3, #10 beq L_aes_decrypt_arm32_crypto_round_done - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! aesimc.8 q0, q0 aesd.8 q0, q1 aesimc.8 q0, q0 aesd.8 q0, q2 subs r3, r3, #2 beq L_aes_decrypt_arm32_crypto_round_done - vld1.32 {q1, q2}, [r2]! + vld1.32 {q1-q2}, [r2]! aesimc.8 q0, q0 aesd.8 q0, q1 aesimc.8 q0, q0 @@ -1063,7 +1063,7 @@ L_aes_decrypt_arm32_crypto_round_done: vld1.32 {q1}, [r2] veor.32 q0, q0, q1 vst1.8 {q0}, [r1] - vpop {d8, d9} + vpop {d8-d9} bx lr .size AES_decrypt_AARCH32,.-AES_decrypt_AARCH32 #endif /* HAVE_AES_DECRYPT */ @@ -1083,7 +1083,7 @@ AES_encrypt_blocks_AARCH32: bgt L_aes_encrypt_blocks_arm32_crypto_start_256 # AES_ECB_192 #ifndef NO_AES_192 - vld1.32 {q8, q9}, [r3]! + vld1.32 {q8-q9}, [r3]! cmp r2, #1 beq L_aes_encrypt_blocks_arm32_crypto_192_start_1 L_aes_encrypt_blocks_arm32_crypto_192_start_4: @@ -1197,7 +1197,7 @@ L_aes_encrypt_blocks_arm32_crypto_192_start_4: L_aes_encrypt_blocks_arm32_crypto_192_start_2: cmp r2, #2 blt L_aes_encrypt_blocks_arm32_crypto_192_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aese.8 q12, q0 aesmc.8 q12, q12 aese.8 q13, q0 @@ -1251,7 +1251,7 @@ L_aes_encrypt_blocks_arm32_crypto_192_start_2: veor.32 q13, q13, q10 sub r3, r3, #48 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_encrypt_blocks_arm32_crypto_192_start_1: cmp r2, #0 beq L_aes_encrypt_blocks_arm32_crypto_192_done @@ -1291,7 +1291,7 @@ L_aes_encrypt_blocks_arm32_crypto_192_done: # AES_ECB_256 L_aes_encrypt_blocks_arm32_crypto_start_256: #ifndef NO_AES_256 - vld1.32 {q8, q9}, [r3]! + vld1.32 {q8-q9}, [r3]! cmp r2, #1 beq L_aes_encrypt_blocks_arm32_crypto_256_start_1 L_aes_encrypt_blocks_arm32_crypto_256_start_4: @@ -1423,7 +1423,7 @@ L_aes_encrypt_blocks_arm32_crypto_256_start_4: L_aes_encrypt_blocks_arm32_crypto_256_start_2: cmp r2, #2 blt L_aes_encrypt_blocks_arm32_crypto_256_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aese.8 q12, q0 aesmc.8 q12, q12 aese.8 q13, q0 @@ -1487,7 +1487,7 @@ L_aes_encrypt_blocks_arm32_crypto_256_start_2: veor.32 q13, q13, q10 sub r3, r3, #0x50 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_encrypt_blocks_arm32_crypto_256_start_1: cmp r2, #0 beq L_aes_encrypt_blocks_arm32_crypto_256_done @@ -1627,7 +1627,7 @@ L_aes_encrypt_blocks_arm32_crypto_128_start_4: L_aes_encrypt_blocks_arm32_crypto_128_start_2: cmp r2, #2 blt L_aes_encrypt_blocks_arm32_crypto_128_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aese.8 q12, q0 aesmc.8 q12, q12 aese.8 q13, q0 @@ -1669,7 +1669,7 @@ L_aes_encrypt_blocks_arm32_crypto_128_start_2: aese.8 q13, q9 veor.32 q13, q13, q10 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_encrypt_blocks_arm32_crypto_128_start_1: cmp r2, #0 beq L_aes_encrypt_blocks_arm32_crypto_128_done @@ -1716,7 +1716,7 @@ AES_decrypt_blocks_AARCH32: bgt L_aes_decrypt_blocks_arm32_crypto_start_256 # AES_ECB_192 #ifndef NO_AES_192 - vld1.32 {q8, q9}, [r3]! + vld1.32 {q8-q9}, [r3]! cmp r2, #1 beq L_aes_decrypt_blocks_arm32_crypto_192_start_1 cmp r2, #4 @@ -1830,7 +1830,7 @@ L_aes_decrypt_blocks_arm32_crypto_192_start_4: L_aes_decrypt_blocks_arm32_crypto_192_start_2: cmp r2, #2 blt L_aes_decrypt_blocks_arm32_crypto_192_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aesd.8 q12, q0 aesimc.8 q12, q12 aesd.8 q13, q0 @@ -1884,7 +1884,7 @@ L_aes_decrypt_blocks_arm32_crypto_192_start_2: veor.32 q13, q13, q10 sub r3, r3, #48 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_decrypt_blocks_arm32_crypto_192_start_1: cmp r2, #0 beq L_aes_decrypt_blocks_arm32_crypto_192_done @@ -1924,7 +1924,7 @@ L_aes_decrypt_blocks_arm32_crypto_192_done: # AES_ECB_256 L_aes_decrypt_blocks_arm32_crypto_start_256: #ifndef NO_AES_256 - vld1.32 {q8, q9}, [r3]! + vld1.32 {q8-q9}, [r3]! cmp r2, #1 beq L_aes_decrypt_blocks_arm32_crypto_256_start_1 cmp r2, #4 @@ -2056,7 +2056,7 @@ L_aes_decrypt_blocks_arm32_crypto_256_start_4: L_aes_decrypt_blocks_arm32_crypto_256_start_2: cmp r2, #2 blt L_aes_decrypt_blocks_arm32_crypto_256_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aesd.8 q12, q0 aesimc.8 q12, q12 aesd.8 q13, q0 @@ -2120,7 +2120,7 @@ L_aes_decrypt_blocks_arm32_crypto_256_start_2: veor.32 q13, q13, q10 sub r3, r3, #0x50 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_decrypt_blocks_arm32_crypto_256_start_1: cmp r2, #0 beq L_aes_decrypt_blocks_arm32_crypto_256_done @@ -2260,7 +2260,7 @@ L_aes_decrypt_blocks_arm32_crypto_128_start_4: L_aes_decrypt_blocks_arm32_crypto_128_start_2: cmp r2, #2 blt L_aes_decrypt_blocks_arm32_crypto_128_start_1 - vld1.8 {q12, q13}, [r0]! + vld1.8 {q12-q13}, [r0]! aesd.8 q12, q0 aesimc.8 q12, q12 aesd.8 q13, q0 @@ -2302,7 +2302,7 @@ L_aes_decrypt_blocks_arm32_crypto_128_start_2: aesd.8 q13, q9 veor.32 q13, q13, q10 sub r2, r2, #2 - vst1.8 {q12, q13}, [r1]! + vst1.8 {q12-q13}, [r1]! L_aes_decrypt_blocks_arm32_crypto_128_start_1: cmp r2, #0 beq L_aes_decrypt_blocks_arm32_crypto_128_done @@ -2568,7 +2568,7 @@ L_aes_cbc_encrypt_arm32_crypto_start_256: vld1.8 {q14}, [r0]! vldm.32 r12!, {q8-q11} add r12, r12, #16 - vld1.32 {q12, q13}, [r12] + vld1.32 {q12-q13}, [r12] sub r12, r12, #16 cmp r2, #1 beq L_aes_cbc_encrypt_arm32_crypto_256_start_1 @@ -3020,7 +3020,7 @@ AES_CBC_decrypt_AARCH32: cmp r2, #1 beq L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_1 L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2: - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! vmov q11, q13 vmov q12, q14 vmov q13, q15 @@ -3080,7 +3080,7 @@ L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2: cmp r2, #1 veor.32 q14, q14, q11 veor.32 q15, q15, q12 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! sub r12, r12, #48 blt L_aes_cbc_decrypt_blocks_arm32_crypto_192_done bgt L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2 @@ -3128,7 +3128,7 @@ L_aes_cbc_decrypt_blocks_arm32_crypto_start_256: cmp r2, #1 beq L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_1 L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2: - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! vmov q11, q13 vmov q12, q14 vmov q13, q15 @@ -3198,7 +3198,7 @@ L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2: cmp r2, #1 veor.32 q14, q14, q11 veor.32 q15, q15, q12 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! sub r12, r12, #0x50 blt L_aes_cbc_decrypt_blocks_arm32_crypto_256_done bgt L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2 @@ -3252,7 +3252,7 @@ L_aes_cbc_decrypt_blocks_arm32_crypto_start_128: cmp r2, #1 beq L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_1 L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2: - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! vmov q11, q13 vmov q12, q14 vmov q13, q15 @@ -3300,7 +3300,7 @@ L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2: cmp r2, #1 veor.32 q14, q14, q11 veor.32 q15, q15, q12 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! blt L_aes_cbc_decrypt_blocks_arm32_crypto_128_done bgt L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2 L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_1: @@ -3431,7 +3431,7 @@ L_aes_ctr_encrypt_arm32_crypto_192_start_2: veor.32 q0, q0, q15 veor.32 q1, q1, q15 adds r8, r8, #1 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! adcs r7, r7, #0 sub r12, r12, #16 veor.32 q14, q14, q0 @@ -3442,7 +3442,7 @@ L_aes_ctr_encrypt_arm32_crypto_192_start_2: vmov d2, r5, r6 vmov d3, r7, r8 cmp r4, #1 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! vrev32.8 q1, q1 bgt L_aes_ctr_encrypt_arm32_crypto_192_start_2 mov lr, #0 @@ -3625,7 +3625,7 @@ L_aes_ctr_encrypt_arm32_crypto_256_start_2: veor.32 q0, q0, q15 veor.32 q1, q1, q15 adcs r6, r6, #0 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! sub r12, r12, #48 veor.32 q14, q14, q0 veor.32 q15, q15, q1 @@ -3634,7 +3634,7 @@ L_aes_ctr_encrypt_arm32_crypto_256_start_2: vmov d2, r5, r6 vmov d3, r7, r8 cmp r4, #1 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! vrev32.8 q1, q1 bgt L_aes_ctr_encrypt_arm32_crypto_256_start_2 mov lr, #0 @@ -3804,7 +3804,7 @@ L_aes_ctr_encrypt_arm32_crypto_128_start_2: aese.8 q1, q11 aesmc.8 q1, q1 adcs r6, r6, #0 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! adc r5, r5, #0 aese.8 q0, q12 aese.8 q1, q12 @@ -3817,7 +3817,7 @@ L_aes_ctr_encrypt_arm32_crypto_128_start_2: vmov d2, r5, r6 vmov d3, r7, r8 cmp r4, #1 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! vrev32.8 q1, q1 bgt L_aes_ctr_encrypt_arm32_crypto_128_start_2 mov lr, #0 @@ -3912,10 +3912,10 @@ L_aes_ctr_encrypt_arm32_crypto_done: .globl AES_GCM_set_key_AARCH32 .type AES_GCM_set_key_AARCH32, %function AES_GCM_set_key_AARCH32: - vpush {d8, d9} + vpush {d8-d9} vld1.8 {q0}, [r0] - vld1.8 {q1, q2}, [r1]! - vld1.8 {q3, q4}, [r1]! + vld1.8 {q1-q2}, [r1]! + vld1.8 {q3-q4}, [r1]! aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 @@ -3924,8 +3924,8 @@ AES_GCM_set_key_AARCH32: aesmc.8 q0, q0 aese.8 q0, q4 aesmc.8 q0, q0 - vld1.8 {q1, q2}, [r1]! - vld1.8 {q3, q4}, [r1]! + vld1.8 {q1-q2}, [r1]! + vld1.8 {q3-q4}, [r1]! aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 @@ -3935,19 +3935,19 @@ AES_GCM_set_key_AARCH32: aese.8 q0, q4 aesmc.8 q0, q0 subs r3, r3, #10 - vld1.8 {q1, q2}, [r1]! + vld1.8 {q1-q2}, [r1]! aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 beq L_aes_gcm_set_key_arm32_crypto_round_done - vld1.8 {q1, q2}, [r1]! + vld1.8 {q1-q2}, [r1]! subs r3, r3, #2 aesmc.8 q0, q0 aese.8 q0, q1 aesmc.8 q0, q0 aese.8 q0, q2 beq L_aes_gcm_set_key_arm32_crypto_round_done - vld1.8 {q1, q2}, [r1]! + vld1.8 {q1-q2}, [r1]! aesmc.8 q0, q0 aese.8 q0, q1 aesmc.8 q0, q0 @@ -3966,7 +3966,7 @@ L_aes_gcm_set_key_arm32_crypto_round_done: vshl.u8 q0, q2, #4 vsri.u8 q0, q2, #4 vst1.32 {q0}, [r2] - vpop {d8, d9} + vpop {d8-d9} bx lr .size AES_GCM_set_key_AARCH32,.-AES_GCM_set_key_AARCH32 .text @@ -4207,11 +4207,11 @@ L_aes_gcm_encrypt_arm32_crypto_192_start_2: veor.8 q4, q4, q15 aese.8 q5, q14 veor.8 q5, q5, q15 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! sub r7, r7, #16 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_encrypt_arm32_crypto_192_start_2 blt L_aes_gcm_encrypt_arm32_crypto_192_done @@ -4447,11 +4447,11 @@ L_aes_gcm_encrypt_arm32_crypto_256_start_2: veor.8 q4, q4, q15 aese.8 q5, q14 veor.8 q5, q5, q15 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! sub r7, r7, #48 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_encrypt_arm32_crypto_256_start_2 blt L_aes_gcm_encrypt_arm32_crypto_256_done @@ -4681,14 +4681,14 @@ L_aes_gcm_encrypt_arm32_crypto_128_start_2: aesmc.8 q4, q4 aese.8 q5, q11 aesmc.8 q5, q5 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! aese.8 q4, q12 veor.8 q4, q4, q13 aese.8 q5, q12 veor.8 q5, q5, q13 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_encrypt_arm32_crypto_128_start_2 blt L_aes_gcm_encrypt_arm32_crypto_128_done @@ -4973,7 +4973,7 @@ L_aes_gcm_encrypt_arm32_crypto_aad_start_4: blt L_aes_gcm_encrypt_arm32_crypto_aad_done beq L_aes_gcm_encrypt_arm32_crypto_aad_start_1 L_aes_gcm_encrypt_arm32_crypto_aad_start_2: - vld1.32 {q14, q15}, [r5]! + vld1.32 {q14-q15}, [r5]! vmov.i8 q12, #0x55 vshl.u8 q0, q14, #1 vshl.u8 q1, q15, #1 @@ -5204,7 +5204,7 @@ L_aes_gcm_encrypt_arm32_crypto_out_start_4: blt L_aes_gcm_encrypt_arm32_crypto_out_done beq L_aes_gcm_encrypt_arm32_crypto_out_start_1 L_aes_gcm_encrypt_arm32_crypto_out_start_2: - vld1.32 {q14, q15}, [r1]! + vld1.32 {q14-q15}, [r1]! vmov.i8 q12, #0x55 vshl.u8 q0, q14, #1 vshl.u8 q1, q15, #1 @@ -5567,7 +5567,7 @@ L_aes_gcm_decrypt_arm32_crypto_aad_start_4: blt L_aes_gcm_decrypt_arm32_crypto_aad_done beq L_aes_gcm_decrypt_arm32_crypto_aad_start_1 L_aes_gcm_decrypt_arm32_crypto_aad_start_2: - vld1.32 {q14, q15}, [r5]! + vld1.32 {q14-q15}, [r5]! vmov.i8 q12, #0x55 vshl.u8 q0, q14, #1 vshl.u8 q1, q15, #1 @@ -5798,7 +5798,7 @@ L_aes_gcm_decrypt_arm32_crypto_in_start_4: blt L_aes_gcm_decrypt_arm32_crypto_in_done beq L_aes_gcm_decrypt_arm32_crypto_in_start_1 L_aes_gcm_decrypt_arm32_crypto_in_start_2: - vld1.32 {q14, q15}, [r0]! + vld1.32 {q14-q15}, [r0]! vmov.i8 q12, #0x55 vshl.u8 q0, q14, #1 vshl.u8 q1, q15, #1 @@ -6156,11 +6156,11 @@ L_aes_gcm_decrypt_arm32_crypto_192_start_2: veor.8 q4, q4, q15 aese.8 q5, q14 veor.8 q5, q5, q15 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! sub r7, r7, #16 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_decrypt_arm32_crypto_192_start_2 blt L_aes_gcm_decrypt_arm32_crypto_192_done @@ -6396,11 +6396,11 @@ L_aes_gcm_decrypt_arm32_crypto_256_start_2: veor.8 q4, q4, q15 aese.8 q5, q14 veor.8 q5, q5, q15 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! sub r7, r7, #48 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_decrypt_arm32_crypto_256_start_2 blt L_aes_gcm_decrypt_arm32_crypto_256_done @@ -6630,14 +6630,14 @@ L_aes_gcm_decrypt_arm32_crypto_128_start_2: aesmc.8 q4, q4 aese.8 q5, q11 aesmc.8 q5, q5 - vld1.8 {q14, q15}, [r0]! + vld1.8 {q14-q15}, [r0]! aese.8 q4, q12 veor.8 q4, q4, q13 aese.8 q5, q12 veor.8 q5, q5, q13 veor.8 q14, q14, q4 veor.8 q15, q15, q5 - vst1.8 {q14, q15}, [r1]! + vst1.8 {q14-q15}, [r1]! cmp r10, #1 bgt L_aes_gcm_decrypt_arm32_crypto_128_start_2 blt L_aes_gcm_decrypt_arm32_crypto_128_done @@ -7820,551 +7820,203 @@ L_aes_xts_decrypt_arm32_crypto_done: #endif /* WOLFSSL_AES_XTS */ #else #ifdef HAVE_AES_DECRYPT +#ifndef __APPLE__ .text .type L_AES_ARM32_td_data, %object .size L_AES_ARM32_td_data, 1024 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_td_data: - .word 0x5051f4a7 - .word 0x537e4165 - .word 0xc31a17a4 - .word 0x963a275e - .word 0xcb3bab6b - .word 0xf11f9d45 - .word 0xabacfa58 - .word 0x934be303 - .word 0x552030fa - .word 0xf6ad766d - .word 0x9188cc76 - .word 0x25f5024c - .word 0xfc4fe5d7 - .word 0xd7c52acb - .word 0x80263544 - .word 0x8fb562a3 - .word 0x49deb15a - .word 0x6725ba1b - .word 0x9845ea0e - .word 0xe15dfec0 - .word 0x2c32f75 - .word 0x12814cf0 - .word 0xa38d4697 - .word 0xc66bd3f9 - .word 0xe7038f5f - .word 0x9515929c - .word 0xebbf6d7a - .word 0xda955259 - .word 0x2dd4be83 - .word 0xd3587421 - .word 0x2949e069 - .word 0x448ec9c8 - .word 0x6a75c289 - .word 0x78f48e79 - .word 0x6b99583e - .word 0xdd27b971 - .word 0xb6bee14f - .word 0x17f088ad - .word 0x66c920ac - .word 0xb47dce3a - .word 0x1863df4a - .word 0x82e51a31 - .word 0x60975133 - .word 0x4562537f - .word 0xe0b16477 - .word 0x84bb6bae - .word 0x1cfe81a0 - .word 0x94f9082b - .word 0x58704868 - .word 0x198f45fd - .word 0x8794de6c - .word 0xb7527bf8 - .word 0x23ab73d3 - .word 0xe2724b02 - .word 0x57e31f8f - .word 0x2a6655ab - .word 0x7b2eb28 - .word 0x32fb5c2 - .word 0x9a86c57b - .word 0xa5d33708 - .word 0xf2302887 - .word 0xb223bfa5 - .word 0xba02036a - .word 0x5ced1682 - .word 0x2b8acf1c - .word 0x92a779b4 - .word 0xf0f307f2 - .word 0xa14e69e2 - .word 0xcd65daf4 - .word 0xd50605be - .word 0x1fd13462 - .word 0x8ac4a6fe - .word 0x9d342e53 - .word 0xa0a2f355 - .word 0x32058ae1 - .word 0x75a4f6eb - .word 0x390b83ec - .word 0xaa4060ef - .word 0x65e719f - .word 0x51bd6e10 - .word 0xf93e218a - .word 0x3d96dd06 - .word 0xaedd3e05 - .word 0x464de6bd - .word 0xb591548d - .word 0x571c45d - .word 0x6f0406d4 - .word 0xff605015 - .word 0x241998fb - .word 0x97d6bde9 - .word 0xcc894043 - .word 0x7767d99e - .word 0xbdb0e842 - .word 0x8807898b - .word 0x38e7195b - .word 0xdb79c8ee - .word 0x47a17c0a - .word 0xe97c420f - .word 0xc9f8841e - .word 0x0 - .word 0x83098086 - .word 0x48322bed - .word 0xac1e1170 - .word 0x4e6c5a72 - .word 0xfbfd0eff - .word 0x560f8538 - .word 0x1e3daed5 - .word 0x27362d39 - .word 0x640a0fd9 - .word 0x21685ca6 - .word 0xd19b5b54 - .word 0x3a24362e - .word 0xb10c0a67 - .word 0xf9357e7 - .word 0xd2b4ee96 - .word 0x9e1b9b91 - .word 0x4f80c0c5 - .word 0xa261dc20 - .word 0x695a774b - .word 0x161c121a - .word 0xae293ba - .word 0xe5c0a02a - .word 0x433c22e0 - .word 0x1d121b17 - .word 0xb0e090d - .word 0xadf28bc7 - .word 0xb92db6a8 - .word 0xc8141ea9 - .word 0x8557f119 - .word 0x4caf7507 - .word 0xbbee99dd - .word 0xfda37f60 - .word 0x9ff70126 - .word 0xbc5c72f5 - .word 0xc544663b - .word 0x345bfb7e - .word 0x768b4329 - .word 0xdccb23c6 - .word 0x68b6edfc - .word 0x63b8e4f1 - .word 0xcad731dc - .word 0x10426385 - .word 0x40139722 - .word 0x2084c611 - .word 0x7d854a24 - .word 0xf8d2bb3d - .word 0x11aef932 - .word 0x6dc729a1 - .word 0x4b1d9e2f - .word 0xf3dcb230 - .word 0xec0d8652 - .word 0xd077c1e3 - .word 0x6c2bb316 - .word 0x99a970b9 - .word 0xfa119448 - .word 0x2247e964 - .word 0xc4a8fc8c - .word 0x1aa0f03f - .word 0xd8567d2c - .word 0xef223390 - .word 0xc787494e - .word 0xc1d938d1 - .word 0xfe8ccaa2 - .word 0x3698d40b - .word 0xcfa6f581 - .word 0x28a57ade - .word 0x26dab78e - .word 0xa43fadbf - .word 0xe42c3a9d - .word 0xd507892 - .word 0x9b6a5fcc - .word 0x62547e46 - .word 0xc2f68d13 - .word 0xe890d8b8 - .word 0x5e2e39f7 - .word 0xf582c3af - .word 0xbe9f5d80 - .word 0x7c69d093 - .word 0xa96fd52d - .word 0xb3cf2512 - .word 0x3bc8ac99 - .word 0xa710187d - .word 0x6ee89c63 - .word 0x7bdb3bbb - .word 0x9cd2678 - .word 0xf46e5918 - .word 0x1ec9ab7 - .word 0xa8834f9a - .word 0x65e6956e - .word 0x7eaaffe6 - .word 0x821bccf - .word 0xe6ef15e8 - .word 0xd9bae79b - .word 0xce4a6f36 - .word 0xd4ea9f09 - .word 0xd629b07c - .word 0xaf31a4b2 - .word 0x312a3f23 - .word 0x30c6a594 - .word 0xc035a266 - .word 0x37744ebc - .word 0xa6fc82ca - .word 0xb0e090d0 - .word 0x1533a7d8 - .word 0x4af10498 - .word 0xf741ecda - .word 0xe7fcd50 - .word 0x2f1791f6 - .word 0x8d764dd6 - .word 0x4d43efb0 - .word 0x54ccaa4d - .word 0xdfe49604 - .word 0xe39ed1b5 - .word 0x1b4c6a88 - .word 0xb8c12c1f - .word 0x7f466551 - .word 0x49d5eea - .word 0x5d018c35 - .word 0x73fa8774 - .word 0x2efb0b41 - .word 0x5ab3671d - .word 0x5292dbd2 - .word 0x33e91056 - .word 0x136dd647 - .word 0x8c9ad761 - .word 0x7a37a10c - .word 0x8e59f814 - .word 0x89eb133c - .word 0xeecea927 - .word 0x35b761c9 - .word 0xede11ce5 - .word 0x3c7a47b1 - .word 0x599cd2df - .word 0x3f55f273 - .word 0x791814ce - .word 0xbf73c737 - .word 0xea53f7cd - .word 0x5b5ffdaa - .word 0x14df3d6f - .word 0x867844db - .word 0x81caaff3 - .word 0x3eb968c4 - .word 0x2c382434 - .word 0x5fc2a340 - .word 0x72161dc3 - .word 0xcbce225 - .word 0x8b283c49 - .word 0x41ff0d95 - .word 0x7139a801 - .word 0xde080cb3 - .word 0x9cd8b4e4 - .word 0x906456c1 - .word 0x617bcb84 - .word 0x70d532b6 - .word 0x74486c5c - .word 0x42d0b857 + .long 0x5051f4a7,0x537e4165,0xc31a17a4,0x963a275e + .long 0xcb3bab6b,0xf11f9d45,0xabacfa58,0x934be303 + .long 0x552030fa,0xf6ad766d,0x9188cc76,0x25f5024c + .long 0xfc4fe5d7,0xd7c52acb,0x80263544,0x8fb562a3 + .long 0x49deb15a,0x6725ba1b,0x9845ea0e,0xe15dfec0 + .long 0x02c32f75,0x12814cf0,0xa38d4697,0xc66bd3f9 + .long 0xe7038f5f,0x9515929c,0xebbf6d7a,0xda955259 + .long 0x2dd4be83,0xd3587421,0x2949e069,0x448ec9c8 + .long 0x6a75c289,0x78f48e79,0x6b99583e,0xdd27b971 + .long 0xb6bee14f,0x17f088ad,0x66c920ac,0xb47dce3a + .long 0x1863df4a,0x82e51a31,0x60975133,0x4562537f + .long 0xe0b16477,0x84bb6bae,0x1cfe81a0,0x94f9082b + .long 0x58704868,0x198f45fd,0x8794de6c,0xb7527bf8 + .long 0x23ab73d3,0xe2724b02,0x57e31f8f,0x2a6655ab + .long 0x07b2eb28,0x032fb5c2,0x9a86c57b,0xa5d33708 + .long 0xf2302887,0xb223bfa5,0xba02036a,0x5ced1682 + .long 0x2b8acf1c,0x92a779b4,0xf0f307f2,0xa14e69e2 + .long 0xcd65daf4,0xd50605be,0x1fd13462,0x8ac4a6fe + .long 0x9d342e53,0xa0a2f355,0x32058ae1,0x75a4f6eb + .long 0x390b83ec,0xaa4060ef,0x065e719f,0x51bd6e10 + .long 0xf93e218a,0x3d96dd06,0xaedd3e05,0x464de6bd + .long 0xb591548d,0x0571c45d,0x6f0406d4,0xff605015 + .long 0x241998fb,0x97d6bde9,0xcc894043,0x7767d99e + .long 0xbdb0e842,0x8807898b,0x38e7195b,0xdb79c8ee + .long 0x47a17c0a,0xe97c420f,0xc9f8841e,0x00000000 + .long 0x83098086,0x48322bed,0xac1e1170,0x4e6c5a72 + .long 0xfbfd0eff,0x560f8538,0x1e3daed5,0x27362d39 + .long 0x640a0fd9,0x21685ca6,0xd19b5b54,0x3a24362e + .long 0xb10c0a67,0x0f9357e7,0xd2b4ee96,0x9e1b9b91 + .long 0x4f80c0c5,0xa261dc20,0x695a774b,0x161c121a + .long 0x0ae293ba,0xe5c0a02a,0x433c22e0,0x1d121b17 + .long 0x0b0e090d,0xadf28bc7,0xb92db6a8,0xc8141ea9 + .long 0x8557f119,0x4caf7507,0xbbee99dd,0xfda37f60 + .long 0x9ff70126,0xbc5c72f5,0xc544663b,0x345bfb7e + .long 0x768b4329,0xdccb23c6,0x68b6edfc,0x63b8e4f1 + .long 0xcad731dc,0x10426385,0x40139722,0x2084c611 + .long 0x7d854a24,0xf8d2bb3d,0x11aef932,0x6dc729a1 + .long 0x4b1d9e2f,0xf3dcb230,0xec0d8652,0xd077c1e3 + .long 0x6c2bb316,0x99a970b9,0xfa119448,0x2247e964 + .long 0xc4a8fc8c,0x1aa0f03f,0xd8567d2c,0xef223390 + .long 0xc787494e,0xc1d938d1,0xfe8ccaa2,0x3698d40b + .long 0xcfa6f581,0x28a57ade,0x26dab78e,0xa43fadbf + .long 0xe42c3a9d,0x0d507892,0x9b6a5fcc,0x62547e46 + .long 0xc2f68d13,0xe890d8b8,0x5e2e39f7,0xf582c3af + .long 0xbe9f5d80,0x7c69d093,0xa96fd52d,0xb3cf2512 + .long 0x3bc8ac99,0xa710187d,0x6ee89c63,0x7bdb3bbb + .long 0x09cd2678,0xf46e5918,0x01ec9ab7,0xa8834f9a + .long 0x65e6956e,0x7eaaffe6,0x0821bccf,0xe6ef15e8 + .long 0xd9bae79b,0xce4a6f36,0xd4ea9f09,0xd629b07c + .long 0xaf31a4b2,0x312a3f23,0x30c6a594,0xc035a266 + .long 0x37744ebc,0xa6fc82ca,0xb0e090d0,0x1533a7d8 + .long 0x4af10498,0xf741ecda,0x0e7fcd50,0x2f1791f6 + .long 0x8d764dd6,0x4d43efb0,0x54ccaa4d,0xdfe49604 + .long 0xe39ed1b5,0x1b4c6a88,0xb8c12c1f,0x7f466551 + .long 0x049d5eea,0x5d018c35,0x73fa8774,0x2efb0b41 + .long 0x5ab3671d,0x5292dbd2,0x33e91056,0x136dd647 + .long 0x8c9ad761,0x7a37a10c,0x8e59f814,0x89eb133c + .long 0xeecea927,0x35b761c9,0xede11ce5,0x3c7a47b1 + .long 0x599cd2df,0x3f55f273,0x791814ce,0xbf73c737 + .long 0xea53f7cd,0x5b5ffdaa,0x14df3d6f,0x867844db + .long 0x81caaff3,0x3eb968c4,0x2c382434,0x5fc2a340 + .long 0x72161dc3,0x0cbce225,0x8b283c49,0x41ff0d95 + .long 0x7139a801,0xde080cb3,0x9cd8b4e4,0x906456c1 + .long 0x617bcb84,0x70d532b6,0x74486c5c,0x42d0b857 #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#ifndef __APPLE__ .text .type L_AES_ARM32_te_data, %object .size L_AES_ARM32_te_data, 1024 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te_data: - .word 0xa5c66363 - .word 0x84f87c7c - .word 0x99ee7777 - .word 0x8df67b7b - .word 0xdfff2f2 - .word 0xbdd66b6b - .word 0xb1de6f6f - .word 0x5491c5c5 - .word 0x50603030 - .word 0x3020101 - .word 0xa9ce6767 - .word 0x7d562b2b - .word 0x19e7fefe - .word 0x62b5d7d7 - .word 0xe64dabab - .word 0x9aec7676 - .word 0x458fcaca - .word 0x9d1f8282 - .word 0x4089c9c9 - .word 0x87fa7d7d - .word 0x15effafa - .word 0xebb25959 - .word 0xc98e4747 - .word 0xbfbf0f0 - .word 0xec41adad - .word 0x67b3d4d4 - .word 0xfd5fa2a2 - .word 0xea45afaf - .word 0xbf239c9c - .word 0xf753a4a4 - .word 0x96e47272 - .word 0x5b9bc0c0 - .word 0xc275b7b7 - .word 0x1ce1fdfd - .word 0xae3d9393 - .word 0x6a4c2626 - .word 0x5a6c3636 - .word 0x417e3f3f - .word 0x2f5f7f7 - .word 0x4f83cccc - .word 0x5c683434 - .word 0xf451a5a5 - .word 0x34d1e5e5 - .word 0x8f9f1f1 - .word 0x93e27171 - .word 0x73abd8d8 - .word 0x53623131 - .word 0x3f2a1515 - .word 0xc080404 - .word 0x5295c7c7 - .word 0x65462323 - .word 0x5e9dc3c3 - .word 0x28301818 - .word 0xa1379696 - .word 0xf0a0505 - .word 0xb52f9a9a - .word 0x90e0707 - .word 0x36241212 - .word 0x9b1b8080 - .word 0x3ddfe2e2 - .word 0x26cdebeb - .word 0x694e2727 - .word 0xcd7fb2b2 - .word 0x9fea7575 - .word 0x1b120909 - .word 0x9e1d8383 - .word 0x74582c2c - .word 0x2e341a1a - .word 0x2d361b1b - .word 0xb2dc6e6e - .word 0xeeb45a5a - .word 0xfb5ba0a0 - .word 0xf6a45252 - .word 0x4d763b3b - .word 0x61b7d6d6 - .word 0xce7db3b3 - .word 0x7b522929 - .word 0x3edde3e3 - .word 0x715e2f2f - .word 0x97138484 - .word 0xf5a65353 - .word 0x68b9d1d1 - .word 0x0 - .word 0x2cc1eded - .word 0x60402020 - .word 0x1fe3fcfc - .word 0xc879b1b1 - .word 0xedb65b5b - .word 0xbed46a6a - .word 0x468dcbcb - .word 0xd967bebe - .word 0x4b723939 - .word 0xde944a4a - .word 0xd4984c4c - .word 0xe8b05858 - .word 0x4a85cfcf - .word 0x6bbbd0d0 - .word 0x2ac5efef - .word 0xe54faaaa - .word 0x16edfbfb - .word 0xc5864343 - .word 0xd79a4d4d - .word 0x55663333 - .word 0x94118585 - .word 0xcf8a4545 - .word 0x10e9f9f9 - .word 0x6040202 - .word 0x81fe7f7f - .word 0xf0a05050 - .word 0x44783c3c - .word 0xba259f9f - .word 0xe34ba8a8 - .word 0xf3a25151 - .word 0xfe5da3a3 - .word 0xc0804040 - .word 0x8a058f8f - .word 0xad3f9292 - .word 0xbc219d9d - .word 0x48703838 - .word 0x4f1f5f5 - .word 0xdf63bcbc - .word 0xc177b6b6 - .word 0x75afdada - .word 0x63422121 - .word 0x30201010 - .word 0x1ae5ffff - .word 0xefdf3f3 - .word 0x6dbfd2d2 - .word 0x4c81cdcd - .word 0x14180c0c - .word 0x35261313 - .word 0x2fc3ecec - .word 0xe1be5f5f - .word 0xa2359797 - .word 0xcc884444 - .word 0x392e1717 - .word 0x5793c4c4 - .word 0xf255a7a7 - .word 0x82fc7e7e - .word 0x477a3d3d - .word 0xacc86464 - .word 0xe7ba5d5d - .word 0x2b321919 - .word 0x95e67373 - .word 0xa0c06060 - .word 0x98198181 - .word 0xd19e4f4f - .word 0x7fa3dcdc - .word 0x66442222 - .word 0x7e542a2a - .word 0xab3b9090 - .word 0x830b8888 - .word 0xca8c4646 - .word 0x29c7eeee - .word 0xd36bb8b8 - .word 0x3c281414 - .word 0x79a7dede - .word 0xe2bc5e5e - .word 0x1d160b0b - .word 0x76addbdb - .word 0x3bdbe0e0 - .word 0x56643232 - .word 0x4e743a3a - .word 0x1e140a0a - .word 0xdb924949 - .word 0xa0c0606 - .word 0x6c482424 - .word 0xe4b85c5c - .word 0x5d9fc2c2 - .word 0x6ebdd3d3 - .word 0xef43acac - .word 0xa6c46262 - .word 0xa8399191 - .word 0xa4319595 - .word 0x37d3e4e4 - .word 0x8bf27979 - .word 0x32d5e7e7 - .word 0x438bc8c8 - .word 0x596e3737 - .word 0xb7da6d6d - .word 0x8c018d8d - .word 0x64b1d5d5 - .word 0xd29c4e4e - .word 0xe049a9a9 - .word 0xb4d86c6c - .word 0xfaac5656 - .word 0x7f3f4f4 - .word 0x25cfeaea - .word 0xafca6565 - .word 0x8ef47a7a - .word 0xe947aeae - .word 0x18100808 - .word 0xd56fbaba - .word 0x88f07878 - .word 0x6f4a2525 - .word 0x725c2e2e - .word 0x24381c1c - .word 0xf157a6a6 - .word 0xc773b4b4 - .word 0x5197c6c6 - .word 0x23cbe8e8 - .word 0x7ca1dddd - .word 0x9ce87474 - .word 0x213e1f1f - .word 0xdd964b4b - .word 0xdc61bdbd - .word 0x860d8b8b - .word 0x850f8a8a - .word 0x90e07070 - .word 0x427c3e3e - .word 0xc471b5b5 - .word 0xaacc6666 - .word 0xd8904848 - .word 0x5060303 - .word 0x1f7f6f6 - .word 0x121c0e0e - .word 0xa3c26161 - .word 0x5f6a3535 - .word 0xf9ae5757 - .word 0xd069b9b9 - .word 0x91178686 - .word 0x5899c1c1 - .word 0x273a1d1d - .word 0xb9279e9e - .word 0x38d9e1e1 - .word 0x13ebf8f8 - .word 0xb32b9898 - .word 0x33221111 - .word 0xbbd26969 - .word 0x70a9d9d9 - .word 0x89078e8e - .word 0xa7339494 - .word 0xb62d9b9b - .word 0x223c1e1e - .word 0x92158787 - .word 0x20c9e9e9 - .word 0x4987cece - .word 0xffaa5555 - .word 0x78502828 - .word 0x7aa5dfdf - .word 0x8f038c8c - .word 0xf859a1a1 - .word 0x80098989 - .word 0x171a0d0d - .word 0xda65bfbf - .word 0x31d7e6e6 - .word 0xc6844242 - .word 0xb8d06868 - .word 0xc3824141 - .word 0xb0299999 - .word 0x775a2d2d - .word 0x111e0f0f - .word 0xcb7bb0b0 - .word 0xfca85454 - .word 0xd66dbbbb - .word 0x3a2c1616 + .long 0xa5c66363,0x84f87c7c,0x99ee7777,0x8df67b7b + .long 0x0dfff2f2,0xbdd66b6b,0xb1de6f6f,0x5491c5c5 + .long 0x50603030,0x03020101,0xa9ce6767,0x7d562b2b + .long 0x19e7fefe,0x62b5d7d7,0xe64dabab,0x9aec7676 + .long 0x458fcaca,0x9d1f8282,0x4089c9c9,0x87fa7d7d + .long 0x15effafa,0xebb25959,0xc98e4747,0x0bfbf0f0 + .long 0xec41adad,0x67b3d4d4,0xfd5fa2a2,0xea45afaf + .long 0xbf239c9c,0xf753a4a4,0x96e47272,0x5b9bc0c0 + .long 0xc275b7b7,0x1ce1fdfd,0xae3d9393,0x6a4c2626 + .long 0x5a6c3636,0x417e3f3f,0x02f5f7f7,0x4f83cccc + .long 0x5c683434,0xf451a5a5,0x34d1e5e5,0x08f9f1f1 + .long 0x93e27171,0x73abd8d8,0x53623131,0x3f2a1515 + .long 0x0c080404,0x5295c7c7,0x65462323,0x5e9dc3c3 + .long 0x28301818,0xa1379696,0x0f0a0505,0xb52f9a9a + .long 0x090e0707,0x36241212,0x9b1b8080,0x3ddfe2e2 + .long 0x26cdebeb,0x694e2727,0xcd7fb2b2,0x9fea7575 + .long 0x1b120909,0x9e1d8383,0x74582c2c,0x2e341a1a + .long 0x2d361b1b,0xb2dc6e6e,0xeeb45a5a,0xfb5ba0a0 + .long 0xf6a45252,0x4d763b3b,0x61b7d6d6,0xce7db3b3 + .long 0x7b522929,0x3edde3e3,0x715e2f2f,0x97138484 + .long 0xf5a65353,0x68b9d1d1,0x00000000,0x2cc1eded + .long 0x60402020,0x1fe3fcfc,0xc879b1b1,0xedb65b5b + .long 0xbed46a6a,0x468dcbcb,0xd967bebe,0x4b723939 + .long 0xde944a4a,0xd4984c4c,0xe8b05858,0x4a85cfcf + .long 0x6bbbd0d0,0x2ac5efef,0xe54faaaa,0x16edfbfb + .long 0xc5864343,0xd79a4d4d,0x55663333,0x94118585 + .long 0xcf8a4545,0x10e9f9f9,0x06040202,0x81fe7f7f + .long 0xf0a05050,0x44783c3c,0xba259f9f,0xe34ba8a8 + .long 0xf3a25151,0xfe5da3a3,0xc0804040,0x8a058f8f + .long 0xad3f9292,0xbc219d9d,0x48703838,0x04f1f5f5 + .long 0xdf63bcbc,0xc177b6b6,0x75afdada,0x63422121 + .long 0x30201010,0x1ae5ffff,0x0efdf3f3,0x6dbfd2d2 + .long 0x4c81cdcd,0x14180c0c,0x35261313,0x2fc3ecec + .long 0xe1be5f5f,0xa2359797,0xcc884444,0x392e1717 + .long 0x5793c4c4,0xf255a7a7,0x82fc7e7e,0x477a3d3d + .long 0xacc86464,0xe7ba5d5d,0x2b321919,0x95e67373 + .long 0xa0c06060,0x98198181,0xd19e4f4f,0x7fa3dcdc + .long 0x66442222,0x7e542a2a,0xab3b9090,0x830b8888 + .long 0xca8c4646,0x29c7eeee,0xd36bb8b8,0x3c281414 + .long 0x79a7dede,0xe2bc5e5e,0x1d160b0b,0x76addbdb + .long 0x3bdbe0e0,0x56643232,0x4e743a3a,0x1e140a0a + .long 0xdb924949,0x0a0c0606,0x6c482424,0xe4b85c5c + .long 0x5d9fc2c2,0x6ebdd3d3,0xef43acac,0xa6c46262 + .long 0xa8399191,0xa4319595,0x37d3e4e4,0x8bf27979 + .long 0x32d5e7e7,0x438bc8c8,0x596e3737,0xb7da6d6d + .long 0x8c018d8d,0x64b1d5d5,0xd29c4e4e,0xe049a9a9 + .long 0xb4d86c6c,0xfaac5656,0x07f3f4f4,0x25cfeaea + .long 0xafca6565,0x8ef47a7a,0xe947aeae,0x18100808 + .long 0xd56fbaba,0x88f07878,0x6f4a2525,0x725c2e2e + .long 0x24381c1c,0xf157a6a6,0xc773b4b4,0x5197c6c6 + .long 0x23cbe8e8,0x7ca1dddd,0x9ce87474,0x213e1f1f + .long 0xdd964b4b,0xdc61bdbd,0x860d8b8b,0x850f8a8a + .long 0x90e07070,0x427c3e3e,0xc471b5b5,0xaacc6666 + .long 0xd8904848,0x05060303,0x01f7f6f6,0x121c0e0e + .long 0xa3c26161,0x5f6a3535,0xf9ae5757,0xd069b9b9 + .long 0x91178686,0x5899c1c1,0x273a1d1d,0xb9279e9e + .long 0x38d9e1e1,0x13ebf8f8,0xb32b9898,0x33221111 + .long 0xbbd26969,0x70a9d9d9,0x89078e8e,0xa7339494 + .long 0xb62d9b9b,0x223c1e1e,0x92158787,0x20c9e9e9 + .long 0x4987cece,0xffaa5555,0x78502828,0x7aa5dfdf + .long 0x8f038c8c,0xf859a1a1,0x80098989,0x171a0d0d + .long 0xda65bfbf,0x31d7e6e6,0xc6844242,0xb8d06868 + .long 0xc3824141,0xb0299999,0x775a2d2d,0x111e0f0f + .long 0xcb7bb0b0,0xfca85454,0xd66dbbbb,0x3a2c1616 #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT +#ifndef __APPLE__ .text .type L_AES_ARM32_td, %object .size L_AES_ARM32_td, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_td: - .word L_AES_ARM32_td_data + .long L_AES_ARM32_td_data #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#ifndef __APPLE__ .text .type L_AES_ARM32_te, %object .size L_AES_ARM32_te, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te: - .word L_AES_ARM32_te_data + .long L_AES_ARM32_te_data #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT @@ -8570,21 +8222,23 @@ L_AES_invert_key_mix_loop: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size AES_invert_key,.-AES_invert_key #endif /* HAVE_AES_DECRYPT */ +#ifndef __APPLE__ .text .type L_AES_ARM32_rcon, %object .size L_AES_ARM32_rcon, 40 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_rcon: - .word 0x1000000 - .word 0x2000000 - .word 0x4000000 - .word 0x8000000 - .word 0x10000000 - .word 0x20000000 - .word 0x40000000 - .word 0x80000000 - .word 0x1b000000 - .word 0x36000000 + .long 0x01000000,0x02000000,0x04000000,0x08000000 + .long 0x10000000,0x20000000,0x40000000,0x80000000 + .long 0x1b000000,0x36000000 .text .align 4 .globl AES_set_encrypt_key @@ -9698,12 +9352,21 @@ L_AES_encrypt_block_nr: #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) +#ifndef __APPLE__ .text .type L_AES_ARM32_te_ecb, %object .size L_AES_ARM32_te_ecb, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te_ecb: - .word L_AES_ARM32_te_data + .long L_AES_ARM32_te_data .text .align 4 .globl AES_ECB_encrypt @@ -11854,12 +11517,21 @@ L_AES_ECB_encrypt_end: #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || * WOLFSSL_AES_COUNTER || HAVE_AES_ECB */ #ifdef HAVE_AES_CBC +#ifndef __APPLE__ .text .type L_AES_ARM32_te_cbc, %object .size L_AES_ARM32_te_cbc, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te_cbc: - .word L_AES_ARM32_te_data + .long L_AES_ARM32_te_data .text .align 4 .globl AES_CBC_encrypt @@ -14024,12 +13696,21 @@ L_AES_CBC_encrypt_end: .size AES_CBC_encrypt,.-AES_CBC_encrypt #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER +#ifndef __APPLE__ .text .type L_AES_ARM32_te_ctr, %object .size L_AES_ARM32_te_ctr, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te_ctr: - .word L_AES_ARM32_te_data + .long L_AES_ARM32_te_data .text .align 4 .globl AES_CTR_encrypt @@ -16843,274 +16524,68 @@ L_AES_decrypt_block_nr: pop {pc} .size AES_decrypt_block,.-AES_decrypt_block #endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#ifndef __APPLE__ .text .type L_AES_ARM32_td_ecb, %object .size L_AES_ARM32_td_ecb, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_td_ecb: - .word L_AES_ARM32_td_data + .long L_AES_ARM32_td_data #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_ECB) +#ifndef __APPLE__ .text .type L_AES_ARM32_ecb_td4, %object .size L_AES_ARM32_ecb_td4, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 4-byte aligned, 32-bit aligned +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_AES_ARM32_ecb_td4: - .byte 0x52 - .byte 0x9 - .byte 0x6a - .byte 0xd5 - .byte 0x30 - .byte 0x36 - .byte 0xa5 - .byte 0x38 - .byte 0xbf - .byte 0x40 - .byte 0xa3 - .byte 0x9e - .byte 0x81 - .byte 0xf3 - .byte 0xd7 - .byte 0xfb - .byte 0x7c - .byte 0xe3 - .byte 0x39 - .byte 0x82 - .byte 0x9b - .byte 0x2f - .byte 0xff - .byte 0x87 - .byte 0x34 - .byte 0x8e - .byte 0x43 - .byte 0x44 - .byte 0xc4 - .byte 0xde - .byte 0xe9 - .byte 0xcb - .byte 0x54 - .byte 0x7b - .byte 0x94 - .byte 0x32 - .byte 0xa6 - .byte 0xc2 - .byte 0x23 - .byte 0x3d - .byte 0xee - .byte 0x4c - .byte 0x95 - .byte 0xb - .byte 0x42 - .byte 0xfa - .byte 0xc3 - .byte 0x4e - .byte 0x8 - .byte 0x2e - .byte 0xa1 - .byte 0x66 - .byte 0x28 - .byte 0xd9 - .byte 0x24 - .byte 0xb2 - .byte 0x76 - .byte 0x5b - .byte 0xa2 - .byte 0x49 - .byte 0x6d - .byte 0x8b - .byte 0xd1 - .byte 0x25 - .byte 0x72 - .byte 0xf8 - .byte 0xf6 - .byte 0x64 - .byte 0x86 - .byte 0x68 - .byte 0x98 - .byte 0x16 - .byte 0xd4 - .byte 0xa4 - .byte 0x5c - .byte 0xcc - .byte 0x5d - .byte 0x65 - .byte 0xb6 - .byte 0x92 - .byte 0x6c - .byte 0x70 - .byte 0x48 - .byte 0x50 - .byte 0xfd - .byte 0xed - .byte 0xb9 - .byte 0xda - .byte 0x5e - .byte 0x15 - .byte 0x46 - .byte 0x57 - .byte 0xa7 - .byte 0x8d - .byte 0x9d - .byte 0x84 - .byte 0x90 - .byte 0xd8 - .byte 0xab - .byte 0x0 - .byte 0x8c - .byte 0xbc - .byte 0xd3 - .byte 0xa - .byte 0xf7 - .byte 0xe4 - .byte 0x58 - .byte 0x5 - .byte 0xb8 - .byte 0xb3 - .byte 0x45 - .byte 0x6 - .byte 0xd0 - .byte 0x2c - .byte 0x1e - .byte 0x8f - .byte 0xca - .byte 0x3f - .byte 0xf - .byte 0x2 - .byte 0xc1 - .byte 0xaf - .byte 0xbd - .byte 0x3 - .byte 0x1 - .byte 0x13 - .byte 0x8a - .byte 0x6b - .byte 0x3a - .byte 0x91 - .byte 0x11 - .byte 0x41 - .byte 0x4f - .byte 0x67 - .byte 0xdc - .byte 0xea - .byte 0x97 - .byte 0xf2 - .byte 0xcf - .byte 0xce - .byte 0xf0 - .byte 0xb4 - .byte 0xe6 - .byte 0x73 - .byte 0x96 - .byte 0xac - .byte 0x74 - .byte 0x22 - .byte 0xe7 - .byte 0xad - .byte 0x35 - .byte 0x85 - .byte 0xe2 - .byte 0xf9 - .byte 0x37 - .byte 0xe8 - .byte 0x1c - .byte 0x75 - .byte 0xdf - .byte 0x6e - .byte 0x47 - .byte 0xf1 - .byte 0x1a - .byte 0x71 - .byte 0x1d - .byte 0x29 - .byte 0xc5 - .byte 0x89 - .byte 0x6f - .byte 0xb7 - .byte 0x62 - .byte 0xe - .byte 0xaa - .byte 0x18 - .byte 0xbe - .byte 0x1b - .byte 0xfc - .byte 0x56 - .byte 0x3e - .byte 0x4b - .byte 0xc6 - .byte 0xd2 - .byte 0x79 - .byte 0x20 - .byte 0x9a - .byte 0xdb - .byte 0xc0 - .byte 0xfe - .byte 0x78 - .byte 0xcd - .byte 0x5a - .byte 0xf4 - .byte 0x1f - .byte 0xdd - .byte 0xa8 - .byte 0x33 - .byte 0x88 - .byte 0x7 - .byte 0xc7 - .byte 0x31 - .byte 0xb1 - .byte 0x12 - .byte 0x10 - .byte 0x59 - .byte 0x27 - .byte 0x80 - .byte 0xec - .byte 0x5f - .byte 0x60 - .byte 0x51 - .byte 0x7f - .byte 0xa9 - .byte 0x19 - .byte 0xb5 - .byte 0x4a - .byte 0xd - .byte 0x2d - .byte 0xe5 - .byte 0x7a - .byte 0x9f - .byte 0x93 - .byte 0xc9 - .byte 0x9c - .byte 0xef - .byte 0xa0 - .byte 0xe0 - .byte 0x3b - .byte 0x4d - .byte 0xae - .byte 0x2a - .byte 0xf5 - .byte 0xb0 - .byte 0xc8 - .byte 0xeb - .byte 0xbb - .byte 0x3c - .byte 0x83 - .byte 0x53 - .byte 0x99 - .byte 0x61 - .byte 0x17 - .byte 0x2b - .byte 0x4 - .byte 0x7e - .byte 0xba - .byte 0x77 - .byte 0xd6 - .byte 0x26 - .byte 0xe1 - .byte 0x69 - .byte 0x14 - .byte 0x63 - .byte 0x55 - .byte 0x21 - .byte 0xc - .byte 0x7d + .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 + .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb + .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 + .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb + .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d + .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e + .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 + .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 + .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 + .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 + .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda + .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 + .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a + .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 + .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 + .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b + .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea + .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 + .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 + .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e + .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 + .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b + .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 + .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 + .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 + .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f + .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d + .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef + .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 + .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 + .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 + .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .text .align 4 .globl AES_ECB_decrypt @@ -19257,267 +18732,52 @@ L_AES_ECB_decrypt_end: .size AES_ECB_decrypt,.-AES_ECB_decrypt #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || defined(HAVE_AES_ECB) */ #ifdef HAVE_AES_CBC +#ifndef __APPLE__ .text .type L_AES_ARM32_cbc_td4, %object .size L_AES_ARM32_cbc_td4, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 4-byte aligned, 32-bit aligned +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_AES_ARM32_cbc_td4: - .byte 0x52 - .byte 0x9 - .byte 0x6a - .byte 0xd5 - .byte 0x30 - .byte 0x36 - .byte 0xa5 - .byte 0x38 - .byte 0xbf - .byte 0x40 - .byte 0xa3 - .byte 0x9e - .byte 0x81 - .byte 0xf3 - .byte 0xd7 - .byte 0xfb - .byte 0x7c - .byte 0xe3 - .byte 0x39 - .byte 0x82 - .byte 0x9b - .byte 0x2f - .byte 0xff - .byte 0x87 - .byte 0x34 - .byte 0x8e - .byte 0x43 - .byte 0x44 - .byte 0xc4 - .byte 0xde - .byte 0xe9 - .byte 0xcb - .byte 0x54 - .byte 0x7b - .byte 0x94 - .byte 0x32 - .byte 0xa6 - .byte 0xc2 - .byte 0x23 - .byte 0x3d - .byte 0xee - .byte 0x4c - .byte 0x95 - .byte 0xb - .byte 0x42 - .byte 0xfa - .byte 0xc3 - .byte 0x4e - .byte 0x8 - .byte 0x2e - .byte 0xa1 - .byte 0x66 - .byte 0x28 - .byte 0xd9 - .byte 0x24 - .byte 0xb2 - .byte 0x76 - .byte 0x5b - .byte 0xa2 - .byte 0x49 - .byte 0x6d - .byte 0x8b - .byte 0xd1 - .byte 0x25 - .byte 0x72 - .byte 0xf8 - .byte 0xf6 - .byte 0x64 - .byte 0x86 - .byte 0x68 - .byte 0x98 - .byte 0x16 - .byte 0xd4 - .byte 0xa4 - .byte 0x5c - .byte 0xcc - .byte 0x5d - .byte 0x65 - .byte 0xb6 - .byte 0x92 - .byte 0x6c - .byte 0x70 - .byte 0x48 - .byte 0x50 - .byte 0xfd - .byte 0xed - .byte 0xb9 - .byte 0xda - .byte 0x5e - .byte 0x15 - .byte 0x46 - .byte 0x57 - .byte 0xa7 - .byte 0x8d - .byte 0x9d - .byte 0x84 - .byte 0x90 - .byte 0xd8 - .byte 0xab - .byte 0x0 - .byte 0x8c - .byte 0xbc - .byte 0xd3 - .byte 0xa - .byte 0xf7 - .byte 0xe4 - .byte 0x58 - .byte 0x5 - .byte 0xb8 - .byte 0xb3 - .byte 0x45 - .byte 0x6 - .byte 0xd0 - .byte 0x2c - .byte 0x1e - .byte 0x8f - .byte 0xca - .byte 0x3f - .byte 0xf - .byte 0x2 - .byte 0xc1 - .byte 0xaf - .byte 0xbd - .byte 0x3 - .byte 0x1 - .byte 0x13 - .byte 0x8a - .byte 0x6b - .byte 0x3a - .byte 0x91 - .byte 0x11 - .byte 0x41 - .byte 0x4f - .byte 0x67 - .byte 0xdc - .byte 0xea - .byte 0x97 - .byte 0xf2 - .byte 0xcf - .byte 0xce - .byte 0xf0 - .byte 0xb4 - .byte 0xe6 - .byte 0x73 - .byte 0x96 - .byte 0xac - .byte 0x74 - .byte 0x22 - .byte 0xe7 - .byte 0xad - .byte 0x35 - .byte 0x85 - .byte 0xe2 - .byte 0xf9 - .byte 0x37 - .byte 0xe8 - .byte 0x1c - .byte 0x75 - .byte 0xdf - .byte 0x6e - .byte 0x47 - .byte 0xf1 - .byte 0x1a - .byte 0x71 - .byte 0x1d - .byte 0x29 - .byte 0xc5 - .byte 0x89 - .byte 0x6f - .byte 0xb7 - .byte 0x62 - .byte 0xe - .byte 0xaa - .byte 0x18 - .byte 0xbe - .byte 0x1b - .byte 0xfc - .byte 0x56 - .byte 0x3e - .byte 0x4b - .byte 0xc6 - .byte 0xd2 - .byte 0x79 - .byte 0x20 - .byte 0x9a - .byte 0xdb - .byte 0xc0 - .byte 0xfe - .byte 0x78 - .byte 0xcd - .byte 0x5a - .byte 0xf4 - .byte 0x1f - .byte 0xdd - .byte 0xa8 - .byte 0x33 - .byte 0x88 - .byte 0x7 - .byte 0xc7 - .byte 0x31 - .byte 0xb1 - .byte 0x12 - .byte 0x10 - .byte 0x59 - .byte 0x27 - .byte 0x80 - .byte 0xec - .byte 0x5f - .byte 0x60 - .byte 0x51 - .byte 0x7f - .byte 0xa9 - .byte 0x19 - .byte 0xb5 - .byte 0x4a - .byte 0xd - .byte 0x2d - .byte 0xe5 - .byte 0x7a - .byte 0x9f - .byte 0x93 - .byte 0xc9 - .byte 0x9c - .byte 0xef - .byte 0xa0 - .byte 0xe0 - .byte 0x3b - .byte 0x4d - .byte 0xae - .byte 0x2a - .byte 0xf5 - .byte 0xb0 - .byte 0xc8 - .byte 0xeb - .byte 0xbb - .byte 0x3c - .byte 0x83 - .byte 0x53 - .byte 0x99 - .byte 0x61 - .byte 0x17 - .byte 0x2b - .byte 0x4 - .byte 0x7e - .byte 0xba - .byte 0x77 - .byte 0xd6 - .byte 0x26 - .byte 0xe1 - .byte 0x69 - .byte 0x14 - .byte 0x63 - .byte 0x55 - .byte 0x21 - .byte 0xc - .byte 0x7d + .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 + .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb + .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 + .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb + .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d + .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e + .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 + .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 + .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 + .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 + .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda + .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 + .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a + .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 + .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 + .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b + .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea + .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 + .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 + .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e + .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 + .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b + .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 + .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 + .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 + .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f + .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d + .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef + .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 + .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 + .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 + .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d .text .align 4 .globl AES_CBC_decrypt @@ -19531,7 +18791,7 @@ AES_CBC_decrypt: adr r2, L_AES_ARM32_cbc_td4 ldr r8, [sp, #36] ldr r4, [sp, #40] - push {r3, r4} + push {r3-r4} cmp r8, #10 beq L_AES_CBC_decrypt_loop_block_128 cmp r8, #12 @@ -23954,7 +23214,7 @@ L_AES_CBC_decrypt_end_odd: strd r10, r11, [r4, #8] #endif L_AES_CBC_decrypt_end: - pop {r3, r4} + pop {r3-r4} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size AES_CBC_decrypt,.-AES_CBC_decrypt #endif /* HAVE_AES_CBC */ @@ -23962,27 +23222,24 @@ L_AES_CBC_decrypt_end: * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM +#ifndef __APPLE__ .text .type L_GCM_gmult_len_r, %object .size L_GCM_gmult_len_r, 64 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_GCM_gmult_len_r: - .word 0x0 - .word 0x1c200000 - .word 0x38400000 - .word 0x24600000 - .word 0x70800000 - .word 0x6ca00000 - .word 0x48c00000 - .word 0x54e00000 - .word 0xe1000000 - .word 0xfd200000 - .word 0xd9400000 - .word 0xc5600000 - .word 0x91800000 - .word 0x8da00000 - .word 0xa9c00000 - .word 0xb5e00000 + .long 0x00000000,0x1c200000,0x38400000,0x24600000 + .long 0x70800000,0x6ca00000,0x48c00000,0x54e00000 + .long 0xe1000000,0xfd200000,0xd9400000,0xc5600000 + .long 0x91800000,0x8da00000,0xa9c00000,0xb5e00000 .text .align 4 .globl GCM_gmult_len @@ -24561,12 +23818,21 @@ L_GCM_gmult_len_start_block: bne L_GCM_gmult_len_start_block pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size GCM_gmult_len,.-GCM_gmult_len +#ifndef __APPLE__ .text .type L_AES_ARM32_te_gcm, %object .size L_AES_ARM32_te_gcm, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_ARM32_te_gcm: - .word L_AES_ARM32_te_data + .long L_AES_ARM32_te_data .text .align 4 .globl AES_GCM_encrypt diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 360e0fc5981..52adcfc5f24 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -59,13 +57,13 @@ WC_OMIT_FRAME_POINTER void AES_set_key_AARCH32(const byte* userKey_p, #else WC_OMIT_FRAME_POINTER void AES_set_key_AARCH32(const byte* userKey, int keylen, byte* key, int dir) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* userKey asm ("r0") = (const byte*)userKey_p; - register int keylen asm ("r1") = (int)keylen_p; - register byte* key asm ("r2") = (byte*)key_p; - register int dir asm ("r3") = (int)dir_p; + register const byte* userKey __asm__ ("r0") = (const byte*)userKey_p; + register int keylen __asm__ ("r1") = (int)keylen_p; + register byte* key __asm__ ("r2") = (byte*)key_p; + register int dir __asm__ ("r3") = (int)dir_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -368,7 +366,7 @@ WC_OMIT_FRAME_POINTER void AES_set_key_AARCH32(const byte* userKey, int keylen, "vst1.32 {q0}, [%[key]]\n\t" "b L_aes_set_key_arm32_crypto_done_%=\n\t" "\n" - "L_aes_set_key_arm32_crypto_start_256_%=: \n\t" + "L_aes_set_key_arm32_crypto_start_256_%=:\n\t" "ldr r4, [%[userKey]], #4\n\t" "ldr r5, [%[userKey]], #4\n\t" "ldr r6, [%[userKey]], #4\n\t" @@ -721,7 +719,7 @@ WC_OMIT_FRAME_POINTER void AES_set_key_AARCH32(const byte* userKey, int keylen, "vst1.32 {q0}, [%[key]]\n\t" "b L_aes_set_key_arm32_crypto_done_%=\n\t" "\n" - "L_aes_set_key_arm32_crypto_start_128_%=: \n\t" + "L_aes_set_key_arm32_crypto_start_128_%=:\n\t" "ldr r4, [%[userKey]], #4\n\t" "ldr r5, [%[userKey]], #4\n\t" "ldr r6, [%[userKey]], #4\n\t" @@ -993,7 +991,7 @@ WC_OMIT_FRAME_POINTER void AES_set_key_AARCH32(const byte* userKey, int keylen, "aesimc.8 q0, q0\n\t" "vst1.32 {q0}, [%[key]]\n\t" "\n" - "L_aes_set_key_arm32_crypto_done_%=: \n\t" + "L_aes_set_key_arm32_crypto_done_%=:\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [userKey] "+r" (userKey), [keylen] "+r" (keylen), [key] "+r" (key), [dir] "+r" (dir) @@ -1017,13 +1015,13 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_AARCH32(const byte* inBlock_p, #else WC_OMIT_FRAME_POINTER void AES_encrypt_AARCH32(const byte* inBlock, byte* outBlock, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* inBlock asm ("r0") = (const byte*)inBlock_p; - register byte* outBlock asm ("r1") = (byte*)outBlock_p; - register byte* key asm ("r2") = (byte*)key_p; - register int nr asm ("r3") = (int)nr_p; + register const byte* inBlock __asm__ ("r0") = (const byte*)inBlock_p; + register byte* outBlock __asm__ ("r1") = (byte*)outBlock_p; + register byte* key __asm__ ("r2") = (byte*)key_p; + register int nr __asm__ ("r3") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1065,7 +1063,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_AARCH32(const byte* inBlock, "aesmc.8 q0, q0\n\t" "aese.8 q0, q2\n\t" "\n" - "L_aes_encrypt_arm32_crypto_round_done_%=: \n\t" + "L_aes_encrypt_arm32_crypto_round_done_%=:\n\t" "vld1.32 {q1}, [%[key]]\n\t" "veor.32 q0, q0, q1\n\t" "vst1.8 {q0}, [%[outBlock]]\n\t" @@ -1094,13 +1092,13 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_AARCH32(const byte* inBlock_p, #else WC_OMIT_FRAME_POINTER void AES_decrypt_AARCH32(const byte* inBlock, byte* outBlock, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* inBlock asm ("r0") = (const byte*)inBlock_p; - register byte* outBlock asm ("r1") = (byte*)outBlock_p; - register byte* key asm ("r2") = (byte*)key_p; - register int nr asm ("r3") = (int)nr_p; + register const byte* inBlock __asm__ ("r0") = (const byte*)inBlock_p; + register byte* outBlock __asm__ ("r1") = (byte*)outBlock_p; + register byte* key __asm__ ("r2") = (byte*)key_p; + register int nr __asm__ ("r3") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1142,7 +1140,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_AARCH32(const byte* inBlock, "aesimc.8 q0, q0\n\t" "aesd.8 q0, q2\n\t" "\n" - "L_aes_decrypt_arm32_crypto_round_done_%=: \n\t" + "L_aes_decrypt_arm32_crypto_round_done_%=:\n\t" "vld1.32 {q1}, [%[key]]\n\t" "veor.32 q0, q0, q1\n\t" "vst1.8 {q0}, [%[outBlock]]\n\t" @@ -1169,14 +1167,14 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, word32 sz, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register byte* key asm ("r3") = (byte*)key_p; - register int nr asm ("r12") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register byte* key __asm__ ("r3") = (byte*)key_p; + register int nr __asm__ ("r12") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1193,7 +1191,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #1\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_192_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_192_start_4_%=:\n\t" "cmp %[sz], #4\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_192_start_2_%=\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" @@ -1302,7 +1300,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_encrypt_blocks_arm32_crypto_192_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_192_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_192_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -1361,7 +1359,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_192_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_192_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -1395,18 +1393,18 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[key], %[key], #48\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_192_done_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_encrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" - "L_aes_encrypt_blocks_arm32_crypto_start_256_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vld1.32 {q8-q9}, [%[key]]!\n\t" "cmp %[sz], #1\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_256_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_256_start_4_%=:\n\t" "cmp %[sz], #4\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_256_start_2_%=\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" @@ -1533,7 +1531,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_encrypt_blocks_arm32_crypto_256_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_256_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_256_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -1602,7 +1600,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_256_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_256_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -1642,18 +1640,18 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[key], %[key], #0x50\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_256_done_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_encrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" - "L_aes_encrypt_blocks_arm32_crypto_start_128_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 %[key]!, {q8-q10}\n\t" "cmp %[sz], #1\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_128_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_128_start_4_%=:\n\t" "cmp %[sz], #4\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_128_start_2_%=\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" @@ -1742,7 +1740,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_encrypt_blocks_arm32_crypto_128_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_128_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_encrypt_blocks_arm32_crypto_128_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -1789,7 +1787,7 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_128_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_encrypt_blocks_arm32_crypto_128_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -1815,10 +1813,10 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_blocks_AARCH32(const byte* in, byte* out, "veor.32 q12, q12, q10\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_encrypt_blocks_arm32_crypto_128_done_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_encrypt_blocks_arm32_crypto_done_%=: \n\t" + "L_aes_encrypt_blocks_arm32_crypto_done_%=:\n\t" "pop {%[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), @@ -1841,14 +1839,14 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, word32 sz, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register byte* key asm ("r3") = (byte*)key_p; - register int nr asm ("r12") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register byte* key __asm__ ("r3") = (byte*)key_p; + register int nr __asm__ ("r12") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1867,7 +1865,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_192_start_2_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_192_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_192_start_4_%=:\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" "aesd.8 q12, q0\n\t" "aesimc.8 q12, q12\n\t" @@ -1974,7 +1972,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_decrypt_blocks_arm32_crypto_192_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_192_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_192_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -2033,7 +2031,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_192_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_decrypt_blocks_arm32_crypto_192_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -2067,12 +2065,12 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[key], %[key], #48\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_192_done_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_decrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" - "L_aes_decrypt_blocks_arm32_crypto_start_256_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vld1.32 {q8-q9}, [%[key]]!\n\t" "cmp %[sz], #1\n\t" @@ -2080,7 +2078,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_256_start_2_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_256_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_256_start_4_%=:\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" "aesd.8 q12, q0\n\t" "aesimc.8 q12, q12\n\t" @@ -2205,7 +2203,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_decrypt_blocks_arm32_crypto_256_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_256_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_256_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -2274,7 +2272,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_256_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_decrypt_blocks_arm32_crypto_256_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -2314,12 +2312,12 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[key], %[key], #0x50\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_256_done_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_decrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" - "L_aes_decrypt_blocks_arm32_crypto_start_128_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 %[key]!, {q8-q10}\n\t" "cmp %[sz], #1\n\t" @@ -2327,7 +2325,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_128_start_2_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_128_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_128_start_4_%=:\n\t" "vldm.8 %[in]!, {q12-q15}\n\t" "aesd.8 q12, q0\n\t" "aesimc.8 q12, q12\n\t" @@ -2414,7 +2412,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "bge L_aes_decrypt_blocks_arm32_crypto_128_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_128_start_2_%=:\n\t" "cmp %[sz], #2\n\t" "blt L_aes_decrypt_blocks_arm32_crypto_128_start_1_%=\n\t" "vld1.8 {q12-q13}, [%[in]]!\n\t" @@ -2461,7 +2459,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "sub %[sz], %[sz], #2\n\t" "vst1.8 {q12-q13}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_128_start_1_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_decrypt_blocks_arm32_crypto_128_done_%=\n\t" "vld1.8 {q12}, [%[in]]!\n\t" @@ -2487,10 +2485,10 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_blocks_AARCH32(const byte* in, byte* out, "veor.32 q12, q12, q10\n\t" "vst1.8 {q12}, [%[out]]!\n\t" "\n" - "L_aes_decrypt_blocks_arm32_crypto_128_done_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_decrypt_blocks_arm32_crypto_done_%=: \n\t" + "L_aes_decrypt_blocks_arm32_crypto_done_%=:\n\t" "pop {%[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), @@ -2515,15 +2513,15 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, word32 sz, byte* reg, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register byte* reg asm ("r3") = (byte*)reg_p; - register byte* key asm ("r12") = (byte*)key_p; - register int nr asm ("lr") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register byte* reg __asm__ ("r3") = (byte*)reg_p; + register byte* key __asm__ ("r12") = (byte*)key_p; + register int nr __asm__ ("lr") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2545,7 +2543,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_192_start_2_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_192_start_4_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_192_start_4_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -2661,7 +2659,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #2\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_192_start_2_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -2719,7 +2717,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "vld1.8 {q14}, [%[in]]!\n\t" "vst1.8 {q15}, [%[out]]!\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_192_start_1_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -2746,13 +2744,13 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "aese.8 q15, q11\n\t" "veor.32 q15, q15, q12\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_192_done_%=:\n\t" "vst1.8 {q15}, [%[out]]!\n\t" #endif /* !NO_AES_192 */ "b L_aes_cbc_encrypt_arm32_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" - "L_aes_cbc_encrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vld1.8 {q14}, [%[in]]!\n\t" "vldm.32 r12!, {q8-q11}\n\t" @@ -2764,7 +2762,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_256_start_2_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_256_start_4_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_256_start_4_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -2900,7 +2898,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #2\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_256_start_2_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -2968,7 +2966,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "vld1.8 {q14}, [%[in]]!\n\t" "vst1.8 {q15}, [%[out]]!\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_256_start_1_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -3000,13 +2998,13 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "aese.8 q15, q12\n\t" "veor.32 q15, q15, q13\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_256_done_%=:\n\t" "vst1.8 {q15}, [%[out]]!\n\t" #endif /* !NO_AES_256 */ "b L_aes_cbc_encrypt_arm32_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" - "L_aes_cbc_encrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vld1.8 {q14}, [%[in]]!\n\t" "vldm.32 r12!, {q8-q10}\n\t" @@ -3015,7 +3013,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #4\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_128_start_2_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_128_start_4_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_128_start_4_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -3115,7 +3113,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #2\n\t" "blt L_aes_cbc_encrypt_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_128_start_2_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -3165,7 +3163,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "vld1.8 {q14}, [%[in]]!\n\t" "vst1.8 {q15}, [%[out]]!\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_128_start_1_%=:\n\t" "veor.32 q15, q15, q14\n\t" "aese.8 q15, q0\n\t" "aesmc.8 q15, q15\n\t" @@ -3188,11 +3186,11 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt_AARCH32(const byte* in, byte* out, "aese.8 q15, q9\n\t" "veor.32 q15, q15, q10\n\t" "\n" - "L_aes_cbc_encrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_128_done_%=:\n\t" "vst1.8 {q15}, [%[out]]!\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_cbc_encrypt_arm32_crypto_done_%=: \n\t" + "L_aes_cbc_encrypt_arm32_crypto_done_%=:\n\t" "vst1.32 {q15}, [%[reg]]\n\t" "pop {%[key], %[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3216,15 +3214,15 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, word32 sz, byte* reg, byte* key, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register byte* reg asm ("r3") = (byte*)reg_p; - register byte* key asm ("r12") = (byte*)key_p; - register int nr asm ("lr") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register byte* reg __asm__ ("r3") = (byte*)reg_p; + register byte* key __asm__ ("r12") = (byte*)key_p; + register int nr __asm__ ("lr") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3243,7 +3241,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "cmp %[sz], #1\n\t" "beq L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2_%=:\n\t" "vld1.8 {q14-q15}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q12, q14\n\t" @@ -3309,7 +3307,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_cbc_decrypt_blocks_arm32_crypto_192_done_%=\n\t" "bgt L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_2_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_192_start_1_%=:\n\t" "vld1.8 {q14}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q13, q14\n\t" @@ -3344,18 +3342,18 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q14, q14, q11\n\t" "vst1.8 {q14}, [%[out]]!\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_192_done_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_cbc_decrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_start_256_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vld1.32 {q8}, [r12]!\n\t" "cmp %[sz], #1\n\t" "beq L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2_%=:\n\t" "vld1.8 {q14-q15}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q12, q14\n\t" @@ -3431,7 +3429,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_cbc_decrypt_blocks_arm32_crypto_256_done_%=\n\t" "bgt L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_2_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_256_start_1_%=:\n\t" "vld1.8 {q14}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q13, q14\n\t" @@ -3472,18 +3470,18 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q14, q14, q11\n\t" "vst1.8 {q14}, [%[out]]!\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_256_done_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_cbc_decrypt_blocks_arm32_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_start_128_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 r12!, {q8-q10}\n\t" "cmp %[sz], #1\n\t" "beq L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2_%=:\n\t" "vld1.8 {q14-q15}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q12, q14\n\t" @@ -3536,7 +3534,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_cbc_decrypt_blocks_arm32_crypto_128_done_%=\n\t" "bgt L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_2_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_128_start_1_%=:\n\t" "vld1.8 {q14}, [%[in]]!\n\t" "vmov q11, q13\n\t" "vmov q13, q14\n\t" @@ -3563,10 +3561,10 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q14, q14, q11\n\t" "vst1.8 {q14}, [%[out]]!\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_128_done_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_cbc_decrypt_blocks_arm32_crypto_done_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm32_crypto_done_%=:\n\t" "vst1.32 {q13}, [%[reg]]\n\t" "pop {%[key], %[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3593,17 +3591,17 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, word32 nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register byte* reg asm ("r3") = (byte*)reg_p; - register byte* key asm ("r12") = (byte*)key_p; - register byte* tmp asm ("lr") = (byte*)tmp_p; - register word32* left asm ("r4") = (word32*)left_p; - register word32 nr asm ("r5") = (word32)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register byte* reg __asm__ ("r3") = (byte*)reg_p; + register byte* key __asm__ ("r12") = (byte*)key_p; + register byte* tmp __asm__ ("lr") = (byte*)tmp_p; + register word32* left __asm__ ("r4") = (word32*)left_p; + register word32 nr __asm__ ("r5") = (word32)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3636,7 +3634,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vmov d2, r5, r6\n\t" "vrev32.8 q1, q1\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_192_start_2_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "aese.8 q1, q3\n\t" @@ -3712,7 +3710,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #0\n\t" "blt L_aes_ctr_encrypt_arm32_crypto_192_done_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_192_start_1_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "adds r8, r8, lr\n\t" @@ -3752,7 +3750,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #1\n\t" "vrev32.8 q0, q2\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_192_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_ctr_encrypt_arm32_crypto_192_partial_done_%=\n\t" "ldr r4, [sp, #8]\n\t" @@ -3794,7 +3792,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov r5, #16\n\t" "sub r5, r5, %[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_192_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_192_start_byte_%=:\n\t" "ldrb r7, [lr], #1\n\t" "ldrb r8, [%[in]], #1\n\t" "eor r7, r7, r8\n\t" @@ -3804,12 +3802,12 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vrev32.8 q0, q2\n\t" "str r5, [r4]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_192_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_ctr_encrypt_arm32_crypto_done_%=\n\t" /* AES_CTR_256 */ "\n" - "L_aes_ctr_encrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vldm.32 r12!, {q11-q13}\n\t" "mov lr, #1\n\t" @@ -3824,7 +3822,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vmov d2, r5, r6\n\t" "vrev32.8 q1, q1\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_256_start_2_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "aese.8 q1, q3\n\t" @@ -3910,7 +3908,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #0\n\t" "blt L_aes_ctr_encrypt_arm32_crypto_256_done_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_256_start_1_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "adds r8, r8, lr\n\t" @@ -3956,7 +3954,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #1\n\t" "vrev32.8 q0, q2\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_256_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_ctr_encrypt_arm32_crypto_256_partial_done_%=\n\t" "ldr r4, [sp, #8]\n\t" @@ -4004,7 +4002,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov r5, #16\n\t" "sub r5, r5, %[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_256_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_256_start_byte_%=:\n\t" "ldrb r7, [lr], #1\n\t" "ldrb r8, [%[in]], #1\n\t" "eor r7, r7, r8\n\t" @@ -4014,12 +4012,12 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vrev32.8 q0, q2\n\t" "str r5, [r4]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_256_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_ctr_encrypt_arm32_crypto_done_%=\n\t" /* AES_CTR_128 */ "\n" - "L_aes_ctr_encrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 r12!, {q11-q13}\n\t" "mov lr, #1\n\t" @@ -4034,7 +4032,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vmov d2, r5, r6\n\t" "vrev32.8 q1, q1\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_128_start_2_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "aese.8 q1, q3\n\t" @@ -4099,7 +4097,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #0\n\t" "blt L_aes_ctr_encrypt_arm32_crypto_128_done_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_128_start_1_%=:\n\t" "aese.8 q0, q3\n\t" "aesmc.8 q0, q0\n\t" "adds r8, r8, lr\n\t" @@ -4132,7 +4130,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov lr, #1\n\t" "vrev32.8 q0, q2\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_128_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_ctr_encrypt_arm32_crypto_128_partial_done_%=\n\t" "ldr r4, [sp, #8]\n\t" @@ -4168,7 +4166,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "mov r5, #16\n\t" "sub r5, r5, %[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_128_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_128_start_byte_%=:\n\t" "ldrb r7, [lr], #1\n\t" "ldrb r8, [%[in]], #1\n\t" "eor r7, r7, r8\n\t" @@ -4178,10 +4176,10 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt_AARCH32(const byte* in, byte* out, "vrev32.8 q0, q2\n\t" "str r5, [r4]\n\t" "\n" - "L_aes_ctr_encrypt_arm32_crypto_128_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_ctr_encrypt_arm32_crypto_done_%=: \n\t" + "L_aes_ctr_encrypt_arm32_crypto_done_%=:\n\t" "vst1.32 {q0}, [%[reg]]\n\t" "pop {%[key], %[tmp]}\n\t" "pop {%[left], %[nr]}\n\t" @@ -4208,13 +4206,13 @@ WC_OMIT_FRAME_POINTER void AES_GCM_set_key_AARCH32(const byte* nonce_p, #else WC_OMIT_FRAME_POINTER void AES_GCM_set_key_AARCH32(const byte* nonce, const byte* key, byte* gcm_h, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* nonce asm ("r0") = (const byte*)nonce_p; - register const byte* key asm ("r1") = (const byte*)key_p; - register byte* gcm_h asm ("r2") = (byte*)gcm_h_p; - register int nr asm ("r3") = (int)nr_p; + register const byte* nonce __asm__ ("r0") = (const byte*)nonce_p; + register const byte* key __asm__ ("r1") = (const byte*)key_p; + register byte* gcm_h __asm__ ("r2") = (byte*)gcm_h_p; + register int nr __asm__ ("r3") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4258,7 +4256,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_set_key_AARCH32(const byte* nonce, "aesmc.8 q0, q0\n\t" "aese.8 q0, q2\n\t" "\n" - "L_aes_gcm_set_key_arm32_crypto_round_done_%=: \n\t" + "L_aes_gcm_set_key_arm32_crypto_round_done_%=:\n\t" "vld1.8 {q1}, [%[key]]\n\t" "veor q0, q0, q1\n\t" "vmov.i8 q1, #0x55\n\t" @@ -4295,23 +4293,23 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register const byte* nonce asm ("r3") = (const byte*)nonce_p; - register word32 nonceSz asm ("r12") = (word32)nonceSz_p; - register byte* tag asm ("lr") = (byte*)tag_p; - register word32 tagSz asm ("r4") = (word32)tagSz_p; - register const byte* aad asm ("r5") = (const byte*)aad_p; - register word32 aadSz asm ("r6") = (word32)aadSz_p; - register byte* key asm ("r7") = (byte*)key_p; - register byte* gcm_h asm ("r8") = (byte*)gcm_h_p; - register byte* tmp asm ("r9") = (byte*)tmp_p; - register byte* reg asm ("r10") = (byte*)reg_p; - register int nr asm ("r11") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register const byte* nonce __asm__ ("r3") = (const byte*)nonce_p; + register word32 nonceSz __asm__ ("r12") = (word32)nonceSz_p; + register byte* tag __asm__ ("lr") = (byte*)tag_p; + register word32 tagSz __asm__ ("r4") = (word32)tagSz_p; + register const byte* aad __asm__ ("r5") = (const byte*)aad_p; + register word32 aadSz __asm__ ("r6") = (word32)aadSz_p; + register byte* key __asm__ ("r7") = (byte*)key_p; + register byte* gcm_h __asm__ ("r8") = (byte*)gcm_h_p; + register byte* tmp __asm__ ("r9") = (byte*)tmp_p; + register byte* reg __asm__ ("r10") = (byte*)reg_p; + register int nr __asm__ ("r11") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4332,7 +4330,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "vshr.u64 q13, q13, #56\n\t" "vld1.32 {q8}, [r8]\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_setup_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_setup_done_%=:\n\t" /* Load Nonce */ "cmp r12, #12\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_ghash_nonce_%=\n\t" @@ -4346,12 +4344,12 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "mov r5, #1\n\t" "b L_aes_gcm_encrypt_arm32_crypto_done_nonce_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_ghash_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_ghash_nonce_%=:\n\t" "lsr r10, r12, #4\n\t" "cmp r10, #0\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_nonce_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_start_1_%=:\n\t" "vld1.32 {q14}, [%[nonce]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -4382,7 +4380,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "subs r10, r10, #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_done_%=:\n\t" "ands r11, r12, #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_nonce_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -4391,21 +4389,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_nonce_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_start_dw_%=:\n\t" "ldr r8, [%[nonce]], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_nonce_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_nonce_start_byte_%=\n\t" "ldrh r8, [%[nonce]], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_nonce_end_bytes_%=\n\t" "ldrb r8, [%[nonce]], #1\n\t" @@ -4413,7 +4411,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -4443,7 +4441,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q6, q6, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_nonce_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_nonce_partial_done_%=:\n\t" "veor.8 q0, q0, q0\n\t" /* nonceSz */ "ldr r12, [sp]\n\t" @@ -4484,7 +4482,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "vmov.32 s27, r5\n\t" "rev r5, r5\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_done_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_done_nonce_%=:\n\t" "vldm.32 r7!, {q0-q3}\n\t" "vldm.32 r7!, {q7-q13}\n\t" /* nr */ @@ -4499,7 +4497,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_encrypt_arm32_crypto_192_done_%=\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -4568,7 +4566,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_encrypt_arm32_crypto_192_start_2_%=\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -4604,7 +4602,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_192_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -4613,21 +4611,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_192_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -4635,7 +4633,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -4675,21 +4673,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_192_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_192_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -4697,9 +4695,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_192_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_192_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -4736,13 +4734,13 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "b L_aes_gcm_encrypt_arm32_crypto_done_enc_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp r10, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_done_%=\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -4821,7 +4819,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_encrypt_arm32_crypto_256_start_2_%=\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -4863,7 +4861,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_256_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -4872,21 +4870,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_256_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -4894,7 +4892,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -4940,21 +4938,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_256_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_256_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -4962,9 +4960,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_256_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_256_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -5007,13 +5005,13 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "b L_aes_gcm_encrypt_arm32_crypto_done_enc_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp r10, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_done_%=\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -5071,7 +5069,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_encrypt_arm32_crypto_128_start_2_%=\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -5100,7 +5098,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_128_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -5109,21 +5107,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_128_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -5131,7 +5129,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -5164,21 +5162,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_128_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_128_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -5186,9 +5184,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_128_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_128_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -5216,7 +5214,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q6, q6, q13\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_done_enc_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_done_enc_%=:\n\t" /* aadSz */ "ldr r6, [sp, #16]\n\t" /* gcm_h */ @@ -5263,7 +5261,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q11, q11, q0\n\t" /* Done */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_h_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_h_done_%=:\n\t" /* aad */ "ldr r5, [sp, #12]\n\t" "lsr r10, r6, #4\n\t" @@ -5273,7 +5271,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r10, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_4_%=:\n\t" "vldm r5!, {q0-q2}\n\t" "vmov.i8 q12, #0x55\n\t" "vmov.i8 q5, #51\n\t" @@ -5365,7 +5363,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_encrypt_arm32_crypto_aad_done_%=\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_2_%=:\n\t" "vld1.32 {q14-q15}, [r5]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -5414,7 +5412,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r10, #0\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_1_%=:\n\t" "vld1.32 {q14}, [r5]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -5443,7 +5441,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_done_%=:\n\t" "ands r11, r6, #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_aad_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -5452,21 +5450,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_aad_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_dw_%=:\n\t" "ldr r8, [r5], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_aad_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_aad_start_byte_%=\n\t" "ldrh r8, [r5], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_aad_end_bytes_%=\n\t" "ldrb r8, [r5], #1\n\t" @@ -5474,7 +5472,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -5504,7 +5502,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_aad_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_aad_partial_done_%=:\n\t" /* out */ "lsr r10, %[sz], #4\n\t" "cmp r10, #1\n\t" @@ -5513,7 +5511,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r10, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_out_start_2_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_4_%=:\n\t" "vldm %[out]!, {q0-q2}\n\t" "vmov.i8 q12, #0x55\n\t" "vmov.i8 q5, #51\n\t" @@ -5605,7 +5603,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_encrypt_arm32_crypto_out_done_%=\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_out_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_2_%=:\n\t" "vld1.32 {q14-q15}, [%[out]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -5654,7 +5652,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r10, #0\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_out_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_1_%=:\n\t" "vld1.32 {q14}, [%[out]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -5683,7 +5681,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_encrypt_arm32_crypto_out_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -5692,21 +5690,21 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_dw_%=:\n\t" "ldr r8, [%[out]], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_out_start_byte_%=\n\t" "ldrh r8, [%[out]], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_out_end_bytes_%=\n\t" "ldrb r8, [%[out]], #1\n\t" @@ -5714,7 +5712,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -5744,7 +5742,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm32_crypto_out_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_out_partial_done_%=:\n\t" "lsr lr, r6, #29\n\t" "lsl r6, r6, #3\n\t" "rbit lr, lr\n\t" @@ -5792,26 +5790,26 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q7}, [lr]\n\t" "b L_aes_gcm_encrypt_arm32_crypto_done_gcm_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_tag_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_tag_tag_partial_%=:\n\t" "vst1.8 {q7}, [r9]\n\t" "cmp r4, #4\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_sw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_dw_%=:\n\t" "ldr r8, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str r8, [lr], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_dw_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_byte_%=\n\t" "ldrh r8, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh r8, [lr], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_encrypt_arm32_crypto_tag_tag_end_bytes_%=\n\t" "ldrb r8, [r9], #1\n\t" @@ -5819,9 +5817,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt_AARCH32(const byte* in, byte* out, "strb r8, [lr], #1\n\t" "bne L_aes_gcm_encrypt_arm32_crypto_tag_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_tag_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_tag_tag_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_arm32_crypto_done_gcm_%=: \n\t" + "L_aes_gcm_encrypt_arm32_crypto_done_gcm_%=:\n\t" "pop {%[nonceSz], %[tag]}\n\t" "pop {%[tagSz], %[aad], %[aadSz], %[key], %[gcm_h], %[tmp], %[reg], %[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -5854,23 +5852,23 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, word32 sz, const byte* nonce, word32 nonceSz, const byte* tag, word32 tagSz, const byte* aad, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register const byte* nonce asm ("r3") = (const byte*)nonce_p; - register word32 nonceSz asm ("r12") = (word32)nonceSz_p; - register const byte* tag asm ("lr") = (const byte*)tag_p; - register word32 tagSz asm ("r4") = (word32)tagSz_p; - register const byte* aad asm ("r5") = (const byte*)aad_p; - register word32 aadSz asm ("r6") = (word32)aadSz_p; - register byte* key asm ("r7") = (byte*)key_p; - register byte* gcm_h asm ("r8") = (byte*)gcm_h_p; - register byte* tmp asm ("r9") = (byte*)tmp_p; - register byte* reg asm ("r10") = (byte*)reg_p; - register int nr asm ("r11") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register const byte* nonce __asm__ ("r3") = (const byte*)nonce_p; + register word32 nonceSz __asm__ ("r12") = (word32)nonceSz_p; + register const byte* tag __asm__ ("lr") = (const byte*)tag_p; + register word32 tagSz __asm__ ("r4") = (word32)tagSz_p; + register const byte* aad __asm__ ("r5") = (const byte*)aad_p; + register word32 aadSz __asm__ ("r6") = (word32)aadSz_p; + register byte* key __asm__ ("r7") = (byte*)key_p; + register byte* gcm_h __asm__ ("r8") = (byte*)gcm_h_p; + register byte* tmp __asm__ ("r9") = (byte*)tmp_p; + register byte* reg __asm__ ("r10") = (byte*)reg_p; + register int nr __asm__ ("r11") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5925,7 +5923,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q11, q11, q0\n\t" /* Done */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_h_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_h_done_%=:\n\t" /* aad */ "ldr r5, [sp, #12]\n\t" "lsr r10, r6, #4\n\t" @@ -5935,7 +5933,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r10, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_4_%=:\n\t" "vldm r5!, {q0-q2}\n\t" "vmov.i8 q12, #0x55\n\t" "vmov.i8 q5, #51\n\t" @@ -6027,7 +6025,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_decrypt_arm32_crypto_aad_done_%=\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_2_%=:\n\t" "vld1.32 {q14-q15}, [r5]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -6076,7 +6074,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r10, #0\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_1_%=:\n\t" "vld1.32 {q14}, [r5]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -6105,7 +6103,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_done_%=:\n\t" "ands r11, r6, #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_aad_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -6114,21 +6112,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_aad_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_dw_%=:\n\t" "ldr r8, [r5], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_aad_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_aad_start_byte_%=\n\t" "ldrh r8, [r5], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_aad_end_bytes_%=\n\t" "ldrb r8, [r5], #1\n\t" @@ -6136,7 +6134,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -6166,7 +6164,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_aad_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_aad_partial_done_%=:\n\t" /* in */ "lsr r10, %[sz], #4\n\t" "cmp r10, #1\n\t" @@ -6175,7 +6173,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r10, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_in_start_2_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_4_%=:\n\t" "vldm %[in]!, {q0-q2}\n\t" "vmov.i8 q12, #0x55\n\t" "vmov.i8 q5, #51\n\t" @@ -6267,7 +6265,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_decrypt_arm32_crypto_in_done_%=\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_in_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_2_%=:\n\t" "vld1.32 {q14-q15}, [%[in]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -6316,7 +6314,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r10, #0\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_in_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_1_%=:\n\t" "vld1.32 {q14}, [%[in]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -6345,7 +6343,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_in_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -6354,21 +6352,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_in_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_dw_%=:\n\t" "ldr r8, [%[in]], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_in_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_in_start_byte_%=\n\t" "ldrh r8, [%[in]], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_in_end_bytes_%=\n\t" "ldrb r8, [%[in]], #1\n\t" @@ -6376,7 +6374,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_in_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -6406,10 +6404,10 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q7, q7, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_in_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_in_partial_done_%=:\n\t" "sub %[in], %[in], %[sz]\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_done_gcm_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_done_gcm_%=:\n\t" /* nonceSz */ "ldr r12, [sp]\n\t" /* Load Nonce */ @@ -6425,12 +6423,12 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "mov r5, #1\n\t" "b L_aes_gcm_decrypt_arm32_crypto_done_nonce_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_ghash_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_ghash_nonce_%=:\n\t" "lsr r10, r12, #4\n\t" "cmp r10, #0\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_nonce_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_start_1_%=:\n\t" "vld1.32 {q14}, [%[nonce]]!\n\t" "vmov.i8 q12, #0x55\n\t" "vshl.u8 q0, q14, #1\n\t" @@ -6461,7 +6459,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "subs r10, r10, #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_done_%=:\n\t" "ands r11, r12, #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_nonce_partial_done_%=\n\t" "veor.8 q0, q0, q0\n\t" @@ -6470,21 +6468,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r12, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_nonce_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_start_dw_%=:\n\t" "ldr r8, [%[nonce]], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_nonce_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_nonce_start_byte_%=\n\t" "ldrh r8, [%[nonce]], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_nonce_end_bytes_%=\n\t" "ldrb r8, [%[nonce]], #1\n\t" @@ -6492,7 +6490,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "vld1.32 {q14}, [r9]\n\t" "vmov.i8 q12, #0x55\n\t" @@ -6522,7 +6520,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q6, q6, q0\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_nonce_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_nonce_partial_done_%=:\n\t" "veor.8 q0, q0, q0\n\t" /* nonceSz */ "ldr r12, [sp]\n\t" @@ -6563,7 +6561,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "vmov.32 s27, r5\n\t" "rev r5, r5\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_done_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_done_nonce_%=:\n\t" /* reg */ "ldr r9, [sp, #32]\n\t" "vst1.32 {q7}, [r9]\n\t" @@ -6584,7 +6582,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "blt L_aes_gcm_decrypt_arm32_crypto_192_done_%=\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -6653,7 +6651,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_decrypt_arm32_crypto_192_start_2_%=\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -6689,7 +6687,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_192_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -6698,21 +6696,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_192_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -6720,7 +6718,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -6760,21 +6758,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_192_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_192_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -6782,9 +6780,9 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_192_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_192_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -6821,13 +6819,13 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "b L_aes_gcm_decrypt_arm32_crypto_done_enc_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp r10, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_done_%=\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -6906,7 +6904,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_decrypt_arm32_crypto_256_start_2_%=\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -6948,7 +6946,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_256_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -6957,21 +6955,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_256_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -6979,7 +6977,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -7025,21 +7023,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_256_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_256_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -7047,9 +7045,9 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_256_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_256_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -7092,13 +7090,13 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "b L_aes_gcm_decrypt_arm32_crypto_done_enc_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp r10, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_done_%=\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_start_2_%=:\n\t" "add r8, r5, #1\n\t" "vmov.8 q4, q6\n\t" "add r5, r5, #2\n\t" @@ -7156,7 +7154,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "bgt L_aes_gcm_decrypt_arm32_crypto_128_start_2_%=\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_start_1_%=:\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" "rev r8, r5\n\t" @@ -7185,7 +7183,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q14, q14, q4\n\t" "vst1.32 {q14}, [%[out]]!\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_done_%=:\n\t" "ands r11, %[sz], #15\n\t" "beq L_aes_gcm_decrypt_arm32_crypto_128_partial_done_%=\n\t" "veor.8 q14, q14, q14\n\t" @@ -7194,21 +7192,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_start_dw_%=:\n\t" "ldr lr, [%[in]], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [r9], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_128_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_start_byte_%=\n\t" "ldrh lr, [%[in]], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_end_bytes_%=\n\t" "ldrb lr, [%[in]], #1\n\t" @@ -7216,7 +7214,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_end_bytes_%=:\n\t" "sub r9, r9, r11\n\t" "add r5, r5, #1\n\t" "vmov.8 q4, q6\n\t" @@ -7249,21 +7247,21 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_out_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_out_start_dw_%=:\n\t" "ldr lr, [r9], #4\n\t" "sub r4, r4, #4\n\t" "str lr, [%[out]], #4\n\t" "cmp r4, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_128_out_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_out_start_sw_%=:\n\t" "cmp r4, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_out_start_byte_%=\n\t" "ldrh lr, [r9], #2\n\t" "sub r4, r4, #2\n\t" "strh lr, [%[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_out_start_byte_%=:\n\t" "cmp r4, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_128_out_end_bytes_%=\n\t" "ldrb lr, [r9], #1\n\t" @@ -7271,9 +7269,9 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb lr, [%[out]], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_128_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_128_partial_done_%=:\n\t" /* Finish */ "add r8, %[sz], #15\n\t" "sub r8, r5, r8, lsr #4\n\t" @@ -7301,7 +7299,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "veor.8 q6, q6, q13\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_decrypt_arm32_crypto_done_enc_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_done_enc_%=:\n\t" "vmov.i8 q13, #0x87\n\t" "vshr.u64 q13, q13, #56\n\t" /* gcm_h */ @@ -7359,28 +7357,28 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "vld1.8 {q0}, [lr]\n\t" "b L_aes_gcm_decrypt_arm32_crypto_tag_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_part_tag_%=:\n\t" "veor.8 q0, q0, q0\n\t" "mov r12, r4\n\t" "vst1.32 {q0}, [r9]\n\t" "cmp r12, #4\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_sw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_dw_%=:\n\t" "ldr r8, [lr], #4\n\t" "sub r12, r12, #4\n\t" "str r8, [r9], #4\n\t" "cmp r12, #4\n\t" "bge L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_dw_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_sw_%=:\n\t" "cmp r12, #2\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_byte_%=\n\t" "ldrh r8, [lr], #2\n\t" "sub r12, r12, #2\n\t" "strh r8, [r9], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_byte_%=:\n\t" "cmp r12, #1\n\t" "blt L_aes_gcm_decrypt_arm32_crypto_tag_tag_end_bytes_%=\n\t" "ldrb r8, [lr], #1\n\t" @@ -7388,7 +7386,7 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "strb r8, [r9], #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_tag_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_tag_end_bytes_%=:\n\t" "sub r9, r9, r4\n\t" "vld1.32 {q0}, [r9]\n\t" "mov r12, #16\n\t" @@ -7397,14 +7395,14 @@ WC_OMIT_FRAME_POINTER int AES_GCM_decrypt_AARCH32(const byte* in, byte* out, "eor r8, r8, r8\n\t" "add r9, r9, r4\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_calc_tag_byte_%=:\n\t" "strb r8, [r9], #1\n\t" "subs r12, r12, #1\n\t" "bne L_aes_gcm_decrypt_arm32_crypto_tag_calc_tag_byte_%=\n\t" "subs r9, r9, #16\n\t" "vld1.32 {q7}, [r9]\n\t" "\n" - "L_aes_gcm_decrypt_arm32_crypto_tag_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm32_crypto_tag_tag_loaded_%=:\n\t" "vceq.i32 q0, q0, q7\n\t" "vmov r5, s0\n\t" "vmov r8, s1\n\t" @@ -7448,17 +7446,17 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register const byte* i asm ("r3") = (const byte*)i_p; - register byte* key asm ("r12") = (byte*)key_p; - register byte* key2 asm ("lr") = (byte*)key2_p; - register byte* tmp asm ("r4") = (byte*)tmp_p; - register int nr asm ("r5") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register const byte* i __asm__ ("r3") = (const byte*)i_p; + register byte* key __asm__ ("r12") = (byte*)key_p; + register byte* key2 __asm__ ("lr") = (byte*)key2_p; + register byte* tmp __asm__ ("r4") = (byte*)tmp_p; + register int nr __asm__ ("r5") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -7509,7 +7507,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_encrypt_arm32_crypto_192_done_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_192_start_1_%=:\n\t" "vld1.8 {q1}, [%[in]]!\n\t" "veor.32 q1, q1, q0\n\t" "aese.8 q1, q2\n\t" @@ -7551,7 +7549,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q1}, [%[out]]!\n\t" "bne L_aes_xts_encrypt_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_192_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_encrypt_arm32_crypto_192_partial_done_%=\n\t" "sub %[out], %[out], #16\n\t" @@ -7560,7 +7558,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.32 {q1}, [r4]\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_192_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_192_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -7599,12 +7597,12 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "veor.32 q1, q1, q0\n\t" "vst1.8 {q1}, [%[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_192_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_encrypt_arm32_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" - "L_aes_xts_encrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vldm.32 lr!, {q10-q13}\n\t" "aese.8 q0, q2\n\t" @@ -7646,7 +7644,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_encrypt_arm32_crypto_256_done_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_256_start_1_%=:\n\t" "vld1.8 {q1}, [%[in]]!\n\t" "veor.32 q1, q1, q0\n\t" "aese.8 q1, q2\n\t" @@ -7696,7 +7694,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q1}, [%[out]]!\n\t" "bne L_aes_xts_encrypt_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_256_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_encrypt_arm32_crypto_256_partial_done_%=\n\t" "sub %[out], %[out], #16\n\t" @@ -7705,7 +7703,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.32 {q1}, [r4]\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_256_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_256_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -7751,12 +7749,12 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "veor.32 q1, q1, q0\n\t" "vst1.8 {q1}, [%[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_256_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_encrypt_arm32_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" - "L_aes_xts_encrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 lr!, {q10-q12}\n\t" "aese.8 q0, q2\n\t" @@ -7787,7 +7785,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_encrypt_arm32_crypto_128_done_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_128_start_1_%=:\n\t" "vld1.8 {q1}, [%[in]]!\n\t" "veor.32 q1, q1, q0\n\t" "aese.8 q1, q2\n\t" @@ -7825,7 +7823,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q1}, [%[out]]!\n\t" "bne L_aes_xts_encrypt_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_128_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_encrypt_arm32_crypto_128_partial_done_%=\n\t" "sub %[out], %[out], #16\n\t" @@ -7834,7 +7832,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "vst1.32 {q1}, [r4]\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_128_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_128_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -7869,10 +7867,10 @@ WC_OMIT_FRAME_POINTER void AES_XTS_encrypt_AARCH32(const byte* in, byte* out, "veor.32 q1, q1, q0\n\t" "vst1.8 {q1}, [%[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm32_crypto_128_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_xts_encrypt_arm32_crypto_done_%=: \n\t" + "L_aes_xts_encrypt_arm32_crypto_done_%=:\n\t" "pop {%[key], %[key2]}\n\t" "pop {%[tmp], %[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -7899,17 +7897,17 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in_p, #else WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const byte* in asm ("r0") = (const byte*)in_p; - register byte* out asm ("r1") = (byte*)out_p; - register word32 sz asm ("r2") = (word32)sz_p; - register const byte* i asm ("r3") = (const byte*)i_p; - register byte* key asm ("r12") = (byte*)key_p; - register byte* key2 asm ("lr") = (byte*)key2_p; - register byte* tmp asm ("r4") = (byte*)tmp_p; - register int nr asm ("r5") = (int)nr_p; + register const byte* in __asm__ ("r0") = (const byte*)in_p; + register byte* out __asm__ ("r1") = (byte*)out_p; + register word32 sz __asm__ ("r2") = (word32)sz_p; + register const byte* i __asm__ ("r3") = (const byte*)i_p; + register byte* key __asm__ ("r12") = (byte*)key_p; + register byte* key2 __asm__ ("lr") = (byte*)key2_p; + register byte* tmp __asm__ ("r4") = (byte*)tmp_p; + register int nr __asm__ ("r5") = (int)nr_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -7963,7 +7961,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_decrypt_arm32_crypto_192_done_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_192_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_192_start_1_%=:\n\t" "vld1.8 {q0}, [%[in]]!\n\t" "veor.32 q0, q0, q1\n\t" "aesd.8 q0, q2\n\t" @@ -8005,7 +8003,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q0}, [%[out]]!\n\t" "bne L_aes_xts_decrypt_arm32_crypto_192_start_1_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_192_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_192_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_decrypt_arm32_crypto_192_partial_done_%=\n\t" "and r5, lr, r9, asr #31\n\t" @@ -8050,7 +8048,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "add %[out], %[out], #16\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_192_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_192_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -8089,12 +8087,12 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q0, q0, q1\n\t" "vst1.8 {q0}, [%[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_192_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_decrypt_arm32_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" - "L_aes_xts_decrypt_arm32_crypto_start_256_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "vldm.32 lr!, {q10-q13}\n\t" "aese.8 q1, q2\n\t" @@ -8136,7 +8134,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_decrypt_arm32_crypto_256_done_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_256_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_256_start_1_%=:\n\t" "vld1.8 {q0}, [%[in]]!\n\t" "veor.32 q0, q0, q1\n\t" "aesd.8 q0, q2\n\t" @@ -8187,7 +8185,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q0}, [%[out]]!\n\t" "bne L_aes_xts_decrypt_arm32_crypto_256_start_1_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_256_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_256_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_decrypt_arm32_crypto_256_partial_done_%=\n\t" "and r5, lr, r9, asr #31\n\t" @@ -8241,7 +8239,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "add %[out], %[out], #16\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_256_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_256_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -8288,12 +8286,12 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q0, q0, q1\n\t" "vst1.8 {q0}, [%[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_256_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_decrypt_arm32_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" - "L_aes_xts_decrypt_arm32_crypto_start_128_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "vldm.32 lr!, {q10-q12}\n\t" "aese.8 q1, q2\n\t" @@ -8324,7 +8322,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "cmp r4, #1\n\t" "blt L_aes_xts_decrypt_arm32_crypto_128_done_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_128_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_128_start_1_%=:\n\t" "vld1.8 {q0}, [%[in]]!\n\t" "veor.32 q0, q0, q1\n\t" "aesd.8 q0, q2\n\t" @@ -8362,7 +8360,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "vst1.8 {q0}, [%[out]]!\n\t" "bne L_aes_xts_decrypt_arm32_crypto_128_start_1_%=\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_128_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_128_done_%=:\n\t" "cmp %[sz], #0\n\t" "beq L_aes_xts_decrypt_arm32_crypto_128_partial_done_%=\n\t" "and r5, lr, r9, asr #31\n\t" @@ -8403,7 +8401,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "add %[out], %[out], #16\n\t" "mov r5, %[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_128_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_128_start_byte_%=:\n\t" "ldrb r8, [r4]\n\t" "ldrb r9, [%[in]], #1\n\t" "strb r8, [%[out]], #1\n\t" @@ -8438,10 +8436,10 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, "veor.32 q0, q0, q1\n\t" "vst1.8 {q0}, [%[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm32_crypto_128_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_xts_decrypt_arm32_crypto_done_%=: \n\t" + "L_aes_xts_decrypt_arm32_crypto_done_%=:\n\t" "pop {%[key], %[key2]}\n\t" "pop {%[tmp], %[nr]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -8464,7 +8462,7 @@ WC_OMIT_FRAME_POINTER void AES_XTS_decrypt_AARCH32(const byte* in, byte* out, #endif /* WOLFSSL_AES_XTS */ #else #ifdef HAVE_AES_DECRYPT -static const word32 L_AES_ARM32_td_data[] = { +XALIGNED(8) static const word32 L_AES_ARM32_td_data[] = { 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, @@ -8535,7 +8533,7 @@ static const word32 L_AES_ARM32_td_data[] = { #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) -static const word32 L_AES_ARM32_te_data[] = { +XALIGNED(8) static const word32 L_AES_ARM32_te_data[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, @@ -8619,13 +8617,13 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p); WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks_p, word32 rounds_p) #else WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; - register word32 rounds asm ("r1") = (word32)rounds_p; - register word32* L_AES_ARM32_te_c asm ("r2") = (word32*)L_AES_ARM32_te; - register word32* L_AES_ARM32_td_c asm ("r3") = (word32*)L_AES_ARM32_td; + register unsigned char* ks __asm__ ("r0") = (unsigned char*)ks_p; + register word32 rounds __asm__ ("r1") = (word32)rounds_p; + register word32* L_AES_ARM32_te_c __asm__ ("r2") = (word32*)L_AES_ARM32_te; + register word32* L_AES_ARM32_td_c __asm__ ("r3") = (word32*)L_AES_ARM32_td; #else register word32* L_AES_ARM32_te_c = (word32*)L_AES_ARM32_te; register word32* L_AES_ARM32_td_c = (word32*)L_AES_ARM32_td; @@ -8637,7 +8635,7 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) "add r10, %[ks], %[rounds], lsl #4\n\t" "mov r11, %[rounds]\n\t" "\n" - "L_AES_invert_key_loop_%=: \n\t" + "L_AES_invert_key_loop_%=:\n\t" "ldm %[ks], {r2, r3, r4, r5}\n\t" "ldm r10, {r6, r7, r8, r9}\n\t" "stm r10, {r2, r3, r4, r5}\n\t" @@ -8649,7 +8647,7 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) "add %[ks], %[ks], #16\n\t" "sub r11, %[rounds], #1\n\t" "\n" - "L_AES_invert_key_mix_loop_%=: \n\t" + "L_AES_invert_key_mix_loop_%=:\n\t" "ldm %[ks], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -8842,7 +8840,7 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) } #endif /* HAVE_AES_DECRYPT */ -static const word32 L_AES_ARM32_rcon[] = { +XALIGNED(8) static const word32 L_AES_ARM32_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000 @@ -8856,14 +8854,15 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key_p, #else WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; - register word32 len asm ("r1") = (word32)len_p; - register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; - register word32* L_AES_ARM32_te_c asm ("r3") = (word32*)L_AES_ARM32_te; - register word32* L_AES_ARM32_rcon_c asm ("r12") = + register const unsigned char* key __asm__ ("r0") = + (const unsigned char*)key_p; + register word32 len __asm__ ("r1") = (word32)len_p; + register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p; + register word32* L_AES_ARM32_te_c __asm__ ("r3") = (word32*)L_AES_ARM32_te; + register word32* L_AES_ARM32_rcon_c __asm__ ("r12") = (word32*)&L_AES_ARM32_rcon; #else register word32* L_AES_ARM32_te_c = (word32*)L_AES_ARM32_te; @@ -8945,7 +8944,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "sub %[ks], %[ks], #16\n\t" "mov r12, #6\n\t" "\n" - "L_AES_set_encrypt_key_loop_256_%=: \n\t" + "L_AES_set_encrypt_key_loop_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r4, r7, #24\n\t" @@ -9093,7 +9092,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "sub %[ks], %[ks], #16\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_start_192_%=: \n\t" + "L_AES_set_encrypt_key_start_192_%=:\n\t" "ldr r4, [%[key]]\n\t" "ldr r5, [%[key], #4]\n\t" "ldr r6, [%[key], #8]\n\t" @@ -9149,7 +9148,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "mov r7, %[len]\n\t" "mov r12, #7\n\t" "\n" - "L_AES_set_encrypt_key_loop_192_%=: \n\t" + "L_AES_set_encrypt_key_loop_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r0, r7, #24\n\t" @@ -9248,7 +9247,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "stm %[ks], {r0, r1, r4, r5}\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_start_128_%=: \n\t" + "L_AES_set_encrypt_key_start_128_%=:\n\t" "ldr r4, [%[key]]\n\t" "ldr r5, [%[key], #4]\n\t" "ldr r6, [%[key], #8]\n\t" @@ -9283,7 +9282,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "stm %[ks], {r4, r5, r6, r7}\n\t" "mov r12, #10\n\t" "\n" - "L_AES_set_encrypt_key_loop_128_%=: \n\t" + "L_AES_set_encrypt_key_loop_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r4, r7, #24\n\t" @@ -9333,7 +9332,7 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, "subs r12, r12, #1\n\t" "bne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" - "L_AES_set_encrypt_key_end_%=: \n\t" + "L_AES_set_encrypt_key_end_%=:\n\t" "pop {%[L_AES_ARM32_rcon]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), @@ -9359,18 +9358,18 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_block(const word32* te_p, int nr_p, #else WC_OMIT_FRAME_POINTER void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const word32* te asm ("r0") = (const word32*)te_p; - register int nr asm ("r1") = (int)nr_p; - register int len asm ("r2") = (int)len_p; - register const word32* ks asm ("r3") = (const word32*)ks_p; + register const word32* te __asm__ ("r0") = (const word32*)te_p; + register int nr __asm__ ("r1") = (int)nr_p; + register int len __asm__ ("r2") = (int)len_p; + register const word32* ks __asm__ ("r3") = (const word32*)ks_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" - "L_AES_encrypt_block_nr_%=: \n\t" + "L_AES_encrypt_block_nr_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -10030,15 +10029,17 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in_p, #else WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register word32* L_AES_ARM32_te_ecb_c asm ("lr") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register word32* L_AES_ARM32_te_ecb_c __asm__ ("lr") = (word32*)L_AES_ARM32_te_ecb; #else register word32* L_AES_ARM32_te_ecb_c = (word32*)L_AES_ARM32_te_ecb; @@ -10055,7 +10056,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "cmp r12, #12\n\t" "beq L_AES_ECB_encrypt_start_block_192_%=\n\t" "\n" - "L_AES_ECB_encrypt_loop_block_256_%=: \n\t" + "L_AES_ECB_encrypt_loop_block_256_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -10095,7 +10096,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_ECB_encrypt_block_nr_256_%=: \n\t" + "L_AES_ECB_encrypt_block_nr_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -10766,9 +10767,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "bne L_AES_ECB_encrypt_loop_block_256_%=\n\t" "b L_AES_ECB_encrypt_end_%=\n\t" "\n" - "L_AES_ECB_encrypt_start_block_192_%=: \n\t" + "L_AES_ECB_encrypt_start_block_192_%=:\n\t" "\n" - "L_AES_ECB_encrypt_loop_block_192_%=: \n\t" + "L_AES_ECB_encrypt_loop_block_192_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -10808,7 +10809,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_ECB_encrypt_block_nr_192_%=: \n\t" + "L_AES_ECB_encrypt_block_nr_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -11479,9 +11480,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "bne L_AES_ECB_encrypt_loop_block_192_%=\n\t" "b L_AES_ECB_encrypt_end_%=\n\t" "\n" - "L_AES_ECB_encrypt_start_block_128_%=: \n\t" + "L_AES_ECB_encrypt_start_block_128_%=:\n\t" "\n" - "L_AES_ECB_encrypt_loop_block_128_%=: \n\t" + "L_AES_ECB_encrypt_loop_block_128_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -11521,7 +11522,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_ECB_encrypt_block_nr_128_%=: \n\t" + "L_AES_ECB_encrypt_block_nr_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -12191,7 +12192,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "add %[out], %[out], #16\n\t" "bne L_AES_ECB_encrypt_loop_block_128_%=\n\t" "\n" - "L_AES_ECB_encrypt_end_%=: \n\t" + "L_AES_ECB_encrypt_end_%=:\n\t" "pop {%[ks]}\n\t" "pop {%[nr], %[L_AES_ARM32_te_ecb]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -12222,16 +12223,18 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in_p, WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register unsigned char* iv asm ("lr") = (unsigned char*)iv_p; - register word32* L_AES_ARM32_te_cbc_c asm ("r4") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register unsigned char* iv __asm__ ("lr") = (unsigned char*)iv_p; + register word32* L_AES_ARM32_te_cbc_c __asm__ ("r4") = (word32*)L_AES_ARM32_te_cbc; #else register word32* L_AES_ARM32_te_cbc_c = (word32*)L_AES_ARM32_te_cbc; @@ -12251,7 +12254,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "cmp r8, #12\n\t" "beq L_AES_CBC_encrypt_start_block_192_%=\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_256_%=: \n\t" + "L_AES_CBC_encrypt_loop_block_256_%=:\n\t" "ldr r8, [lr]\n\t" "ldr r9, [lr, #4]\n\t" "ldr r10, [lr, #8]\n\t" @@ -12295,7 +12298,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CBC_encrypt_block_nr_256_%=: \n\t" + "L_AES_CBC_encrypt_block_nr_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -12966,9 +12969,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "bne L_AES_CBC_encrypt_loop_block_256_%=\n\t" "b L_AES_CBC_encrypt_end_%=\n\t" "\n" - "L_AES_CBC_encrypt_start_block_192_%=: \n\t" + "L_AES_CBC_encrypt_start_block_192_%=:\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_192_%=: \n\t" + "L_AES_CBC_encrypt_loop_block_192_%=:\n\t" "ldr r8, [lr]\n\t" "ldr r9, [lr, #4]\n\t" "ldr r10, [lr, #8]\n\t" @@ -13012,7 +13015,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CBC_encrypt_block_nr_192_%=: \n\t" + "L_AES_CBC_encrypt_block_nr_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -13683,9 +13686,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "bne L_AES_CBC_encrypt_loop_block_192_%=\n\t" "b L_AES_CBC_encrypt_end_%=\n\t" "\n" - "L_AES_CBC_encrypt_start_block_128_%=: \n\t" + "L_AES_CBC_encrypt_start_block_128_%=:\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_128_%=: \n\t" + "L_AES_CBC_encrypt_loop_block_128_%=:\n\t" "ldr r8, [lr]\n\t" "ldr r9, [lr, #4]\n\t" "ldr r10, [lr, #8]\n\t" @@ -13729,7 +13732,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CBC_encrypt_block_nr_128_%=: \n\t" + "L_AES_CBC_encrypt_block_nr_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -14399,7 +14402,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "add %[out], %[out], #16\n\t" "bne L_AES_CBC_encrypt_loop_block_128_%=\n\t" "\n" - "L_AES_CBC_encrypt_end_%=: \n\t" + "L_AES_CBC_encrypt_end_%=:\n\t" "pop {%[ks], r9}\n\t" "stm r9, {r4, r5, r6, r7}\n\t" "pop {%[nr], %[iv]}\n\t" @@ -14433,16 +14436,18 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in_p, WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register unsigned char* ctr asm ("lr") = (unsigned char*)ctr_p; - register word32* L_AES_ARM32_te_ctr_c asm ("r4") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register unsigned char* ctr __asm__ ("lr") = (unsigned char*)ctr_p; + register word32* L_AES_ARM32_te_ctr_c __asm__ ("r4") = (word32*)L_AES_ARM32_te_ctr; #else register word32* L_AES_ARM32_te_ctr_c = (word32*)L_AES_ARM32_te_ctr; @@ -14486,7 +14491,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "cmp r12, #12\n\t" "beq L_AES_CTR_encrypt_start_block_192_%=\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_256_%=: \n\t" + "L_AES_CTR_encrypt_loop_block_256_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "adds r11, r7, #1\n\t" @@ -14505,7 +14510,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CTR_encrypt_block_nr_256_%=: \n\t" + "L_AES_CTR_encrypt_block_nr_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -15186,9 +15191,9 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "bne L_AES_CTR_encrypt_loop_block_256_%=\n\t" "b L_AES_CTR_encrypt_end_%=\n\t" "\n" - "L_AES_CTR_encrypt_start_block_192_%=: \n\t" + "L_AES_CTR_encrypt_start_block_192_%=:\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_192_%=: \n\t" + "L_AES_CTR_encrypt_loop_block_192_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "adds r11, r7, #1\n\t" @@ -15207,7 +15212,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CTR_encrypt_block_nr_192_%=: \n\t" + "L_AES_CTR_encrypt_block_nr_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -15888,9 +15893,9 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "bne L_AES_CTR_encrypt_loop_block_192_%=\n\t" "b L_AES_CTR_encrypt_end_%=\n\t" "\n" - "L_AES_CTR_encrypt_start_block_128_%=: \n\t" + "L_AES_CTR_encrypt_start_block_128_%=:\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_128_%=: \n\t" + "L_AES_CTR_encrypt_loop_block_128_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "adds r11, r7, #1\n\t" @@ -15909,7 +15914,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_CTR_encrypt_block_nr_128_%=: \n\t" + "L_AES_CTR_encrypt_block_nr_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -16589,7 +16594,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "add %[out], %[out], #16\n\t" "bne L_AES_CTR_encrypt_loop_block_128_%=\n\t" "\n" - "L_AES_CTR_encrypt_end_%=: \n\t" + "L_AES_CTR_encrypt_end_%=:\n\t" "pop {%[ks], r8}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "eor r10, r4, r4, ror #16\n\t" @@ -16644,17 +16649,17 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td_p, int nr_p, #else WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td, int nr, const byte* td4) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const word32* td asm ("r0") = (const word32*)td_p; - register int nr asm ("r1") = (int)nr_p; - register const byte* td4 asm ("r2") = (const byte*)td4_p; + register const word32* td __asm__ ("r0") = (const word32*)td_p; + register int nr __asm__ ("r1") = (int)nr_p; + register const byte* td4 __asm__ ("r2") = (const byte*)td4_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" - "L_AES_decrypt_block_nr_%=: \n\t" + "L_AES_decrypt_block_nr_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -17304,7 +17309,7 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td, int nr, static const word32* L_AES_ARM32_td_ecb = L_AES_ARM32_td_data; #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) -static const byte L_AES_ARM32_ecb_td4[] = { +XALIGNED(4) static const word8 L_AES_ARM32_ecb_td4[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, @@ -17348,21 +17353,23 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in_p, #else WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register word32* L_AES_ARM32_td_ecb_c asm ("lr") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register word32* L_AES_ARM32_td_ecb_c __asm__ ("lr") = (word32*)L_AES_ARM32_td_ecb; - register byte* L_AES_ARM32_ecb_td4_c asm ("r4") = - (byte*)&L_AES_ARM32_ecb_td4; + register word8* L_AES_ARM32_ecb_td4_c __asm__ ("r4") = + (word8*)&L_AES_ARM32_ecb_td4; #else register word32* L_AES_ARM32_td_ecb_c = (word32*)L_AES_ARM32_td_ecb; - register byte* L_AES_ARM32_ecb_td4_c = (byte*)&L_AES_ARM32_ecb_td4; + register word8* L_AES_ARM32_ecb_td4_c = (word8*)&L_AES_ARM32_ecb_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -17378,7 +17385,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "cmp r8, #12\n\t" "beq L_AES_ECB_decrypt_start_block_192_%=\n\t" "\n" - "L_AES_ECB_decrypt_loop_block_256_%=: \n\t" + "L_AES_ECB_decrypt_loop_block_256_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -17418,7 +17425,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_ECB_decrypt_block_nr_256_%=: \n\t" + "L_AES_ECB_decrypt_block_nr_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -18088,9 +18095,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "bne L_AES_ECB_decrypt_loop_block_256_%=\n\t" "b L_AES_ECB_decrypt_end_%=\n\t" "\n" - "L_AES_ECB_decrypt_start_block_192_%=: \n\t" + "L_AES_ECB_decrypt_start_block_192_%=:\n\t" "\n" - "L_AES_ECB_decrypt_loop_block_192_%=: \n\t" + "L_AES_ECB_decrypt_loop_block_192_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -18130,7 +18137,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_ECB_decrypt_block_nr_192_%=: \n\t" + "L_AES_ECB_decrypt_block_nr_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -18800,9 +18807,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "bne L_AES_ECB_decrypt_loop_block_192_%=\n\t" "b L_AES_ECB_decrypt_end_%=\n\t" "\n" - "L_AES_ECB_decrypt_start_block_128_%=: \n\t" + "L_AES_ECB_decrypt_start_block_128_%=:\n\t" "\n" - "L_AES_ECB_decrypt_loop_block_128_%=: \n\t" + "L_AES_ECB_decrypt_loop_block_128_%=:\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -18842,7 +18849,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_ECB_decrypt_block_nr_128_%=: \n\t" + "L_AES_ECB_decrypt_block_nr_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -19511,7 +19518,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, "add %[out], %[out], #16\n\t" "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" "\n" - "L_AES_ECB_decrypt_end_%=: \n\t" + "L_AES_ECB_decrypt_end_%=:\n\t" "pop {%[nr], %[L_AES_ARM32_td_ecb]}\n\t" "pop {%[L_AES_ARM32_ecb_td4]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -19531,7 +19538,7 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || defined(HAVE_AES_ECB) */ #ifdef HAVE_AES_CBC -static const byte L_AES_ARM32_cbc_td4[] = { +XALIGNED(4) static const word8 L_AES_ARM32_cbc_td4[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, @@ -19577,22 +19584,24 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in_p, WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register unsigned char* iv asm ("lr") = (unsigned char*)iv_p; - register word32* L_AES_ARM32_td_ecb_c asm ("r4") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register unsigned char* iv __asm__ ("lr") = (unsigned char*)iv_p; + register word32* L_AES_ARM32_td_ecb_c __asm__ ("r4") = (word32*)L_AES_ARM32_td_ecb; - register byte* L_AES_ARM32_cbc_td4_c asm ("r5") = - (byte*)&L_AES_ARM32_cbc_td4; + register word8* L_AES_ARM32_cbc_td4_c __asm__ ("r5") = + (word8*)&L_AES_ARM32_cbc_td4; #else register word32* L_AES_ARM32_td_ecb_c = (word32*)L_AES_ARM32_td_ecb; - register byte* L_AES_ARM32_cbc_td4_c = (byte*)&L_AES_ARM32_cbc_td4; + register word8* L_AES_ARM32_cbc_td4_c = (word8*)&L_AES_ARM32_cbc_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -19610,7 +19619,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "cmp r8, #12\n\t" "beq L_AES_CBC_decrypt_loop_block_192_%=\n\t" "\n" - "L_AES_CBC_decrypt_loop_block_256_%=: \n\t" + "L_AES_CBC_decrypt_loop_block_256_%=:\n\t" "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" @@ -19663,7 +19672,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_256_odd_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_256_odd_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -20389,7 +20398,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_256_even_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_256_even_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -21077,7 +21086,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bne L_AES_CBC_decrypt_loop_block_256_%=\n\t" "b L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_loop_block_192_%=: \n\t" + "L_AES_CBC_decrypt_loop_block_192_%=:\n\t" "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" @@ -21130,7 +21139,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_192_odd_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_192_odd_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -21856,7 +21865,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_192_even_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_192_even_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -22544,7 +22553,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bne L_AES_CBC_decrypt_loop_block_192_%=\n\t" "b L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_loop_block_128_%=: \n\t" + "L_AES_CBC_decrypt_loop_block_128_%=:\n\t" "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" @@ -22597,7 +22606,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_128_odd_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_128_odd_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -23323,7 +23332,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bl AES_decrypt_block\n\t" #else "\n" - "L_AES_CBC_decrypt_block_nr_128_even_%=: \n\t" + "L_AES_CBC_decrypt_block_nr_128_even_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r7, #8\n\t" @@ -24011,7 +24020,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "bne L_AES_CBC_decrypt_loop_block_128_%=\n\t" "b L_AES_CBC_decrypt_end_%=\n\t" "\n" - "L_AES_CBC_decrypt_end_odd_%=: \n\t" + "L_AES_CBC_decrypt_end_odd_%=:\n\t" "ldr r4, [sp, #4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldr r8, [r4, #16]\n\t" @@ -24037,7 +24046,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "strd r10, r11, [r4, #8]\n\t" #endif "\n" - "L_AES_CBC_decrypt_end_%=: \n\t" + "L_AES_CBC_decrypt_end_%=:\n\t" "pop {%[ks]-r4}\n\t" "pop {%[nr], %[iv]}\n\t" "pop {%[L_AES_ARM32_td_ecb], %[L_AES_ARM32_cbc_td4]}\n\t" @@ -24063,7 +24072,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM -static const word32 L_GCM_gmult_len_r[] = { +XALIGNED(8) static const word32 L_GCM_gmult_len_r[] = { 0x00000000, 0x1c200000, 0x38400000, 0x24600000, 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, @@ -24078,15 +24087,16 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x_p, #else WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register unsigned char* x asm ("r0") = (unsigned char*)x_p; - register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; - register const unsigned char* data asm ("r2") = + register unsigned char* x __asm__ ("r0") = (unsigned char*)x_p; + register const unsigned char** m __asm__ ("r1") = + (const unsigned char**)m_p; + register const unsigned char* data __asm__ ("r2") = (const unsigned char*)data_p; - register unsigned long len asm ("r3") = (unsigned long)len_p; - register word32* L_GCM_gmult_len_r_c asm ("r12") = + register unsigned long len __asm__ ("r3") = (unsigned long)len_p; + register word32* L_GCM_gmult_len_r_c __asm__ ("r12") = (word32*)&L_GCM_gmult_len_r; #else register word32* L_GCM_gmult_len_r_c = (word32*)&L_GCM_gmult_len_r; @@ -24096,7 +24106,7 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, "push {%[L_GCM_gmult_len_r]}\n\t" "mov lr, %[L_GCM_gmult_len_r]\n\t" "\n" - "L_GCM_gmult_len_start_block_%=: \n\t" + "L_GCM_gmult_len_start_block_%=:\n\t" "push {r3}\n\t" "ldr r12, [r0, #12]\n\t" "ldr %[len], [r2, #12]\n\t" @@ -24692,16 +24702,18 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in_p, WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; - register unsigned char* out asm ("r1") = (unsigned char*)out_p; - register unsigned long len asm ("r2") = (unsigned long)len_p; - register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; - register int nr asm ("r12") = (int)nr_p; - register unsigned char* ctr asm ("lr") = (unsigned char*)ctr_p; - register word32* L_AES_ARM32_te_gcm_c asm ("r4") = + register const unsigned char* in __asm__ ("r0") = + (const unsigned char*)in_p; + register unsigned char* out __asm__ ("r1") = (unsigned char*)out_p; + register unsigned long len __asm__ ("r2") = (unsigned long)len_p; + register const unsigned char* ks __asm__ ("r3") = + (const unsigned char*)ks_p; + register int nr __asm__ ("r12") = (int)nr_p; + register unsigned char* ctr __asm__ ("lr") = (unsigned char*)ctr_p; + register word32* L_AES_ARM32_te_gcm_c __asm__ ("r4") = (word32*)L_AES_ARM32_te_gcm; #else register word32* L_AES_ARM32_te_gcm_c = (word32*)L_AES_ARM32_te_gcm; @@ -24745,7 +24757,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "cmp r12, #12\n\t" "beq L_AES_GCM_encrypt_start_block_192_%=\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_256_%=: \n\t" + "L_AES_GCM_encrypt_loop_block_256_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "add r7, r7, #1\n\t" @@ -24761,7 +24773,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_GCM_encrypt_block_nr_256_%=: \n\t" + "L_AES_GCM_encrypt_block_nr_256_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -25442,9 +25454,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "bne L_AES_GCM_encrypt_loop_block_256_%=\n\t" "b L_AES_GCM_encrypt_end_%=\n\t" "\n" - "L_AES_GCM_encrypt_start_block_192_%=: \n\t" + "L_AES_GCM_encrypt_start_block_192_%=:\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_192_%=: \n\t" + "L_AES_GCM_encrypt_loop_block_192_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "add r7, r7, #1\n\t" @@ -25460,7 +25472,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_GCM_encrypt_block_nr_192_%=: \n\t" + "L_AES_GCM_encrypt_block_nr_192_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -26141,9 +26153,9 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "bne L_AES_GCM_encrypt_loop_block_192_%=\n\t" "b L_AES_GCM_encrypt_end_%=\n\t" "\n" - "L_AES_GCM_encrypt_start_block_128_%=: \n\t" + "L_AES_GCM_encrypt_start_block_128_%=:\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_128_%=: \n\t" + "L_AES_GCM_encrypt_loop_block_128_%=:\n\t" "push {r1, %[len], lr}\n\t" "ldr lr, [sp, #16]\n\t" "add r7, r7, #1\n\t" @@ -26159,7 +26171,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "bl AES_encrypt_block\n\t" #else "\n" - "L_AES_GCM_encrypt_block_nr_128_%=: \n\t" + "L_AES_GCM_encrypt_block_nr_128_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "lsl r8, r5, #8\n\t" @@ -26839,7 +26851,7 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "add %[out], %[out], #16\n\t" "bne L_AES_GCM_encrypt_loop_block_128_%=\n\t" "\n" - "L_AES_GCM_encrypt_end_%=: \n\t" + "L_AES_GCM_encrypt_end_%=:\n\t" "pop {%[ks], r8}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "eor r10, r4, r4, ror #16\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S b/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S index c740134a16e..a342fad9216 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S @@ -51,19 +51,22 @@ wc_chacha_setiv: pop {r4, pc} .size wc_chacha_setiv,.-wc_chacha_setiv #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_chacha_arm32_constants, %object .size L_chacha_arm32_constants, 32 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_chacha_arm32_constants: - .word 0x61707865 - .word 0x3120646e - .word 0x79622d36 - .word 0x6b206574 - .word 0x61707865 - .word 0x3320646e - .word 0x79622d32 - .word 0x6b206574 + .long 0x61707865,0x3120646e,0x79622d36,0x6b206574 + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 .text .align 4 .globl wc_chacha_setkey @@ -969,8 +972,8 @@ L_chacha_crypt_bytes_arm32_round_start_128: vext.8 q6, q6, q6, #8 bne L_chacha_crypt_bytes_arm32_round_start_128 # Add back state, XOR in message and store (load next block) - vld1.8 {q8, q9}, [r2]! - vld1.8 {q10, q11}, [r2]! + vld1.8 {q8-q9}, [r2]! + vld1.8 {q10-q11}, [r2]! vadd.i32 q0, q0, q12 vadd.i32 q1, q1, q13 vadd.i32 q2, q2, q14 @@ -979,10 +982,10 @@ L_chacha_crypt_bytes_arm32_round_start_128: veor q1, q1, q9 veor q2, q2, q10 veor q3, q3, q11 - vld1.8 {q8, q9}, [r2]! - vld1.8 {q10, q11}, [r2]! - vst1.8 {q0, q1}, [r1]! - vst1.8 {q2, q3}, [r1]! + vld1.8 {q8-q9}, [r2]! + vld1.8 {q10-q11}, [r2]! + vst1.8 {q0-q1}, [r1]! + vst1.8 {q2-q3}, [r1]! veor q0, q0, q0 mov r12, #1 vmov.i32 d0[0], r12 @@ -995,8 +998,8 @@ L_chacha_crypt_bytes_arm32_round_start_128: veor q5, q5, q9 veor q6, q6, q10 veor q7, q7, q11 - vst1.8 {q4, q5}, [r1]! - vst1.8 {q6, q7}, [r1]! + vst1.8 {q4-q5}, [r1]! + vst1.8 {q6-q7}, [r1]! vadd.i32 q15, q15, q0 sub r3, r3, #0x80 # Done 128-byte block @@ -1075,14 +1078,14 @@ L_chacha_crypt_bytes_arm32_round_64: vadd.i32 q15, q15, q9 blt L_chacha_crypt_bytes_arm32_lt_64 # Encipher 64 bytes - vld1.8 {q4, q5}, [r2]! - vld1.8 {q6, q7}, [r2]! + vld1.8 {q4-q5}, [r2]! + vld1.8 {q6-q7}, [r2]! veor q4, q4, q0 veor q5, q5, q1 veor q6, q6, q2 veor q7, q7, q3 - vst1.8 {q4, q5}, [r1]! - vst1.8 {q6, q7}, [r1]! + vst1.8 {q4-q5}, [r1]! + vst1.8 {q6-q7}, [r1]! # Check for more bytes to be enciphered subs r3, r3, #0x40 bne L_chacha_crypt_bytes_arm32_loop_64 @@ -1097,10 +1100,10 @@ L_chacha_crypt_bytes_arm32_lt_64: # Encipher 32 bytes cmp r3, #32 blt L_chacha_crypt_bytes_arm32_lt_32 - vld1.8 {q4, q5}, [r2]! + vld1.8 {q4-q5}, [r2]! veor q4, q4, q0 veor q5, q5, q1 - vst1.8 {q4, q5}, [r1]! + vst1.8 {q4-q5}, [r1]! subs r3, r3, #32 vmov q0, q2 vmov q1, q3 @@ -1153,19 +1156,22 @@ L_chacha_crypt_bytes_arm32_done_all: vpop {d8-d15} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size wc_chacha_crypt_bytes,.-wc_chacha_crypt_bytes +#ifndef __APPLE__ .text .type L_chacha_setkey_arm32_constant, %object .size L_chacha_setkey_arm32_constant, 32 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_chacha_setkey_arm32_constant: - .word 0x61707865 - .word 0x3120646e - .word 0x79622d36 - .word 0x6b206574 - .word 0x61707865 - .word 0x3320646e - .word 0x79622d32 - .word 0x6b206574 + .long 0x61707865,0x3120646e,0x79622d36,0x6b206574 + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 .text .align 4 .globl wc_chacha_setkey @@ -1180,7 +1186,7 @@ wc_chacha_setkey: #ifdef BIG_ENDIAN_ORDER vrev32.16 q1, q1 #endif /* BIG_ENDIAN_ORDER */ - vstm r0!, {q0, q1} + vstm r0!, {q0-q1} beq L_chacha_setkey_arm32_done vld1.8 {q1}, [r1] #ifdef BIG_ENDIAN_ORDER diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index 323e93ff329..47e2562e5f1 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -58,12 +56,12 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x_p, const byte* iv_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv, word32 counter) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register word32* x asm ("r0") = (word32*)x_p; - register const byte* iv asm ("r1") = (const byte*)iv_p; - register word32 counter asm ("r2") = (word32)counter_p; + register word32* x __asm__ ("r0") = (word32*)x_p; + register const byte* iv __asm__ ("r1") = (const byte*)iv_p; + register word32 counter __asm__ ("r2") = (word32)counter_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90,7 +88,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv, } #ifdef WOLFSSL_ARMASM_NO_NEON -static const word32 L_chacha_arm32_constants[] = { +XALIGNED(8) static const word32 L_chacha_arm32_constants[] = { 0x61707865, 0x3120646e, 0x79622d36, 0x6b206574, 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, }; @@ -101,13 +99,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x_p, const byte* key_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register word32* x asm ("r0") = (word32*)x_p; - register const byte* key asm ("r1") = (const byte*)key_p; - register word32 keySz asm ("r2") = (word32)keySz_p; - register word32* L_chacha_arm32_constants_c asm ("r3") = + register word32* x __asm__ ("r0") = (word32*)x_p; + register const byte* key __asm__ ("r1") = (const byte*)key_p; + register word32 keySz __asm__ ("r2") = (word32)keySz_p; + register word32* L_chacha_arm32_constants_c __asm__ ("r3") = (word32*)&L_chacha_arm32_constants; #else register word32* L_chacha_arm32_constants_c = @@ -142,7 +140,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "ldr r12, [%[key], #8]\n\t" "ldr lr, [%[key], #12]\n\t" "\n" - "L_chacha_arm32_setkey_same_key_bytes_%=: \n\t" + "L_chacha_arm32_setkey_same_key_bytes_%=:\n\t" "stm %[x], {r4, r5, r12, lr}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), @@ -163,13 +161,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ChaCha* ctx asm ("r0") = (ChaCha*)ctx_p; - register byte* c asm ("r1") = (byte*)c_p; - register const byte* m asm ("r2") = (const byte*)m_p; - register word32 len asm ("r3") = (word32)len_p; + register ChaCha* ctx __asm__ ("r0") = (ChaCha*)ctx_p; + register byte* c __asm__ ("r1") = (byte*)c_p; + register const byte* m __asm__ ("r2") = (const byte*)m_p; + register word32 len __asm__ ("r3") = (word32)len_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -188,7 +186,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "strd %[m], %[len], [sp, #40]\n\t" #endif "\n" - "L_chacha_arm32_crypt_block_%=: \n\t" + "L_chacha_arm32_crypt_block_%=:\n\t" /* Put x[12]..x[15] onto stack. */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldr r4, [lr, #48]\n\t" @@ -220,7 +218,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "mov lr, #10\n\t" "str lr, [sp, #48]\n\t" "\n" - "L_chacha_arm32_crypt_loop_%=: \n\t" + "L_chacha_arm32_crypt_loop_%=:\n\t" /* 0, 4, 8, 12 */ /* 1, 5, 9, 13 */ "ldr lr, [sp, #20]\n\t" @@ -449,7 +447,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "bne L_chacha_arm32_crypt_block_%=\n\t" "b L_chacha_arm32_crypt_done_%=\n\t" "\n" - "L_chacha_arm32_crypt_lt_block_%=: \n\t" + "L_chacha_arm32_crypt_lt_block_%=:\n\t" /* Store in over field of ChaCha. */ "ldr lr, [sp, #32]\n\t" "add r12, lr, #0x44\n\t" @@ -467,7 +465,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "str r12, [lr, #64]\n\t" "add lr, lr, #0x44\n\t" "\n" - "L_chacha_arm32_crypt_16byte_loop_%=: \n\t" + "L_chacha_arm32_crypt_16byte_loop_%=:\n\t" "cmp %[len], #16\n\t" "blt L_chacha_arm32_crypt_word_loop_%=\n\t" /* 16 bytes of state XORed into message. */ @@ -490,7 +488,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "add %[c], %[c], #16\n\t" "b L_chacha_arm32_crypt_16byte_loop_%=\n\t" "\n" - "L_chacha_arm32_crypt_word_loop_%=: \n\t" + "L_chacha_arm32_crypt_word_loop_%=:\n\t" "cmp %[len], #4\n\t" "blt L_chacha_arm32_crypt_byte_start_%=\n\t" /* 4 bytes of state XORed into message. */ @@ -505,10 +503,10 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "add %[c], %[c], #4\n\t" "b L_chacha_arm32_crypt_word_loop_%=\n\t" "\n" - "L_chacha_arm32_crypt_byte_start_%=: \n\t" + "L_chacha_arm32_crypt_byte_start_%=:\n\t" "ldr r4, [lr]\n\t" "\n" - "L_chacha_arm32_crypt_byte_loop_%=: \n\t" + "L_chacha_arm32_crypt_byte_loop_%=:\n\t" "ldrb r8, [%[m]]\n\t" "eor r8, r8, r4\n\t" "subs %[len], %[len], #1\n\t" @@ -519,7 +517,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "add %[c], %[c], #1\n\t" "b L_chacha_arm32_crypt_byte_loop_%=\n\t" "\n" - "L_chacha_arm32_crypt_done_%=: \n\t" + "L_chacha_arm32_crypt_done_%=:\n\t" "add sp, sp, #52\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) @@ -539,18 +537,18 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over_p, byte* output_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* over asm ("r0") = (byte*)over_p; - register byte* output asm ("r1") = (byte*)output_p; - register const byte* input asm ("r2") = (const byte*)input_p; - register word32 len asm ("r3") = (word32)len_p; + register byte* over __asm__ ("r0") = (byte*)over_p; + register byte* output __asm__ ("r1") = (byte*)output_p; + register const byte* input __asm__ ("r2") = (const byte*)input_p; + register word32 len __asm__ ("r3") = (word32)len_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n" - "L_chacha_arm32_over_16byte_loop_%=: \n\t" + "L_chacha_arm32_over_16byte_loop_%=:\n\t" "cmp %[len], #16\n\t" "blt L_chacha_arm32_over_word_loop_%=\n\t" /* 16 bytes of state XORed into message. */ @@ -577,7 +575,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "add %[output], %[output], #16\n\t" "b L_chacha_arm32_over_16byte_loop_%=\n\t" "\n" - "L_chacha_arm32_over_word_loop_%=: \n\t" + "L_chacha_arm32_over_word_loop_%=:\n\t" "cmp %[len], #4\n\t" "blt L_chacha_arm32_over_byte_loop_%=\n\t" /* 4 bytes of state XORed into message. */ @@ -592,7 +590,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "add %[output], %[output], #4\n\t" "b L_chacha_arm32_over_word_loop_%=\n\t" "\n" - "L_chacha_arm32_over_byte_loop_%=: \n\t" + "L_chacha_arm32_over_byte_loop_%=:\n\t" /* 4 bytes of state XORed into message. */ "ldrb r12, [%[over]]\n\t" "ldrb r6, [%[input]]\n\t" @@ -605,7 +603,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "add %[output], %[output], #1\n\t" "b L_chacha_arm32_over_byte_loop_%=\n\t" "\n" - "L_chacha_arm32_over_done_%=: \n\t" + "L_chacha_arm32_over_done_%=:\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), [len] "+r" (len) @@ -627,13 +625,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx_p, byte* c_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ChaCha* ctx asm ("r0") = (ChaCha*)ctx_p; - register byte* c asm ("r1") = (byte*)c_p; - register const byte* m asm ("r2") = (const byte*)m_p; - register word32 len asm ("r3") = (word32)len_p; + register ChaCha* ctx __asm__ ("r0") = (ChaCha*)ctx_p; + register byte* c __asm__ ("r1") = (byte*)c_p; + register const byte* m __asm__ ("r2") = (const byte*)m_p; + register word32 len __asm__ ("r3") = (word32)len_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -644,7 +642,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "blt L_chacha_crypt_bytes_arm32_lt_256_%=\n\t" "str %[ctx], [sp, #28]\n\t" "\n" - "L_chacha_crypt_bytes_arm32_start_256_%=: \n\t" + "L_chacha_crypt_bytes_arm32_start_256_%=:\n\t" "str %[m], [sp, #32]\n\t" "str %[c], [sp, #36]\n\t" "str %[len], [sp, #40]\n\t" @@ -676,7 +674,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, /* Set number of odd+even rounds to perform */ "mov lr, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm32_round_start_256_%=: \n\t" + "L_chacha_crypt_bytes_arm32_round_start_256_%=:\n\t" "subs lr, lr, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -986,7 +984,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "cmp %[len], #0x100\n\t" "bge L_chacha_crypt_bytes_arm32_start_256_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_256_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_256_%=:\n\t" "cmp %[len], #0x80\n\t" "blt L_chacha_crypt_bytes_arm32_lt_128_%=\n\t" /* Move state into vector registers */ @@ -1006,7 +1004,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, /* Set number of odd+even rounds to perform */ "mov lr, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm32_round_start_128_%=: \n\t" + "L_chacha_crypt_bytes_arm32_round_start_128_%=:\n\t" "subs lr, lr, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -1124,7 +1122,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "sub %[len], %[len], #0x80\n\t" /* Done 128-byte block */ "\n" - "L_chacha_crypt_bytes_arm32_lt_128_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_128_%=:\n\t" "cmp %[len], #0\n\t" "beq L_chacha_crypt_bytes_arm32_done_all_%=\n\t" "mov r12, #1\n\t" @@ -1133,7 +1131,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vmov d18[0], r12\n\t" "mov r12, #0x40\n\t" "\n" - "L_chacha_crypt_bytes_arm32_loop_64_%=: \n\t" + "L_chacha_crypt_bytes_arm32_loop_64_%=:\n\t" /* Move state into vector registers */ "vmov q0, q12\n\t" "vmov q1, q13\n\t" @@ -1142,7 +1140,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, /* Set number of odd+even rounds to perform */ "mov lr, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm32_round_64_%=: \n\t" + "L_chacha_crypt_bytes_arm32_round_64_%=:\n\t" "subs lr, lr, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -1214,7 +1212,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "bne L_chacha_crypt_bytes_arm32_loop_64_%=\n\t" "b L_chacha_crypt_bytes_arm32_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_64_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_64_%=:\n\t" /* Calculate bytes left in block not used */ "sub r12, r12, %[len]\n\t" /* Store encipher block in over for further operations and left */ @@ -1233,7 +1231,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vmov q1, q3\n\t" "beq L_chacha_crypt_bytes_arm32_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_32_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_32_%=:\n\t" "cmp %[len], #16\n\t" "blt L_chacha_crypt_bytes_arm32_lt_16_%=\n\t" /* Encipher 16 bytes */ @@ -1244,7 +1242,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vmov q0, q1\n\t" "beq L_chacha_crypt_bytes_arm32_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_16_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_16_%=:\n\t" "cmp %[len], #8\n\t" "blt L_chacha_crypt_bytes_arm32_lt_8_%=\n\t" /* Encipher 8 bytes */ @@ -1255,7 +1253,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vmov d0, d1\n\t" "beq L_chacha_crypt_bytes_arm32_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_8_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_8_%=:\n\t" "cmp %[len], #4\n\t" "blt L_chacha_crypt_bytes_arm32_lt_4_%=\n\t" /* Encipher 8 bytes */ @@ -1267,10 +1265,10 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vshr.u64 d0, d0, #32\n\t" "beq L_chacha_crypt_bytes_arm32_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_lt_4_%=: \n\t" + "L_chacha_crypt_bytes_arm32_lt_4_%=:\n\t" "vmov r12, s0\n\t" "\n" - "L_chacha_crypt_bytes_arm32loop_lt_4_%=: \n\t" + "L_chacha_crypt_bytes_arm32loop_lt_4_%=:\n\t" /* Encipher 1 byte at a time */ "ldrb r4, [%[m]], #1\n\t" "eor r4, r4, r12\n\t" @@ -1279,9 +1277,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "lsr r12, r12, #8\n\t" "bgt L_chacha_crypt_bytes_arm32loop_lt_4_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm32_done_%=: \n\t" + "L_chacha_crypt_bytes_arm32_done_%=:\n\t" "\n" - "L_chacha_crypt_bytes_arm32_done_all_%=: \n\t" + "L_chacha_crypt_bytes_arm32_done_all_%=:\n\t" "vstm.32 %[ctx], {q12-q15}\n\t" "add sp, sp, #44\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -1297,7 +1295,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, ); } -static const word32 L_chacha_setkey_arm32_constant[] = { +XALIGNED(8) static const word32 L_chacha_setkey_arm32_constant[] = { 0x61707865, 0x3120646e, 0x79622d36, 0x6b206574, 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, }; @@ -1308,13 +1306,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x_p, const byte* key_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register word32* x asm ("r0") = (word32*)x_p; - register const byte* key asm ("r1") = (const byte*)key_p; - register word32 keySz asm ("r2") = (word32)keySz_p; - register word32* L_chacha_setkey_arm32_constant_c asm ("r3") = + register word32* x __asm__ ("r0") = (word32*)x_p; + register const byte* key __asm__ ("r1") = (const byte*)key_p; + register word32 keySz __asm__ ("r2") = (word32)keySz_p; + register word32* L_chacha_setkey_arm32_constant_c __asm__ ("r3") = (word32*)&L_chacha_setkey_arm32_constant; #else register word32* L_chacha_setkey_arm32_constant_c = @@ -1338,7 +1336,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "vrev32.16 q1, q1\n\t" #endif /* BIG_ENDIAN_ORDER */ "\n" - "L_chacha_setkey_arm32_done_%=: \n\t" + "L_chacha_setkey_arm32_done_%=:\n\t" "vstm %[x], {q1}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), @@ -1359,14 +1357,14 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over_p, byte* output_p, #else WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* over asm ("r0") = (byte*)over_p; - register byte* output asm ("r1") = (byte*)output_p; - register const byte* input asm ("r2") = (const byte*)input_p; - register word32 len asm ("r3") = (word32)len_p; - register word32* L_chacha_setkey_arm32_constant_c asm ("r12") = + register byte* over __asm__ ("r0") = (byte*)over_p; + register byte* output __asm__ ("r1") = (byte*)output_p; + register const byte* input __asm__ ("r2") = (const byte*)input_p; + register word32 len __asm__ ("r3") = (word32)len_p; + register word32* L_chacha_setkey_arm32_constant_c __asm__ ("r12") = (word32*)&L_chacha_setkey_arm32_constant; #else register word32* L_chacha_setkey_arm32_constant_c = @@ -1376,7 +1374,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, __asm__ __volatile__ ( "push {%[L_chacha_setkey_arm32_constant]}\n\t" "\n" - "L_chacha_use_over_arm32_16byte_loop_%=: \n\t" + "L_chacha_use_over_arm32_16byte_loop_%=:\n\t" "cmp %[len], #16\n\t" "blt L_chacha_use_over_arm32_word_loop_%=\n\t" /* 16 bytes of state XORed into message. */ @@ -1388,7 +1386,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "beq L_chacha_use_over_arm32_done_%=\n\t" "b L_chacha_use_over_arm32_16byte_loop_%=\n\t" "\n" - "L_chacha_use_over_arm32_word_loop_%=: \n\t" + "L_chacha_use_over_arm32_word_loop_%=:\n\t" "cmp %[len], #4\n\t" "blt L_chacha_use_over_arm32_byte_loop_%=\n\t" /* 4 bytes of state XORed into message. */ @@ -1400,7 +1398,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "beq L_chacha_use_over_arm32_done_%=\n\t" "b L_chacha_use_over_arm32_word_loop_%=\n\t" "\n" - "L_chacha_use_over_arm32_byte_loop_%=: \n\t" + "L_chacha_use_over_arm32_byte_loop_%=:\n\t" /* 1 bytes of state XORed into message. */ "ldrb r12, [%[over]], #1\n\t" "ldrb lr, [%[input]], #1\n\t" @@ -1410,7 +1408,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, "beq L_chacha_use_over_arm32_done_%=\n\t" "b L_chacha_use_over_arm32_byte_loop_%=\n\t" "\n" - "L_chacha_use_over_arm32_done_%=: \n\t" + "L_chacha_use_over_arm32_done_%=:\n\t" "pop {%[L_chacha_setkey_arm32_constant]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index 2e333d16387..e307cb9fb4d 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -2737,12 +2737,12 @@ fe_mul_op: lsl r0, r0, #1 orr r0, r0, r10, lsr #31 mul r11, r0, lr - pop {r0, r1, r2} + pop {r0-r2} mov lr, #38 umaal r0, r11, r12, lr umaal r1, r11, r4, lr umaal r2, r11, r5, lr - pop {r3, r4, r5} + pop {r3-r5} umaal r3, r11, r6, lr umaal r4, r11, r7, lr umaal r5, r11, r8, lr @@ -3147,18 +3147,18 @@ fe_sq_op: lsl r0, r0, #1 orr r0, r0, r7, lsr #31 mul lr, r0, r6 - pop {r0, r1} + pop {r0-r1} mov r6, #38 umaal r0, lr, r12, r6 umaal r1, lr, r11, r6 mov r12, r3 mov r11, r4 - pop {r2, r3, r4} + pop {r2-r4} umaal r2, lr, r10, r6 umaal r3, lr, r12, r6 umaal r4, lr, r11, r6 mov r12, r6 - pop {r5, r6} + pop {r5-r6} umaal r5, lr, r8, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) bic r7, r7, #0x80000000 @@ -4563,18 +4563,18 @@ fe_sq2: lsl r0, r0, #1 orr r0, r0, r7, lsr #31 mul lr, r0, r6 - pop {r0, r1} + pop {r0-r1} mov r6, #38 umaal r0, lr, r12, r6 umaal r1, lr, r11, r6 mov r12, r3 mov r11, r4 - pop {r2, r3, r4} + pop {r2-r4} umaal r2, lr, r10, r6 umaal r3, lr, r12, r6 umaal r4, lr, r11, r6 mov r12, r6 - pop {r5, r6} + pop {r5-r6} umaal r5, lr, r8, r12 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) bic r7, r7, #0x80000000 diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 7ed31b978c2..c981871e4bd 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -64,7 +62,7 @@ WC_OMIT_FRAME_POINTER void fe_init() #else WC_OMIT_FRAME_POINTER void fe_init() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -86,7 +84,7 @@ void fe_add_sub_op(void); WC_OMIT_FRAME_POINTER void fe_add_sub_op() #else WC_OMIT_FRAME_POINTER void fe_add_sub_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -305,7 +303,7 @@ void fe_sub_op(void); WC_OMIT_FRAME_POINTER void fe_sub_op() #else WC_OMIT_FRAME_POINTER void fe_sub_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -357,12 +355,12 @@ WC_OMIT_FRAME_POINTER void fe_sub_op() WC_OMIT_FRAME_POINTER void fe_sub(fe r_p, const fe a_p, const fe b_p) #else WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -384,7 +382,7 @@ void fe_add_op(void); WC_OMIT_FRAME_POINTER void fe_add_op() #else WC_OMIT_FRAME_POINTER void fe_add_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -437,12 +435,12 @@ WC_OMIT_FRAME_POINTER void fe_add_op() WC_OMIT_FRAME_POINTER void fe_add(fe r_p, const fe a_p, const fe b_p) #else WC_OMIT_FRAME_POINTER void fe_add(fe r, const fe a, const fe b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -464,11 +462,12 @@ WC_OMIT_FRAME_POINTER void fe_add(fe r, const fe a, const fe b) WC_OMIT_FRAME_POINTER void fe_frombytes(fe out_p, const unsigned char* in_p) #else WC_OMIT_FRAME_POINTER void fe_frombytes(fe out, const unsigned char* in) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* out asm ("r0") = (sword32*)out_p; - register const unsigned char* in asm ("r1") = (const unsigned char*)in_p; + register sword32* out __asm__ ("r0") = (sword32*)out_p; + register const unsigned char* in __asm__ ("r1") = + (const unsigned char*)in_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -508,11 +507,11 @@ WC_OMIT_FRAME_POINTER void fe_frombytes(fe out, const unsigned char* in) WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out_p, const fe n_p) #else WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out, const fe n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register unsigned char* out asm ("r0") = (unsigned char*)out_p; - register const sword32* n asm ("r1") = (const sword32*)n_p; + register unsigned char* out __asm__ ("r0") = (unsigned char*)out_p; + register const sword32* n __asm__ ("r1") = (const sword32*)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -563,10 +562,10 @@ WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out, const fe n) WC_OMIT_FRAME_POINTER void fe_1(fe n_p) #else WC_OMIT_FRAME_POINTER void fe_1(fe n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* n asm ("r0") = (sword32*)n_p; + register sword32* n __asm__ ("r0") = (sword32*)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -595,10 +594,10 @@ WC_OMIT_FRAME_POINTER void fe_1(fe n) WC_OMIT_FRAME_POINTER void fe_0(fe n_p) #else WC_OMIT_FRAME_POINTER void fe_0(fe n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* n asm ("r0") = (sword32*)n_p; + register sword32* n __asm__ ("r0") = (sword32*)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -627,11 +626,11 @@ WC_OMIT_FRAME_POINTER void fe_0(fe n) WC_OMIT_FRAME_POINTER void fe_copy(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_copy(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -697,11 +696,11 @@ WC_OMIT_FRAME_POINTER void fe_copy(fe r, const fe a) WC_OMIT_FRAME_POINTER void fe_neg(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_neg(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -735,10 +734,10 @@ WC_OMIT_FRAME_POINTER void fe_neg(fe r, const fe a) WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a_p) #else WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sword32* a asm ("r0") = (const sword32*)a_p; + register const sword32* a __asm__ ("r0") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -790,10 +789,10 @@ WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a) WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a_p) #else WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sword32* a asm ("r0") = (const sword32*)a_p; + register const sword32* a __asm__ ("r0") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -831,12 +830,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r_p, const fe* base_p, signed char b_p) #else WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register fe* r asm ("r0") = (fe*)r_p; - register const fe* base asm ("r1") = (const fe*)base_p; - register signed char b asm ("r2") = (signed char)b_p; + register fe* r __asm__ ("r0") = (fe*)r_p; + register const fe* base __asm__ ("r1") = (const fe*)base_p; + register signed char b __asm__ ("r2") = (signed char)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2377,12 +2376,12 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r_p, const fe* base_p, signed char b_p) #else WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register fe* r asm ("r0") = (fe*)r_p; - register const fe* base asm ("r1") = (const fe*)base_p; - register signed char b asm ("r2") = (signed char)b_p; + register fe* r __asm__ ("r0") = (fe*)r_p; + register const fe* base __asm__ ("r1") = (const fe*)base_p; + register signed char b __asm__ ("r2") = (signed char)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2514,7 +2513,7 @@ void fe_mul_op(void); WC_OMIT_FRAME_POINTER void fe_mul_op() #else WC_OMIT_FRAME_POINTER void fe_mul_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -2909,7 +2908,7 @@ void fe_mul_op(void); WC_OMIT_FRAME_POINTER void fe_mul_op() #else WC_OMIT_FRAME_POINTER void fe_mul_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -3062,12 +3061,12 @@ WC_OMIT_FRAME_POINTER void fe_mul_op() WC_OMIT_FRAME_POINTER void fe_mul(fe r_p, const fe a_p, const fe b_p) #else WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; - register const sword32* b asm ("r2") = (const sword32*)b_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; + register const sword32* b __asm__ ("r2") = (const sword32*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3090,7 +3089,7 @@ void fe_sq_op(void); WC_OMIT_FRAME_POINTER void fe_sq_op() #else WC_OMIT_FRAME_POINTER void fe_sq_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -3378,7 +3377,7 @@ void fe_sq_op(void); WC_OMIT_FRAME_POINTER void fe_sq_op() #else WC_OMIT_FRAME_POINTER void fe_sq_op() -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -3517,11 +3516,11 @@ WC_OMIT_FRAME_POINTER void fe_sq_op() WC_OMIT_FRAME_POINTER void fe_sq(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_sq(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3544,11 +3543,11 @@ WC_OMIT_FRAME_POINTER void fe_sq(fe r, const fe a) WC_OMIT_FRAME_POINTER void fe_mul121666(fe r_p, fe a_p) #else WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register sword32* a asm ("r1") = (sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register sword32* a __asm__ ("r1") = (sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3628,11 +3627,11 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) WC_OMIT_FRAME_POINTER void fe_mul121666(fe r_p, fe a_p) #else WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register sword32* a asm ("r1") = (sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register sword32* a __asm__ ("r1") = (sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3701,12 +3700,12 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #else WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* r asm ("r0") = (byte*)r_p; - register const byte* n asm ("r1") = (const byte*)n_p; - register const byte* a asm ("r2") = (const byte*)a_p; + register byte* r __asm__ ("r0") = (byte*)r_p; + register const byte* n __asm__ ("r1") = (const byte*)n_p; + register const byte* a __asm__ ("r2") = (const byte*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3739,9 +3738,9 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "mov %[a], #28\n\t" "str %[a], [sp, #176]\n\t" "\n" - "L_curve25519_words_%=: \n\t" + "L_curve25519_words_%=:\n\t" "\n" - "L_curve25519_bits_%=: \n\t" + "L_curve25519_bits_%=:\n\t" "ldr %[n], [sp, #164]\n\t" "ldr %[a], [%[n], r2]\n\t" "ldr %[n], [sp, #180]\n\t" @@ -3957,7 +3956,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #4\n\t" "\n" - "L_curve25519_inv_1_%=: \n\t" + "L_curve25519_inv_1_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -3974,7 +3973,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #9\n\t" "\n" - "L_curve25519_inv_2_%=: \n\t" + "L_curve25519_inv_2_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -3991,7 +3990,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #19\n\t" "\n" - "L_curve25519_inv_3_%=: \n\t" + "L_curve25519_inv_3_%=:\n\t" "add r1, sp, #0x80\n\t" "add r0, sp, #0x80\n\t" "push {r12}\n\t" @@ -4005,7 +4004,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #10\n\t" "\n" - "L_curve25519_inv_4_%=: \n\t" + "L_curve25519_inv_4_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4022,7 +4021,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #49\n\t" "\n" - "L_curve25519_inv_5_%=: \n\t" + "L_curve25519_inv_5_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4039,7 +4038,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #0x63\n\t" "\n" - "L_curve25519_inv_6_%=: \n\t" + "L_curve25519_inv_6_%=:\n\t" "add r1, sp, #0x80\n\t" "add r0, sp, #0x80\n\t" "push {r12}\n\t" @@ -4053,7 +4052,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #50\n\t" "\n" - "L_curve25519_inv_7_%=: \n\t" + "L_curve25519_inv_7_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4067,7 +4066,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #5\n\t" "\n" - "L_curve25519_inv_8_%=: \n\t" + "L_curve25519_inv_8_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4104,12 +4103,12 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #else WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* r asm ("r0") = (byte*)r_p; - register const byte* n asm ("r1") = (const byte*)n_p; - register const byte* a asm ("r2") = (const byte*)a_p; + register byte* r __asm__ ("r0") = (byte*)r_p; + register const byte* n __asm__ ("r1") = (const byte*)n_p; + register const byte* a __asm__ ("r2") = (const byte*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4144,7 +4143,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "stm r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "mov %[a], #0xfe\n\t" "\n" - "L_curve25519_bits_%=: \n\t" + "L_curve25519_bits_%=:\n\t" "str %[a], [sp, #168]\n\t" "ldr %[n], [sp, #160]\n\t" "and r4, %[a], #31\n\t" @@ -4264,7 +4263,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #4\n\t" "\n" - "L_curve25519_inv_1_%=: \n\t" + "L_curve25519_inv_1_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4281,7 +4280,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #9\n\t" "\n" - "L_curve25519_inv_2_%=: \n\t" + "L_curve25519_inv_2_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4298,7 +4297,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #19\n\t" "\n" - "L_curve25519_inv_3_%=: \n\t" + "L_curve25519_inv_3_%=:\n\t" "add r1, sp, #0x80\n\t" "add r0, sp, #0x80\n\t" "push {r12}\n\t" @@ -4312,7 +4311,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #10\n\t" "\n" - "L_curve25519_inv_4_%=: \n\t" + "L_curve25519_inv_4_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4329,7 +4328,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #49\n\t" "\n" - "L_curve25519_inv_5_%=: \n\t" + "L_curve25519_inv_5_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4346,7 +4345,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_sq_op\n\t" "mov r12, #0x63\n\t" "\n" - "L_curve25519_inv_6_%=: \n\t" + "L_curve25519_inv_6_%=:\n\t" "add r1, sp, #0x80\n\t" "add r0, sp, #0x80\n\t" "push {r12}\n\t" @@ -4360,7 +4359,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #50\n\t" "\n" - "L_curve25519_inv_7_%=: \n\t" + "L_curve25519_inv_7_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4374,7 +4373,7 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "bl fe_mul_op\n\t" "mov r12, #5\n\t" "\n" - "L_curve25519_inv_8_%=: \n\t" + "L_curve25519_inv_8_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4431,11 +4430,11 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) WC_OMIT_FRAME_POINTER void fe_invert(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4472,7 +4471,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #4\n\t" "\n" - "L_fe_invert1_%=: \n\t" + "L_fe_invert1_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4489,7 +4488,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #9\n\t" "\n" - "L_fe_invert2_%=: \n\t" + "L_fe_invert2_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4506,7 +4505,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #19\n\t" "\n" - "L_fe_invert3_%=: \n\t" + "L_fe_invert3_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4520,7 +4519,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #10\n\t" "\n" - "L_fe_invert4_%=: \n\t" + "L_fe_invert4_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4537,7 +4536,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #49\n\t" "\n" - "L_fe_invert5_%=: \n\t" + "L_fe_invert5_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4554,7 +4553,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #0x63\n\t" "\n" - "L_fe_invert6_%=: \n\t" + "L_fe_invert6_%=:\n\t" "add r1, sp, #0x60\n\t" "add r0, sp, #0x60\n\t" "push {r12}\n\t" @@ -4568,7 +4567,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #50\n\t" "\n" - "L_fe_invert7_%=: \n\t" + "L_fe_invert7_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -4582,7 +4581,7 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #5\n\t" "\n" - "L_fe_invert8_%=: \n\t" + "L_fe_invert8_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -4614,11 +4613,11 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) WC_OMIT_FRAME_POINTER void fe_sq2(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4945,11 +4944,11 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) WC_OMIT_FRAME_POINTER void fe_sq2(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5135,11 +5134,11 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) WC_OMIT_FRAME_POINTER void fe_pow22523(fe r_p, const fe a_p) #else WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword32* r asm ("r0") = (sword32*)r_p; - register const sword32* a asm ("r1") = (const sword32*)a_p; + register sword32* r __asm__ ("r0") = (sword32*)r_p; + register const sword32* a __asm__ ("r1") = (const sword32*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5176,7 +5175,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #4\n\t" "\n" - "L_fe_pow22523_1_%=: \n\t" + "L_fe_pow22523_1_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -5193,7 +5192,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #9\n\t" "\n" - "L_fe_pow22523_2_%=: \n\t" + "L_fe_pow22523_2_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -5210,7 +5209,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #19\n\t" "\n" - "L_fe_pow22523_3_%=: \n\t" + "L_fe_pow22523_3_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -5224,7 +5223,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #10\n\t" "\n" - "L_fe_pow22523_4_%=: \n\t" + "L_fe_pow22523_4_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -5241,7 +5240,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #49\n\t" "\n" - "L_fe_pow22523_5_%=: \n\t" + "L_fe_pow22523_5_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -5258,7 +5257,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_sq_op\n\t" "mov r12, #0x63\n\t" "\n" - "L_fe_pow22523_6_%=: \n\t" + "L_fe_pow22523_6_%=:\n\t" "add r1, sp, #0x40\n\t" "add r0, sp, #0x40\n\t" "push {r12}\n\t" @@ -5272,7 +5271,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #50\n\t" "\n" - "L_fe_pow22523_7_%=: \n\t" + "L_fe_pow22523_7_%=:\n\t" "add r1, sp, #32\n\t" "add r0, sp, #32\n\t" "push {r12}\n\t" @@ -5286,7 +5285,7 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "bl fe_mul_op\n\t" "mov r12, #2\n\t" "\n" - "L_fe_pow22523_8_%=: \n\t" + "L_fe_pow22523_8_%=:\n\t" "mov r1, sp\n\t" "mov r0, sp\n\t" "push {r12}\n\t" @@ -5317,11 +5316,11 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) #else WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p2 * r asm ("r0") = (ge_p2 *)r_p; - register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; + register ge_p2 * r __asm__ ("r0") = (ge_p2 *)r_p; + register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5359,11 +5358,11 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) #else WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p3 * r asm ("r0") = (ge_p3 *)r_p; - register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; + register ge_p3 * r __asm__ ("r0") = (ge_p3 *)r_p; + register const ge_p1p1 * p __asm__ ("r1") = (const ge_p1p1 *)p_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5406,11 +5405,11 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) WC_OMIT_FRAME_POINTER void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) #else WC_OMIT_FRAME_POINTER void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p; + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p2 * p __asm__ ("r1") = (const ge_p2 *)p_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5467,12 +5466,12 @@ WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, #else WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5567,12 +5566,12 @@ WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, #else WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q __asm__ ("r2") = (const ge_precomp *)q_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5668,12 +5667,12 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, #else WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5769,12 +5768,12 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, #else WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; - register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; - register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; + register ge_p1p1 * r __asm__ ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p __asm__ ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q __asm__ ("r2") = (const ge_cached*)q_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5871,10 +5870,10 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, WC_OMIT_FRAME_POINTER void sc_reduce(byte* s_p) #else WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* s asm ("r0") = (byte*)s_p; + register byte* s __asm__ ("r0") = (byte*)s_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -6671,10 +6670,10 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) WC_OMIT_FRAME_POINTER void sc_reduce(byte* s_p) #else WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* s asm ("r0") = (byte*)s_p; + register byte* s __asm__ ("r0") = (byte*)s_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -7346,13 +7345,13 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s_p, const byte* a_p, #else WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* s asm ("r0") = (byte*)s_p; - register const byte* a asm ("r1") = (const byte*)a_p; - register const byte* b asm ("r2") = (const byte*)b_p; - register const byte* c asm ("r3") = (const byte*)c_p; + register byte* s __asm__ ("r0") = (byte*)s_p; + register const byte* a __asm__ ("r1") = (const byte*)a_p; + register const byte* b __asm__ ("r2") = (const byte*)b_p; + register const byte* c __asm__ ("r3") = (const byte*)c_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -8508,13 +8507,13 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s_p, const byte* a_p, #else WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register byte* s asm ("r0") = (byte*)s_p; - register const byte* a asm ("r1") = (const byte*)a_p; - register const byte* b asm ("r2") = (const byte*)b_p; - register const byte* c asm ("r3") = (const byte*)c_p; + register byte* s __asm__ ("r0") = (byte*)s_p; + register const byte* a __asm__ ("r1") = (const byte*)a_p; + register const byte* b __asm__ ("r2") = (const byte*)b_p; + register const byte* c __asm__ ("r3") = (const byte*)c_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S index 90a275c2326..638a0310d12 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S @@ -31,139 +31,36 @@ #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) #ifndef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_WC_MLKEM +#ifndef __APPLE__ .text .type L_mlkem_arm32_ntt_zetas, %object .size L_mlkem_arm32_ntt_zetas, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 4-byte aligned, 32-bit aligned +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_arm32_ntt_zetas: - .short 0x8ed - .short 0xa0b - .short 0xb9a - .short 0x714 - .short 0x5d5 - .short 0x58e - .short 0x11f - .short 0xca - .short 0xc56 - .short 0x26e - .short 0x629 - .short 0xb6 - .short 0x3c2 - .short 0x84f - .short 0x73f - .short 0x5bc - .short 0x23d - .short 0x7d4 - .short 0x108 - .short 0x17f - .short 0x9c4 - .short 0x5b2 - .short 0x6bf - .short 0xc7f - .short 0xa58 - .short 0x3f9 - .short 0x2dc - .short 0x260 - .short 0x6fb - .short 0x19b - .short 0xc34 - .short 0x6de - .short 0x4c7 - .short 0x28c - .short 0xad9 - .short 0x3f7 - .short 0x7f4 - .short 0x5d3 - .short 0xbe7 - .short 0x6f9 - .short 0x204 - .short 0xcf9 - .short 0xbc1 - .short 0xa67 - .short 0x6af - .short 0x877 - .short 0x7e - .short 0x5bd - .short 0x9ac - .short 0xca7 - .short 0xbf2 - .short 0x33e - .short 0x6b - .short 0x774 - .short 0xc0a - .short 0x94a - .short 0xb73 - .short 0x3c1 - .short 0x71d - .short 0xa2c - .short 0x1c0 - .short 0x8d8 - .short 0x2a5 - .short 0x806 - .short 0x8b2 - .short 0x1ae - .short 0x22b - .short 0x34b - .short 0x81e - .short 0x367 - .short 0x60e - .short 0x69 - .short 0x1a6 - .short 0x24b - .short 0xb1 - .short 0xc16 - .short 0xbde - .short 0xb35 - .short 0x626 - .short 0x675 - .short 0xc0b - .short 0x30a - .short 0x487 - .short 0xc6e - .short 0x9f8 - .short 0x5cb - .short 0xaa7 - .short 0x45f - .short 0x6cb - .short 0x284 - .short 0x999 - .short 0x15d - .short 0x1a2 - .short 0x149 - .short 0xc65 - .short 0xcb6 - .short 0x331 - .short 0x449 - .short 0x25b - .short 0x262 - .short 0x52a - .short 0x7fc - .short 0x748 - .short 0x180 - .short 0x842 - .short 0xc79 - .short 0x4c2 - .short 0x7ca - .short 0x997 - .short 0xdc - .short 0x85e - .short 0x686 - .short 0x860 - .short 0x707 - .short 0x803 - .short 0x31a - .short 0x71b - .short 0x9ab - .short 0x99b - .short 0x1de - .short 0xc95 - .short 0xbcd - .short 0x3e4 - .short 0x3df - .short 0x3be - .short 0x74d - .short 0x5f2 - .short 0x65c + .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca + .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc + .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f + .short 0x0a58,0x03f9,0x02dc,0x0260,0x06fb,0x019b,0x0c34,0x06de + .short 0x04c7,0x028c,0x0ad9,0x03f7,0x07f4,0x05d3,0x0be7,0x06f9 + .short 0x0204,0x0cf9,0x0bc1,0x0a67,0x06af,0x0877,0x007e,0x05bd + .short 0x09ac,0x0ca7,0x0bf2,0x033e,0x006b,0x0774,0x0c0a,0x094a + .short 0x0b73,0x03c1,0x071d,0x0a2c,0x01c0,0x08d8,0x02a5,0x0806 + .short 0x08b2,0x01ae,0x022b,0x034b,0x081e,0x0367,0x060e,0x0069 + .short 0x01a6,0x024b,0x00b1,0x0c16,0x0bde,0x0b35,0x0626,0x0675 + .short 0x0c0b,0x030a,0x0487,0x0c6e,0x09f8,0x05cb,0x0aa7,0x045f + .short 0x06cb,0x0284,0x0999,0x015d,0x01a2,0x0149,0x0c65,0x0cb6 + .short 0x0331,0x0449,0x025b,0x0262,0x052a,0x07fc,0x0748,0x0180 + .short 0x0842,0x0c79,0x04c2,0x07ca,0x0997,0x00dc,0x085e,0x0686 + .short 0x0860,0x0707,0x0803,0x031a,0x071b,0x09ab,0x099b,0x01de + .short 0x0c95,0x0bcd,0x03e4,0x03df,0x03be,0x074d,0x05f2,0x065c .text .align 4 .globl mlkem_arm32_ntt @@ -3155,139 +3052,36 @@ L_mlkem_arm32_ntt_loop_567: add sp, sp, #8 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size mlkem_arm32_ntt,.-mlkem_arm32_ntt +#ifndef __APPLE__ .text .type L_mlkem_invntt_zetas_inv, %object .size L_mlkem_invntt_zetas_inv, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 4-byte aligned, 32-bit aligned +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_invntt_zetas_inv: - .short 0x6a5 - .short 0x70f - .short 0x5b4 - .short 0x943 - .short 0x922 - .short 0x91d - .short 0x134 - .short 0x6c - .short 0xb23 - .short 0x366 - .short 0x356 - .short 0x5e6 - .short 0x9e7 - .short 0x4fe - .short 0x5fa - .short 0x4a1 - .short 0x67b - .short 0x4a3 - .short 0xc25 - .short 0x36a - .short 0x537 - .short 0x83f - .short 0x88 - .short 0x4bf - .short 0xb81 - .short 0x5b9 - .short 0x505 - .short 0x7d7 - .short 0xa9f - .short 0xaa6 - .short 0x8b8 - .short 0x9d0 - .short 0x4b - .short 0x9c - .short 0xbb8 - .short 0xb5f - .short 0xba4 - .short 0x368 - .short 0xa7d - .short 0x636 - .short 0x8a2 - .short 0x25a - .short 0x736 - .short 0x309 - .short 0x93 - .short 0x87a - .short 0x9f7 - .short 0xf6 - .short 0x68c - .short 0x6db - .short 0x1cc - .short 0x123 - .short 0xeb - .short 0xc50 - .short 0xab6 - .short 0xb5b - .short 0xc98 - .short 0x6f3 - .short 0x99a - .short 0x4e3 - .short 0x9b6 - .short 0xad6 - .short 0xb53 - .short 0x44f - .short 0x4fb - .short 0xa5c - .short 0x429 - .short 0xb41 - .short 0x2d5 - .short 0x5e4 - .short 0x940 - .short 0x18e - .short 0x3b7 - .short 0xf7 - .short 0x58d - .short 0xc96 - .short 0x9c3 - .short 0x10f - .short 0x5a - .short 0x355 - .short 0x744 - .short 0xc83 - .short 0x48a - .short 0x652 - .short 0x29a - .short 0x140 - .short 0x8 - .short 0xafd - .short 0x608 - .short 0x11a - .short 0x72e - .short 0x50d - .short 0x90a - .short 0x228 - .short 0xa75 - .short 0x83a - .short 0x623 - .short 0xcd - .short 0xb66 - .short 0x606 - .short 0xaa1 - .short 0xa25 - .short 0x908 - .short 0x2a9 - .short 0x82 - .short 0x642 - .short 0x74f - .short 0x33d - .short 0xb82 - .short 0xbf9 - .short 0x52d - .short 0xac4 - .short 0x745 - .short 0x5c2 - .short 0x4b2 - .short 0x93f - .short 0xc4b - .short 0x6d8 - .short 0xa93 - .short 0xab - .short 0xc37 - .short 0xbe2 - .short 0x773 - .short 0x72c - .short 0x5ed - .short 0x167 - .short 0x2f6 - .short 0x5a1 + .short 0x06a5,0x070f,0x05b4,0x0943,0x0922,0x091d,0x0134,0x006c + .short 0x0b23,0x0366,0x0356,0x05e6,0x09e7,0x04fe,0x05fa,0x04a1 + .short 0x067b,0x04a3,0x0c25,0x036a,0x0537,0x083f,0x0088,0x04bf + .short 0x0b81,0x05b9,0x0505,0x07d7,0x0a9f,0x0aa6,0x08b8,0x09d0 + .short 0x004b,0x009c,0x0bb8,0x0b5f,0x0ba4,0x0368,0x0a7d,0x0636 + .short 0x08a2,0x025a,0x0736,0x0309,0x0093,0x087a,0x09f7,0x00f6 + .short 0x068c,0x06db,0x01cc,0x0123,0x00eb,0x0c50,0x0ab6,0x0b5b + .short 0x0c98,0x06f3,0x099a,0x04e3,0x09b6,0x0ad6,0x0b53,0x044f + .short 0x04fb,0x0a5c,0x0429,0x0b41,0x02d5,0x05e4,0x0940,0x018e + .short 0x03b7,0x00f7,0x058d,0x0c96,0x09c3,0x010f,0x005a,0x0355 + .short 0x0744,0x0c83,0x048a,0x0652,0x029a,0x0140,0x0008,0x0afd + .short 0x0608,0x011a,0x072e,0x050d,0x090a,0x0228,0x0a75,0x083a + .short 0x0623,0x00cd,0x0b66,0x0606,0x0aa1,0x0a25,0x0908,0x02a9 + .short 0x0082,0x0642,0x074f,0x033d,0x0b82,0x0bf9,0x052d,0x0ac4 + .short 0x0745,0x05c2,0x04b2,0x093f,0x0c4b,0x06d8,0x0a93,0x00ab + .short 0x0c37,0x0be2,0x0773,0x072c,0x05ed,0x0167,0x02f6,0x05a1 .text .align 4 .globl mlkem_arm32_invntt @@ -7673,139 +7467,36 @@ L_mlkem_invntt_loop_321: add sp, sp, #8 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size mlkem_arm32_invntt,.-mlkem_arm32_invntt +#ifndef __APPLE__ .text .type L_mlkem_basemul_mont_zetas, %object .size L_mlkem_basemul_mont_zetas, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 4-byte aligned, 32-bit aligned +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_basemul_mont_zetas: - .short 0x8ed - .short 0xa0b - .short 0xb9a - .short 0x714 - .short 0x5d5 - .short 0x58e - .short 0x11f - .short 0xca - .short 0xc56 - .short 0x26e - .short 0x629 - .short 0xb6 - .short 0x3c2 - .short 0x84f - .short 0x73f - .short 0x5bc - .short 0x23d - .short 0x7d4 - .short 0x108 - .short 0x17f - .short 0x9c4 - .short 0x5b2 - .short 0x6bf - .short 0xc7f - .short 0xa58 - .short 0x3f9 - .short 0x2dc - .short 0x260 - .short 0x6fb - .short 0x19b - .short 0xc34 - .short 0x6de - .short 0x4c7 - .short 0x28c - .short 0xad9 - .short 0x3f7 - .short 0x7f4 - .short 0x5d3 - .short 0xbe7 - .short 0x6f9 - .short 0x204 - .short 0xcf9 - .short 0xbc1 - .short 0xa67 - .short 0x6af - .short 0x877 - .short 0x7e - .short 0x5bd - .short 0x9ac - .short 0xca7 - .short 0xbf2 - .short 0x33e - .short 0x6b - .short 0x774 - .short 0xc0a - .short 0x94a - .short 0xb73 - .short 0x3c1 - .short 0x71d - .short 0xa2c - .short 0x1c0 - .short 0x8d8 - .short 0x2a5 - .short 0x806 - .short 0x8b2 - .short 0x1ae - .short 0x22b - .short 0x34b - .short 0x81e - .short 0x367 - .short 0x60e - .short 0x69 - .short 0x1a6 - .short 0x24b - .short 0xb1 - .short 0xc16 - .short 0xbde - .short 0xb35 - .short 0x626 - .short 0x675 - .short 0xc0b - .short 0x30a - .short 0x487 - .short 0xc6e - .short 0x9f8 - .short 0x5cb - .short 0xaa7 - .short 0x45f - .short 0x6cb - .short 0x284 - .short 0x999 - .short 0x15d - .short 0x1a2 - .short 0x149 - .short 0xc65 - .short 0xcb6 - .short 0x331 - .short 0x449 - .short 0x25b - .short 0x262 - .short 0x52a - .short 0x7fc - .short 0x748 - .short 0x180 - .short 0x842 - .short 0xc79 - .short 0x4c2 - .short 0x7ca - .short 0x997 - .short 0xdc - .short 0x85e - .short 0x686 - .short 0x860 - .short 0x707 - .short 0x803 - .short 0x31a - .short 0x71b - .short 0x9ab - .short 0x99b - .short 0x1de - .short 0xc95 - .short 0xbcd - .short 0x3e4 - .short 0x3df - .short 0x3be - .short 0x74d - .short 0x5f2 - .short 0x65c + .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca + .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc + .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f + .short 0x0a58,0x03f9,0x02dc,0x0260,0x06fb,0x019b,0x0c34,0x06de + .short 0x04c7,0x028c,0x0ad9,0x03f7,0x07f4,0x05d3,0x0be7,0x06f9 + .short 0x0204,0x0cf9,0x0bc1,0x0a67,0x06af,0x0877,0x007e,0x05bd + .short 0x09ac,0x0ca7,0x0bf2,0x033e,0x006b,0x0774,0x0c0a,0x094a + .short 0x0b73,0x03c1,0x071d,0x0a2c,0x01c0,0x08d8,0x02a5,0x0806 + .short 0x08b2,0x01ae,0x022b,0x034b,0x081e,0x0367,0x060e,0x0069 + .short 0x01a6,0x024b,0x00b1,0x0c16,0x0bde,0x0b35,0x0626,0x0675 + .short 0x0c0b,0x030a,0x0487,0x0c6e,0x09f8,0x05cb,0x0aa7,0x045f + .short 0x06cb,0x0284,0x0999,0x015d,0x01a2,0x0149,0x0c65,0x0cb6 + .short 0x0331,0x0449,0x025b,0x0262,0x052a,0x07fc,0x0748,0x0180 + .short 0x0842,0x0c79,0x04c2,0x07ca,0x0997,0x00dc,0x085e,0x0686 + .short 0x0860,0x0707,0x0803,0x031a,0x071b,0x09ab,0x099b,0x01de + .short 0x0c95,0x0bcd,0x03e4,0x03df,0x03be,0x074d,0x05f2,0x065c .text .align 4 .globl mlkem_arm32_basemul_mont diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index 721fc2b3ba2..686fd9f8efd 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -52,50 +50,34 @@ #include #ifdef WOLFSSL_WC_MLKEM -static const word16 L_mlkem_arm32_ntt_zetas[] = { - 0x08ed, 0x0a0b, 0x0b9a, 0x0714, - 0x05d5, 0x058e, 0x011f, 0x00ca, - 0x0c56, 0x026e, 0x0629, 0x00b6, - 0x03c2, 0x084f, 0x073f, 0x05bc, - 0x023d, 0x07d4, 0x0108, 0x017f, - 0x09c4, 0x05b2, 0x06bf, 0x0c7f, - 0x0a58, 0x03f9, 0x02dc, 0x0260, - 0x06fb, 0x019b, 0x0c34, 0x06de, - 0x04c7, 0x028c, 0x0ad9, 0x03f7, - 0x07f4, 0x05d3, 0x0be7, 0x06f9, - 0x0204, 0x0cf9, 0x0bc1, 0x0a67, - 0x06af, 0x0877, 0x007e, 0x05bd, - 0x09ac, 0x0ca7, 0x0bf2, 0x033e, - 0x006b, 0x0774, 0x0c0a, 0x094a, - 0x0b73, 0x03c1, 0x071d, 0x0a2c, - 0x01c0, 0x08d8, 0x02a5, 0x0806, - 0x08b2, 0x01ae, 0x022b, 0x034b, - 0x081e, 0x0367, 0x060e, 0x0069, - 0x01a6, 0x024b, 0x00b1, 0x0c16, - 0x0bde, 0x0b35, 0x0626, 0x0675, - 0x0c0b, 0x030a, 0x0487, 0x0c6e, - 0x09f8, 0x05cb, 0x0aa7, 0x045f, - 0x06cb, 0x0284, 0x0999, 0x015d, - 0x01a2, 0x0149, 0x0c65, 0x0cb6, - 0x0331, 0x0449, 0x025b, 0x0262, - 0x052a, 0x07fc, 0x0748, 0x0180, - 0x0842, 0x0c79, 0x04c2, 0x07ca, - 0x0997, 0x00dc, 0x085e, 0x0686, - 0x0860, 0x0707, 0x0803, 0x031a, - 0x071b, 0x09ab, 0x099b, 0x01de, - 0x0c95, 0x0bcd, 0x03e4, 0x03df, - 0x03be, 0x074d, 0x05f2, 0x065c, +XALIGNED(4) static const word16 L_mlkem_arm32_ntt_zetas[] = { + 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, + 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, + 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, + 0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de, + 0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9, + 0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd, + 0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a, + 0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806, + 0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069, + 0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675, + 0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f, + 0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6, + 0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180, + 0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686, + 0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de, + 0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c, }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r_p) #else WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* r asm ("r0") = (sword16*)r_p; - register word16* L_mlkem_arm32_ntt_zetas_c asm ("r1") = + register sword16* r __asm__ ("r0") = (sword16*)r_p; + register word16* L_mlkem_arm32_ntt_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_arm32_ntt_zetas; #else register word16* L_mlkem_arm32_ntt_zetas_c = @@ -121,7 +103,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r2, #16\n\t" "\n" - "L_mlkem_arm32_ntt_loop_123_%=: \n\t" + "L_mlkem_arm32_ntt_loop_123_%=:\n\t" "str r2, [sp]\n\t" "ldrh r11, [r1, #2]\n\t" "ldr r2, [%[r]]\n\t" @@ -1238,13 +1220,13 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) "sub %[r], %[r], #0x40\n\t" "mov r3, #0\n\t" "\n" - "L_mlkem_arm32_ntt_loop_4_j_%=: \n\t" + "L_mlkem_arm32_ntt_loop_4_j_%=:\n\t" "str r3, [sp, #4]\n\t" "add r11, r1, r3, lsr #4\n\t" "mov r2, #4\n\t" "ldr r11, [r11, #16]\n\t" "\n" - "L_mlkem_arm32_ntt_loop_4_i_%=: \n\t" + "L_mlkem_arm32_ntt_loop_4_i_%=:\n\t" "str r2, [sp]\n\t" "ldr r2, [%[r]]\n\t" "ldr r3, [%[r], #16]\n\t" @@ -1639,7 +1621,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) "sub %[r], %[r], #0x200\n\t" "mov r3, #0\n\t" "\n" - "L_mlkem_arm32_ntt_loop_567_%=: \n\t" + "L_mlkem_arm32_ntt_loop_567_%=:\n\t" "add r11, r1, r3, lsr #3\n\t" "str r3, [sp, #4]\n\t" "ldrh r11, [r11, #32]\n\t" @@ -3104,50 +3086,34 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) ); } -static const word16 L_mlkem_invntt_zetas_inv[] = { - 0x06a5, 0x070f, 0x05b4, 0x0943, - 0x0922, 0x091d, 0x0134, 0x006c, - 0x0b23, 0x0366, 0x0356, 0x05e6, - 0x09e7, 0x04fe, 0x05fa, 0x04a1, - 0x067b, 0x04a3, 0x0c25, 0x036a, - 0x0537, 0x083f, 0x0088, 0x04bf, - 0x0b81, 0x05b9, 0x0505, 0x07d7, - 0x0a9f, 0x0aa6, 0x08b8, 0x09d0, - 0x004b, 0x009c, 0x0bb8, 0x0b5f, - 0x0ba4, 0x0368, 0x0a7d, 0x0636, - 0x08a2, 0x025a, 0x0736, 0x0309, - 0x0093, 0x087a, 0x09f7, 0x00f6, - 0x068c, 0x06db, 0x01cc, 0x0123, - 0x00eb, 0x0c50, 0x0ab6, 0x0b5b, - 0x0c98, 0x06f3, 0x099a, 0x04e3, - 0x09b6, 0x0ad6, 0x0b53, 0x044f, - 0x04fb, 0x0a5c, 0x0429, 0x0b41, - 0x02d5, 0x05e4, 0x0940, 0x018e, - 0x03b7, 0x00f7, 0x058d, 0x0c96, - 0x09c3, 0x010f, 0x005a, 0x0355, - 0x0744, 0x0c83, 0x048a, 0x0652, - 0x029a, 0x0140, 0x0008, 0x0afd, - 0x0608, 0x011a, 0x072e, 0x050d, - 0x090a, 0x0228, 0x0a75, 0x083a, - 0x0623, 0x00cd, 0x0b66, 0x0606, - 0x0aa1, 0x0a25, 0x0908, 0x02a9, - 0x0082, 0x0642, 0x074f, 0x033d, - 0x0b82, 0x0bf9, 0x052d, 0x0ac4, - 0x0745, 0x05c2, 0x04b2, 0x093f, - 0x0c4b, 0x06d8, 0x0a93, 0x00ab, - 0x0c37, 0x0be2, 0x0773, 0x072c, - 0x05ed, 0x0167, 0x02f6, 0x05a1, +XALIGNED(4) static const word16 L_mlkem_invntt_zetas_inv[] = { + 0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c, + 0x0b23, 0x0366, 0x0356, 0x05e6, 0x09e7, 0x04fe, 0x05fa, 0x04a1, + 0x067b, 0x04a3, 0x0c25, 0x036a, 0x0537, 0x083f, 0x0088, 0x04bf, + 0x0b81, 0x05b9, 0x0505, 0x07d7, 0x0a9f, 0x0aa6, 0x08b8, 0x09d0, + 0x004b, 0x009c, 0x0bb8, 0x0b5f, 0x0ba4, 0x0368, 0x0a7d, 0x0636, + 0x08a2, 0x025a, 0x0736, 0x0309, 0x0093, 0x087a, 0x09f7, 0x00f6, + 0x068c, 0x06db, 0x01cc, 0x0123, 0x00eb, 0x0c50, 0x0ab6, 0x0b5b, + 0x0c98, 0x06f3, 0x099a, 0x04e3, 0x09b6, 0x0ad6, 0x0b53, 0x044f, + 0x04fb, 0x0a5c, 0x0429, 0x0b41, 0x02d5, 0x05e4, 0x0940, 0x018e, + 0x03b7, 0x00f7, 0x058d, 0x0c96, 0x09c3, 0x010f, 0x005a, 0x0355, + 0x0744, 0x0c83, 0x048a, 0x0652, 0x029a, 0x0140, 0x0008, 0x0afd, + 0x0608, 0x011a, 0x072e, 0x050d, 0x090a, 0x0228, 0x0a75, 0x083a, + 0x0623, 0x00cd, 0x0b66, 0x0606, 0x0aa1, 0x0a25, 0x0908, 0x02a9, + 0x0082, 0x0642, 0x074f, 0x033d, 0x0b82, 0x0bf9, 0x052d, 0x0ac4, + 0x0745, 0x05c2, 0x04b2, 0x093f, 0x0c4b, 0x06d8, 0x0a93, 0x00ab, + 0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1, }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r_p) #else WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* r asm ("r0") = (sword16*)r_p; - register word16* L_mlkem_invntt_zetas_inv_c asm ("r1") = + register sword16* r __asm__ ("r0") = (sword16*)r_p; + register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") = (word16*)&L_mlkem_invntt_zetas_inv; #else register word16* L_mlkem_invntt_zetas_inv_c = @@ -3173,7 +3139,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r3, #0\n\t" "\n" - "L_mlkem_invntt_loop_765_%=: \n\t" + "L_mlkem_invntt_loop_765_%=:\n\t" "add r11, r1, r3, lsr #1\n\t" "str r3, [sp, #4]\n\t" "ldr r2, [%[r]]\n\t" @@ -4774,13 +4740,13 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) "sub %[r], %[r], #0x200\n\t" "mov r3, #0\n\t" "\n" - "L_mlkem_invntt_loop_4_j_%=: \n\t" + "L_mlkem_invntt_loop_4_j_%=:\n\t" "str r3, [sp, #4]\n\t" "add r11, r1, r3, lsr #4\n\t" "mov r2, #4\n\t" "ldr r11, [r11, #224]\n\t" "\n" - "L_mlkem_invntt_loop_4_i_%=: \n\t" + "L_mlkem_invntt_loop_4_i_%=:\n\t" "str r2, [sp]\n\t" "ldr r2, [%[r]]\n\t" "ldr r3, [%[r], #16]\n\t" @@ -5279,7 +5245,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) "sub %[r], %[r], #0x200\n\t" "mov r2, #16\n\t" "\n" - "L_mlkem_invntt_loop_321_%=: \n\t" + "L_mlkem_invntt_loop_321_%=:\n\t" "str r2, [sp]\n\t" "ldrh r11, [r1, #2]\n\t" "ldr r2, [%[r]]\n\t" @@ -7550,39 +7516,23 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) ); } -static const word16 L_mlkem_basemul_mont_zetas[] = { - 0x08ed, 0x0a0b, 0x0b9a, 0x0714, - 0x05d5, 0x058e, 0x011f, 0x00ca, - 0x0c56, 0x026e, 0x0629, 0x00b6, - 0x03c2, 0x084f, 0x073f, 0x05bc, - 0x023d, 0x07d4, 0x0108, 0x017f, - 0x09c4, 0x05b2, 0x06bf, 0x0c7f, - 0x0a58, 0x03f9, 0x02dc, 0x0260, - 0x06fb, 0x019b, 0x0c34, 0x06de, - 0x04c7, 0x028c, 0x0ad9, 0x03f7, - 0x07f4, 0x05d3, 0x0be7, 0x06f9, - 0x0204, 0x0cf9, 0x0bc1, 0x0a67, - 0x06af, 0x0877, 0x007e, 0x05bd, - 0x09ac, 0x0ca7, 0x0bf2, 0x033e, - 0x006b, 0x0774, 0x0c0a, 0x094a, - 0x0b73, 0x03c1, 0x071d, 0x0a2c, - 0x01c0, 0x08d8, 0x02a5, 0x0806, - 0x08b2, 0x01ae, 0x022b, 0x034b, - 0x081e, 0x0367, 0x060e, 0x0069, - 0x01a6, 0x024b, 0x00b1, 0x0c16, - 0x0bde, 0x0b35, 0x0626, 0x0675, - 0x0c0b, 0x030a, 0x0487, 0x0c6e, - 0x09f8, 0x05cb, 0x0aa7, 0x045f, - 0x06cb, 0x0284, 0x0999, 0x015d, - 0x01a2, 0x0149, 0x0c65, 0x0cb6, - 0x0331, 0x0449, 0x025b, 0x0262, - 0x052a, 0x07fc, 0x0748, 0x0180, - 0x0842, 0x0c79, 0x04c2, 0x07ca, - 0x0997, 0x00dc, 0x085e, 0x0686, - 0x0860, 0x0707, 0x0803, 0x031a, - 0x071b, 0x09ab, 0x099b, 0x01de, - 0x0c95, 0x0bcd, 0x03e4, 0x03df, - 0x03be, 0x074d, 0x05f2, 0x065c, +XALIGNED(4) static const word16 L_mlkem_basemul_mont_zetas[] = { + 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, + 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, + 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, + 0x0a58, 0x03f9, 0x02dc, 0x0260, 0x06fb, 0x019b, 0x0c34, 0x06de, + 0x04c7, 0x028c, 0x0ad9, 0x03f7, 0x07f4, 0x05d3, 0x0be7, 0x06f9, + 0x0204, 0x0cf9, 0x0bc1, 0x0a67, 0x06af, 0x0877, 0x007e, 0x05bd, + 0x09ac, 0x0ca7, 0x0bf2, 0x033e, 0x006b, 0x0774, 0x0c0a, 0x094a, + 0x0b73, 0x03c1, 0x071d, 0x0a2c, 0x01c0, 0x08d8, 0x02a5, 0x0806, + 0x08b2, 0x01ae, 0x022b, 0x034b, 0x081e, 0x0367, 0x060e, 0x0069, + 0x01a6, 0x024b, 0x00b1, 0x0c16, 0x0bde, 0x0b35, 0x0626, 0x0675, + 0x0c0b, 0x030a, 0x0487, 0x0c6e, 0x09f8, 0x05cb, 0x0aa7, 0x045f, + 0x06cb, 0x0284, 0x0999, 0x015d, 0x01a2, 0x0149, 0x0c65, 0x0cb6, + 0x0331, 0x0449, 0x025b, 0x0262, 0x052a, 0x07fc, 0x0748, 0x0180, + 0x0842, 0x0c79, 0x04c2, 0x07ca, 0x0997, 0x00dc, 0x085e, 0x0686, + 0x0860, 0x0707, 0x0803, 0x031a, 0x071b, 0x09ab, 0x099b, 0x01de, + 0x0c95, 0x0bcd, 0x03e4, 0x03df, 0x03be, 0x074d, 0x05f2, 0x065c, }; #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -7591,13 +7541,13 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r_p, #else WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r, const sword16* a, const sword16* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* r asm ("r0") = (sword16*)r_p; - register const sword16* a asm ("r1") = (const sword16*)a_p; - register const sword16* b asm ("r2") = (const sword16*)b_p; - register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = + register sword16* r __asm__ ("r0") = (sword16*)r_p; + register const sword16* a __asm__ ("r1") = (const sword16*)a_p; + register const sword16* b __asm__ ("r2") = (const sword16*)b_p; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; #else register word16* L_mlkem_basemul_mont_zetas_c = @@ -7623,7 +7573,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r, #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" - "L_mlkem_basemul_mont_loop_%=: \n\t" + "L_mlkem_basemul_mont_loop_%=:\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "ldr lr, [r3, r8]\n\t" @@ -7896,13 +7846,13 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r_p, #else WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* r asm ("r0") = (sword16*)r_p; - register const sword16* a asm ("r1") = (const sword16*)a_p; - register const sword16* b asm ("r2") = (const sword16*)b_p; - register word16* L_mlkem_basemul_mont_zetas_c asm ("r3") = + register sword16* r __asm__ ("r0") = (sword16*)r_p; + register const sword16* a __asm__ ("r1") = (const sword16*)a_p; + register const sword16* b __asm__ ("r2") = (const sword16*)b_p; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; #else register word16* L_mlkem_basemul_mont_zetas_c = @@ -7928,7 +7878,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r, #endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" - "L_mlkem_arm32_basemul_mont_add_loop_%=: \n\t" + "L_mlkem_arm32_basemul_mont_add_loop_%=:\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "ldr lr, [r3, r8]\n\t" @@ -8233,11 +8183,11 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r, WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p_p) #else WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* p asm ("r0") = (sword16*)p_p; - register word16* L_mlkem_basemul_mont_zetas_c asm ("r1") = + register sword16* p __asm__ ("r0") = (sword16*)p_p; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_basemul_mont_zetas; #else register word16* L_mlkem_basemul_mont_zetas_c = @@ -8273,7 +8223,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p) #endif "mov r1, #0x100\n\t" "\n" - "L_mlkem_arm32_csubq_loop_%=: \n\t" + "L_mlkem_arm32_csubq_loop_%=:\n\t" "ldm %[p], {r2, r3, r4, r5}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r2, r2, lr\n\t" @@ -8429,14 +8379,14 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p_p, #else WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, unsigned int len, const byte* r, unsigned int rLen) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sword16* p asm ("r0") = (sword16*)p_p; - register unsigned int len asm ("r1") = (unsigned int)len_p; - register const byte* r asm ("r2") = (const byte*)r_p; - register unsigned int rLen asm ("r3") = (unsigned int)rLen_p; - register word16* L_mlkem_basemul_mont_zetas_c asm ("r12") = + register sword16* p __asm__ ("r0") = (sword16*)p_p; + register unsigned int len __asm__ ("r1") = (unsigned int)len_p; + register const byte* r __asm__ ("r2") = (const byte*)r_p; + register unsigned int rLen __asm__ ("r3") = (unsigned int)rLen_p; + register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r12") = (word16*)&L_mlkem_basemul_mont_zetas; #else register word16* L_mlkem_basemul_mont_zetas_c = @@ -8453,7 +8403,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, #endif "mov r12, #0\n\t" "\n" - "L_mlkem_arm32_rej_uniform_loop_no_fail_%=: \n\t" + "L_mlkem_arm32_rej_uniform_loop_no_fail_%=:\n\t" "cmp %[len], #8\n\t" "blt L_mlkem_arm32_rej_uniform_done_no_fail_%=\n\t" "ldm %[r]!, {r4, r5, r6}\n\t" @@ -8562,11 +8512,11 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "bne L_mlkem_arm32_rej_uniform_loop_no_fail_%=\n\t" "b L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_done_no_fail_%=: \n\t" + "L_mlkem_arm32_rej_uniform_done_no_fail_%=:\n\t" "cmp %[len], #0\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_loop_%=: \n\t" + "L_mlkem_arm32_rej_uniform_loop_%=:\n\t" "ldm %[r]!, {r4, r5, r6}\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r4, #20\n\t" @@ -8581,7 +8531,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_0_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_0_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r4, #8\n\t" "lsr r7, r7, #20\n\t" @@ -8595,7 +8545,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_1_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_1_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r4, #24\n\t" #else @@ -8616,7 +8566,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_2_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_2_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r5, #16\n\t" "lsr r7, r7, #20\n\t" @@ -8630,7 +8580,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_3_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_3_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r5, #4\n\t" "lsr r7, r7, #20\n\t" @@ -8644,7 +8594,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_4_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_4_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r5, #28\n\t" #else @@ -8665,7 +8615,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_5_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_5_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsl r7, r6, #12\n\t" "lsr r7, r7, #20\n\t" @@ -8679,7 +8629,7 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_6_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_6_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "lsr r7, r6, #20\n\t" #else @@ -8692,11 +8642,11 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_arm32_rej_uniform(sword16* p, "add r12, r12, #2\n\t" "beq L_mlkem_arm32_rej_uniform_done_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_fail_7_%=: \n\t" + "L_mlkem_arm32_rej_uniform_fail_7_%=:\n\t" "subs %[rLen], %[rLen], #12\n\t" "bgt L_mlkem_arm32_rej_uniform_loop_%=\n\t" "\n" - "L_mlkem_arm32_rej_uniform_done_%=: \n\t" + "L_mlkem_arm32_rej_uniform_done_%=:\n\t" "lsr r0, r12, #1\n\t" "pop {%[L_mlkem_basemul_mont_zetas]}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S index bcc3305e137..f67031475bd 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm.S @@ -246,15 +246,21 @@ L_poly1305_arm32_16_done: add sp, sp, #28 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size poly1305_arm32_blocks_16,.-poly1305_arm32_blocks_16 +#ifndef __APPLE__ .text .type L_poly1305_arm32_clamp, %object .size L_poly1305_arm32_clamp, 16 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_poly1305_arm32_clamp: - .word 0xfffffff - .word 0xffffffc - .word 0xffffffc - .word 0xffffffc + .long 0x0fffffff,0x0ffffffc,0x0ffffffc,0x0ffffffc .text .align 4 .globl poly1305_set_key @@ -822,7 +828,7 @@ L_poly1305_arm32_blocks_start_1: adcs r9, r9, r4 adcs r10, r10, r5 adc r11, r11, r12 - push {r0, r1} + push {r0-r1} add r1, r0, #0 add lr, r0, #16 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -996,7 +1002,7 @@ L_poly1305_arm32_blocks_start_1: adcs r9, r9, r5 adcs r10, r10, r6 adc r11, r11, r12 - pop {r0, r1} + pop {r0-r1} pop {r2} add r12, r0, #16 stm r12, {r7, r8, r9, r10, r11} @@ -1004,15 +1010,21 @@ L_poly1305_arm32_blocks_done: vpop {d8-d15} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size poly1305_arm32_blocks,.-poly1305_arm32_blocks +#ifndef __APPLE__ .text .type L_poly1305_arm32_clamp, %object .size L_poly1305_arm32_clamp, 16 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_poly1305_arm32_clamp: - .word 0xfffffff - .word 0xffffffc - .word 0xffffffc - .word 0xffffffc + .long 0x0fffffff,0x0ffffffc,0x0ffffffc,0x0ffffffc .text .align 4 .globl poly1305_set_key @@ -1059,7 +1071,7 @@ poly1305_set_key: vmov.i32 s5, r9 vmov.i32 s7, r10 vmov.i32 s9, r11 - push {r0, r1} + push {r0-r1} #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) # Square r umull r1, r6, r2, r3 @@ -1158,7 +1170,7 @@ poly1305_set_key: vmov.i32 s4, r4 vmov.i32 s6, r5 vmov.i32 s8, r10 - pop {r0, r1} + pop {r0-r1} add lr, r0, #0x7c vstm.32 lr, {d0-d4} # Multiply r^2, r by r^2 diff --git a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c index 7a8a1e06111..c71c3acf8c4 100644 --- a/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-poly1305-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -59,13 +57,13 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx_p, #else WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, int notLast) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register const byte* m asm ("r1") = (const byte*)m_p; - register word32 len asm ("r2") = (word32)len_p; - register int notLast asm ("r3") = (int)notLast_p; + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register const byte* m __asm__ ("r1") = (const byte*)m_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register int notLast __asm__ ("r3") = (int)notLast_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78,7 +76,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, "add lr, %[ctx], #16\n\t" "ldm lr, {r4, r5, r6, r7, r8}\n\t" "\n" - "L_poly1305_arm32_16_loop_%=: \n\t" + "L_poly1305_arm32_16_loop_%=:\n\t" /* Add m to h */ "ldr %[m], [sp, #16]\n\t" "ldr %[len], [%[m]]\n\t" @@ -275,7 +273,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, "bgt L_poly1305_arm32_16_loop_%=\n\t" "stm lr, {r4, r5, r6, r7, r8}\n\t" "\n" - "L_poly1305_arm32_16_done_%=: \n\t" + "L_poly1305_arm32_16_done_%=:\n\t" "add sp, sp, #28\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), @@ -291,7 +289,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, ); } -static const word32 L_poly1305_arm32_clamp[] = { +XALIGNED(8) static const word32 L_poly1305_arm32_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; @@ -299,12 +297,12 @@ static const word32 L_poly1305_arm32_clamp[] = { WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) #else WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register const byte* key asm ("r1") = (const byte*)key_p; - register word32* L_poly1305_arm32_clamp_c asm ("r2") = + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register const byte* key __asm__ ("r1") = (const byte*)key_p; + register word32* L_poly1305_arm32_clamp_c __asm__ ("r2") = (word32*)&L_poly1305_arm32_clamp; #else register word32* L_poly1305_arm32_clamp_c = @@ -360,11 +358,11 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx_p, byte* mac_p) #else WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register byte* mac asm ("r1") = (byte*)mac_p; + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register byte* mac __asm__ ("r1") = (byte*)mac_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -430,13 +428,13 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx_p, #else WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, const byte* m, word32 len, int notLast) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register const byte* m asm ("r1") = (const byte*)m_p; - register word32 len asm ("r2") = (word32)len_p; - register int notLast asm ("r3") = (int)notLast_p; + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register const byte* m __asm__ ("r1") = (const byte*)m_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register int notLast __asm__ ("r3") = (int)notLast_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -449,7 +447,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, "add lr, %[ctx], #16\n\t" "ldm lr, {r4, r5, r6, r7, r8}\n\t" "\n" - "L_poly1305_arm32_16_loop_%=: \n\t" + "L_poly1305_arm32_16_loop_%=:\n\t" /* Add m to h */ "ldr %[m], [sp, #16]\n\t" "ldr %[len], [%[m]]\n\t" @@ -646,7 +644,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks_16(Poly1305* ctx, "bgt L_poly1305_arm32_16_loop_%=\n\t" "stm lr, {r4, r5, r6, r7, r8}\n\t" "\n" - "L_poly1305_arm32_16_done_%=: \n\t" + "L_poly1305_arm32_16_done_%=:\n\t" "add sp, sp, #28\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), @@ -668,12 +666,12 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx_p, #else WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register const unsigned char* m asm ("r1") = (const unsigned char*)m_p; - register size_t bytes asm ("r2") = (size_t)bytes_p; + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register const unsigned char* m __asm__ ("r1") = (const unsigned char*)m_p; + register size_t bytes __asm__ ("r2") = (size_t)bytes_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -683,7 +681,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "ldm r12, {r7, r8, r9, r10, r11}\n\t" "b L_poly1305_arm32_blocks_start_1_%=\n\t" "\n" - "L_poly1305_arm32_blocks_begin_neon_%=: \n\t" + "L_poly1305_arm32_blocks_begin_neon_%=:\n\t" "vmov.i16 q15, #0xffff\n\t" "vshr.u64 q15, q15, #38\n\t" "vld1.64 {d0-d2}, [r12]\n\t" @@ -711,11 +709,11 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "vadd.u32 d9, d9, d24\n\t" "b L_poly1305_arm32_blocks_start_2_%=\n\t" "\n" - "L_poly1305_arm32_blocks_begin_4_%=: \n\t" + "L_poly1305_arm32_blocks_begin_4_%=:\n\t" "add r3, %[ctx], #0xa4\n\t" "vldm.32 r3, {d26-d30}\n\t" "\n" - "L_poly1305_arm32_blocks_start_4_%=: \n\t" + "L_poly1305_arm32_blocks_start_4_%=:\n\t" "sub %[bytes], #0x40\n\t" "vld4.32 {d10-d13}, [%[m]]!\n\t" "vshl.u32 d6, d27, #2\n\t" @@ -837,7 +835,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "cmp %[bytes], #32\n\t" "blt L_poly1305_arm32_blocks_done_neon_%=\n\t" "\n" - "L_poly1305_arm32_blocks_start_2_%=: \n\t" + "L_poly1305_arm32_blocks_start_2_%=:\n\t" "sub %[bytes], #32\n\t" "vld4.32 {d10-d13}, [%[m]]!\n\t" "vshr.u32 d14, d13, #8\n\t" @@ -901,7 +899,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "vsra.u64 d1, d0, #26\n\t" "vand.u64 d0, d0, d31\n\t" "\n" - "L_poly1305_arm32_blocks_done_neon_%=: \n\t" + "L_poly1305_arm32_blocks_done_neon_%=:\n\t" "cmp %[bytes], #16\n\t" "beq L_poly1305_arm32_blocks_begin_1_%=\n\t" "add r12, %[ctx], #16\n\t" @@ -914,7 +912,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "vst1.64 {d0-d2}, [r12]\n\t" "b L_poly1305_arm32_blocks_done_%=\n\t" "\n" - "L_poly1305_arm32_blocks_begin_1_%=: \n\t" + "L_poly1305_arm32_blocks_begin_1_%=:\n\t" "vsli.u64 d0, d1, #26\n\t" "vsli.u64 d0, d2, #52\n\t" "vshr.u64 d1, d2, #12\n\t" @@ -925,7 +923,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "vmov r9, r10, d1\n\t" "vmov r11, d2[0]\n\t" "\n" - "L_poly1305_arm32_blocks_start_1_%=: \n\t" + "L_poly1305_arm32_blocks_start_1_%=:\n\t" "mov r12, #1\n\t" "push {r2}\n\t" /* Load message */ @@ -1118,7 +1116,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, "add r12, %[ctx], #16\n\t" "stm r12, {r7, r8, r9, r10, r11}\n\t" "\n" - "L_poly1305_arm32_blocks_done_%=: \n\t" + "L_poly1305_arm32_blocks_done_%=:\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [bytes] "+r" (bytes) : @@ -1134,7 +1132,7 @@ WC_OMIT_FRAME_POINTER void poly1305_arm32_blocks(Poly1305* ctx, ); } -static const word32 L_poly1305_arm32_clamp[] = { +XALIGNED(8) static const word32 L_poly1305_arm32_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; @@ -1142,12 +1140,12 @@ static const word32 L_poly1305_arm32_clamp[] = { WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx_p, const byte* key_p) #else WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register const byte* key asm ("r1") = (const byte*)key_p; - register word32* L_poly1305_arm32_clamp_c asm ("r2") = + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register const byte* key __asm__ ("r1") = (const byte*)key_p; + register word32* L_poly1305_arm32_clamp_c __asm__ ("r2") = (word32*)&L_poly1305_arm32_clamp; #else register word32* L_poly1305_arm32_clamp_c = @@ -1383,11 +1381,11 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx_p, byte* mac_p) #else WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register Poly1305* ctx asm ("r0") = (Poly1305*)ctx_p; - register byte* mac asm ("r1") = (byte*)mac_p; + register Poly1305* ctx __asm__ ("r0") = (Poly1305*)ctx_p; + register byte* mac __asm__ ("r1") = (byte*)mac_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S index 510f6016c28..e0883cc7deb 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -32,75 +32,36 @@ #ifndef WOLFSSL_ARMASM_INLINE #ifndef NO_SHA256 #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_SHA256_transform_len_k, %object .size L_SHA256_transform_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_transform_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0xfc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x6ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .text .align 4 .globl Transform_Sha256_Len_base @@ -2568,75 +2529,36 @@ L_SHA256_transform_len_blk_end_15: .size Transform_Sha256_Len_base,.-Transform_Sha256_Len_base #else #ifdef WOLFSSL_ARMASM_NO_HW_CRYPTO +#ifndef __APPLE__ .text .type L_SHA256_transform_neon_len_k, %object .size L_SHA256_transform_neon_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_transform_neon_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0xfc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x6ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .text .align 4 .fpu neon @@ -3667,75 +3589,36 @@ L_SHA256_transform_neon_len_start: pop {r4, r5, r6, r7, r8, r9, r10, pc} .size Transform_Sha256_Len_neon,.-Transform_Sha256_Len_neon #else +#ifndef __APPLE__ .text .type L_SHA256_trans_crypto_len_k, %object .size L_SHA256_trans_crypto_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_trans_crypto_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0xfc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x6ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .text .align 4 .globl Transform_Sha256_Len_crypto @@ -3745,14 +3628,14 @@ Transform_Sha256_Len_crypto: adr r3, L_SHA256_trans_crypto_len_k # Load K into vector registers vldm r3!, {q8-q11} - vldm r3!, {q12, q13} + vldm r3!, {q12-q13} # Load digest into working vars - vldm r0, {q0, q1} + vldm r0, {q0-q1} # Start of loop processing a block L_sha256_len_crypto_begin: # Load W - vld1.8 {q4, q5}, [r1]! - vld1.8 {q6, q7}, [r1]! + vld1.8 {q4-q5}, [r1]! + vld1.8 {q6-q7}, [r1]! vrev32.8 q4, q4 vrev32.8 q5, q5 vrev32.8 q6, q6 @@ -3882,7 +3765,7 @@ L_sha256_len_crypto_begin: sub r3, r3, #0xa0 bne L_sha256_len_crypto_begin # Store digest back - vst1.8 {q0, q1}, [r0] + vst1.8 {q0-q1}, [r0] vpop {d8-d15} bx lr .size Transform_Sha256_Len_crypto,.-Transform_Sha256_Len_crypto diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 2a8c9299e45..e6cdbe2d202 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -53,7 +51,7 @@ #ifndef NO_SHA256 #ifdef WOLFSSL_ARMASM_NO_NEON -static const word32 L_SHA256_transform_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -80,13 +78,13 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256_p, #else WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c asm ("r3") = + register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register word32* L_SHA256_transform_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_transform_len_k; #else register word32* L_SHA256_transform_len_k_c = @@ -146,7 +144,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, #endif /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load, Reverse and Store W - 64 bytes */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "ldr r4, [%[data]]\n\t" @@ -364,7 +362,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "mov r3, #3\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_fast_%=: \n\t" + "L_SHA256_transform_len_start_fast_%=:\n\t" /* Round 0 */ "ldr r5, [%[sha256], #16]\n\t" "ldr r6, [%[sha256], #20]\n\t" @@ -1636,7 +1634,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "mov r3, #4\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_small_%=: \n\t" + "L_SHA256_transform_len_start_small_%=:\n\t" "sub r3, r3, #1\n\t" /* Round 0 */ "ldr r5, [%[sha256], #16]\n\t" @@ -1688,7 +1686,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp]\n\t" "\n" - "L_SHA256_transform_len_blk_end_0_%=: \n\t" + "L_SHA256_transform_len_blk_end_0_%=:\n\t" /* Round 1 */ "ldr r5, [%[sha256], #12]\n\t" "ldr r6, [%[sha256], #16]\n\t" @@ -1739,7 +1737,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #4]\n\t" "\n" - "L_SHA256_transform_len_blk_end_1_%=: \n\t" + "L_SHA256_transform_len_blk_end_1_%=:\n\t" /* Round 2 */ "ldr r5, [%[sha256], #8]\n\t" "ldr r6, [%[sha256], #12]\n\t" @@ -1790,7 +1788,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #8]\n\t" "\n" - "L_SHA256_transform_len_blk_end_2_%=: \n\t" + "L_SHA256_transform_len_blk_end_2_%=:\n\t" /* Round 3 */ "ldr r5, [%[sha256], #4]\n\t" "ldr r6, [%[sha256], #8]\n\t" @@ -1841,7 +1839,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #12]\n\t" "\n" - "L_SHA256_transform_len_blk_end_3_%=: \n\t" + "L_SHA256_transform_len_blk_end_3_%=:\n\t" /* Round 4 */ "ldr r5, [%[sha256]]\n\t" "ldr r6, [%[sha256], #4]\n\t" @@ -1892,7 +1890,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #16]\n\t" "\n" - "L_SHA256_transform_len_blk_end_4_%=: \n\t" + "L_SHA256_transform_len_blk_end_4_%=:\n\t" /* Round 5 */ "ldr r5, [%[sha256], #28]\n\t" "ldr r6, [%[sha256]]\n\t" @@ -1943,7 +1941,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #20]\n\t" "\n" - "L_SHA256_transform_len_blk_end_5_%=: \n\t" + "L_SHA256_transform_len_blk_end_5_%=:\n\t" /* Round 6 */ "ldr r5, [%[sha256], #24]\n\t" "ldr r6, [%[sha256], #28]\n\t" @@ -1994,7 +1992,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #24]\n\t" "\n" - "L_SHA256_transform_len_blk_end_6_%=: \n\t" + "L_SHA256_transform_len_blk_end_6_%=:\n\t" /* Round 7 */ "ldr r5, [%[sha256], #20]\n\t" "ldr r6, [%[sha256], #24]\n\t" @@ -2045,7 +2043,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #28]\n\t" "\n" - "L_SHA256_transform_len_blk_end_7_%=: \n\t" + "L_SHA256_transform_len_blk_end_7_%=:\n\t" /* Round 8 */ "ldr r5, [%[sha256], #16]\n\t" "ldr r6, [%[sha256], #20]\n\t" @@ -2096,7 +2094,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #32]\n\t" "\n" - "L_SHA256_transform_len_blk_end_8_%=: \n\t" + "L_SHA256_transform_len_blk_end_8_%=:\n\t" /* Round 9 */ "ldr r5, [%[sha256], #12]\n\t" "ldr r6, [%[sha256], #16]\n\t" @@ -2147,7 +2145,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #36]\n\t" "\n" - "L_SHA256_transform_len_blk_end_9_%=: \n\t" + "L_SHA256_transform_len_blk_end_9_%=:\n\t" /* Round 10 */ "ldr r5, [%[sha256], #8]\n\t" "ldr r6, [%[sha256], #12]\n\t" @@ -2198,7 +2196,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #40]\n\t" "\n" - "L_SHA256_transform_len_blk_end_10_%=: \n\t" + "L_SHA256_transform_len_blk_end_10_%=:\n\t" /* Round 11 */ "ldr r5, [%[sha256], #4]\n\t" "ldr r6, [%[sha256], #8]\n\t" @@ -2249,7 +2247,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #44]\n\t" "\n" - "L_SHA256_transform_len_blk_end_11_%=: \n\t" + "L_SHA256_transform_len_blk_end_11_%=:\n\t" /* Round 12 */ "ldr r5, [%[sha256]]\n\t" "ldr r6, [%[sha256], #4]\n\t" @@ -2300,7 +2298,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #48]\n\t" "\n" - "L_SHA256_transform_len_blk_end_12_%=: \n\t" + "L_SHA256_transform_len_blk_end_12_%=:\n\t" /* Round 13 */ "ldr r5, [%[sha256], #28]\n\t" "ldr r6, [%[sha256]]\n\t" @@ -2351,7 +2349,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #52]\n\t" "\n" - "L_SHA256_transform_len_blk_end_13_%=: \n\t" + "L_SHA256_transform_len_blk_end_13_%=:\n\t" /* Round 14 */ "ldr r5, [%[sha256], #24]\n\t" "ldr r6, [%[sha256], #28]\n\t" @@ -2402,7 +2400,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #56]\n\t" "\n" - "L_SHA256_transform_len_blk_end_14_%=: \n\t" + "L_SHA256_transform_len_blk_end_14_%=:\n\t" /* Round 15 */ "ldr r5, [%[sha256], #20]\n\t" "ldr r6, [%[sha256], #24]\n\t" @@ -2453,7 +2451,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "add r9, r9, r4\n\t" "str r9, [sp, #60]\n\t" "\n" - "L_SHA256_transform_len_blk_end_15_%=: \n\t" + "L_SHA256_transform_len_blk_end_15_%=:\n\t" "cmp r3, #0\n\t" "add r12, r12, #0x40\n\t" "bne L_SHA256_transform_len_start_small_%=\n\t" @@ -2586,7 +2584,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, #else #ifdef WOLFSSL_ARMASM_NO_HW_CRYPTO -static const word32 L_SHA256_transform_neon_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_neon_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -2613,13 +2611,13 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_neon(wc_Sha256* sha256_p, #else WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register word32* L_SHA256_transform_neon_len_k_c asm ("r3") = + register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register word32* L_SHA256_transform_neon_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_transform_neon_len_k; #else register word32* L_SHA256_transform_neon_len_k_c = @@ -2661,7 +2659,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_neon(wc_Sha256* sha256, #endif /* Start of loop processing a block */ "\n" - "L_SHA256_transform_neon_len_begin_%=: \n\t" + "L_SHA256_transform_neon_len_begin_%=:\n\t" /* Load W */ "vld1.8 {d0-d3}, [%[data]]!\n\t" "vld1.8 {d4-d7}, [%[data]]!\n\t" @@ -2684,7 +2682,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_neon(wc_Sha256* sha256, "mov lr, #3\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_neon_len_start_%=: \n\t" + "L_SHA256_transform_neon_len_start_%=:\n\t" /* Round 0 */ "vmov.32 r10, d0[0]\n\t" "ror %[sha256], r6, #6\n\t" @@ -3663,7 +3661,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_neon(wc_Sha256* sha256, } #else -static const word32 L_SHA256_trans_crypto_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_trans_crypto_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -3690,13 +3688,13 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_crypto(wc_Sha256* sha256_p, #else WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register word32* L_SHA256_trans_crypto_len_k_c asm ("r3") = + register wc_Sha256* sha256 __asm__ ("r0") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register word32* L_SHA256_trans_crypto_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_trans_crypto_len_k; #else register word32* L_SHA256_trans_crypto_len_k_c = @@ -3712,7 +3710,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_crypto(wc_Sha256* sha256, "vldm %[sha256], {q0-q1}\n\t" /* Start of loop processing a block */ "\n" - "L_sha256_len_crypto_begin_%=: \n\t" + "L_sha256_len_crypto_begin_%=:\n\t" /* Load W */ "vld1.8 {q4-q5}, [%[data]]!\n\t" "vld1.8 {q6-q7}, [%[data]]!\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S index 701adb24136..6d2f0172994 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -32,59 +32,32 @@ #ifndef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_SHA3 #ifndef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_sha3_arm32_neon_rt, %object .size L_sha3_arm32_neon_rt, 192 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned +#ifndef __APPLE__ .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_sha3_arm32_neon_rt: - .word 0x1 - .word 0x0 - .word 0x8082 - .word 0x0 - .word 0x808a - .word 0x80000000 - .word 0x80008000 - .word 0x80000000 - .word 0x808b - .word 0x0 - .word 0x80000001 - .word 0x0 - .word 0x80008081 - .word 0x80000000 - .word 0x8009 - .word 0x80000000 - .word 0x8a - .word 0x0 - .word 0x88 - .word 0x0 - .word 0x80008009 - .word 0x0 - .word 0x8000000a - .word 0x0 - .word 0x8000808b - .word 0x0 - .word 0x8b - .word 0x80000000 - .word 0x8089 - .word 0x80000000 - .word 0x8003 - .word 0x80000000 - .word 0x8002 - .word 0x80000000 - .word 0x80 - .word 0x80000000 - .word 0x800a - .word 0x0 - .word 0x8000000a - .word 0x80000000 - .word 0x80008081 - .word 0x80000000 - .word 0x8080 - .word 0x80000000 - .word 0x80000001 - .word 0x0 - .word 0x80008008 - .word 0x80000000 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 .text .align 4 .globl BlockSha3 @@ -125,7 +98,7 @@ L_sha3_arm32_neon_begin: veor d28, d28, d22 veor d29, d29, d23 veor d25, d25, d24 - vst1.8 {d25, d26}, [r3] + vst1.8 {d25-d26}, [r3] # Calc t[0..4] and XOR into s[i*5..i*5+4] # t[0] vshr.u64 d30, d27, #63 @@ -353,59 +326,32 @@ L_sha3_arm32_neon_begin: .size BlockSha3,.-BlockSha3 #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_sha3_arm32_rt, %object .size L_sha3_arm32_rt, 192 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned +#ifndef __APPLE__ .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_sha3_arm32_rt: - .word 0x1 - .word 0x0 - .word 0x8082 - .word 0x0 - .word 0x808a - .word 0x80000000 - .word 0x80008000 - .word 0x80000000 - .word 0x808b - .word 0x0 - .word 0x80000001 - .word 0x0 - .word 0x80008081 - .word 0x80000000 - .word 0x8009 - .word 0x80000000 - .word 0x8a - .word 0x0 - .word 0x88 - .word 0x0 - .word 0x80008009 - .word 0x0 - .word 0x8000000a - .word 0x0 - .word 0x8000808b - .word 0x0 - .word 0x8b - .word 0x80000000 - .word 0x8089 - .word 0x80000000 - .word 0x8003 - .word 0x80000000 - .word 0x8002 - .word 0x80000000 - .word 0x80 - .word 0x80000000 - .word 0x800a - .word 0x0 - .word 0x8000000a - .word 0x80000000 - .word 0x80008081 - .word 0x80000000 - .word 0x8080 - .word 0x80000000 - .word 0x80000001 - .word 0x0 - .word 0x80008008 - .word 0x80000000 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 .text .align 4 .globl BlockSha3 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index dd191f26e1f..4889b239f0b 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -51,7 +49,7 @@ #ifdef WOLFSSL_SHA3 #ifndef WOLFSSL_ARMASM_NO_NEON -static const word64 L_sha3_arm32_neon_rt[] = { +XALIGNED(16) static const word64 L_sha3_arm32_neon_rt[] = { 0x0000000000000001UL, 0x0000000000008082UL, 0x800000000000808aUL, 0x8000000080008000UL, 0x000000000000808bUL, 0x0000000080000001UL, @@ -72,11 +70,11 @@ static const word64 L_sha3_arm32_neon_rt[] = { WC_OMIT_FRAME_POINTER void BlockSha3(word64* state_p) #else WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register word64* state asm ("r0") = (word64*)state_p; - register word64* L_sha3_arm32_neon_rt_c asm ("r1") = + register word64* state __asm__ ("r0") = (word64*)state_p; + register word64* L_sha3_arm32_neon_rt_c __asm__ ("r1") = (word64*)&L_sha3_arm32_neon_rt; #else register word64* L_sha3_arm32_neon_rt_c = (word64*)&L_sha3_arm32_neon_rt; @@ -96,7 +94,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "vld1.8 {d24}, [%[state]]\n\t" "sub %[state], %[state], #0xc0\n\t" "\n" - "L_sha3_arm32_neon_begin_%=: \n\t" + "L_sha3_arm32_neon_begin_%=:\n\t" /* Calc b[0..4] */ "veor d26, d0, d5\n\t" "veor d27, d1, d6\n\t" @@ -359,7 +357,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifdef WOLFSSL_ARMASM_NO_NEON -static const word64 L_sha3_arm32_rt[] = { +XALIGNED(16) static const word64 L_sha3_arm32_rt[] = { 0x0000000000000001UL, 0x0000000000008082UL, 0x800000000000808aUL, 0x8000000080008000UL, 0x000000000000808bUL, 0x0000000080000001UL, @@ -380,11 +378,12 @@ static const word64 L_sha3_arm32_rt[] = { WC_OMIT_FRAME_POINTER void BlockSha3(word64* state_p) #else WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register word64* state asm ("r0") = (word64*)state_p; - register word64* L_sha3_arm32_rt_c asm ("r1") = (word64*)&L_sha3_arm32_rt; + register word64* state __asm__ ("r0") = (word64*)state_p; + register word64* L_sha3_arm32_rt_c __asm__ ("r1") = + (word64*)&L_sha3_arm32_rt; #else register word64* L_sha3_arm32_rt_c = (word64*)&L_sha3_arm32_rt; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ @@ -394,7 +393,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "mov r1, %[L_sha3_arm32_rt]\n\t" "mov r2, #12\n\t" "\n" - "L_sha3_arm32_begin_%=: \n\t" + "L_sha3_arm32_begin_%=:\n\t" "str r2, [sp, #200]\n\t" /* Round even */ /* Calc b[4] */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index 9624aa8b49d..60a7d731362 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -32,171 +32,60 @@ #ifndef WOLFSSL_ARMASM_INLINE #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_SHA512_transform_len_k, %object .size L_SHA512_transform_len_k, 640 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned +#ifndef __APPLE__ .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_SHA512_transform_len_k: - .word 0xd728ae22 - .word 0x428a2f98 - .word 0x23ef65cd - .word 0x71374491 - .word 0xec4d3b2f - .word 0xb5c0fbcf - .word 0x8189dbbc - .word 0xe9b5dba5 - .word 0xf348b538 - .word 0x3956c25b - .word 0xb605d019 - .word 0x59f111f1 - .word 0xaf194f9b - .word 0x923f82a4 - .word 0xda6d8118 - .word 0xab1c5ed5 - .word 0xa3030242 - .word 0xd807aa98 - .word 0x45706fbe - .word 0x12835b01 - .word 0x4ee4b28c - .word 0x243185be - .word 0xd5ffb4e2 - .word 0x550c7dc3 - .word 0xf27b896f - .word 0x72be5d74 - .word 0x3b1696b1 - .word 0x80deb1fe - .word 0x25c71235 - .word 0x9bdc06a7 - .word 0xcf692694 - .word 0xc19bf174 - .word 0x9ef14ad2 - .word 0xe49b69c1 - .word 0x384f25e3 - .word 0xefbe4786 - .word 0x8b8cd5b5 - .word 0xfc19dc6 - .word 0x77ac9c65 - .word 0x240ca1cc - .word 0x592b0275 - .word 0x2de92c6f - .word 0x6ea6e483 - .word 0x4a7484aa - .word 0xbd41fbd4 - .word 0x5cb0a9dc - .word 0x831153b5 - .word 0x76f988da - .word 0xee66dfab - .word 0x983e5152 - .word 0x2db43210 - .word 0xa831c66d - .word 0x98fb213f - .word 0xb00327c8 - .word 0xbeef0ee4 - .word 0xbf597fc7 - .word 0x3da88fc2 - .word 0xc6e00bf3 - .word 0x930aa725 - .word 0xd5a79147 - .word 0xe003826f - .word 0x6ca6351 - .word 0xa0e6e70 - .word 0x14292967 - .word 0x46d22ffc - .word 0x27b70a85 - .word 0x5c26c926 - .word 0x2e1b2138 - .word 0x5ac42aed - .word 0x4d2c6dfc - .word 0x9d95b3df - .word 0x53380d13 - .word 0x8baf63de - .word 0x650a7354 - .word 0x3c77b2a8 - .word 0x766a0abb - .word 0x47edaee6 - .word 0x81c2c92e - .word 0x1482353b - .word 0x92722c85 - .word 0x4cf10364 - .word 0xa2bfe8a1 - .word 0xbc423001 - .word 0xa81a664b - .word 0xd0f89791 - .word 0xc24b8b70 - .word 0x654be30 - .word 0xc76c51a3 - .word 0xd6ef5218 - .word 0xd192e819 - .word 0x5565a910 - .word 0xd6990624 - .word 0x5771202a - .word 0xf40e3585 - .word 0x32bbd1b8 - .word 0x106aa070 - .word 0xb8d2d0c8 - .word 0x19a4c116 - .word 0x5141ab53 - .word 0x1e376c08 - .word 0xdf8eeb99 - .word 0x2748774c - .word 0xe19b48a8 - .word 0x34b0bcb5 - .word 0xc5c95a63 - .word 0x391c0cb3 - .word 0xe3418acb - .word 0x4ed8aa4a - .word 0x7763e373 - .word 0x5b9cca4f - .word 0xd6b2b8a3 - .word 0x682e6ff3 - .word 0x5defb2fc - .word 0x748f82ee - .word 0x43172f60 - .word 0x78a5636f - .word 0xa1f0ab72 - .word 0x84c87814 - .word 0x1a6439ec - .word 0x8cc70208 - .word 0x23631e28 - .word 0x90befffa - .word 0xde82bde9 - .word 0xa4506ceb - .word 0xb2c67915 - .word 0xbef9a3f7 - .word 0xe372532b - .word 0xc67178f2 - .word 0xea26619c - .word 0xca273ece - .word 0x21c0c207 - .word 0xd186b8c7 - .word 0xcde0eb1e - .word 0xeada7dd6 - .word 0xee6ed178 - .word 0xf57d4f7f - .word 0x72176fba - .word 0x6f067aa - .word 0xa2c898a6 - .word 0xa637dc5 - .word 0xbef90dae - .word 0x113f9804 - .word 0x131c471b - .word 0x1b710b35 - .word 0x23047d84 - .word 0x28db77f5 - .word 0x40c72493 - .word 0x32caab7b - .word 0x15c9bebc - .word 0x3c9ebe0a - .word 0x9c100d4c - .word 0x431d67c4 - .word 0xcb3e42b6 - .word 0x4cc5d4be - .word 0xfc657e2a - .word 0x597f299c - .word 0x3ad6faec - .word 0x5fcb6fab - .word 0x4a475817 - .word 0x6c44198c + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .text .align 4 .globl Transform_Sha512_Len_base @@ -7618,171 +7507,60 @@ L_SHA512_transform_len_start: .size Transform_Sha512_Len_base,.-Transform_Sha512_Len_base #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifndef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_SHA512_transform_neon_len_k, %object .size L_SHA512_transform_neon_len_k, 640 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned +#ifndef __APPLE__ .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_SHA512_transform_neon_len_k: - .word 0xd728ae22 - .word 0x428a2f98 - .word 0x23ef65cd - .word 0x71374491 - .word 0xec4d3b2f - .word 0xb5c0fbcf - .word 0x8189dbbc - .word 0xe9b5dba5 - .word 0xf348b538 - .word 0x3956c25b - .word 0xb605d019 - .word 0x59f111f1 - .word 0xaf194f9b - .word 0x923f82a4 - .word 0xda6d8118 - .word 0xab1c5ed5 - .word 0xa3030242 - .word 0xd807aa98 - .word 0x45706fbe - .word 0x12835b01 - .word 0x4ee4b28c - .word 0x243185be - .word 0xd5ffb4e2 - .word 0x550c7dc3 - .word 0xf27b896f - .word 0x72be5d74 - .word 0x3b1696b1 - .word 0x80deb1fe - .word 0x25c71235 - .word 0x9bdc06a7 - .word 0xcf692694 - .word 0xc19bf174 - .word 0x9ef14ad2 - .word 0xe49b69c1 - .word 0x384f25e3 - .word 0xefbe4786 - .word 0x8b8cd5b5 - .word 0xfc19dc6 - .word 0x77ac9c65 - .word 0x240ca1cc - .word 0x592b0275 - .word 0x2de92c6f - .word 0x6ea6e483 - .word 0x4a7484aa - .word 0xbd41fbd4 - .word 0x5cb0a9dc - .word 0x831153b5 - .word 0x76f988da - .word 0xee66dfab - .word 0x983e5152 - .word 0x2db43210 - .word 0xa831c66d - .word 0x98fb213f - .word 0xb00327c8 - .word 0xbeef0ee4 - .word 0xbf597fc7 - .word 0x3da88fc2 - .word 0xc6e00bf3 - .word 0x930aa725 - .word 0xd5a79147 - .word 0xe003826f - .word 0x6ca6351 - .word 0xa0e6e70 - .word 0x14292967 - .word 0x46d22ffc - .word 0x27b70a85 - .word 0x5c26c926 - .word 0x2e1b2138 - .word 0x5ac42aed - .word 0x4d2c6dfc - .word 0x9d95b3df - .word 0x53380d13 - .word 0x8baf63de - .word 0x650a7354 - .word 0x3c77b2a8 - .word 0x766a0abb - .word 0x47edaee6 - .word 0x81c2c92e - .word 0x1482353b - .word 0x92722c85 - .word 0x4cf10364 - .word 0xa2bfe8a1 - .word 0xbc423001 - .word 0xa81a664b - .word 0xd0f89791 - .word 0xc24b8b70 - .word 0x654be30 - .word 0xc76c51a3 - .word 0xd6ef5218 - .word 0xd192e819 - .word 0x5565a910 - .word 0xd6990624 - .word 0x5771202a - .word 0xf40e3585 - .word 0x32bbd1b8 - .word 0x106aa070 - .word 0xb8d2d0c8 - .word 0x19a4c116 - .word 0x5141ab53 - .word 0x1e376c08 - .word 0xdf8eeb99 - .word 0x2748774c - .word 0xe19b48a8 - .word 0x34b0bcb5 - .word 0xc5c95a63 - .word 0x391c0cb3 - .word 0xe3418acb - .word 0x4ed8aa4a - .word 0x7763e373 - .word 0x5b9cca4f - .word 0xd6b2b8a3 - .word 0x682e6ff3 - .word 0x5defb2fc - .word 0x748f82ee - .word 0x43172f60 - .word 0x78a5636f - .word 0xa1f0ab72 - .word 0x84c87814 - .word 0x1a6439ec - .word 0x8cc70208 - .word 0x23631e28 - .word 0x90befffa - .word 0xde82bde9 - .word 0xa4506ceb - .word 0xb2c67915 - .word 0xbef9a3f7 - .word 0xe372532b - .word 0xc67178f2 - .word 0xea26619c - .word 0xca273ece - .word 0x21c0c207 - .word 0xd186b8c7 - .word 0xcde0eb1e - .word 0xeada7dd6 - .word 0xee6ed178 - .word 0xf57d4f7f - .word 0x72176fba - .word 0x6f067aa - .word 0xa2c898a6 - .word 0xa637dc5 - .word 0xbef90dae - .word 0x113f9804 - .word 0x131c471b - .word 0x1b710b35 - .word 0x23047d84 - .word 0x28db77f5 - .word 0x40c72493 - .word 0x32caab7b - .word 0x15c9bebc - .word 0x3c9ebe0a - .word 0x9c100d4c - .word 0x431d67c4 - .word 0xcb3e42b6 - .word 0x4cc5d4be - .word 0xfc657e2a - .word 0x597f299c - .word 0x3ad6faec - .word 0x5fcb6fab - .word 0x4a475817 - .word 0x6c44198c + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .text .align 4 .fpu neon @@ -7796,10 +7574,10 @@ Transform_Sha512_Len_neon: # Start of loop processing a block L_SHA512_transform_neon_len_begin: # Load W - vld1.8 {q8, q9}, [r1]! - vld1.8 {q10, q11}, [r1]! - vld1.8 {q12, q13}, [r1]! - vld1.8 {q14, q15}, [r1]! + vld1.8 {q8-q9}, [r1]! + vld1.8 {q10-q11}, [r1]! + vld1.8 {q12-q13}, [r1]! + vld1.8 {q14-q15}, [r1]! #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT vrev64.8 q8, q8 vrev64.8 q9, q9 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 6be8ccb8cd1..48710dc3cf6 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -30,8 +30,6 @@ #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && !defined(WOLFSSL_ARMASM_THUMB2) -#include -#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -53,7 +51,7 @@ #include #ifdef WOLFSSL_ARMASM_NO_NEON -static const word64 L_SHA512_transform_len_k[] = { +XALIGNED(16) static const word64 L_SHA512_transform_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, @@ -104,13 +102,13 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512_p, #else WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register word64* L_SHA512_transform_len_k_c asm ("r3") = + register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register word64* L_SHA512_transform_len_k_c __asm__ ("r3") = (word64*)&L_SHA512_transform_len_k; #else register word64* L_SHA512_transform_len_k_c = @@ -218,7 +216,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, #endif /* Start of loop processing a block */ "\n" - "L_SHA512_transform_len_begin_%=: \n\t" + "L_SHA512_transform_len_begin_%=:\n\t" /* Load, Reverse and Store W - 64 bytes */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) "ldr r4, [%[data]]\n\t" @@ -529,7 +527,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, "mov r12, #4\n\t" /* Start of 16 rounds */ "\n" - "L_SHA512_transform_len_start_%=: \n\t" + "L_SHA512_transform_len_start_%=:\n\t" /* Round 0 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldr r4, [%[sha512], #32]\n\t" @@ -7549,7 +7547,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, #include #ifndef WOLFSSL_ARMASM_NO_NEON -static const word64 L_SHA512_transform_neon_len_k[] = { +XALIGNED(16) static const word64 L_SHA512_transform_neon_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, @@ -7600,13 +7598,13 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_neon(wc_Sha512* sha512_p, #else WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; - register const byte* data asm ("r1") = (const byte*)data_p; - register word32 len asm ("r2") = (word32)len_p; - register word64* L_SHA512_transform_neon_len_k_c asm ("r3") = + register wc_Sha512* sha512 __asm__ ("r0") = (wc_Sha512*)sha512_p; + register const byte* data __asm__ ("r1") = (const byte*)data_p; + register word32 len __asm__ ("r2") = (word32)len_p; + register word64* L_SHA512_transform_neon_len_k_c __asm__ ("r3") = (word64*)&L_SHA512_transform_neon_len_k; #else register word64* L_SHA512_transform_neon_len_k_c = @@ -7619,7 +7617,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_neon(wc_Sha512* sha512, "vldm.64 %[sha512], {d0-d7}\n\t" /* Start of loop processing a block */ "\n" - "L_SHA512_transform_neon_len_begin_%=: \n\t" + "L_SHA512_transform_neon_len_begin_%=:\n\t" /* Load W */ "vld1.8 {q8-q9}, [%[data]]!\n\t" "vld1.8 {q10-q11}, [%[data]]!\n\t" @@ -7655,7 +7653,7 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_neon(wc_Sha512* sha512, "mov r12, #4\n\t" /* Start of 16 rounds */ "\n" - "L_SHA512_transform_neon_len_start_%=: \n\t" + "L_SHA512_transform_neon_len_start_%=:\n\t" /* Round 0 */ "vld1.64 {d12}, [r3:64]!\n\t" "vshl.u64 d8, d4, #50\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm.S b/wolfcrypt/src/port/arm/armv8-aes-asm.S index cf46a0790ba..fa48e67b178 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-aes-asm.S @@ -43123,16 +43123,17 @@ L_aes_xts_decrypt_arm64_crypto_done: defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) #ifndef __APPLE__ .text - .type L_AES_ARM64_NEON_te, %object .section .rodata + .type L_AES_ARM64_NEON_te, %object .size L_AES_ARM64_NEON_te, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_NEON_te: .byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 @@ -43169,16 +43170,17 @@ L_AES_ARM64_NEON_te: .byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 #ifndef __APPLE__ .text - .type L_AES_ARM64_NEON_shift_rows_shuffle, %object .section .rodata + .type L_AES_ARM64_NEON_shift_rows_shuffle, %object .size L_AES_ARM64_NEON_shift_rows_shuffle, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_NEON_shift_rows_shuffle: .byte 0x0c,0x09,0x06,0x03,0x00,0x0d,0x0a,0x07 @@ -43249,28 +43251,22 @@ L_AES_invert_key_NEON_mix_loop: #endif /* HAVE_AES_DECRYPT */ #ifndef __APPLE__ .text - .type L_AES_ARM64_NEON_rcon, %object .section .rodata + .type L_AES_ARM64_NEON_rcon, %object .size L_AES_ARM64_NEON_rcon, 40 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_NEON_rcon: - .word 0x01000000 - .word 0x02000000 - .word 0x04000000 - .word 0x08000000 - .word 0x10000000 - .word 0x20000000 - .word 0x40000000 - .word 0x80000000 - .word 0x1b000000 - .word 0x36000000 + .long 0x01000000,0x02000000,0x04000000,0x08000000 + .long 0x10000000,0x20000000,0x40000000,0x80000000 + .long 0x1b000000,0x36000000 #ifndef __APPLE__ .text .globl AES_set_encrypt_key_NEON @@ -45307,16 +45303,17 @@ L_AES_CTR_encrypt_NEON_data_done: defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) #ifndef __APPLE__ .text - .type L_AES_ARM64_NEON_td, %object .section .rodata + .type L_AES_ARM64_NEON_td, %object .size L_AES_ARM64_NEON_td, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_NEON_td: .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 @@ -45353,16 +45350,17 @@ L_AES_ARM64_NEON_td: .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d #ifndef __APPLE__ .text - .type L_AES_ARM64_NEON_shift_rows_invshuffle, %object .section .rodata + .type L_AES_ARM64_NEON_shift_rows_invshuffle, %object .size L_AES_ARM64_NEON_shift_rows_invshuffle, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_NEON_shift_rows_invshuffle: .byte 0x04,0x09,0x0e,0x03,0x08,0x0d,0x02,0x07 @@ -51289,548 +51287,166 @@ L_AES_XTS_decrypt_NEON_data_done: #ifdef HAVE_AES_DECRYPT #ifndef __APPLE__ .text - .type L_AES_ARM64_td, %object .section .rodata + .type L_AES_ARM64_td, %object .size L_AES_ARM64_td, 1024 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_td: - .word 0x5051f4a7 - .word 0x537e4165 - .word 0xc31a17a4 - .word 0x963a275e - .word 0xcb3bab6b - .word 0xf11f9d45 - .word 0xabacfa58 - .word 0x934be303 - .word 0x552030fa - .word 0xf6ad766d - .word 0x9188cc76 - .word 0x25f5024c - .word 0xfc4fe5d7 - .word 0xd7c52acb - .word 0x80263544 - .word 0x8fb562a3 - .word 0x49deb15a - .word 0x6725ba1b - .word 0x9845ea0e - .word 0xe15dfec0 - .word 0x02c32f75 - .word 0x12814cf0 - .word 0xa38d4697 - .word 0xc66bd3f9 - .word 0xe7038f5f - .word 0x9515929c - .word 0xebbf6d7a - .word 0xda955259 - .word 0x2dd4be83 - .word 0xd3587421 - .word 0x2949e069 - .word 0x448ec9c8 - .word 0x6a75c289 - .word 0x78f48e79 - .word 0x6b99583e - .word 0xdd27b971 - .word 0xb6bee14f - .word 0x17f088ad - .word 0x66c920ac - .word 0xb47dce3a - .word 0x1863df4a - .word 0x82e51a31 - .word 0x60975133 - .word 0x4562537f - .word 0xe0b16477 - .word 0x84bb6bae - .word 0x1cfe81a0 - .word 0x94f9082b - .word 0x58704868 - .word 0x198f45fd - .word 0x8794de6c - .word 0xb7527bf8 - .word 0x23ab73d3 - .word 0xe2724b02 - .word 0x57e31f8f - .word 0x2a6655ab - .word 0x07b2eb28 - .word 0x032fb5c2 - .word 0x9a86c57b - .word 0xa5d33708 - .word 0xf2302887 - .word 0xb223bfa5 - .word 0xba02036a - .word 0x5ced1682 - .word 0x2b8acf1c - .word 0x92a779b4 - .word 0xf0f307f2 - .word 0xa14e69e2 - .word 0xcd65daf4 - .word 0xd50605be - .word 0x1fd13462 - .word 0x8ac4a6fe - .word 0x9d342e53 - .word 0xa0a2f355 - .word 0x32058ae1 - .word 0x75a4f6eb - .word 0x390b83ec - .word 0xaa4060ef - .word 0x065e719f - .word 0x51bd6e10 - .word 0xf93e218a - .word 0x3d96dd06 - .word 0xaedd3e05 - .word 0x464de6bd - .word 0xb591548d - .word 0x0571c45d - .word 0x6f0406d4 - .word 0xff605015 - .word 0x241998fb - .word 0x97d6bde9 - .word 0xcc894043 - .word 0x7767d99e - .word 0xbdb0e842 - .word 0x8807898b - .word 0x38e7195b - .word 0xdb79c8ee - .word 0x47a17c0a - .word 0xe97c420f - .word 0xc9f8841e - .word 0x00000000 - .word 0x83098086 - .word 0x48322bed - .word 0xac1e1170 - .word 0x4e6c5a72 - .word 0xfbfd0eff - .word 0x560f8538 - .word 0x1e3daed5 - .word 0x27362d39 - .word 0x640a0fd9 - .word 0x21685ca6 - .word 0xd19b5b54 - .word 0x3a24362e - .word 0xb10c0a67 - .word 0x0f9357e7 - .word 0xd2b4ee96 - .word 0x9e1b9b91 - .word 0x4f80c0c5 - .word 0xa261dc20 - .word 0x695a774b - .word 0x161c121a - .word 0x0ae293ba - .word 0xe5c0a02a - .word 0x433c22e0 - .word 0x1d121b17 - .word 0x0b0e090d - .word 0xadf28bc7 - .word 0xb92db6a8 - .word 0xc8141ea9 - .word 0x8557f119 - .word 0x4caf7507 - .word 0xbbee99dd - .word 0xfda37f60 - .word 0x9ff70126 - .word 0xbc5c72f5 - .word 0xc544663b - .word 0x345bfb7e - .word 0x768b4329 - .word 0xdccb23c6 - .word 0x68b6edfc - .word 0x63b8e4f1 - .word 0xcad731dc - .word 0x10426385 - .word 0x40139722 - .word 0x2084c611 - .word 0x7d854a24 - .word 0xf8d2bb3d - .word 0x11aef932 - .word 0x6dc729a1 - .word 0x4b1d9e2f - .word 0xf3dcb230 - .word 0xec0d8652 - .word 0xd077c1e3 - .word 0x6c2bb316 - .word 0x99a970b9 - .word 0xfa119448 - .word 0x2247e964 - .word 0xc4a8fc8c - .word 0x1aa0f03f - .word 0xd8567d2c - .word 0xef223390 - .word 0xc787494e - .word 0xc1d938d1 - .word 0xfe8ccaa2 - .word 0x3698d40b - .word 0xcfa6f581 - .word 0x28a57ade - .word 0x26dab78e - .word 0xa43fadbf - .word 0xe42c3a9d - .word 0x0d507892 - .word 0x9b6a5fcc - .word 0x62547e46 - .word 0xc2f68d13 - .word 0xe890d8b8 - .word 0x5e2e39f7 - .word 0xf582c3af - .word 0xbe9f5d80 - .word 0x7c69d093 - .word 0xa96fd52d - .word 0xb3cf2512 - .word 0x3bc8ac99 - .word 0xa710187d - .word 0x6ee89c63 - .word 0x7bdb3bbb - .word 0x09cd2678 - .word 0xf46e5918 - .word 0x01ec9ab7 - .word 0xa8834f9a - .word 0x65e6956e - .word 0x7eaaffe6 - .word 0x0821bccf - .word 0xe6ef15e8 - .word 0xd9bae79b - .word 0xce4a6f36 - .word 0xd4ea9f09 - .word 0xd629b07c - .word 0xaf31a4b2 - .word 0x312a3f23 - .word 0x30c6a594 - .word 0xc035a266 - .word 0x37744ebc - .word 0xa6fc82ca - .word 0xb0e090d0 - .word 0x1533a7d8 - .word 0x4af10498 - .word 0xf741ecda - .word 0x0e7fcd50 - .word 0x2f1791f6 - .word 0x8d764dd6 - .word 0x4d43efb0 - .word 0x54ccaa4d - .word 0xdfe49604 - .word 0xe39ed1b5 - .word 0x1b4c6a88 - .word 0xb8c12c1f - .word 0x7f466551 - .word 0x049d5eea - .word 0x5d018c35 - .word 0x73fa8774 - .word 0x2efb0b41 - .word 0x5ab3671d - .word 0x5292dbd2 - .word 0x33e91056 - .word 0x136dd647 - .word 0x8c9ad761 - .word 0x7a37a10c - .word 0x8e59f814 - .word 0x89eb133c - .word 0xeecea927 - .word 0x35b761c9 - .word 0xede11ce5 - .word 0x3c7a47b1 - .word 0x599cd2df - .word 0x3f55f273 - .word 0x791814ce - .word 0xbf73c737 - .word 0xea53f7cd - .word 0x5b5ffdaa - .word 0x14df3d6f - .word 0x867844db - .word 0x81caaff3 - .word 0x3eb968c4 - .word 0x2c382434 - .word 0x5fc2a340 - .word 0x72161dc3 - .word 0x0cbce225 - .word 0x8b283c49 - .word 0x41ff0d95 - .word 0x7139a801 - .word 0xde080cb3 - .word 0x9cd8b4e4 - .word 0x906456c1 - .word 0x617bcb84 - .word 0x70d532b6 - .word 0x74486c5c - .word 0x42d0b857 + .long 0x5051f4a7,0x537e4165,0xc31a17a4,0x963a275e + .long 0xcb3bab6b,0xf11f9d45,0xabacfa58,0x934be303 + .long 0x552030fa,0xf6ad766d,0x9188cc76,0x25f5024c + .long 0xfc4fe5d7,0xd7c52acb,0x80263544,0x8fb562a3 + .long 0x49deb15a,0x6725ba1b,0x9845ea0e,0xe15dfec0 + .long 0x02c32f75,0x12814cf0,0xa38d4697,0xc66bd3f9 + .long 0xe7038f5f,0x9515929c,0xebbf6d7a,0xda955259 + .long 0x2dd4be83,0xd3587421,0x2949e069,0x448ec9c8 + .long 0x6a75c289,0x78f48e79,0x6b99583e,0xdd27b971 + .long 0xb6bee14f,0x17f088ad,0x66c920ac,0xb47dce3a + .long 0x1863df4a,0x82e51a31,0x60975133,0x4562537f + .long 0xe0b16477,0x84bb6bae,0x1cfe81a0,0x94f9082b + .long 0x58704868,0x198f45fd,0x8794de6c,0xb7527bf8 + .long 0x23ab73d3,0xe2724b02,0x57e31f8f,0x2a6655ab + .long 0x07b2eb28,0x032fb5c2,0x9a86c57b,0xa5d33708 + .long 0xf2302887,0xb223bfa5,0xba02036a,0x5ced1682 + .long 0x2b8acf1c,0x92a779b4,0xf0f307f2,0xa14e69e2 + .long 0xcd65daf4,0xd50605be,0x1fd13462,0x8ac4a6fe + .long 0x9d342e53,0xa0a2f355,0x32058ae1,0x75a4f6eb + .long 0x390b83ec,0xaa4060ef,0x065e719f,0x51bd6e10 + .long 0xf93e218a,0x3d96dd06,0xaedd3e05,0x464de6bd + .long 0xb591548d,0x0571c45d,0x6f0406d4,0xff605015 + .long 0x241998fb,0x97d6bde9,0xcc894043,0x7767d99e + .long 0xbdb0e842,0x8807898b,0x38e7195b,0xdb79c8ee + .long 0x47a17c0a,0xe97c420f,0xc9f8841e,0x00000000 + .long 0x83098086,0x48322bed,0xac1e1170,0x4e6c5a72 + .long 0xfbfd0eff,0x560f8538,0x1e3daed5,0x27362d39 + .long 0x640a0fd9,0x21685ca6,0xd19b5b54,0x3a24362e + .long 0xb10c0a67,0x0f9357e7,0xd2b4ee96,0x9e1b9b91 + .long 0x4f80c0c5,0xa261dc20,0x695a774b,0x161c121a + .long 0x0ae293ba,0xe5c0a02a,0x433c22e0,0x1d121b17 + .long 0x0b0e090d,0xadf28bc7,0xb92db6a8,0xc8141ea9 + .long 0x8557f119,0x4caf7507,0xbbee99dd,0xfda37f60 + .long 0x9ff70126,0xbc5c72f5,0xc544663b,0x345bfb7e + .long 0x768b4329,0xdccb23c6,0x68b6edfc,0x63b8e4f1 + .long 0xcad731dc,0x10426385,0x40139722,0x2084c611 + .long 0x7d854a24,0xf8d2bb3d,0x11aef932,0x6dc729a1 + .long 0x4b1d9e2f,0xf3dcb230,0xec0d8652,0xd077c1e3 + .long 0x6c2bb316,0x99a970b9,0xfa119448,0x2247e964 + .long 0xc4a8fc8c,0x1aa0f03f,0xd8567d2c,0xef223390 + .long 0xc787494e,0xc1d938d1,0xfe8ccaa2,0x3698d40b + .long 0xcfa6f581,0x28a57ade,0x26dab78e,0xa43fadbf + .long 0xe42c3a9d,0x0d507892,0x9b6a5fcc,0x62547e46 + .long 0xc2f68d13,0xe890d8b8,0x5e2e39f7,0xf582c3af + .long 0xbe9f5d80,0x7c69d093,0xa96fd52d,0xb3cf2512 + .long 0x3bc8ac99,0xa710187d,0x6ee89c63,0x7bdb3bbb + .long 0x09cd2678,0xf46e5918,0x01ec9ab7,0xa8834f9a + .long 0x65e6956e,0x7eaaffe6,0x0821bccf,0xe6ef15e8 + .long 0xd9bae79b,0xce4a6f36,0xd4ea9f09,0xd629b07c + .long 0xaf31a4b2,0x312a3f23,0x30c6a594,0xc035a266 + .long 0x37744ebc,0xa6fc82ca,0xb0e090d0,0x1533a7d8 + .long 0x4af10498,0xf741ecda,0x0e7fcd50,0x2f1791f6 + .long 0x8d764dd6,0x4d43efb0,0x54ccaa4d,0xdfe49604 + .long 0xe39ed1b5,0x1b4c6a88,0xb8c12c1f,0x7f466551 + .long 0x049d5eea,0x5d018c35,0x73fa8774,0x2efb0b41 + .long 0x5ab3671d,0x5292dbd2,0x33e91056,0x136dd647 + .long 0x8c9ad761,0x7a37a10c,0x8e59f814,0x89eb133c + .long 0xeecea927,0x35b761c9,0xede11ce5,0x3c7a47b1 + .long 0x599cd2df,0x3f55f273,0x791814ce,0xbf73c737 + .long 0xea53f7cd,0x5b5ffdaa,0x14df3d6f,0x867844db + .long 0x81caaff3,0x3eb968c4,0x2c382434,0x5fc2a340 + .long 0x72161dc3,0x0cbce225,0x8b283c49,0x41ff0d95 + .long 0x7139a801,0xde080cb3,0x9cd8b4e4,0x906456c1 + .long 0x617bcb84,0x70d532b6,0x74486c5c,0x42d0b857 #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) #ifndef __APPLE__ .text - .type L_AES_ARM64_te, %object .section .rodata + .type L_AES_ARM64_te, %object .size L_AES_ARM64_te, 1024 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_te: - .word 0xa5c66363 - .word 0x84f87c7c - .word 0x99ee7777 - .word 0x8df67b7b - .word 0x0dfff2f2 - .word 0xbdd66b6b - .word 0xb1de6f6f - .word 0x5491c5c5 - .word 0x50603030 - .word 0x03020101 - .word 0xa9ce6767 - .word 0x7d562b2b - .word 0x19e7fefe - .word 0x62b5d7d7 - .word 0xe64dabab - .word 0x9aec7676 - .word 0x458fcaca - .word 0x9d1f8282 - .word 0x4089c9c9 - .word 0x87fa7d7d - .word 0x15effafa - .word 0xebb25959 - .word 0xc98e4747 - .word 0x0bfbf0f0 - .word 0xec41adad - .word 0x67b3d4d4 - .word 0xfd5fa2a2 - .word 0xea45afaf - .word 0xbf239c9c - .word 0xf753a4a4 - .word 0x96e47272 - .word 0x5b9bc0c0 - .word 0xc275b7b7 - .word 0x1ce1fdfd - .word 0xae3d9393 - .word 0x6a4c2626 - .word 0x5a6c3636 - .word 0x417e3f3f - .word 0x02f5f7f7 - .word 0x4f83cccc - .word 0x5c683434 - .word 0xf451a5a5 - .word 0x34d1e5e5 - .word 0x08f9f1f1 - .word 0x93e27171 - .word 0x73abd8d8 - .word 0x53623131 - .word 0x3f2a1515 - .word 0x0c080404 - .word 0x5295c7c7 - .word 0x65462323 - .word 0x5e9dc3c3 - .word 0x28301818 - .word 0xa1379696 - .word 0x0f0a0505 - .word 0xb52f9a9a - .word 0x090e0707 - .word 0x36241212 - .word 0x9b1b8080 - .word 0x3ddfe2e2 - .word 0x26cdebeb - .word 0x694e2727 - .word 0xcd7fb2b2 - .word 0x9fea7575 - .word 0x1b120909 - .word 0x9e1d8383 - .word 0x74582c2c - .word 0x2e341a1a - .word 0x2d361b1b - .word 0xb2dc6e6e - .word 0xeeb45a5a - .word 0xfb5ba0a0 - .word 0xf6a45252 - .word 0x4d763b3b - .word 0x61b7d6d6 - .word 0xce7db3b3 - .word 0x7b522929 - .word 0x3edde3e3 - .word 0x715e2f2f - .word 0x97138484 - .word 0xf5a65353 - .word 0x68b9d1d1 - .word 0x00000000 - .word 0x2cc1eded - .word 0x60402020 - .word 0x1fe3fcfc - .word 0xc879b1b1 - .word 0xedb65b5b - .word 0xbed46a6a - .word 0x468dcbcb - .word 0xd967bebe - .word 0x4b723939 - .word 0xde944a4a - .word 0xd4984c4c - .word 0xe8b05858 - .word 0x4a85cfcf - .word 0x6bbbd0d0 - .word 0x2ac5efef - .word 0xe54faaaa - .word 0x16edfbfb - .word 0xc5864343 - .word 0xd79a4d4d - .word 0x55663333 - .word 0x94118585 - .word 0xcf8a4545 - .word 0x10e9f9f9 - .word 0x06040202 - .word 0x81fe7f7f - .word 0xf0a05050 - .word 0x44783c3c - .word 0xba259f9f - .word 0xe34ba8a8 - .word 0xf3a25151 - .word 0xfe5da3a3 - .word 0xc0804040 - .word 0x8a058f8f - .word 0xad3f9292 - .word 0xbc219d9d - .word 0x48703838 - .word 0x04f1f5f5 - .word 0xdf63bcbc - .word 0xc177b6b6 - .word 0x75afdada - .word 0x63422121 - .word 0x30201010 - .word 0x1ae5ffff - .word 0x0efdf3f3 - .word 0x6dbfd2d2 - .word 0x4c81cdcd - .word 0x14180c0c - .word 0x35261313 - .word 0x2fc3ecec - .word 0xe1be5f5f - .word 0xa2359797 - .word 0xcc884444 - .word 0x392e1717 - .word 0x5793c4c4 - .word 0xf255a7a7 - .word 0x82fc7e7e - .word 0x477a3d3d - .word 0xacc86464 - .word 0xe7ba5d5d - .word 0x2b321919 - .word 0x95e67373 - .word 0xa0c06060 - .word 0x98198181 - .word 0xd19e4f4f - .word 0x7fa3dcdc - .word 0x66442222 - .word 0x7e542a2a - .word 0xab3b9090 - .word 0x830b8888 - .word 0xca8c4646 - .word 0x29c7eeee - .word 0xd36bb8b8 - .word 0x3c281414 - .word 0x79a7dede - .word 0xe2bc5e5e - .word 0x1d160b0b - .word 0x76addbdb - .word 0x3bdbe0e0 - .word 0x56643232 - .word 0x4e743a3a - .word 0x1e140a0a - .word 0xdb924949 - .word 0x0a0c0606 - .word 0x6c482424 - .word 0xe4b85c5c - .word 0x5d9fc2c2 - .word 0x6ebdd3d3 - .word 0xef43acac - .word 0xa6c46262 - .word 0xa8399191 - .word 0xa4319595 - .word 0x37d3e4e4 - .word 0x8bf27979 - .word 0x32d5e7e7 - .word 0x438bc8c8 - .word 0x596e3737 - .word 0xb7da6d6d - .word 0x8c018d8d - .word 0x64b1d5d5 - .word 0xd29c4e4e - .word 0xe049a9a9 - .word 0xb4d86c6c - .word 0xfaac5656 - .word 0x07f3f4f4 - .word 0x25cfeaea - .word 0xafca6565 - .word 0x8ef47a7a - .word 0xe947aeae - .word 0x18100808 - .word 0xd56fbaba - .word 0x88f07878 - .word 0x6f4a2525 - .word 0x725c2e2e - .word 0x24381c1c - .word 0xf157a6a6 - .word 0xc773b4b4 - .word 0x5197c6c6 - .word 0x23cbe8e8 - .word 0x7ca1dddd - .word 0x9ce87474 - .word 0x213e1f1f - .word 0xdd964b4b - .word 0xdc61bdbd - .word 0x860d8b8b - .word 0x850f8a8a - .word 0x90e07070 - .word 0x427c3e3e - .word 0xc471b5b5 - .word 0xaacc6666 - .word 0xd8904848 - .word 0x05060303 - .word 0x01f7f6f6 - .word 0x121c0e0e - .word 0xa3c26161 - .word 0x5f6a3535 - .word 0xf9ae5757 - .word 0xd069b9b9 - .word 0x91178686 - .word 0x5899c1c1 - .word 0x273a1d1d - .word 0xb9279e9e - .word 0x38d9e1e1 - .word 0x13ebf8f8 - .word 0xb32b9898 - .word 0x33221111 - .word 0xbbd26969 - .word 0x70a9d9d9 - .word 0x89078e8e - .word 0xa7339494 - .word 0xb62d9b9b - .word 0x223c1e1e - .word 0x92158787 - .word 0x20c9e9e9 - .word 0x4987cece - .word 0xffaa5555 - .word 0x78502828 - .word 0x7aa5dfdf - .word 0x8f038c8c - .word 0xf859a1a1 - .word 0x80098989 - .word 0x171a0d0d - .word 0xda65bfbf - .word 0x31d7e6e6 - .word 0xc6844242 - .word 0xb8d06868 - .word 0xc3824141 - .word 0xb0299999 - .word 0x775a2d2d - .word 0x111e0f0f - .word 0xcb7bb0b0 - .word 0xfca85454 - .word 0xd66dbbbb - .word 0x3a2c1616 + .long 0xa5c66363,0x84f87c7c,0x99ee7777,0x8df67b7b + .long 0x0dfff2f2,0xbdd66b6b,0xb1de6f6f,0x5491c5c5 + .long 0x50603030,0x03020101,0xa9ce6767,0x7d562b2b + .long 0x19e7fefe,0x62b5d7d7,0xe64dabab,0x9aec7676 + .long 0x458fcaca,0x9d1f8282,0x4089c9c9,0x87fa7d7d + .long 0x15effafa,0xebb25959,0xc98e4747,0x0bfbf0f0 + .long 0xec41adad,0x67b3d4d4,0xfd5fa2a2,0xea45afaf + .long 0xbf239c9c,0xf753a4a4,0x96e47272,0x5b9bc0c0 + .long 0xc275b7b7,0x1ce1fdfd,0xae3d9393,0x6a4c2626 + .long 0x5a6c3636,0x417e3f3f,0x02f5f7f7,0x4f83cccc + .long 0x5c683434,0xf451a5a5,0x34d1e5e5,0x08f9f1f1 + .long 0x93e27171,0x73abd8d8,0x53623131,0x3f2a1515 + .long 0x0c080404,0x5295c7c7,0x65462323,0x5e9dc3c3 + .long 0x28301818,0xa1379696,0x0f0a0505,0xb52f9a9a + .long 0x090e0707,0x36241212,0x9b1b8080,0x3ddfe2e2 + .long 0x26cdebeb,0x694e2727,0xcd7fb2b2,0x9fea7575 + .long 0x1b120909,0x9e1d8383,0x74582c2c,0x2e341a1a + .long 0x2d361b1b,0xb2dc6e6e,0xeeb45a5a,0xfb5ba0a0 + .long 0xf6a45252,0x4d763b3b,0x61b7d6d6,0xce7db3b3 + .long 0x7b522929,0x3edde3e3,0x715e2f2f,0x97138484 + .long 0xf5a65353,0x68b9d1d1,0x00000000,0x2cc1eded + .long 0x60402020,0x1fe3fcfc,0xc879b1b1,0xedb65b5b + .long 0xbed46a6a,0x468dcbcb,0xd967bebe,0x4b723939 + .long 0xde944a4a,0xd4984c4c,0xe8b05858,0x4a85cfcf + .long 0x6bbbd0d0,0x2ac5efef,0xe54faaaa,0x16edfbfb + .long 0xc5864343,0xd79a4d4d,0x55663333,0x94118585 + .long 0xcf8a4545,0x10e9f9f9,0x06040202,0x81fe7f7f + .long 0xf0a05050,0x44783c3c,0xba259f9f,0xe34ba8a8 + .long 0xf3a25151,0xfe5da3a3,0xc0804040,0x8a058f8f + .long 0xad3f9292,0xbc219d9d,0x48703838,0x04f1f5f5 + .long 0xdf63bcbc,0xc177b6b6,0x75afdada,0x63422121 + .long 0x30201010,0x1ae5ffff,0x0efdf3f3,0x6dbfd2d2 + .long 0x4c81cdcd,0x14180c0c,0x35261313,0x2fc3ecec + .long 0xe1be5f5f,0xa2359797,0xcc884444,0x392e1717 + .long 0x5793c4c4,0xf255a7a7,0x82fc7e7e,0x477a3d3d + .long 0xacc86464,0xe7ba5d5d,0x2b321919,0x95e67373 + .long 0xa0c06060,0x98198181,0xd19e4f4f,0x7fa3dcdc + .long 0x66442222,0x7e542a2a,0xab3b9090,0x830b8888 + .long 0xca8c4646,0x29c7eeee,0xd36bb8b8,0x3c281414 + .long 0x79a7dede,0xe2bc5e5e,0x1d160b0b,0x76addbdb + .long 0x3bdbe0e0,0x56643232,0x4e743a3a,0x1e140a0a + .long 0xdb924949,0x0a0c0606,0x6c482424,0xe4b85c5c + .long 0x5d9fc2c2,0x6ebdd3d3,0xef43acac,0xa6c46262 + .long 0xa8399191,0xa4319595,0x37d3e4e4,0x8bf27979 + .long 0x32d5e7e7,0x438bc8c8,0x596e3737,0xb7da6d6d + .long 0x8c018d8d,0x64b1d5d5,0xd29c4e4e,0xe049a9a9 + .long 0xb4d86c6c,0xfaac5656,0x07f3f4f4,0x25cfeaea + .long 0xafca6565,0x8ef47a7a,0xe947aeae,0x18100808 + .long 0xd56fbaba,0x88f07878,0x6f4a2525,0x725c2e2e + .long 0x24381c1c,0xf157a6a6,0xc773b4b4,0x5197c6c6 + .long 0x23cbe8e8,0x7ca1dddd,0x9ce87474,0x213e1f1f + .long 0xdd964b4b,0xdc61bdbd,0x860d8b8b,0x850f8a8a + .long 0x90e07070,0x427c3e3e,0xc471b5b5,0xaacc6666 + .long 0xd8904848,0x05060303,0x01f7f6f6,0x121c0e0e + .long 0xa3c26161,0x5f6a3535,0xf9ae5757,0xd069b9b9 + .long 0x91178686,0x5899c1c1,0x273a1d1d,0xb9279e9e + .long 0x38d9e1e1,0x13ebf8f8,0xb32b9898,0x33221111 + .long 0xbbd26969,0x70a9d9d9,0x89078e8e,0xa7339494 + .long 0xb62d9b9b,0x223c1e1e,0x92158787,0x20c9e9e9 + .long 0x4987cece,0xffaa5555,0x78502828,0x7aa5dfdf + .long 0x8f038c8c,0xf859a1a1,0x80098989,0x171a0d0d + .long 0xda65bfbf,0x31d7e6e6,0xc6844242,0xb8d06868 + .long 0xc3824141,0xb0299999,0x775a2d2d,0x111e0f0f + .long 0xcb7bb0b0,0xfca85454,0xd66dbbbb,0x3a2c1616 #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT @@ -51969,28 +51585,22 @@ L_AES_invert_key_mix_loop: #endif /* HAVE_AES_DECRYPT */ #ifndef __APPLE__ .text - .type L_AES_ARM64_rcon, %object .section .rodata + .type L_AES_ARM64_rcon, %object .size L_AES_ARM64_rcon, 40 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_rcon: - .word 0x01000000 - .word 0x02000000 - .word 0x04000000 - .word 0x08000000 - .word 0x10000000 - .word 0x20000000 - .word 0x40000000 - .word 0x80000000 - .word 0x1b000000 - .word 0x36000000 + .long 0x01000000,0x02000000,0x04000000,0x08000000 + .long 0x10000000,0x20000000,0x40000000,0x80000000 + .long 0x1b000000,0x36000000 #ifndef __APPLE__ .text .globl AES_set_encrypt_key @@ -53270,16 +52880,17 @@ L_AES_CTR_encrypt_loop_nr: defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) #ifndef __APPLE__ .text - .type L_AES_ARM64_td4, %object .section .rodata + .type L_AES_ARM64_td4, %object .size L_AES_ARM64_td4, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_AES_ARM64_td4: .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 @@ -54230,50 +53841,27 @@ L_AES_CBC_decrypt_end_dec: #ifdef HAVE_AESGCM #ifndef __APPLE__ .text - .type L_GCM_gmult_len_r, %object .section .rodata + .type L_GCM_gmult_len_r, %object .size L_GCM_gmult_len_r, 128 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_GCM_gmult_len_r: - .word 0x00000000 - .word 0x1c200000 - .word 0x38400000 - .word 0x24600000 - .word 0x70800000 - .word 0x6ca00000 - .word 0x48c00000 - .word 0x54e00000 - .word 0xe1000000 - .word 0xfd200000 - .word 0xd9400000 - .word 0xc5600000 - .word 0x91800000 - .word 0x8da00000 - .word 0xa9c00000 - .word 0xb5e00000 - .word 0x00000000 - .word 0x01c20000 - .word 0x03840000 - .word 0x02460000 - .word 0x07080000 - .word 0x06ca0000 - .word 0x048c0000 - .word 0x054e0000 - .word 0x0e100000 - .word 0x0fd20000 - .word 0x0d940000 - .word 0x0c560000 - .word 0x09180000 - .word 0x08da0000 - .word 0x0a9c0000 - .word 0x0b5e0000 + .long 0x00000000,0x1c200000,0x38400000,0x24600000 + .long 0x70800000,0x6ca00000,0x48c00000,0x54e00000 + .long 0xe1000000,0xfd200000,0xd9400000,0xc5600000 + .long 0x91800000,0x8da00000,0xa9c00000,0xb5e00000 + .long 0x00000000,0x01c20000,0x03840000,0x02460000 + .long 0x07080000,0x06ca0000,0x048c0000,0x054e0000 + .long 0x0e100000,0x0fd20000,0x0d940000,0x0c560000 + .long 0x09180000,0x08da0000,0x0a9c0000,0x0b5e0000 #ifndef __APPLE__ .text .globl GCM_gmult_len diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c index 26f11a70b71..7c6e43e9729 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c @@ -207,7 +207,7 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir) "stur q0, [%x[key], #96]\n\t" "b L_aes_set_key_arm64_crypto_done_%=\n\t" "\n" - "L_aes_set_key_arm64_crypto_start_256_%=: \n\t" + "L_aes_set_key_arm64_crypto_start_256_%=:\n\t" "ldr x4, [%x[userKey]], #8\n\t" "ldr x6, [%x[userKey]], #8\n\t" "ldr x8, [%x[userKey]], #8\n\t" @@ -410,7 +410,7 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir) "stur q0, [%x[key], #112]\n\t" "b L_aes_set_key_arm64_crypto_done_%=\n\t" "\n" - "L_aes_set_key_arm64_crypto_start_128_%=: \n\t" + "L_aes_set_key_arm64_crypto_start_128_%=:\n\t" "ldr x4, [%x[userKey]], #8\n\t" "ldr x6, [%x[userKey]], #8\n\t" "stp x4, x6, [%x[key]], #16\n\t" @@ -573,7 +573,7 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir) "aesimc v0.16b, v0.16b\n\t" "stur q0, [%x[key], #80]\n\t" "\n" - "L_aes_set_key_arm64_crypto_done_%=: \n\t" + "L_aes_set_key_arm64_crypto_done_%=:\n\t" : [keylen] "+r" (keylen), [key] "+r" (key), [dir] "+r" (dir) : [userKey] "r" (userKey) : "memory", "cc", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", @@ -581,7 +581,9 @@ void AES_set_key_AARCH64(const byte* userKey, int keylen, byte* key, int dir) ); } -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ + defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_CBC) void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) { __asm__ __volatile__ ( @@ -623,7 +625,7 @@ void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "\n" - "L_aes_encrypt_arm64_crypto_round_done_%=: \n\t" + "L_aes_encrypt_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "st1 {v0.16b}, [%x[outBlock]]\n\t" @@ -633,8 +635,11 @@ void AES_encrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) ); } -#endif /* defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) */ -#if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#endif /* defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || + * defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || + * defined(HAVE_AES_CBC) */ +#if !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || \ + defined(WOLFSSL_AES_COUNTER) #ifdef HAVE_AES_DECRYPT void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) { @@ -677,7 +682,7 @@ void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) "aesimc v0.16b, v0.16b\n\t" "aesd v0.16b, v2.16b\n\t" "\n" - "L_aes_decrypt_arm64_crypto_round_done_%=: \n\t" + "L_aes_decrypt_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "st1 {v0.16b}, [%x[outBlock]]\n\t" @@ -688,7 +693,8 @@ void AES_decrypt_AARCH64(const byte* inBlock, byte* outBlock, byte* key, int nr) } #endif /* HAVE_AES_DECRYPT */ -#endif /* !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) */ +#endif /* !defined(WC_AES_BITSLICED) || defined(WOLFSSL_AES_DIRECT) || + * defined(WOLFSSL_AES_COUNTER) */ #ifdef HAVE_AES_ECB void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, int nr) @@ -709,7 +715,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" @@ -910,7 +916,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_192_start_8_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_192_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -1013,7 +1019,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_192_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -1068,7 +1074,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" @@ -1097,12 +1103,12 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v28.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_192_done_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" - "L_aes_encrypt_blocks_arm64_crypto_start_256_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t" "cmp %w[sz], #1\n\t" @@ -1110,7 +1116,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" @@ -1343,7 +1349,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_256_start_8_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_256_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -1462,7 +1468,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_256_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -1525,7 +1531,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" @@ -1558,19 +1564,19 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v30.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_256_done_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_encrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" - "L_aes_encrypt_blocks_arm64_crypto_start_128_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp %w[sz], #1\n\t" "b.eq L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aese v0.16b, v16.16b\n\t" @@ -1739,7 +1745,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_encrypt_blocks_arm64_crypto_128_start_8_%=\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_128_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -1826,7 +1832,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_128_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -1873,7 +1879,7 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "cbz %w[sz], L_aes_encrypt_blocks_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aese v0.16b, v16.16b\n\t" @@ -1898,10 +1904,10 @@ void AES_encrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v26.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_encrypt_blocks_arm64_crypto_128_done_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_encrypt_blocks_arm64_crypto_done_%=: \n\t" + "L_aes_encrypt_blocks_arm64_crypto_done_%=:\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", @@ -1930,7 +1936,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" @@ -2131,7 +2137,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_192_start_8_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_192_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -2234,7 +2240,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_192_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -2289,7 +2295,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" @@ -2318,12 +2324,12 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v28.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_192_done_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_256 */ "\n" - "L_aes_decrypt_blocks_arm64_crypto_start_256_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v27.2d, v28.2d, v29.2d, v30.2d}, [%x[key]], #0x40\n\t" "cmp %w[sz], #1\n\t" @@ -2331,7 +2337,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" @@ -2564,7 +2570,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_256_start_8_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_256_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -2683,7 +2689,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_256_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -2746,7 +2752,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" @@ -2779,19 +2785,19 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v30.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_256_done_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_ECB_128 */ "\n" - "L_aes_decrypt_blocks_arm64_crypto_start_128_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp %w[sz], #1\n\t" "b.eq L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "cmp %w[sz], #8\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "aesd v0.16b, v16.16b\n\t" @@ -2960,7 +2966,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "cmp %w[sz], #8\n\t" "b.ge L_aes_decrypt_blocks_arm64_crypto_128_start_8_%=\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_128_start_4_%=:\n\t" "cmp %w[sz], #4\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" @@ -3047,7 +3053,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #4\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_128_start_2_%=:\n\t" "cmp %w[sz], #2\n\t" "b.lt L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -3094,7 +3100,7 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "sub %w[sz], %w[sz], #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "cbz %w[sz], L_aes_decrypt_blocks_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "aesd v0.16b, v16.16b\n\t" @@ -3119,10 +3125,10 @@ void AES_decrypt_blocks_AARCH64(const byte* in, byte* out, word32 sz, byte* key, "eor v0.16b, v0.16b, v26.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_decrypt_blocks_arm64_crypto_128_done_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_decrypt_blocks_arm64_crypto_done_%=: \n\t" + "L_aes_decrypt_blocks_arm64_crypto_done_%=:\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [nr] "+r" (nr) : [in] "r" (in) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", @@ -3149,7 +3155,7 @@ void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, #ifndef NO_AES_192 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "\n" - "L_aes_cbc_encrypt_arm64_crypto_loop_192_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_loop_192_%=:\n\t" "ld1 {v28.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" @@ -3184,12 +3190,12 @@ void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" - "L_aes_cbc_encrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" "\n" - "L_aes_cbc_encrypt_arm64_crypto_loop_256_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_loop_256_%=:\n\t" "ld1 {v30.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" @@ -3228,11 +3234,11 @@ void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b L_aes_cbc_encrypt_arm64_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" - "L_aes_cbc_encrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "\n" - "L_aes_cbc_encrypt_arm64_crypto_loop_128_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_loop_128_%=:\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "subs %w[sz], %w[sz], #1\n\t" @@ -3261,7 +3267,7 @@ void AES_CBC_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.ne L_aes_cbc_encrypt_arm64_crypto_loop_128_%=\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_cbc_encrypt_arm64_crypto_done_%=: \n\t" + "L_aes_cbc_encrypt_arm64_crypto_done_%=:\n\t" "st1 {v0.2d}, [%x[reg]]\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [nr] "+r" (nr) @@ -3290,7 +3296,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "cmp %w[sz], #10\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" @@ -3325,7 +3331,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v28.16b\n\t" @@ -3361,7 +3367,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_CBC_256 */ "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key]], #32\n\t" @@ -3369,7 +3375,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "cmp %w[sz], #5\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" @@ -3408,7 +3414,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_256_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v30.16b\n\t" @@ -3448,14 +3454,14 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" /* AES_CBC_128 */ "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key]], #32\n\t" "ld1 {v26.2d}, [%x[key]]\n\t" "cmp %w[sz], #24\n\t" "b.le L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "mov v2.16b, v1.16b\n\t" @@ -3486,7 +3492,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_long_%=\n\t" "b L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=\n\t" "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=:\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "sub %w[sz], %w[sz], #1\n\t" "eor v2.16b, v0.16b, v26.16b\n\t" @@ -3516,7 +3522,7 @@ void AES_CBC_decrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.ge L_aes_cbc_decrypt_blocks_arm64_crypto_128_start_1_%=\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=: \n\t" + "L_aes_cbc_decrypt_blocks_arm64_crypto_done_%=:\n\t" "st1 {v0.2d}, [%x[reg]]\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [nr] "+r" (nr) @@ -3562,7 +3568,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" @@ -3811,7 +3817,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_192_start_8_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" @@ -3938,7 +3944,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" @@ -4010,7 +4016,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" @@ -4043,7 +4049,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" @@ -4078,7 +4084,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" @@ -4087,12 +4093,12 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.gt L_aes_ctr_encrypt_arm64_crypto_192_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t" /* AES_CTR_256 */ "\n" - "L_aes_ctr_encrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" "ld1 {v12.2d, v13.2d}, [%x[key]], #32\n\t" @@ -4108,7 +4114,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" @@ -4389,7 +4395,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_256_start_8_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" @@ -4532,7 +4538,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" @@ -4612,7 +4618,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" @@ -4649,7 +4655,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" @@ -4688,7 +4694,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" @@ -4697,12 +4703,12 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.gt L_aes_ctr_encrypt_arm64_crypto_256_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_ctr_encrypt_arm64_crypto_done_%=\n\t" /* AES_CTR_128 */ "\n" - "L_aes_ctr_encrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" "ld1 {v10.2d}, [%x[key]]\n\t" @@ -4717,7 +4723,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adds x15, x9, #3\n\t" "adc x16, x10, xzr\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=:\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" "ld1 {v28.16b, v29.16b, v30.16b, v31.16b}, [%x[in]], #0x40\n\t" "mov v17.d[0], x12\n\t" @@ -4934,7 +4940,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "cmp w8, #8\n\t" "b.ge L_aes_ctr_encrypt_arm64_crypto_128_start_8_%=\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_start_4_%=:\n\t" "cmp w8, #4\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[in]], #0x40\n\t" @@ -5045,7 +5051,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v24.16b, v25.16b}, [%x[in]], #32\n\t" @@ -5109,7 +5115,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_ctr_encrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v24.16b}, [%x[in]], #16\n\t" "rev64 v16.16b, v16.16b\n\t" @@ -5138,7 +5144,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "adc x10, x10, xzr\n\t" "st1 {v24.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=\n\t" "mov v16.d[0], x10\n\t" "mov v16.d[1], x9\n\t" @@ -5169,7 +5175,7 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "mov w13, #16\n\t" "sub w13, w13, %w[sz]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w11, [%x[tmp]], #1\n\t" "ldrb w12, [%x[in]], #1\n\t" "eor w11, w11, w12\n\t" @@ -5178,10 +5184,10 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "b.gt L_aes_ctr_encrypt_arm64_crypto_128_start_byte_%=\n\t" "str w13, [%x[left]]\n\t" "\n" - "L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_ctr_encrypt_arm64_crypto_done_%=: \n\t" + "L_aes_ctr_encrypt_arm64_crypto_done_%=:\n\t" "rev x11, x10\n\t" "rev x12, x9\n\t" "stp x11, x12, [%x[reg]]\n\t" @@ -5242,7 +5248,7 @@ void AES_GCM_set_key_AARCH64(const byte* nonce, const byte* key, byte* gcm_h, "aesmc v0.16b, v0.16b\n\t" "aese v0.16b, v2.16b\n\t" "\n" - "L_aes_gcm_set_key_arm64_crypto_round_done_%=: \n\t" + "L_aes_gcm_set_key_arm64_crypto_round_done_%=:\n\t" "ld1 {v1.2d}, [%x[key]]\n\t" "eor v0.16b, v0.16b, v1.16b\n\t" "rbit v0.16b, v0.16b\n\t" @@ -5372,7 +5378,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_h_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" @@ -5381,7 +5387,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" @@ -5488,7 +5494,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -5548,7 +5554,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -5584,10 +5590,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.gt L_aes_gcm_encrypt_arm64_crypto_aad_start_2_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" @@ -5610,7 +5616,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -5622,28 +5628,28 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -5665,7 +5671,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=\n\t" @@ -5677,12 +5683,12 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov w15, #1\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" @@ -5705,7 +5711,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -5717,28 +5723,28 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -5760,7 +5766,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" @@ -5785,7 +5791,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" @@ -5796,7 +5802,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -6057,7 +6063,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -6413,7 +6419,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -6510,7 +6516,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -6645,7 +6651,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -6819,7 +6825,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_192_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -6875,7 +6881,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -6969,7 +6975,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -7020,7 +7026,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -7032,28 +7038,28 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -7093,32 +7099,32 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=\n\t" @@ -7143,7 +7149,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -7197,7 +7203,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=\n\t" @@ -7205,38 +7211,38 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -7531,7 +7537,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -7921,7 +7927,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -8018,7 +8024,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -8170,7 +8176,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -8361,7 +8367,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_256_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -8417,7 +8423,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -8520,7 +8526,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -8577,7 +8583,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -8589,28 +8595,28 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -8656,32 +8662,32 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=\n\t" @@ -8706,7 +8712,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -8768,7 +8774,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=\n\t" @@ -8776,38 +8782,38 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -9034,7 +9040,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -9356,7 +9362,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -9453,7 +9459,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" @@ -9572,7 +9578,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -9730,7 +9736,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_128_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -9786,7 +9792,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -9872,7 +9878,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -9919,7 +9925,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -9931,28 +9937,28 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -9988,32 +9994,32 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=\n\t" @@ -10038,7 +10044,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -10088,7 +10094,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=\n\t" @@ -10096,31 +10102,31 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), @@ -10256,7 +10262,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_h_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" @@ -10265,7 +10271,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" @@ -10372,7 +10378,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -10432,7 +10438,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -10468,10 +10474,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.gt L_aes_gcm_decrypt_arm64_crypto_aad_start_2_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" @@ -10494,7 +10500,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -10506,28 +10512,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -10549,7 +10555,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=\n\t" @@ -10561,12 +10567,12 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov w15, #1\n\t" "b L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" @@ -10589,7 +10595,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -10601,28 +10607,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -10644,7 +10650,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" @@ -10669,7 +10675,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" @@ -10680,7 +10686,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -10941,7 +10947,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -11297,7 +11303,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -11394,7 +11400,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -11529,7 +11535,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -11703,7 +11709,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_192_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -11759,7 +11765,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -11853,7 +11859,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -11905,7 +11911,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -11917,28 +11923,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -11996,30 +12002,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -12073,7 +12079,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -12083,28 +12089,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -12112,14 +12118,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -12132,12 +12138,12 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -12432,7 +12438,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -12822,7 +12828,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -12919,7 +12925,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -13071,7 +13077,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -13262,7 +13268,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_256_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -13318,7 +13324,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -13421,7 +13427,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -13478,7 +13484,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -13490,28 +13496,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -13575,30 +13581,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -13660,7 +13666,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -13670,28 +13676,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -13699,14 +13705,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -13719,12 +13725,12 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -13951,7 +13957,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -14273,7 +14279,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -14370,7 +14376,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" @@ -14489,7 +14495,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -14647,7 +14653,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_128_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -14703,7 +14709,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -14789,7 +14795,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -14836,7 +14842,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -14848,28 +14854,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -14923,30 +14929,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -14996,7 +15002,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -15006,28 +15012,28 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -15035,14 +15041,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -15053,7 +15059,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), @@ -15187,7 +15193,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" @@ -15196,7 +15202,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" @@ -15295,7 +15301,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #16\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -15351,7 +15357,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -15385,10 +15391,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.gt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" @@ -15410,7 +15416,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -15422,28 +15428,28 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -15464,7 +15470,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t" @@ -15476,12 +15482,12 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov w15, #1\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" @@ -15503,7 +15509,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -15515,28 +15521,28 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -15557,7 +15563,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" @@ -15581,7 +15587,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" @@ -15592,7 +15598,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -15853,7 +15859,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -16201,7 +16207,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -16290,7 +16296,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -16425,7 +16431,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -16595,7 +16601,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -16647,7 +16653,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -16739,7 +16745,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -16789,7 +16795,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -16801,28 +16807,28 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -16862,32 +16868,32 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=\n\t" @@ -16911,7 +16917,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -16964,7 +16970,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" @@ -16972,38 +16978,38 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -17298,7 +17304,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -17680,7 +17686,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -17769,7 +17775,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -17921,7 +17927,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -18108,7 +18114,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -18160,7 +18166,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -18261,7 +18267,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -18317,7 +18323,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -18329,28 +18335,28 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -18396,32 +18402,32 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=\n\t" @@ -18445,7 +18451,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -18506,7 +18512,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" @@ -18514,38 +18520,38 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -18772,7 +18778,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -19086,7 +19092,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -19175,7 +19181,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" @@ -19294,7 +19300,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -19448,7 +19454,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -19500,7 +19506,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -19584,7 +19590,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -19630,7 +19636,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" @@ -19642,28 +19648,28 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v16.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -19699,32 +19705,32 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "mov x17, #16\n\t" "sub x17, x17, x14\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" "strb wzr, [x11], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=\n\t" @@ -19748,7 +19754,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -19797,7 +19803,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v26.16b}, [%x[tag]]\n\t" "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=:\n\t" "st1 {v26.16b}, [x11]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" @@ -19805,31 +19811,31 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [x11], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [x11], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [x11], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), @@ -19962,7 +19968,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w14, w8, #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" @@ -19971,7 +19977,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_8_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" @@ -20070,7 +20076,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #16\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_4_%=:\n\t" "ld1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[aad]], #0x40\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -20126,7 +20132,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=:\n\t" "ld1 {v18.16b, v19.16b}, [%x[aad]], #32\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" @@ -20160,10 +20166,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.gt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_2_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=:\n\t" "ld1 {v18.16b}, [%x[aad]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" @@ -20185,7 +20191,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=:\n\t" "and w14, w8, #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -20197,28 +20203,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -20239,7 +20245,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=:\n\t" /* Load Nonce */ "cmp %w[nonceSz], #12\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=\n\t" @@ -20251,12 +20257,12 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov w15, #1\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v13.16b, v13.16b, v13.16b\n\t" "lsr w14, %w[nonceSz], #4\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=:\n\t" "ld1 {v18.16b}, [%x[nonce]], #16\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" @@ -20278,7 +20284,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "subs w14, w14, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_1_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_done_%=:\n\t" "and w24, %w[nonceSz], #15\n\t" "cbz x24, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" @@ -20290,28 +20296,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w20, w20, #8\n\t" "str x19, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" "str w19, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" "strh w19, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" "strb w19, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" "sub x11, x11, x24\n\t" "ld1 {v18.2d}, [x11]\n\t" "rbit v18.16b, v18.16b\n\t" @@ -20332,7 +20338,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v13.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" "lsl x24, %x[nonceSz], #3\n\t" "mov v28.d[0], x14\n\t" @@ -20356,7 +20362,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov w15, v13.s[3]\n\t" "rev w15, w15\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v13.2d}, [x12]\n\t" "lsr w14, %w[sz], #4\n\t" "cmp w13, #12\n\t" @@ -20367,7 +20373,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -20628,7 +20634,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -20976,7 +20982,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -21065,7 +21071,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -21200,7 +21206,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -21370,7 +21376,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -21422,7 +21428,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -21514,7 +21520,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -21565,7 +21571,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -21577,28 +21583,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -21655,30 +21661,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -21731,7 +21737,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -21741,28 +21747,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -21770,14 +21776,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -21790,12 +21796,12 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -22090,7 +22096,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -22472,7 +22478,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -22561,7 +22567,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" @@ -22713,7 +22719,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -22900,7 +22906,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -22952,7 +22958,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -23053,7 +23059,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -23109,7 +23115,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -23121,28 +23127,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -23205,30 +23211,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -23289,7 +23295,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -23299,28 +23305,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -23328,14 +23334,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -23348,12 +23354,12 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w14, #32\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -23580,7 +23586,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [x9]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -23894,7 +23900,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cmp w14, #8\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -23983,7 +23989,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" @@ -24102,7 +24108,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w19, w15, #2\n\t" @@ -24256,7 +24262,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -24308,7 +24314,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w20, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w15, w15, #2\n\t" @@ -24392,7 +24398,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -24438,7 +24444,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=:\n\t" "ands w14, %w[sz], #15\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" @@ -24450,28 +24456,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x19, x19, #8\n\t" "str x17, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" "sub x11, x11, x14\n\t" "ld1 {v15.2d}, [x11]\n\t" "add w15, w15, #1\n\t" @@ -24524,30 +24530,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" "ldr w17, [x11], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "ldrh w17, [x11], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" "ldrb w17, [x11], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" "ld1 {v14.2d}, [x12]\n\t" "lsl x8, x8, #3\n\t" "rbit x8, x8\n\t" @@ -24596,7 +24602,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v28.16b}, [%x[tag]]\n\t" "b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=:\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" "st1 {v28.2d}, [x11]\n\t" @@ -24606,28 +24612,28 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, #8\n\t" "str x16, [x11], #8\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" "str w16, [x11], #4\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" "strh w16, [x11], #2\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" "strb w16, [x11], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" "sub x11, x11, %x[tagSz]\n\t" "ld1 {v28.2d}, [x11]\n\t" "mov x17, #16\n\t" @@ -24635,14 +24641,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "sub x17, x17, %x[tagSz]\n\t" "add x11, x11, %x[tagSz]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=:\n\t" "strb wzr, [x11], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=\n\t" "subs x11, x11, #16\n\t" "ld1 {v26.2d}, [x11]\n\t" "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" "mov x16, v28.d[0]\n\t" "mov x17, v28.d[1]\n\t" @@ -24653,7 +24659,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "and %x[in], %x[in], x19\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=:\n\t" "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), @@ -24690,12 +24696,12 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "mov w8, #1\n\t" "b L_aes_gcm_init_arm64_crypto_done_nonce_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_ghash_nonce_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_ghash_nonce_%=:\n\t" "eor v4.16b, v4.16b, v4.16b\n\t" "lsr w7, %w[nonceSz], #4\n\t" "cbz w7, L_aes_gcm_init_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_start_1_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_start_1_%=:\n\t" "ld1 {v0.16b}, [%x[nonce]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" @@ -24718,7 +24724,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "subs w7, w7, #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_start_1_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_done_%=:\n\t" "and w13, %w[nonceSz], #15\n\t" "cbz x13, L_aes_gcm_init_arm64_crypto_partial_done_%=\n\t" "eor v7.16b, v7.16b, v7.16b\n\t" @@ -24730,28 +24736,28 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "sub w12, w12, #8\n\t" "str x11, [%x[initCtr]], #8\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_start_dw_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_start_dw_%=:\n\t" "cmp w12, #4\n\t" "b.lt L_aes_gcm_init_arm64_crypto_start_sw_%=\n\t" "ldr w11, [%x[nonce]], #4\n\t" "sub w12, w12, #4\n\t" "str w11, [%x[initCtr]], #4\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_start_sw_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_start_sw_%=:\n\t" "cmp w12, #2\n\t" "b.lt L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t" "ldrh w11, [%x[nonce]], #2\n\t" "sub w12, w12, #2\n\t" "strh w11, [%x[initCtr]], #2\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_start_byte_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_start_byte_%=:\n\t" "cbz w12, L_aes_gcm_init_arm64_crypto_end_bytes_%=\n\t" "ldrb w11, [%x[nonce]], #1\n\t" "subs w12, w12, #1\n\t" "strb w11, [%x[initCtr]], #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_start_byte_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_end_bytes_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_end_bytes_%=:\n\t" "sub %x[initCtr], %x[initCtr], x13\n\t" "ld1 {v0.2d}, [%x[initCtr]]\n\t" "rbit v0.16b, v0.16b\n\t" @@ -24773,7 +24779,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_init_arm64_crypto_partial_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" @@ -24798,7 +24804,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "mov w8, v4.s[3]\n\t" "rev w8, w8\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_done_nonce_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_done_nonce_%=:\n\t" "st1 {v4.2d}, [%x[counter]]\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" @@ -24837,7 +24843,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_round_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_round_done_%=:\n\t" "ld1 {v7.2d}, [%x[key]]\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "st1 {v4.2d}, [%x[initCtr]]\n\t" @@ -24981,7 +24987,7 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "eor v19.16b, v10.16b, v11.16b\n\t" /* Done */ "\n" - "L_aes_gcm_aad_update_arm64_crypto_h_done_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_h_done_%=:\n\t" "lsr %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t" @@ -24990,7 +24996,7 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_4_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_start_8_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" @@ -25097,7 +25103,7 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_start_4_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" @@ -25157,7 +25163,7 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "b.lt L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_start_1_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_start_2_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_start_2_%=:\n\t" "ld1 {v0.16b, v1.16b}, [%x[aadt]], #32\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" @@ -25193,10 +25199,10 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "b.gt L_aes_gcm_aad_update_arm64_crypto_start_2_%=\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_start_1_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_start_1_%=:\n\t" "cbz %w[abytes], L_aes_gcm_aad_update_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_both_1_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_both_1_%=:\n\t" "ld1 {v0.16b}, [%x[aadt]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" @@ -25219,7 +25225,7 @@ void AES_GCM_aad_update_AARCH64(const byte* aadt, word32 abytes, byte* tag, "subs %w[abytes], %w[abytes], #1\n\t" "b.ne L_aes_gcm_aad_update_arm64_crypto_both_1_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_done_%=:\n\t" "st1 {v20.2d}, [%x[tag]]\n\t" : [abytes] "+r" (abytes), [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [aadt] "r" (aadt) @@ -25278,7 +25284,7 @@ void AES_GCM_encrypt_block_AARCH64(const byte* key, int nr, byte* out, "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "\n" - "L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=: \n\t" + "L_aes_gcm_encrypt_block_arm64_crypto_round_done_%=:\n\t" "ld1 {v0.2d}, [%x[key]]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v5.16b\n\t" @@ -25392,7 +25398,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=\n\t" @@ -25402,7 +25408,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -25663,7 +25669,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -26019,7 +26025,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_192_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -26116,7 +26122,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -26251,7 +26257,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -26425,7 +26431,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_192_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -26481,7 +26487,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -26575,7 +26581,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -26626,17 +26632,17 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_start_256_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -26931,7 +26937,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -27321,7 +27327,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_256_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -27418,7 +27424,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -27570,7 +27576,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -27761,7 +27767,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_256_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -27817,7 +27823,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -27920,7 +27926,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -27977,17 +27983,17 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -28214,7 +28220,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -28536,7 +28542,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_128_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -28633,7 +28639,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" @@ -28752,7 +28758,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -28910,7 +28916,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_128_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -28966,7 +28972,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -29052,7 +29058,7 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -29099,10 +29105,10 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" @@ -29156,7 +29162,7 @@ void AES_GCM_encrypt_final_AARCH64(byte* tag, byte* authTag, word32 tbytes, "st1 {v5.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_encrypt_final_arm64_crypto_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_tag_partial_%=:\n\t" "st1 {v5.16b}, [%x[tag]]\n\t" "cmp %w[tbytes], #8\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_dw_%=\n\t" @@ -29164,30 +29170,30 @@ void AES_GCM_encrypt_final_AARCH64(byte* tag, byte* authTag, word32 tbytes, "sub %w[tbytes], %w[tbytes], #8\n\t" "str x8, [%x[authTag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_dw_%=:\n\t" "cmp %w[tbytes], #4\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_sw_%=\n\t" "ldr w8, [%x[tag]], #4\n\t" "sub %w[tbytes], %w[tbytes], #4\n\t" "str w8, [%x[authTag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_sw_%=:\n\t" "cmp %w[tbytes], #2\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "ldrh w8, [%x[tag]], #2\n\t" "sub %w[tbytes], %w[tbytes], #2\n\t" "strh w8, [%x[authTag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=:\n\t" "cbz %w[tbytes], L_aes_gcm_encrypt_final_arm64_crypto_tag_end_bytes_%=\n\t" "ldrb w8, [%x[tag]], #1\n\t" "subs %w[tbytes], %w[tbytes], #1\n\t" "strb w8, [%x[authTag]], #1\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_tag_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_done_%=:\n\t" : [tag] "+r" (tag), [authTag] "+r" (authTag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr) @@ -29300,7 +29306,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_start_128_%=\n\t" @@ -29310,7 +29316,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -29571,7 +29577,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -29927,7 +29933,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_192_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -30024,7 +30030,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -30159,7 +30165,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -30333,7 +30339,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_192_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -30389,7 +30395,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -30483,7 +30489,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -30535,17 +30541,17 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_start_256_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -30840,7 +30846,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -31230,7 +31236,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_256_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -31327,7 +31333,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -31479,7 +31485,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -31670,7 +31676,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_256_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -31726,7 +31732,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -31829,7 +31835,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -31886,17 +31892,17 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_update_arm64_crypto_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_start_128_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -32123,7 +32129,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -32445,7 +32451,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_128_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -32542,7 +32548,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" @@ -32661,7 +32667,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -32819,7 +32825,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_128_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -32875,7 +32881,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -32961,7 +32967,7 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -33008,10 +33014,10 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" @@ -33068,7 +33074,7 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "ld1 {v0.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=:\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -33078,28 +33084,28 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "sub x10, x10, #8\n\t" "str x9, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_dw_%=:\n\t" "cmp x10, #4\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_tag_start_sw_%=\n\t" "ldr w9, [%x[authTag]], #4\n\t" "sub x10, x10, #4\n\t" "str w9, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_sw_%=:\n\t" "cmp x10, #2\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "ldrh w9, [%x[authTag]], #2\n\t" "sub x10, x10, #2\n\t" "strh w9, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=:\n\t" "cbz x10, L_aes_gcm_decrypt_final_arm64_crypto_tag_end_bytes_%=\n\t" "ldrb w9, [%x[authTag]], #1\n\t" "subs x10, x10, #1\n\t" "strb w9, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_tag_end_bytes_%=:\n\t" "sub %x[tag], %x[tag], %x[tbytes]\n\t" "ld1 {v0.2d}, [%x[tag]]\n\t" "mov x10, #16\n\t" @@ -33107,14 +33113,14 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "sub x10, x10, %x[tbytes]\n\t" "add %x[tag], %x[tag], %x[tbytes]\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_calc_tag_byte_%=:\n\t" "strb wzr, [%x[tag]], #1\n\t" "subs x10, x10, #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_calc_tag_byte_%=\n\t" "subs %x[tag], %x[tag], #16\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=:\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "mov x9, v0.d[0]\n\t" "mov x10, v0.d[1]\n\t" @@ -33154,12 +33160,12 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "mov w8, #1\n\t" "b L_aes_gcm_init_arm64_crypto_eor3_done_nonce_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_ghash_nonce_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_ghash_nonce_%=:\n\t" "eor v4.16b, v4.16b, v4.16b\n\t" "lsr w7, %w[nonceSz], #4\n\t" "cbz w7, L_aes_gcm_init_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_start_1_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_start_1_%=:\n\t" "ld1 {v0.16b}, [%x[nonce]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v4.16b, v0.16b\n\t" @@ -33181,7 +33187,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "subs w7, w7, #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_eor3_start_1_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_done_%=:\n\t" "and w13, %w[nonceSz], #15\n\t" "cbz x13, L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=\n\t" "eor v7.16b, v7.16b, v7.16b\n\t" @@ -33193,28 +33199,28 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "sub w12, w12, #8\n\t" "str x11, [%x[initCtr]], #8\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_start_dw_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_start_dw_%=:\n\t" "cmp w12, #4\n\t" "b.lt L_aes_gcm_init_arm64_crypto_eor3_start_sw_%=\n\t" "ldr w11, [%x[nonce]], #4\n\t" "sub w12, w12, #4\n\t" "str w11, [%x[initCtr]], #4\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_start_sw_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_start_sw_%=:\n\t" "cmp w12, #2\n\t" "b.lt L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=\n\t" "ldrh w11, [%x[nonce]], #2\n\t" "sub w12, w12, #2\n\t" "strh w11, [%x[initCtr]], #2\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=:\n\t" "cbz w12, L_aes_gcm_init_arm64_crypto_eor3_end_bytes_%=\n\t" "ldrb w11, [%x[nonce]], #1\n\t" "subs w12, w12, #1\n\t" "strb w11, [%x[initCtr]], #1\n\t" "b.ne L_aes_gcm_init_arm64_crypto_eor3_start_byte_%=\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_end_bytes_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_end_bytes_%=:\n\t" "sub %x[initCtr], %x[initCtr], x13\n\t" "ld1 {v0.2d}, [%x[initCtr]]\n\t" "rbit v0.16b, v0.16b\n\t" @@ -33235,7 +33241,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "eor v4.16b, v7.16b, v9.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" "lsl x13, %x[nonceSz], #3\n\t" "mov v7.d[0], x7\n\t" @@ -33259,7 +33265,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "mov w8, v4.s[3]\n\t" "rev w8, w8\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_done_nonce_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_done_nonce_%=:\n\t" "st1 {v4.2d}, [%x[counter]]\n\t" "ld1 {v7.2d, v8.2d, v9.2d, v10.2d}, [%x[key]], #0x40\n\t" "aese v4.16b, v7.16b\n\t" @@ -33298,7 +33304,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "aesmc v4.16b, v4.16b\n\t" "aese v4.16b, v8.16b\n\t" "\n" - "L_aes_gcm_init_arm64_crypto_eor3_round_done_%=: \n\t" + "L_aes_gcm_init_arm64_crypto_eor3_round_done_%=:\n\t" "ld1 {v7.2d}, [%x[key]]\n\t" "eor v4.16b, v4.16b, v7.16b\n\t" "st1 {v4.2d}, [%x[initCtr]]\n\t" @@ -33438,7 +33444,7 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "eor v19.16b, v10.16b, v11.16b\n\t" /* Done */ "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr %w[abytes], %w[abytes], #4\n\t" "cmp %w[abytes], #4\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=\n\t" @@ -33447,7 +33453,7 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "cmp %w[abytes], #0x40\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_start_8_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_start_8_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" @@ -33546,7 +33552,7 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "cmp %w[abytes], #16\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[aadt]], #0x40\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" @@ -33602,7 +33608,7 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "b.eq L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=:\n\t" "ld1 {v0.16b, v1.16b}, [%x[aadt]], #32\n\t" "rbit v0.16b, v0.16b\n\t" "rbit v1.16b, v1.16b\n\t" @@ -33636,10 +33642,10 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "b.gt L_aes_gcm_aad_update_arm64_crypto_eor3_start_2_%=\n\t" "b.lt L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_start_1_%=:\n\t" "cbz %w[abytes], L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_both_1_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_both_1_%=:\n\t" "ld1 {v0.16b}, [%x[aadt]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v3.16b, v20.16b, v0.16b\n\t" @@ -33661,7 +33667,7 @@ void AES_GCM_aad_update_AARCH64_EOR3(const byte* aadt, word32 abytes, byte* tag, "subs %w[abytes], %w[abytes], #1\n\t" "b.ne L_aes_gcm_aad_update_arm64_crypto_eor3_both_1_%=\n\t" "\n" - "L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_aad_update_arm64_crypto_eor3_done_%=:\n\t" "st1 {v20.2d}, [%x[tag]]\n\t" : [abytes] "+r" (abytes), [tag] "+r" (tag), [gcm_h] "+r" (gcm_h) : [aadt] "r" (aadt) @@ -33720,7 +33726,7 @@ void AES_GCM_encrypt_block_AARCH64_EOR3(const byte* key, int nr, byte* out, "aesmc v5.16b, v5.16b\n\t" "aese v5.16b, v1.16b\n\t" "\n" - "L_aes_gcm_encrypt_block_arm64_crypto_eor3_round_done_%=: \n\t" + "L_aes_gcm_encrypt_block_arm64_crypto_eor3_round_done_%=:\n\t" "ld1 {v0.2d}, [%x[key]]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" "eor v4.16b, v4.16b, v5.16b\n\t" @@ -33831,7 +33837,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_128_%=\n\t" @@ -33841,7 +33847,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -34102,7 +34108,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -34450,7 +34456,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -34539,7 +34545,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -34674,7 +34680,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -34844,7 +34850,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -34896,7 +34902,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -34988,7 +34994,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -35038,17 +35044,17 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_256_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -35343,7 +35349,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -35725,7 +35731,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -35814,7 +35820,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -35966,7 +35972,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -36153,7 +36159,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -36205,7 +36211,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -36306,7 +36312,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -36362,17 +36368,17 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_128_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -36599,7 +36605,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -36913,7 +36919,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_8_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -37002,7 +37008,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" @@ -37121,7 +37127,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -37275,7 +37281,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_4_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -37327,7 +37333,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_2_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -37411,7 +37417,7 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -37457,10 +37463,10 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_encrypt_update_arm64_crypto_eor3_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" @@ -37513,7 +37519,7 @@ void AES_GCM_encrypt_final_AARCH64_EOR3(byte* tag, byte* authTag, word32 tbytes, "st1 {v5.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_encrypt_final_arm64_crypto_eor3_done_%=\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_partial_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_partial_%=:\n\t" "st1 {v5.16b}, [%x[tag]]\n\t" "cmp %w[tbytes], #8\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_dw_%=\n\t" @@ -37521,30 +37527,30 @@ void AES_GCM_encrypt_final_AARCH64_EOR3(byte* tag, byte* authTag, word32 tbytes, "sub %w[tbytes], %w[tbytes], #8\n\t" "str x8, [%x[authTag]], #8\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_dw_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_dw_%=:\n\t" "cmp %w[tbytes], #4\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_sw_%=\n\t" "ldr w8, [%x[tag]], #4\n\t" "sub %w[tbytes], %w[tbytes], #4\n\t" "str w8, [%x[authTag]], #4\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_sw_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_sw_%=:\n\t" "cmp %w[tbytes], #2\n\t" "b.lt L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "ldrh w8, [%x[tag]], #2\n\t" "sub %w[tbytes], %w[tbytes], #2\n\t" "strh w8, [%x[authTag]], #2\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=:\n\t" "cbz %w[tbytes], L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_end_bytes_%=\n\t" "ldrb w8, [%x[tag]], #1\n\t" "subs %w[tbytes], %w[tbytes], #1\n\t" "strb w8, [%x[authTag]], #1\n\t" "b.ne L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_end_bytes_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_tag_end_bytes_%=:\n\t" "\n" - "L_aes_gcm_encrypt_final_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_encrypt_final_arm64_crypto_eor3_done_%=:\n\t" : [tag] "+r" (tag), [authTag] "+r" (authTag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr) @@ -37654,7 +37660,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v7.16b, v30.16b, v31.16b\n\t" /* Done */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_h_done_%=:\n\t" "lsr w8, %w[nbytes], #4\n\t" "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_128_%=\n\t" @@ -37664,7 +37670,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -37925,7 +37931,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -38273,7 +38279,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -38362,7 +38368,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -38497,7 +38503,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -38667,7 +38673,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -38719,7 +38725,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -38811,7 +38817,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_start_1_%=:\n\t" "ld1 {v15.16b}, [%x[in]], #16\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -38862,17 +38868,17 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v14.16b, v14.16b, v15.16b\n\t" "st1 {v14.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_192_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_256 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_256_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_256_%=:\n\t" #ifndef NO_AES_256 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -39167,7 +39173,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -39549,7 +39555,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -39638,7 +39644,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" @@ -39790,7 +39796,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -39977,7 +39983,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -40029,7 +40035,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -40130,7 +40136,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -40186,17 +40192,17 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_256_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=\n\t" /* AES_GCM_128 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_128_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_start_128_%=:\n\t" #ifndef NO_AES_128 "cmp w8, #32\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -40423,7 +40429,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_8_%=:\n\t" "ldr q12, [%x[key]]\n\t" "add w17, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" @@ -40737,7 +40743,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "cmp w8, #8\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_8_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_8_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_8_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -40826,7 +40832,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_4_%=:\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" @@ -40945,7 +40951,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_4_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w12, w9, #2\n\t" @@ -41099,7 +41105,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "b.ge L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_both_4_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_4_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_end_4_%=:\n\t" "rbit v18.16b, v18.16b\n\t" "rbit v19.16b, v19.16b\n\t" "rbit v20.16b, v20.16b\n\t" @@ -41151,7 +41157,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "b.eq L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=\n\t" "b.lt L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_2_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_2_%=:\n\t" "add w13, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w9, w9, #2\n\t" @@ -41235,7 +41241,7 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, /* Done GHASH */ "cbz w8, L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=\n\t" "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_start_1_%=:\n\t" "add w9, w9, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w10, w9\n\t" @@ -41281,10 +41287,10 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_128_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=: \n\t" + "L_aes_gcm_decrypt_update_arm64_crypto_eor3_done_%=:\n\t" "rev w9, w9\n\t" "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" @@ -41340,7 +41346,7 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "ld1 {v0.16b}, [%x[authTag]]\n\t" "b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=:\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -41350,28 +41356,28 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "sub x10, x10, #8\n\t" "str x9, [%x[tag]], #8\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_dw_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_dw_%=:\n\t" "cmp x10, #4\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_sw_%=\n\t" "ldr w9, [%x[authTag]], #4\n\t" "sub x10, x10, #4\n\t" "str w9, [%x[tag]], #4\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_sw_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_sw_%=:\n\t" "cmp x10, #2\n\t" "b.lt L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "ldrh w9, [%x[authTag]], #2\n\t" "sub x10, x10, #2\n\t" "strh w9, [%x[tag]], #2\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=:\n\t" "cbz x10, L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_end_bytes_%=\n\t" "ldrb w9, [%x[authTag]], #1\n\t" "subs x10, x10, #1\n\t" "strb w9, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_start_byte_%=\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_end_bytes_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_end_bytes_%=:\n\t" "sub %x[tag], %x[tag], %x[tbytes]\n\t" "ld1 {v0.2d}, [%x[tag]]\n\t" "mov x10, #16\n\t" @@ -41379,14 +41385,14 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "sub x10, x10, %x[tbytes]\n\t" "add %x[tag], %x[tag], %x[tbytes]\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_calc_tag_byte_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_calc_tag_byte_%=:\n\t" "strb wzr, [%x[tag]], #1\n\t" "subs x10, x10, #1\n\t" "b.ne L_aes_gcm_decrypt_final_arm64_crypto_eor3_calc_tag_byte_%=\n\t" "subs %x[tag], %x[tag], #16\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "\n" - "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=: \n\t" + "L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=:\n\t" "eor v0.16b, v0.16b, v5.16b\n\t" "mov x9, v0.d[0]\n\t" "mov x10, v0.d[1]\n\t" @@ -41472,7 +41478,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_192_start_2_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -41603,7 +41609,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -41672,7 +41678,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -41708,14 +41714,14 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_192_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -41754,12 +41760,12 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_192_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_encrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" - "L_aes_xts_encrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key2]], #32\n\t" @@ -41811,7 +41817,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_256_start_2_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -41958,7 +41964,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -42035,7 +42041,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -42075,14 +42081,14 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_256_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -42125,12 +42131,12 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_256_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_encrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" - "L_aes_xts_encrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key2]], #32\n\t" "ld1 {v26.2d}, [%x[key2]]\n\t" @@ -42172,7 +42178,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_128_start_2_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -42287,7 +42293,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_encrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -42348,7 +42354,7 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_xts_encrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -42380,14 +42386,14 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_xts_encrypt_arm64_crypto_128_partial_done_%=\n\t" "sub %x[out], %x[out], #16\n\t" "ld1 {v0.16b}, [%x[out]], #16\n\t" "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_start_byte_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -42422,10 +42428,10 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_encrypt_arm64_crypto_128_partial_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_xts_encrypt_arm64_crypto_done_%=: \n\t" + "L_aes_xts_encrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) @@ -42501,7 +42507,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_192_start_2_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_start_4_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -42632,7 +42638,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_start_2_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_192_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -42701,7 +42707,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_192_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -42737,7 +42743,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_192_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" @@ -42775,7 +42781,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -42814,12 +42820,12 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_192_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_192_partial_done_%=:\n\t" #endif /* !NO_AES_192 */ "b L_aes_xts_decrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_256 */ "\n" - "L_aes_xts_decrypt_arm64_crypto_start_256_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_start_256_%=:\n\t" #ifndef NO_AES_256 "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[key2]], #0x40\n\t" "ld1 {v28.2d, v29.2d}, [%x[key2]], #32\n\t" @@ -42871,7 +42877,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_256_start_2_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_start_4_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -43018,7 +43024,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_start_2_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_256_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -43095,7 +43101,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_256_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -43135,7 +43141,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_256_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" @@ -43177,7 +43183,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -43220,12 +43226,12 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_256_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_256_partial_done_%=:\n\t" #endif /* !NO_AES_256 */ "b L_aes_xts_decrypt_arm64_crypto_done_%=\n\t" /* AES_XTS_128 */ "\n" - "L_aes_xts_decrypt_arm64_crypto_start_128_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_start_128_%=:\n\t" #ifndef NO_AES_128 "ld1 {v24.2d, v25.2d}, [%x[key2]], #32\n\t" "ld1 {v26.2d}, [%x[key2]]\n\t" @@ -43267,7 +43273,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_128_start_2_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_start_4_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_start_4_%=:\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "mov v5.d[0], x12\n\t" "mov v5.d[1], x13\n\t" @@ -43382,7 +43388,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cmp w8, #4\n\t" "b.ge L_aes_xts_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_start_2_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_start_2_%=:\n\t" "cmp w8, #2\n\t" "b.lt L_aes_xts_decrypt_arm64_crypto_128_start_1_%=\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" @@ -43443,7 +43449,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "sub w8, w8, #2\n\t" "st1 {v0.16b, v1.16b}, [%x[out]], #32\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_start_1_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_start_1_%=:\n\t" "cbz w8, L_aes_xts_decrypt_arm64_crypto_128_done_%=\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" "eor v0.16b, v0.16b, v4.16b\n\t" @@ -43475,7 +43481,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v4.d[1], x11\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_done_%=:\n\t" "cbz %w[sz], L_aes_xts_decrypt_arm64_crypto_128_partial_done_%=\n\t" "and x9, x19, x11, asr 63\n\t" "extr x13, x11, x10, #63\n\t" @@ -43509,7 +43515,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "add %x[out], %x[out], #16\n\t" "mov w9, %w[sz]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_start_byte_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "ldrb w12, [%x[tmp]]\n\t" "ldrb w13, [%x[in]], #1\n\t" "strb w12, [%x[out]], #1\n\t" @@ -43544,10 +43550,10 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_aes_xts_decrypt_arm64_crypto_128_partial_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_128_partial_done_%=:\n\t" #endif /* !NO_AES_128 */ "\n" - "L_aes_xts_decrypt_arm64_crypto_done_%=: \n\t" + "L_aes_xts_decrypt_arm64_crypto_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) @@ -43566,7 +43572,7 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) -static const word8 L_AES_ARM64_NEON_te[] = { +XALIGNED(4) static const word8 L_AES_ARM64_NEON_te[] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, @@ -43601,7 +43607,7 @@ static const word8 L_AES_ARM64_NEON_te[] = { 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, }; -static const word8 L_AES_ARM64_NEON_shift_rows_shuffle[] = { +XALIGNED(4) static const word8 L_AES_ARM64_NEON_shift_rows_shuffle[] = { 0x0c, 0x09, 0x06, 0x03, 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b, 0x08, 0x05, 0x02, 0x0f, }; @@ -43617,7 +43623,7 @@ void AES_invert_key_NEON(unsigned char* ks, word32 rounds) "mov x2, %x[ks]\n\t" "mov w4, %w[rounds]\n\t" "\n" - "L_AES_invert_key_NEON_loop_%=: \n\t" + "L_AES_invert_key_NEON_loop_%=:\n\t" "ld1 {v0.2d}, [x2]\n\t" "ld1 {v1.2d}, [x3]\n\t" "st1 {v0.2d}, [x3]\n\t" @@ -43629,7 +43635,7 @@ void AES_invert_key_NEON(unsigned char* ks, word32 rounds) "add x2, %x[ks], #16\n\t" "sub w4, %w[rounds], #1\n\t" "\n" - "L_AES_invert_key_NEON_mix_loop_%=: \n\t" + "L_AES_invert_key_NEON_mix_loop_%=:\n\t" "ld1 {v0.2d}, [x2]\n\t" "sshr v5.16b, v0.16b, #7\n\t" "ushr v6.16b, v0.16b, #6\n\t" @@ -43667,10 +43673,10 @@ void AES_invert_key_NEON(unsigned char* ks, word32 rounds) } #endif /* HAVE_AES_DECRYPT */ -static const word32 L_AES_ARM64_NEON_rcon[] = { +XALIGNED(8) static const word32 L_AES_ARM64_NEON_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, + 0x1b000000, 0x36000000 }; void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, @@ -43702,7 +43708,7 @@ void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, "st1 {v1.2d}, [%x[ks]], #16\n\t" "mov x3, #6\n\t" "\n" - "L_AES_set_encrypt_key_NEON_loop_256_%=: \n\t" + "L_AES_set_encrypt_key_NEON_loop_256_%=:\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" @@ -43781,7 +43787,7 @@ void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, "st1 {v0.2d}, [%x[ks]], #16\n\t" "b L_AES_set_encrypt_key_NEON_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_NEON_start_192_%=: \n\t" + "L_AES_set_encrypt_key_NEON_start_192_%=:\n\t" "ld1 {v0.16b}, [%x[key]], #16\n\t" "ld1 {v1.8b}, [%x[key]]\n\t" "rev32 v0.16b, v0.16b\n\t" @@ -43791,7 +43797,7 @@ void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, "ext v1.16b, v1.16b, v1.16b, #8\n\t" "mov x3, #7\n\t" "\n" - "L_AES_set_encrypt_key_NEON_loop_192_%=: \n\t" + "L_AES_set_encrypt_key_NEON_loop_192_%=:\n\t" "eor v22.16b, v1.16b, v2.16b\n\t" "eor v23.16b, v1.16b, v3.16b\n\t" "eor v24.16b, v1.16b, v4.16b\n\t" @@ -43855,13 +43861,13 @@ void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, "st1 {v0.2d}, [%x[ks]], #16\n\t" "b L_AES_set_encrypt_key_NEON_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_NEON_start_128_%=: \n\t" + "L_AES_set_encrypt_key_NEON_start_128_%=:\n\t" "ld1 {v0.16b}, [%x[key]]\n\t" "rev32 v0.16b, v0.16b\n\t" "st1 {v0.2d}, [%x[ks]], #16\n\t" "mov x3, #10\n\t" "\n" - "L_AES_set_encrypt_key_NEON_loop_128_%=: \n\t" + "L_AES_set_encrypt_key_NEON_loop_128_%=:\n\t" "eor v22.16b, v0.16b, v2.16b\n\t" "eor v23.16b, v0.16b, v3.16b\n\t" "eor v24.16b, v0.16b, v4.16b\n\t" @@ -43891,7 +43897,7 @@ void AES_set_encrypt_key_NEON(const unsigned char* key, word32 len, "subs x3, x3, #1\n\t" "b.ne L_AES_set_encrypt_key_NEON_loop_128_%=\n\t" "\n" - "L_AES_set_encrypt_key_NEON_end_%=: \n\t" + "L_AES_set_encrypt_key_NEON_end_%=:\n\t" : [len] "+r" (len), [ks] "+r" (ks) : [key] "r" (key), [rcon] "r" (rcon), [te] "r" (te) : "memory", "cc", "x3", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", @@ -43918,7 +43924,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.lt L_AES_ECB_encrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_ECB_encrypt_NEON_loop_4_%=: \n\t" + "L_AES_ECB_encrypt_NEON_loop_4_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.2d}, [x8], #16\n\t" @@ -43933,7 +43939,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v3.16b, v3.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_encrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_ECB_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -44324,7 +44330,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.ge L_AES_ECB_encrypt_NEON_loop_4_%=\n\t" "\n" - "L_AES_ECB_encrypt_NEON_start_2_%=: \n\t" + "L_AES_ECB_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -44333,7 +44339,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "b.eq L_AES_ECB_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_ECB_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_ECB_encrypt_NEON_loop_2_%=: \n\t" + "L_AES_ECB_encrypt_NEON_loop_2_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.2d}, [x8], #16\n\t" @@ -44344,7 +44350,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v1.16b, v1.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_encrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_ECB_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" @@ -44544,7 +44550,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0\n\t" "b.eq L_AES_ECB_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_ECB_encrypt_NEON_start_1_%=: \n\t" + "L_AES_ECB_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b}, [%x[in]], #16\n\t" @@ -44554,7 +44560,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_encrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_ECB_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -44655,7 +44661,7 @@ void AES_ECB_encrypt_NEON(const unsigned char* in, unsigned char* out, "rev32 v0.16b, v0.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_AES_ECB_encrypt_NEON_data_done_%=: \n\t" + "L_AES_ECB_encrypt_NEON_data_done_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [te] "r" (te), [shuffle] "r" (shuffle) : "memory", "cc", "x7", "x8", "v0", "v1", "v2", "v3", "v4", "v5", "v6", @@ -44687,7 +44693,7 @@ void AES_CBC_encrypt_NEON(const unsigned char* in, unsigned char* out, "ld1 {v0.2d}, [%x[iv]]\n\t" "ld1 {v26.2d}, [%[shuffle]]\n\t" "\n" - "L_AES_CBC_encrypt_NEON_loop_block_%=: \n\t" + "L_AES_CBC_encrypt_NEON_loop_block_%=:\n\t" "add x9, %x[ks], #16\n\t" "ld1 {v1.16b}, [%x[in]], #16\n\t" "ld1 {v2.16b}, [%x[ks]]\n\t" @@ -44697,7 +44703,7 @@ void AES_CBC_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v0.16b, v2.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CBC_encrypt_NEON_loop_nr_%=: \n\t" + "L_AES_CBC_encrypt_NEON_loop_nr_%=:\n\t" "eor v2.16b, v0.16b, v6.16b\n\t" "eor v3.16b, v0.16b, v7.16b\n\t" "eor v4.16b, v0.16b, v8.16b\n\t" @@ -44831,7 +44837,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.lt L_AES_CTR_encrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_CTR_encrypt_NEON_loop_4_%=: \n\t" + "L_AES_CTR_encrypt_NEON_loop_4_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" "mov v8.d[1], x10\n\t" @@ -44869,7 +44875,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "rev32 v8.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CTR_encrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_CTR_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -45269,7 +45275,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "\n" - "L_AES_CTR_encrypt_NEON_start_2_%=: \n\t" + "L_AES_CTR_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -45278,7 +45284,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "b.eq L_AES_CTR_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_CTR_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_CTR_encrypt_NEON_loop_2_%=: \n\t" + "L_AES_CTR_encrypt_NEON_loop_2_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" /* Round: 0 - XOR in key schedule */ @@ -45298,7 +45304,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "rev32 v2.16b, v2.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CTR_encrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_CTR_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" @@ -45501,7 +45507,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0\n\t" "b.eq L_AES_CTR_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_CTR_encrypt_NEON_start_1_%=: \n\t" + "L_AES_CTR_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.2d}, [x9], #16\n\t" @@ -45509,7 +45515,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v2.16b, v4.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CTR_encrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_CTR_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -45618,7 +45624,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, "rev64 v2.16b, v2.16b\n\t" "rev32 v2.16b, v2.16b\n\t" "\n" - "L_AES_CTR_encrypt_NEON_data_done_%=: \n\t" + "L_AES_CTR_encrypt_NEON_data_done_%=:\n\t" "rev32 v2.16b, v2.16b\n\t" "st1 {v2.2d}, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) @@ -45634,7 +45640,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out, #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) -static const word8 L_AES_ARM64_NEON_td[] = { +XALIGNED(4) static const word8 L_AES_ARM64_NEON_td[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, @@ -45669,12 +45675,13 @@ static const word8 L_AES_ARM64_NEON_td[] = { 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; -static const word8 L_AES_ARM64_NEON_shift_rows_invshuffle[] = { +XALIGNED(4) static const word8 L_AES_ARM64_NEON_shift_rows_invshuffle[] = { 0x04, 0x09, 0x0e, 0x03, 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b, 0x00, 0x05, 0x0a, 0x0f, }; -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_ECB) +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_ECB) void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, @@ -45690,7 +45697,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.lt L_AES_ECB_decrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_ECB_decrypt_NEON_loop_4_%=: \n\t" + "L_AES_ECB_decrypt_NEON_loop_4_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.2d}, [x8], #16\n\t" @@ -45705,7 +45712,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v3.16b, v3.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_decrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_ECB_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -46243,12 +46250,12 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.ge L_AES_ECB_decrypt_NEON_loop_4_%=\n\t" "\n" - "L_AES_ECB_decrypt_NEON_start_2_%=: \n\t" + "L_AES_ECB_decrypt_NEON_start_2_%=:\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_ECB_decrypt_NEON_start_1_%=\n\t" "b.lt L_AES_ECB_decrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_ECB_decrypt_NEON_loop_2_%=: \n\t" + "L_AES_ECB_decrypt_NEON_loop_2_%=:\n\t" "mov x8, %x[ks]\n\t" "ld1 {v0.16b, v1.16b}, [%x[in]], #32\n\t" "ld1 {v4.2d}, [x8], #16\n\t" @@ -46259,7 +46266,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v1.16b, v1.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_decrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_ECB_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -46546,7 +46553,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0\n\t" "b.eq L_AES_ECB_decrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_ECB_decrypt_NEON_start_1_%=: \n\t" + "L_AES_ECB_decrypt_NEON_start_1_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -46560,7 +46567,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w7, %w[nr], #2\n\t" "\n" - "L_AES_ECB_decrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_ECB_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -46697,7 +46704,7 @@ void AES_ECB_decrypt_NEON(const unsigned char* in, unsigned char* out, "rev32 v0.16b, v0.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_AES_ECB_decrypt_NEON_data_done_%=: \n\t" + "L_AES_ECB_decrypt_NEON_data_done_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr) : [in] "r" (in), [ks] "r" (ks), [td] "r" (td), [invshuffle] "r" (invshuffle) @@ -46729,7 +46736,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.lt L_AES_CBC_decrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_CBC_decrypt_NEON_loop_4_%=: \n\t" + "L_AES_CBC_decrypt_NEON_loop_4_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%x[in]], #0x40\n\t" "st1 {v3.2d, v4.2d, v5.2d, v6.2d}, [x10]\n\t" @@ -46746,7 +46753,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v7.16b, v7.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CBC_decrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_CBC_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v8.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v4.16b\n\t" "tbl v9.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v5.16b\n\t" "tbl v10.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v6.16b\n\t" @@ -47290,12 +47297,12 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0x40\n\t" "b.ge L_AES_CBC_decrypt_NEON_loop_4_%=\n\t" "\n" - "L_AES_CBC_decrypt_NEON_start_2_%=: \n\t" + "L_AES_CBC_decrypt_NEON_start_2_%=:\n\t" "cmp %x[len], #16\n\t" "b.eq L_AES_CBC_decrypt_NEON_start_1_%=\n\t" "b.lt L_AES_CBC_decrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_CBC_decrypt_NEON_loop_2_%=: \n\t" + "L_AES_CBC_decrypt_NEON_loop_2_%=:\n\t" "mov x9, %x[ks]\n\t" "ld1 {v4.16b, v5.16b}, [%x[in]], #32\n\t" "st1 {v3.2d, v4.2d, v5.2d}, [x10]\n\t" @@ -47307,7 +47314,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v5.16b, v5.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CBC_decrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_CBC_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -47599,7 +47606,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0\n\t" "b.eq L_AES_CBC_decrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_CBC_decrypt_NEON_start_1_%=: \n\t" + "L_AES_CBC_decrypt_NEON_start_1_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -47615,7 +47622,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v4.16b, v4.16b, v8.16b\n\t" "sub w8, %w[nr], #2\n\t" "\n" - "L_AES_CBC_decrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_CBC_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v0.16b, v4.16b, v12.16b\n\t" "eor v1.16b, v4.16b, v13.16b\n\t" "eor v2.16b, v4.16b, v14.16b\n\t" @@ -47754,7 +47761,7 @@ void AES_CBC_decrypt_NEON(const unsigned char* in, unsigned char* out, "eor v4.16b, v4.16b, v10.16b\n\t" "st1 {v4.16b}, [%x[out]], #16\n\t" "\n" - "L_AES_CBC_decrypt_NEON_data_done_%=: \n\t" + "L_AES_CBC_decrypt_NEON_data_done_%=:\n\t" "st1 {v3.2d}, [%x[iv]]\n\t" "ldp x29, x30, [sp], #0x60\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) @@ -47788,7 +47795,7 @@ void GCM_gmult_len_NEON(unsigned char* x, const unsigned char* h, "ushr v13.16b, v10.16b, #4\n\t" "eor v14.16b, v12.16b, v13.16b\n\t" "\n" - "L_GCM_gmult_len_NEON_start_block_%=: \n\t" + "L_GCM_gmult_len_NEON_start_block_%=:\n\t" "ld1 {v0.16b}, [%x[data]], #16\n\t" "rbit v0.16b, v0.16b\n\t" "eor v18.16b, v18.16b, v0.16b\n\t" @@ -48122,7 +48129,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "mov x7, v2.d[0]\n\t" "mov x8, v2.d[1]\n\t" "\n" - "L_AES_GCM_encrypt_NEON_loop_4_%=: \n\t" + "L_AES_GCM_encrypt_NEON_loop_4_%=:\n\t" "mov x12, %x[ks]\n\t" "ld1 {v4.2d}, [x12], #16\n\t" "mov v8.d[0], x7\n\t" @@ -48142,7 +48149,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v3.16b, v8.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" - "L_AES_GCM_encrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_GCM_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -48541,7 +48548,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "mov v2.d[1], x8\n\t" "mov v2.s[3], w6\n\t" "\n" - "L_AES_GCM_encrypt_NEON_start_2_%=: \n\t" + "L_AES_GCM_encrypt_NEON_start_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -48550,7 +48557,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "b.eq L_AES_GCM_encrypt_NEON_start_1_%=\n\t" "b.lt L_AES_GCM_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_GCM_encrypt_NEON_loop_2_%=: \n\t" + "L_AES_GCM_encrypt_NEON_loop_2_%=:\n\t" "mov x12, %x[ks]\n\t" "ld1 {v4.2d}, [x12], #16\n\t" /* Round: 0 - XOR in key schedule */ @@ -48562,7 +48569,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v1.16b, v2.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" - "L_AES_GCM_encrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_GCM_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" @@ -48765,7 +48772,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "cmp %x[len], #0\n\t" "b.eq L_AES_GCM_encrypt_NEON_data_done_%=\n\t" "\n" - "L_AES_GCM_encrypt_NEON_start_1_%=: \n\t" + "L_AES_GCM_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov x12, %x[ks]\n\t" "add w6, w6, #1\n\t" @@ -48775,7 +48782,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v2.16b, v4.16b\n\t" "sub w11, %w[nr], #2\n\t" "\n" - "L_AES_GCM_encrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_GCM_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -48878,7 +48885,7 @@ void AES_GCM_encrypt_NEON(const unsigned char* in, unsigned char* out, "eor v0.16b, v0.16b, v4.16b\n\t" "st1 {v0.16b}, [%x[out]], #16\n\t" "\n" - "L_AES_GCM_encrypt_NEON_data_done_%=: \n\t" + "L_AES_GCM_encrypt_NEON_data_done_%=:\n\t" "rev32 v2.16b, v2.16b\n\t" "st1 {v2.2d}, [%x[ctr]]\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [ctr] "+r" (ctr) @@ -48919,7 +48926,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v2.16b, v2.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_nr_tweak_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_nr_tweak_%=:\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" @@ -49023,7 +49030,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "cmp %w[sz], #0x40\n\t" "b.lt L_AES_XTS_encrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_4_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_4_%=:\n\t" "mov x22, %x[key]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b}, [x22], #16\n\t" @@ -49058,7 +49065,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v3.16b, v3.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -49468,7 +49475,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "\n" - "L_AES_XTS_encrypt_NEON_start_2_%=: \n\t" + "L_AES_XTS_encrypt_NEON_start_2_%=:\n\t" "cmp %w[sz], #32\n\t" "b.lt L_AES_XTS_encrypt_NEON_start_1_%=\n\t" "mov x22, %x[key]\n\t" @@ -49492,7 +49499,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v1.16b, v1.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_nr_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v1.16b, v12.16b\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" @@ -49695,7 +49702,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor x8, x16, x10, lsl 1\n\t" "sub %w[sz], %w[sz], #32\n\t" "\n" - "L_AES_XTS_encrypt_NEON_start_1_%=: \n\t" + "L_AES_XTS_encrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[shuffle]]\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" @@ -49709,7 +49716,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -49816,7 +49823,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "extr x9, x9, x8, #63\n\t" "eor x8, x16, x8, lsl 1\n\t" "\n" - "L_AES_XTS_encrypt_NEON_start_partial_%=: \n\t" + "L_AES_XTS_encrypt_NEON_start_partial_%=:\n\t" "cbz %w[sz], L_AES_XTS_encrypt_NEON_data_done_%=\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" @@ -49826,7 +49833,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "st1 {v0.2d}, [%x[tmp]]\n\t" "mov w16, %w[sz]\n\t" "\n" - "L_AES_XTS_encrypt_NEON_start_byte_%=: \n\t" + "L_AES_XTS_encrypt_NEON_start_byte_%=:\n\t" "ldrb w10, [%x[tmp]]\n\t" "ldrb w11, [%x[in]], #1\n\t" "strb w10, [%x[out]], #1\n\t" @@ -49843,7 +49850,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w21, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_NEON_loop_nr_partial_%=: \n\t" + "L_AES_XTS_encrypt_NEON_loop_nr_partial_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -49945,7 +49952,7 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_AES_XTS_encrypt_NEON_data_done_%=: \n\t" + "L_AES_XTS_encrypt_NEON_data_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) @@ -49991,7 +49998,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v2.16b, v2.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_tweak_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_tweak_%=:\n\t" "eor v8.16b, v2.16b, v12.16b\n\t" "eor v9.16b, v2.16b, v13.16b\n\t" "eor v10.16b, v2.16b, v14.16b\n\t" @@ -50100,7 +50107,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "cmp %w[sz], #0x40\n\t" "b.lt L_AES_XTS_decrypt_NEON_start_2_%=\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_4_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_4_%=:\n\t" "mov x25, %x[key]\n\t" "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[in]], #0x40\n\t" "ld1 {v4.16b}, [x25], #16\n\t" @@ -50135,7 +50142,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v3.16b, v3.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_4_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_4_%=:\n\t" "tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b\n\t" "tbl v5.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v1.16b\n\t" "tbl v6.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v2.16b\n\t" @@ -50692,7 +50699,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "movi v14.16b, #0xc0\n\t" "movi v15.16b, #27\n\t" "\n" - "L_AES_XTS_decrypt_NEON_start_2_%=: \n\t" + "L_AES_XTS_decrypt_NEON_start_2_%=:\n\t" "cmp %w[sz], #32\n\t" "b.lt L_AES_XTS_decrypt_NEON_start_1_%=\n\t" "mov x25, %x[key]\n\t" @@ -50716,7 +50723,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v1.16b, v1.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_2_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_2_%=:\n\t" "movi v12.16b, #0x40\n\t" "movi v13.16b, #0x80\n\t" "movi v14.16b, #0xc0\n\t" @@ -51006,7 +51013,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor x8, x16, x10, lsl 1\n\t" "sub %w[sz], %w[sz], #32\n\t" "\n" - "L_AES_XTS_decrypt_NEON_start_1_%=: \n\t" + "L_AES_XTS_decrypt_NEON_start_1_%=:\n\t" "ld1 {v3.2d}, [%[invshuffle]]\n\t" "mov v2.d[0], x8\n\t" "mov v2.d[1], x9\n\t" @@ -51020,7 +51027,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_1_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -51163,7 +51170,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "extr x9, x9, x8, #63\n\t" "eor x8, x16, x8, lsl 1\n\t" "\n" - "L_AES_XTS_decrypt_NEON_start_partial_%=: \n\t" + "L_AES_XTS_decrypt_NEON_start_partial_%=:\n\t" "mov %w[sz], w19\n\t" "cbz %w[sz], L_AES_XTS_decrypt_NEON_data_done_%=\n\t" "mov v2.d[0], x8\n\t" @@ -51181,7 +51188,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_partial_1_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_partial_1_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -51321,7 +51328,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "add %x[out], %x[out], #16\n\t" "mov w16, %w[sz]\n\t" "\n" - "L_AES_XTS_decrypt_NEON_start_byte_%=: \n\t" + "L_AES_XTS_decrypt_NEON_start_byte_%=:\n\t" "ldrb w10, [%x[tmp]]\n\t" "ldrb w11, [%x[in]], #1\n\t" "strb w10, [%x[out]], #1\n\t" @@ -51339,7 +51346,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v4.16b\n\t" "sub w24, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_NEON_loop_nr_partial_2_%=: \n\t" + "L_AES_XTS_decrypt_NEON_loop_nr_partial_2_%=:\n\t" "eor v8.16b, v0.16b, v12.16b\n\t" "eor v9.16b, v0.16b, v13.16b\n\t" "eor v10.16b, v0.16b, v14.16b\n\t" @@ -51477,7 +51484,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "eor v0.16b, v0.16b, v2.16b\n\t" "st1 {v0.16b}, [%x[out]]\n\t" "\n" - "L_AES_XTS_decrypt_NEON_data_done_%=: \n\t" + "L_AES_XTS_decrypt_NEON_data_done_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) @@ -51496,7 +51503,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, #endif /* !WOLFSSL_ARMASM_NO_NEON */ #ifndef WOLFSSL_ARMASM_NEON_NO_TABLE_LOOKUP #ifdef HAVE_AES_DECRYPT -static const word32 L_AES_ARM64_td[] = { +XALIGNED(8) static const word32 L_AES_ARM64_td[] = { 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, @@ -51567,7 +51574,7 @@ static const word32 L_AES_ARM64_td[] = { #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) -static const word32 L_AES_ARM64_te[] = { +XALIGNED(8) static const word32 L_AES_ARM64_te[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, @@ -51646,7 +51653,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "add x12, %x[ks], %x[rounds], lsl 4\n\t" "mov w13, %w[rounds]\n\t" "\n" - "L_AES_invert_key_loop_%=: \n\t" + "L_AES_invert_key_loop_%=:\n\t" "ldp w4, w5, [%x[ks]]\n\t" "ldnp w6, w7, [%x[ks], #8]\n\t" "ldp w8, w9, [x12]\n\t" @@ -51662,7 +51669,7 @@ void AES_invert_key(unsigned char* ks, word32 rounds) "add %x[ks], %x[ks], #16\n\t" "sub w13, %w[rounds], #1\n\t" "\n" - "L_AES_invert_key_mix_loop_%=: \n\t" + "L_AES_invert_key_mix_loop_%=:\n\t" "ldp w4, w5, [%x[ks]]\n\t" "ldnp w6, w7, [%x[ks], #8]\n\t" "ubfx w8, w4, #0, #8\n\t" @@ -51755,10 +51762,10 @@ void AES_invert_key(unsigned char* ks, word32 rounds) } #endif /* HAVE_AES_DECRYPT */ -static const word32 L_AES_ARM64_rcon[] = { +XALIGNED(8) static const word32 L_AES_ARM64_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, + 0x1b000000, 0x36000000 }; void AES_set_encrypt_key(const unsigned char* key, word32 len, @@ -51796,7 +51803,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "sub %x[ks], %x[ks], #16\n\t" "mov x4, #6\n\t" "\n" - "L_AES_set_encrypt_key_loop_256_%=: \n\t" + "L_AES_set_encrypt_key_loop_256_%=:\n\t" "ubfx w6, w9, #0, #8\n\t" "ubfx w7, w9, #8, #8\n\t" "ubfx w8, w9, #16, #8\n\t" @@ -51881,7 +51888,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "sub %x[ks], %x[ks], #16\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_start_192_%=: \n\t" + "L_AES_set_encrypt_key_start_192_%=:\n\t" "ldr w6, [%x[key]]\n\t" "ldr w7, [%x[key], #4]\n\t" "ldr w8, [%x[key], #8]\n\t" @@ -51899,7 +51906,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "stnp w10, w11, [%x[ks], #16]\n\t" "mov x4, #7\n\t" "\n" - "L_AES_set_encrypt_key_loop_192_%=: \n\t" + "L_AES_set_encrypt_key_loop_192_%=:\n\t" "ubfx w6, w11, #0, #8\n\t" "ubfx w7, w11, #8, #8\n\t" "ubfx w8, w11, #16, #8\n\t" @@ -51959,7 +51966,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "stnp w8, w9, [%x[ks], #8]\n\t" "b L_AES_set_encrypt_key_end_%=\n\t" "\n" - "L_AES_set_encrypt_key_start_128_%=: \n\t" + "L_AES_set_encrypt_key_start_128_%=:\n\t" "ldr w6, [%x[key]]\n\t" "ldr w7, [%x[key], #4]\n\t" "ldr w8, [%x[key], #8]\n\t" @@ -51972,7 +51979,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "stnp w8, w9, [%x[ks], #8]\n\t" "mov x4, #10\n\t" "\n" - "L_AES_set_encrypt_key_loop_128_%=: \n\t" + "L_AES_set_encrypt_key_loop_128_%=:\n\t" "ubfx w6, w9, #0, #8\n\t" "ubfx w7, w9, #8, #8\n\t" "ubfx w8, w9, #16, #8\n\t" @@ -52001,7 +52008,7 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len, "subs x4, x4, #1\n\t" "b.ne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" - "L_AES_set_encrypt_key_end_%=: \n\t" + "L_AES_set_encrypt_key_end_%=:\n\t" : [len] "+r" (len), [ks] "+r" (ks) : [key] "r" (key), [rcon] "r" (rcon), [te] "r" (te) : "memory", "cc", "x3", "x4", "x6", "x7", "x8", "x9", "x10", "x11" @@ -52019,7 +52026,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( "\n" - "L_AES_ECB_encrypt_loop_block_128_%=: \n\t" + "L_AES_ECB_encrypt_loop_block_128_%=:\n\t" "mov x17, %x[ks]\n\t" "ldr x6, [%x[in]]\n\t" "ldr x7, [%x[in], #8]\n\t" @@ -52031,7 +52038,7 @@ void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, "eor x7, x7, x11\n\t" "sub w16, %w[nr], #2\n\t" "\n" - "L_AES_ECB_encrypt_loop_nr_%=: \n\t" + "L_AES_ECB_encrypt_loop_nr_%=:\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" @@ -52341,7 +52348,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, __asm__ __volatile__ ( "ldp x7, x8, [%x[iv]]\n\t" "\n" - "L_AES_CBC_encrypt_loop_block_%=: \n\t" + "L_AES_CBC_encrypt_loop_block_%=:\n\t" "mov x19, %x[ks]\n\t" "ldr x11, [%x[in]]\n\t" "ldr x12, [%x[in], #8]\n\t" @@ -52355,7 +52362,7 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, "eor x8, x8, x12\n\t" "sub w17, %w[nr], #2\n\t" "\n" - "L_AES_CBC_encrypt_loop_nr_%=: \n\t" + "L_AES_CBC_encrypt_loop_nr_%=:\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" @@ -52667,7 +52674,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, "rev32 x15, x15\n\t" "rev32 x16, x16\n\t" "\n" - "L_AES_CTR_encrypt_loop_block_128_%=: \n\t" + "L_AES_CTR_encrypt_loop_block_128_%=:\n\t" "mov x21, %x[ks]\n\t" "ldp x11, x12, [x21], #16\n\t" /* Round: 0 - XOR in key schedule */ @@ -52675,7 +52682,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, "eor x8, x16, x12\n\t" "sub w20, %w[nr], #2\n\t" "\n" - "L_AES_CTR_encrypt_loop_nr_%=: \n\t" + "L_AES_CTR_encrypt_loop_nr_%=:\n\t" "ubfx x11, x7, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x17, x8, #8, #8\n\t" @@ -52990,7 +52997,7 @@ void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) -static const word8 L_AES_ARM64_td4[] = { +XALIGNED(4) static const word8 L_AES_ARM64_td4[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, @@ -53025,7 +53032,8 @@ static const word8 L_AES_ARM64_td4[] = { 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_ECB) +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_ECB) void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, @@ -53035,7 +53043,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, const word8* td4 = L_AES_ARM64_td4; __asm__ __volatile__ ( "\n" - "L_AES_ECB_decrypt_loop_block_%=: \n\t" + "L_AES_ECB_decrypt_loop_block_%=:\n\t" "mov x19, %x[ks]\n\t" "ldr x7, [%x[in]]\n\t" "ldr x8, [%x[in], #8]\n\t" @@ -53047,7 +53055,7 @@ void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, "eor x8, x8, x12\n\t" "sub w17, %w[nr], #2\n\t" "\n" - "L_AES_ECB_decrypt_loop_nr_%=: \n\t" + "L_AES_ECB_decrypt_loop_nr_%=:\n\t" "ubfx x11, x8, #48, #8\n\t" "ubfx x14, x7, #24, #8\n\t" "ubfx x15, x8, #8, #8\n\t" @@ -53328,7 +53336,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, const word32* td = L_AES_ARM64_td; __asm__ __volatile__ ( "\n" - "L_AES_CBC_decrypt_loop_block_%=: \n\t" + "L_AES_CBC_decrypt_loop_block_%=:\n\t" "mov x20, %x[ks]\n\t" "ldr x8, [%x[in]]\n\t" "ldr x9, [%x[in], #8]\n\t" @@ -53341,7 +53349,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, "eor x9, x9, x13\n\t" "sub w19, %w[nr], #2\n\t" "\n" - "L_AES_CBC_decrypt_loop_nr_even_%=: \n\t" + "L_AES_CBC_decrypt_loop_nr_even_%=:\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" @@ -53619,7 +53627,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, "eor x9, x9, x13\n\t" "sub w19, %w[nr], #2\n\t" "\n" - "L_AES_CBC_decrypt_loop_nr_odd_%=: \n\t" + "L_AES_CBC_decrypt_loop_nr_odd_%=:\n\t" "ubfx x12, x9, #48, #8\n\t" "ubfx x15, x8, #24, #8\n\t" "ubfx x16, x9, #8, #8\n\t" @@ -53887,11 +53895,11 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, "b.ne L_AES_CBC_decrypt_loop_block_%=\n\t" "b L_AES_CBC_decrypt_end_dec_%=\n\t" "\n" - "L_AES_CBC_decrypt_end_dec_odd_%=: \n\t" + "L_AES_CBC_decrypt_end_dec_odd_%=:\n\t" "ldnp x12, x13, [%x[iv], #16]\n\t" "stp x12, x13, [%x[iv]]\n\t" "\n" - "L_AES_CBC_decrypt_end_dec_%=: \n\t" + "L_AES_CBC_decrypt_end_dec_%=:\n\t" : [out] "+r" (out), [len] "+r" (len), [nr] "+r" (nr), [iv] "+r" (iv) : [in] "r" (in), [ks] "r" (ks), [td4] "r" (td4), [td] "r" (td) : "memory", "cc", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", @@ -53904,7 +53912,7 @@ void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM -static const word32 L_GCM_gmult_len_r[] = { +XALIGNED(8) static const word32 L_GCM_gmult_len_r[] = { 0x00000000, 0x1c200000, 0x38400000, 0x24600000, 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, @@ -53923,7 +53931,7 @@ void GCM_gmult_len(unsigned char* x, const unsigned char** m, const word32* r = L_GCM_gmult_len_r; __asm__ __volatile__ ( "\n" - "L_GCM_gmult_len_start_block_%=: \n\t" + "L_GCM_gmult_len_start_block_%=:\n\t" "ldp x4, x5, [%x[x]]\n\t" "ldp x6, x7, [%x[data]]\n\t" "eor x4, x4, x6\n\t" @@ -54340,7 +54348,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, "rev32 x16, x16\n\t" "rev32 x17, x17\n\t" "\n" - "L_AES_GCM_encrypt_loop_block_%=: \n\t" + "L_AES_GCM_encrypt_loop_block_%=:\n\t" "mov x21, %x[ks]\n\t" "lsr x9, x17, #32\n\t" "ldp x10, x11, [x21], #16\n\t" @@ -54351,7 +54359,7 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, "eor x7, x17, x11\n\t" "sub w20, %w[nr], #2\n\t" "\n" - "L_AES_GCM_encrypt_loop_nr_%=: \n\t" + "L_AES_GCM_encrypt_loop_nr_%=:\n\t" "ubfx x10, x6, #48, #8\n\t" "ubfx x13, x6, #24, #8\n\t" "ubfx x14, x7, #8, #8\n\t" @@ -54676,7 +54684,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x22, x22, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_loop_nr_tweak_%=: \n\t" + "L_AES_XTS_encrypt_loop_nr_tweak_%=:\n\t" "ubfx x14, x21, #48, #8\n\t" "ubfx x17, x21, #24, #8\n\t" "ubfx x19, x22, #8, #8\n\t" @@ -54962,7 +54970,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "rev32 x21, x21\n\t" "rev32 x22, x22\n\t" "\n" - "L_AES_XTS_encrypt_loop_block_%=: \n\t" + "L_AES_XTS_encrypt_loop_block_%=:\n\t" "mov x26, %x[key]\n\t" "ldp x10, x11, [%x[in]]\n\t" "ldp x14, x15, [x26], #16\n\t" @@ -54975,7 +54983,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x11, x11, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_loop_nr_%=: \n\t" + "L_AES_XTS_encrypt_loop_nr_%=:\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" @@ -55278,7 +55286,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "stp x10, x11, [%x[tmp]]\n\t" "mov w14, %w[sz]\n\t" "\n" - "L_AES_XTS_encrypt_start_byte_%=: \n\t" + "L_AES_XTS_encrypt_start_byte_%=:\n\t" "ldrb w19, [%x[tmp]]\n\t" "ldrb w20, [%x[in]], #1\n\t" "strb w19, [%x[out]], #1\n\t" @@ -55299,7 +55307,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x11, x11, x15\n\t" "sub w25, %w[nr], #2\n\t" "\n" - "L_AES_XTS_encrypt_loop_nr_partial_%=: \n\t" + "L_AES_XTS_encrypt_loop_nr_partial_%=:\n\t" "ubfx x14, x10, #48, #8\n\t" "ubfx x17, x10, #24, #8\n\t" "ubfx x19, x11, #8, #8\n\t" @@ -55588,7 +55596,7 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x11, x11, x22\n\t" "stp x10, x11, [%x[out]]\n\t" "\n" - "L_AES_XTS_encrypt_done_data_%=: \n\t" + "L_AES_XTS_encrypt_done_data_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) @@ -55623,7 +55631,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x24, x24, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_loop_nr_tweak_%=: \n\t" + "L_AES_XTS_decrypt_loop_nr_tweak_%=:\n\t" "ubfx x16, x23, #48, #8\n\t" "ubfx x20, x23, #24, #8\n\t" "ubfx x21, x24, #8, #8\n\t" @@ -55911,7 +55919,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "cmp %w[sz], #16\n\t" "b.lt L_AES_XTS_decrypt_start_partail_%=\n\t" "\n" - "L_AES_XTS_decrypt_loop_block_%=: \n\t" + "L_AES_XTS_decrypt_loop_block_%=:\n\t" "mov x28, %x[key]\n\t" "ldp x12, x13, [%x[in]]\n\t" "ldp x16, x17, [x28], #16\n\t" @@ -55924,7 +55932,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_loop_nr_%=: \n\t" + "L_AES_XTS_decrypt_loop_nr_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" @@ -56194,7 +56202,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "b.ge L_AES_XTS_decrypt_loop_block_%=\n\t" "cbz %w[sz], L_AES_XTS_decrypt_done_data_%=\n\t" "\n" - "L_AES_XTS_decrypt_start_partail_%=: \n\t" + "L_AES_XTS_decrypt_start_partail_%=:\n\t" "and x21, x11, x24, asr 63\n\t" "extr x26, x24, x23, #63\n\t" "eor x25, x21, x23, lsl 1\n\t" @@ -56210,7 +56218,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_loop_nr_partial_1_%=: \n\t" + "L_AES_XTS_decrypt_loop_nr_partial_1_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" @@ -56473,7 +56481,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "add %x[out], %x[out], #16\n\t" "mov w16, %w[sz]\n\t" "\n" - "L_AES_XTS_decrypt_start_byte_%=: \n\t" + "L_AES_XTS_decrypt_start_byte_%=:\n\t" "ldrb w21, [%x[tmp]]\n\t" "ldrb w22, [%x[in]], #1\n\t" "strb w21, [%x[out]], #1\n\t" @@ -56495,7 +56503,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x13, x13, x17\n\t" "sub w27, %w[nr], #2\n\t" "\n" - "L_AES_XTS_decrypt_loop_nr_partial_2_%=: \n\t" + "L_AES_XTS_decrypt_loop_nr_partial_2_%=:\n\t" "ubfx x16, x13, #48, #8\n\t" "ubfx x20, x12, #24, #8\n\t" "ubfx x21, x13, #8, #8\n\t" @@ -56756,7 +56764,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "eor x13, x13, x24\n\t" "stp x12, x13, [%x[out]]\n\t" "\n" - "L_AES_XTS_decrypt_done_data_%=: \n\t" + "L_AES_XTS_decrypt_done_data_%=:\n\t" "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) diff --git a/wolfcrypt/src/port/arm/armv8-chacha-asm.S b/wolfcrypt/src/port/arm/armv8-chacha-asm.S index 1bc3a294e8d..93e9d8e635c 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha-asm.S +++ b/wolfcrypt/src/port/arm/armv8-chacha-asm.S @@ -32,40 +32,36 @@ #ifdef HAVE_CHACHA #ifndef __APPLE__ .text - .type L_chacha20_arm64_ctr, %object .section .rodata + .type L_chacha20_arm64_ctr, %object .size L_chacha20_arm64_ctr, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_chacha20_arm64_ctr: - .word 0x00000000 - .word 0x00000001 - .word 0x00000002 - .word 0x00000003 + .long 0x00000000,0x00000001,0x00000002,0x00000003 #ifndef __APPLE__ .text - .type L_chacha20_arm64_rol8, %object .section .rodata + .type L_chacha20_arm64_rol8, %object .size L_chacha20_arm64_rol8, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_chacha20_arm64_rol8: - .word 0x02010003 - .word 0x06050407 - .word 0x0a09080b - .word 0x0e0d0c0f + .long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f #ifndef WOLFSSL_ARMASM_NO_NEON #ifndef __APPLE__ .text @@ -1009,26 +1005,21 @@ _wc_chacha_setiv: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_chacha_setkey_arm64_constant, %object .section .rodata + .type L_chacha_setkey_arm64_constant, %object .size L_chacha_setkey_arm64_constant, 32 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_chacha_setkey_arm64_constant: - .word 0x61707865 - .word 0x3120646e - .word 0x79622d36 - .word 0x6b206574 - .word 0x61707865 - .word 0x3320646e - .word 0x79622d32 - .word 0x6b206574 + .long 0x61707865,0x3120646e,0x79622d36,0x6b206574 + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 #ifndef __APPLE__ .text .globl wc_chacha_setkey diff --git a/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c index e9720680f27..e440bdee643 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c @@ -33,11 +33,11 @@ #ifdef HAVE_CHACHA #include -static const word32 L_chacha20_arm64_ctr[] = { +XALIGNED(8) static const word32 L_chacha20_arm64_ctr[] = { 0x00000000, 0x00000001, 0x00000002, 0x00000003, }; -static const word32 L_chacha20_arm64_rol8[] = { +XALIGNED(8) static const word32 L_chacha20_arm64_rol8[] = { 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f, }; @@ -62,7 +62,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "b.lt L_chacha_crypt_bytes_arm64_lt_320_%=\n\t" "mov w25, #4\n\t" "\n" - "L_chacha_crypt_bytes_arm64_loop_320_%=: \n\t" + "L_chacha_crypt_bytes_arm64_loop_320_%=:\n\t" /* Move state into regular register */ "mov x8, v16.d[0]\n\t" "mov x10, v16.d[1]\n\t" @@ -104,7 +104,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) /* Set number of odd+even rounds to perform */ "mov x26, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm64_round_start_320_%=: \n\t" + "L_chacha_crypt_bytes_arm64_round_start_320_%=:\n\t" "subs x26, x26, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -436,7 +436,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "b.ge L_chacha_crypt_bytes_arm64_loop_320_%=\n\t" /* Done doing 320 bytes at a time */ "\n" - "L_chacha_crypt_bytes_arm64_lt_320_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_320_%=:\n\t" "cmp %w[len], #0x100\n\t" "b.lt L_chacha_crypt_bytes_arm64_lt_256_%=\n\t" /* Move state into vector registers */ @@ -461,7 +461,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) /* Set number of odd+even rounds to perform */ "mov x26, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm64_round_start_256_%=: \n\t" + "L_chacha_crypt_bytes_arm64_round_start_256_%=:\n\t" "subs x26, x26, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -669,7 +669,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "add v19.4s, v19.4s, v29.4s\n\t" /* Done 256-byte block */ "\n" - "L_chacha_crypt_bytes_arm64_lt_256_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_256_%=:\n\t" "cmp %w[len], #0x80\n\t" "b.lt L_chacha_crypt_bytes_arm64_lt_128_%=\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%x[m]], #0x40\n\t" @@ -687,7 +687,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) /* Set number of odd+even rounds to perform */ "mov x26, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm64_round_start_128_%=: \n\t" + "L_chacha_crypt_bytes_arm64_round_start_128_%=:\n\t" "subs x26, x26, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -793,12 +793,12 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "sub %w[len], %w[len], #0x80\n\t" /* Done 128-byte block */ "\n" - "L_chacha_crypt_bytes_arm64_lt_128_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_128_%=:\n\t" "cmp %w[len], #0\n\t" "b.eq L_chacha_crypt_bytes_arm64_done_all_%=\n\t" "mov %w[rol8], #0x40\n\t" "\n" - "L_chacha_crypt_bytes_arm64_loop_64_%=: \n\t" + "L_chacha_crypt_bytes_arm64_loop_64_%=:\n\t" /* Move state into vector registers */ "mov v0.16b, v16.16b\n\t" "mov v1.16b, v17.16b\n\t" @@ -807,7 +807,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) /* Set number of odd+even rounds to perform */ "mov x26, #10\n\t" "\n" - "L_chacha_crypt_bytes_arm64_round_64_%=: \n\t" + "L_chacha_crypt_bytes_arm64_round_64_%=:\n\t" "subs x26, x26, #1\n\t" /* Round odd */ /* a += b; d ^= a; d <<<= 16; */ @@ -875,7 +875,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "b.ne L_chacha_crypt_bytes_arm64_loop_64_%=\n\t" "b L_chacha_crypt_bytes_arm64_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm64_lt_64_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_64_%=:\n\t" /* Calculate bytes left in block not used */ "sub %w[rol8], %w[rol8], %w[len]\n\t" /* Store encipher block in over for further operations and left */ @@ -893,7 +893,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "mov v1.16b, v3.16b\n\t" "b.eq L_chacha_crypt_bytes_arm64_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm64_lt_32_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_32_%=:\n\t" "cmp %w[len], #16\n\t" "b.lt L_chacha_crypt_bytes_arm64_lt_16_%=\n\t" /* Encipher 16 bytes */ @@ -904,7 +904,7 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "mov v0.16b, v1.16b\n\t" "b.eq L_chacha_crypt_bytes_arm64_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm64_lt_16_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_16_%=:\n\t" "cmp %w[len], #8\n\t" "b.lt L_chacha_crypt_bytes_arm64_lt_8_%=\n\t" /* Encipher 8 bytes */ @@ -915,10 +915,10 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "mov v0.d[0], v0.d[1]\n\t" "b.eq L_chacha_crypt_bytes_arm64_done_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm64_lt_8_%=: \n\t" + "L_chacha_crypt_bytes_arm64_lt_8_%=:\n\t" "mov %[rol8], v0.d[0]\n\t" "\n" - "L_chacha_crypt_bytes_arm64_loop_lt_8_%=: \n\t" + "L_chacha_crypt_bytes_arm64_loop_lt_8_%=:\n\t" /* Encipher 1 byte at a time */ "ldrb %w[ctr], [%x[m]], #1\n\t" "eor %w[ctr], %w[ctr], %w[rol8]\n\t" @@ -927,9 +927,9 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "lsr %[rol8], %[rol8], #8\n\t" "b.gt L_chacha_crypt_bytes_arm64_loop_lt_8_%=\n\t" "\n" - "L_chacha_crypt_bytes_arm64_done_%=: \n\t" + "L_chacha_crypt_bytes_arm64_done_%=:\n\t" "\n" - "L_chacha_crypt_bytes_arm64_done_all_%=: \n\t" + "L_chacha_crypt_bytes_arm64_done_all_%=:\n\t" "st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%x[ctx]]\n\t" : [ctx] "+r" (ctx), [c] "+r" (c), [len] "+r" (len) : [m] "r" (m), [rol8] "r" (rol8), [ctr] "r" (ctr) @@ -956,7 +956,7 @@ void wc_chacha_setiv(word32* x, const byte* iv, word32 counter) ); } -static const word32 L_chacha_setkey_arm64_constant[] = { +XALIGNED(8) static const word32 L_chacha_setkey_arm64_constant[] = { 0x61707865, 0x3120646e, 0x79622d36, 0x6b206574, 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, }; @@ -981,7 +981,7 @@ void wc_chacha_setkey(word32* x, const byte* key, word32 keySz) "rev32 v1.8h, v1.8h\n\t" #endif /* BIG_ENDIAN_ORDER */ "\n" - "L_chacha_setkey_arm64_done_%=: \n\t" + "L_chacha_setkey_arm64_done_%=:\n\t" "st1 {v1.4s}, [%x[x]]\n\t" : [x] "+r" (x), [keySz] "+r" (keySz) : [key] "r" (key), [constant] "r" (constant) @@ -993,7 +993,7 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) { __asm__ __volatile__ ( "\n" - "L_chacha_use_over_arm64_16byte_loop_%=: \n\t" + "L_chacha_use_over_arm64_16byte_loop_%=:\n\t" "cmp %w[len], #16\n\t" "b.lt L_chacha_use_over_arm64_word_loop_%=\n\t" /* 16 bytes of state XORed into message. */ @@ -1005,7 +1005,7 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) "b.eq L_chacha_use_over_arm64_done_%=\n\t" "b L_chacha_use_over_arm64_16byte_loop_%=\n\t" "\n" - "L_chacha_use_over_arm64_word_loop_%=: \n\t" + "L_chacha_use_over_arm64_word_loop_%=:\n\t" "cmp %w[len], #4\n\t" "b.lt L_chacha_use_over_arm64_byte_loop_%=\n\t" /* 4 bytes of state XORed into message. */ @@ -1017,7 +1017,7 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) "b.eq L_chacha_use_over_arm64_done_%=\n\t" "b L_chacha_use_over_arm64_word_loop_%=\n\t" "\n" - "L_chacha_use_over_arm64_byte_loop_%=: \n\t" + "L_chacha_use_over_arm64_byte_loop_%=:\n\t" /* 1 bytes of state XORed into message. */ "ldrb w4, [%x[over]], #1\n\t" "ldrb w5, [%x[input]], #1\n\t" @@ -1027,7 +1027,7 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len) "b.eq L_chacha_use_over_arm64_done_%=\n\t" "b L_chacha_use_over_arm64_byte_loop_%=\n\t" "\n" - "L_chacha_use_over_arm64_done_%=: \n\t" + "L_chacha_use_over_arm64_done_%=:\n\t" : [over] "+r" (over), [output] "+r" (output), [len] "+r" (len) : [input] "r" (input) : "memory", "cc", "x4", "x5", "v0", "v1" diff --git a/wolfcrypt/src/port/arm/armv8-curve25519.S b/wolfcrypt/src/port/arm/armv8-curve25519.S index fd7c30f3474..6f88bd5e19f 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-curve25519.S @@ -1941,20 +1941,21 @@ L_fe_invert8: #if !defined(HAVE_ED25519) && !defined(WOLFSSL_CURVE25519_USE_ED25519) #ifndef __APPLE__ .text - .type L_curve25519_base_x2, %object .section .rodata + .type L_curve25519_base_x2, %object .size L_curve25519_base_x2, 32 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 5 + .align 4 #else - .p2align 5 + .p2align 4 #endif /* __APPLE__ */ L_curve25519_base_x2: -.xword 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c -.xword 0xd9750c687d157114, 0x20d342d51873f1b7 + .quad 0x5cae469cdd684efb,0x8f3f5ced1e350b5c + .quad 0xd9750c687d157114,0x20d342d51873f1b7 #ifndef __APPLE__ .text .globl curve25519_base diff --git a/wolfcrypt/src/port/arm/armv8-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-curve25519_c.c index 6867ed3ca75..b8515b00549 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-curve25519_c.c @@ -501,7 +501,7 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_num_bits_init_v_3_%=\n\t" "\n" - "L_fe_invert_nct_num_bits_init_v_0_%=: \n\t" + "L_fe_invert_nct_num_bits_init_v_0_%=:\n\t" "cmp x8, #0\n\t" "b.eq L_fe_invert_nct_num_bits_init_v_1_%=\n\t" "mov x24, #0xc0\n\t" @@ -509,7 +509,7 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_num_bits_init_v_3_%=\n\t" "\n" - "L_fe_invert_nct_num_bits_init_v_1_%=: \n\t" + "L_fe_invert_nct_num_bits_init_v_1_%=:\n\t" "cmp x7, #0\n\t" "b.eq L_fe_invert_nct_num_bits_init_v_2_%=\n\t" "mov x24, #0x80\n\t" @@ -517,16 +517,16 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_num_bits_init_v_3_%=\n\t" "\n" - "L_fe_invert_nct_num_bits_init_v_2_%=: \n\t" + "L_fe_invert_nct_num_bits_init_v_2_%=:\n\t" "mov x24, #0x40\n\t" "clz x23, x6\n\t" "sub x23, x24, x23\n\t" "\n" - "L_fe_invert_nct_num_bits_init_v_3_%=: \n\t" + "L_fe_invert_nct_num_bits_init_v_3_%=:\n\t" "tst x6, #1\n\t" "b.ne L_fe_invert_nct_loop_%=\n\t" "\n" - "L_fe_invert_nct_even_init_v_0_%=: \n\t" + "L_fe_invert_nct_even_init_v_0_%=:\n\t" "extr x6, x7, x6, #1\n\t" "extr x7, x8, x7, #1\n\t" "extr x8, x9, x8, #1\n\t" @@ -540,7 +540,7 @@ void fe_invert_nct(fe r, const fe a) "adcs x17, x17, x21\n\t" "cset x24, cs\n\t" "\n" - "L_fe_invert_nct_even_init_v_1_%=: \n\t" + "L_fe_invert_nct_even_init_v_1_%=:\n\t" "extr x14, x15, x14, #1\n\t" "extr x15, x16, x15, #1\n\t" "extr x16, x17, x16, #1\n\t" @@ -548,7 +548,7 @@ void fe_invert_nct(fe r, const fe a) "tst x6, #1\n\t" "b.eq L_fe_invert_nct_even_init_v_0_%=\n\t" "\n" - "L_fe_invert_nct_loop_%=: \n\t" + "L_fe_invert_nct_loop_%=:\n\t" "cmp x22, #1\n\t" "b.eq L_fe_invert_nct_u_done_%=\n\t" "cmp x23, #1\n\t" @@ -568,7 +568,7 @@ void fe_invert_nct(fe r, const fe a) "cmp x2, x6\n\t" "bcc L_fe_invert_nct_v_larger_%=\n\t" "\n" - "L_fe_invert_nct_u_larger_%=: \n\t" + "L_fe_invert_nct_u_larger_%=:\n\t" "subs x2, x2, x6\n\t" "sbcs x3, x3, x7\n\t" "sbcs x4, x4, x8\n\t" @@ -583,7 +583,7 @@ void fe_invert_nct(fe r, const fe a) "adcs x12, x12, x20\n\t" "adc x13, x13, x21\n\t" "\n" - "L_fe_invert_nct_sub_uv_%=: \n\t" + "L_fe_invert_nct_sub_uv_%=:\n\t" "cmp x5, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_u_0_%=\n\t" "mov x24, #0x100\n\t" @@ -591,7 +591,7 @@ void fe_invert_nct(fe r, const fe a) "sub x22, x24, x22\n\t" "b L_fe_invert_nct_nct_num_bits_u_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_u_0_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_u_0_%=:\n\t" "cmp x4, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_u_1_%=\n\t" "mov x24, #0xc0\n\t" @@ -599,7 +599,7 @@ void fe_invert_nct(fe r, const fe a) "sub x22, x24, x22\n\t" "b L_fe_invert_nct_nct_num_bits_u_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_u_1_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_u_1_%=:\n\t" "cmp x3, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_u_2_%=\n\t" "mov x24, #0x80\n\t" @@ -607,14 +607,14 @@ void fe_invert_nct(fe r, const fe a) "sub x22, x24, x22\n\t" "b L_fe_invert_nct_nct_num_bits_u_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_u_2_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_u_2_%=:\n\t" "mov x24, #0x40\n\t" "clz x22, x2\n\t" "sub x22, x24, x22\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_u_3_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_u_3_%=:\n\t" "\n" - "L_fe_invert_nct_even_u_0_%=: \n\t" + "L_fe_invert_nct_even_u_0_%=:\n\t" "extr x2, x3, x2, #1\n\t" "extr x3, x4, x3, #1\n\t" "extr x4, x5, x4, #1\n\t" @@ -628,7 +628,7 @@ void fe_invert_nct(fe r, const fe a) "adcs x13, x13, x21\n\t" "cset x24, cs\n\t" "\n" - "L_fe_invert_nct_even_u_1_%=: \n\t" + "L_fe_invert_nct_even_u_1_%=:\n\t" "extr x10, x11, x10, #1\n\t" "extr x11, x12, x11, #1\n\t" "extr x12, x13, x12, #1\n\t" @@ -637,7 +637,7 @@ void fe_invert_nct(fe r, const fe a) "b.eq L_fe_invert_nct_even_u_0_%=\n\t" "b L_fe_invert_nct_loop_%=\n\t" "\n" - "L_fe_invert_nct_v_larger_%=: \n\t" + "L_fe_invert_nct_v_larger_%=:\n\t" "subs x6, x6, x2\n\t" "sbcs x7, x7, x3\n\t" "sbcs x8, x8, x4\n\t" @@ -652,7 +652,7 @@ void fe_invert_nct(fe r, const fe a) "adcs x16, x16, x20\n\t" "adc x17, x17, x21\n\t" "\n" - "L_fe_invert_nct_sub_vu_%=: \n\t" + "L_fe_invert_nct_sub_vu_%=:\n\t" "cmp x9, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_v_0_%=\n\t" "mov x24, #0x100\n\t" @@ -660,7 +660,7 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_nct_num_bits_v_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_v_0_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_v_0_%=:\n\t" "cmp x8, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_v_1_%=\n\t" "mov x24, #0xc0\n\t" @@ -668,7 +668,7 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_nct_num_bits_v_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_v_1_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_v_1_%=:\n\t" "cmp x7, #0\n\t" "b.eq L_fe_invert_nct_nct_num_bits_v_2_%=\n\t" "mov x24, #0x80\n\t" @@ -676,14 +676,14 @@ void fe_invert_nct(fe r, const fe a) "sub x23, x24, x23\n\t" "b L_fe_invert_nct_nct_num_bits_v_3_%=\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_v_2_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_v_2_%=:\n\t" "mov x24, #0x40\n\t" "clz x23, x6\n\t" "sub x23, x24, x23\n\t" "\n" - "L_fe_invert_nct_nct_num_bits_v_3_%=: \n\t" + "L_fe_invert_nct_nct_num_bits_v_3_%=:\n\t" "\n" - "L_fe_invert_nct_even_v_0_%=: \n\t" + "L_fe_invert_nct_even_v_0_%=:\n\t" "extr x6, x7, x6, #1\n\t" "extr x7, x8, x7, #1\n\t" "extr x8, x9, x8, #1\n\t" @@ -697,7 +697,7 @@ void fe_invert_nct(fe r, const fe a) "adcs x17, x17, x21\n\t" "cset x24, cs\n\t" "\n" - "L_fe_invert_nct_even_v_1_%=: \n\t" + "L_fe_invert_nct_even_v_1_%=:\n\t" "extr x14, x15, x14, #1\n\t" "extr x15, x16, x15, #1\n\t" "extr x16, x17, x16, #1\n\t" @@ -706,20 +706,20 @@ void fe_invert_nct(fe r, const fe a) "b.eq L_fe_invert_nct_even_v_0_%=\n\t" "b L_fe_invert_nct_loop_%=\n\t" "\n" - "L_fe_invert_nct_u_done_%=: \n\t" + "L_fe_invert_nct_u_done_%=:\n\t" "str x10, [%x[r]]\n\t" "str x11, [%x[r], #8]\n\t" "str x12, [%x[r], #16]\n\t" "str x13, [%x[r], #24]\n\t" "b L_fe_invert_nct_done_%=\n\t" "\n" - "L_fe_invert_nct_v_done_%=: \n\t" + "L_fe_invert_nct_v_done_%=:\n\t" "str x14, [%x[r]]\n\t" "str x15, [%x[r], #8]\n\t" "str x16, [%x[r], #16]\n\t" "str x17, [%x[r], #24]\n\t" "\n" - "L_fe_invert_nct_done_%=: \n\t" + "L_fe_invert_nct_done_%=:\n\t" : [r] "+r" (r) : [a] "r" (a) : "memory", "cc", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", @@ -1041,7 +1041,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_invert1_%=: \n\t" + "L_fe_invert1_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1142,7 +1142,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_invert2_%=: \n\t" + "L_fe_invert2_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1243,7 +1243,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_fe_invert3_%=: \n\t" + "L_fe_invert3_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1344,7 +1344,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_fe_invert4_%=: \n\t" + "L_fe_invert4_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1443,7 +1443,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_invert5_%=: \n\t" + "L_fe_invert5_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1544,7 +1544,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_fe_invert6_%=: \n\t" + "L_fe_invert6_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1645,7 +1645,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_fe_invert7_%=: \n\t" + "L_fe_invert7_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1744,7 +1744,7 @@ void fe_invert(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_invert8_%=: \n\t" + "L_fe_invert8_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -1847,9 +1847,9 @@ void fe_invert(fe r, const fe a) } #if !defined(HAVE_ED25519) && !defined(WOLFSSL_CURVE25519_USE_ED25519) -static const word64 L_curve25519_base_x2[] = { - 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c, - 0xd9750c687d157114, 0x20d342d51873f1b7, +XALIGNED(16) static const word64 L_curve25519_base_x2[] = { + 0x5cae469cdd684efbUL, 0x8f3f5ced1e350b5cUL, + 0xd9750c687d157114UL, 0x20d342d51873f1b7UL, }; int curve25519_base(byte* r, const byte* n) @@ -1876,7 +1876,7 @@ int curve25519_base(byte* r, const byte* n) "mov x23, %x[r]\n\t" "mov x24, #0xfd\n\t" "\n" - "L_curve25519_base_bits_%=: \n\t" + "L_curve25519_base_bits_%=:\n\t" "lsr x3, x24, #6\n\t" "and x4, x24, #63\n\t" "ldr x5, [%x[n], x3, LSL 3]\n\t" @@ -2885,7 +2885,7 @@ int curve25519_base(byte* r, const byte* n) "csel x17, x13, x9, ne\n\t" "csel x13, x9, x13, ne\n\t" "\n" - "L_curve25519_base_3_%=: \n\t" + "L_curve25519_base_3_%=:\n\t" /* Add */ "adds x6, x10, x25\n\t" "adcs x7, x11, x26\n\t" @@ -3427,7 +3427,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_base_inv_1_%=: \n\t" + "L_curve25519_base_inv_1_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -3528,7 +3528,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_base_inv_2_%=: \n\t" + "L_curve25519_base_inv_2_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -3629,7 +3629,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_base_inv_3_%=: \n\t" + "L_curve25519_base_inv_3_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -3730,7 +3730,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_base_inv_4_%=: \n\t" + "L_curve25519_base_inv_4_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -3829,7 +3829,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_base_inv_5_%=: \n\t" + "L_curve25519_base_inv_5_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -3930,7 +3930,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_base_inv_6_%=: \n\t" + "L_curve25519_base_inv_6_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -4031,7 +4031,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_base_inv_7_%=: \n\t" + "L_curve25519_base_inv_7_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -4130,7 +4130,7 @@ int curve25519_base(byte* r, const byte* n) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_base_inv_8_%=: \n\t" + "L_curve25519_base_inv_8_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -4394,7 +4394,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "stp xzr, xzr, [x29, #32]\n\t" "mov x24, #0xfe\n\t" "\n" - "L_curve25519_bits_%=: \n\t" + "L_curve25519_bits_%=:\n\t" "lsr x3, x24, #6\n\t" "and x4, x24, #63\n\t" "ldr x5, [%x[n], x3, LSL 3]\n\t" @@ -5492,7 +5492,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "csel x17, x13, x9, ne\n\t" "csel x13, x9, x13, ne\n\t" "\n" - "L_curve25519_3_%=: \n\t" + "L_curve25519_3_%=:\n\t" /* Add */ "adds x6, x10, x25\n\t" "adcs x7, x11, x26\n\t" @@ -6034,7 +6034,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_inv_1_%=: \n\t" + "L_curve25519_inv_1_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6135,7 +6135,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_inv_2_%=: \n\t" + "L_curve25519_inv_2_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6236,7 +6236,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_inv_3_%=: \n\t" + "L_curve25519_inv_3_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6337,7 +6337,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_inv_4_%=: \n\t" + "L_curve25519_inv_4_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6436,7 +6436,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_inv_5_%=: \n\t" + "L_curve25519_inv_5_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6537,7 +6537,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_inv_6_%=: \n\t" + "L_curve25519_inv_6_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6638,7 +6638,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #112]\n\t" "ldp x8, x9, [x29, #128]\n\t" "\n" - "L_curve25519_inv_7_%=: \n\t" + "L_curve25519_inv_7_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -6737,7 +6737,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "ldp x6, x7, [x29, #80]\n\t" "ldp x8, x9, [x29, #96]\n\t" "\n" - "L_curve25519_inv_8_%=: \n\t" + "L_curve25519_inv_8_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7057,7 +7057,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #16]\n\t" "ldp x8, x9, [x29, #32]\n\t" "\n" - "L_fe_pow22523_1_%=: \n\t" + "L_fe_pow22523_1_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7160,7 +7160,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #16]\n\t" "ldp x8, x9, [x29, #32]\n\t" "\n" - "L_fe_pow22523_2_%=: \n\t" + "L_fe_pow22523_2_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7261,7 +7261,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_pow22523_3_%=: \n\t" + "L_fe_pow22523_3_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7362,7 +7362,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_pow22523_4_%=: \n\t" + "L_fe_pow22523_4_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7461,7 +7461,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #16]\n\t" "ldp x8, x9, [x29, #32]\n\t" "\n" - "L_fe_pow22523_5_%=: \n\t" + "L_fe_pow22523_5_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7562,7 +7562,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_pow22523_6_%=: \n\t" + "L_fe_pow22523_6_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" @@ -7663,7 +7663,7 @@ void fe_pow22523(fe r, const fe a) "ldp x6, x7, [x29, #48]\n\t" "ldp x8, x9, [x29, #64]\n\t" "\n" - "L_fe_pow22523_7_%=: \n\t" + "L_fe_pow22523_7_%=:\n\t" /* Square */ /* A[0] * A[1] */ "umulh x12, x6, x7\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S index 5b7df728433..566e10fcdaf 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm.S @@ -31,32 +31,34 @@ #ifndef WOLFSSL_ARMASM_INLINE #ifndef __APPLE__ .text - .type L_mlkem_aarch64_consts, %object .section .rodata + .type L_mlkem_aarch64_consts, %object .size L_mlkem_aarch64_consts, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_consts: .short 0x0d01,0xf301,0x4ebf,0x0549,0x5049,0x0000,0x0000,0x0000 #ifdef WOLFSSL_WC_MLKEM #ifndef __APPLE__ .text - .type L_mlkem_aarch64_zetas, %object .section .rodata + .type L_mlkem_aarch64_zetas, %object .size L_mlkem_aarch64_zetas, 576 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_zetas: .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca @@ -97,16 +99,17 @@ L_mlkem_aarch64_zetas: .short 0x03be,0x03be,0x074d,0x074d,0x05f2,0x05f2,0x065c,0x065c #ifndef __APPLE__ .text - .type L_mlkem_aarch64_zetas_qinv, %object .section .rodata + .type L_mlkem_aarch64_zetas_qinv, %object .size L_mlkem_aarch64_zetas_qinv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_zetas_qinv: .short 0xffed,0x7b0b,0x399a,0x0314,0x34d5,0xcf8e,0x6e1f,0xbeca @@ -1441,16 +1444,17 @@ _mlkem_ntt: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_mlkem_aarch64_zetas_inv, %object .section .rodata + .type L_mlkem_aarch64_zetas_inv, %object .size L_mlkem_aarch64_zetas_inv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_zetas_inv: .short 0x06a5,0x06a5,0x070f,0x070f,0x05b4,0x05b4,0x0943,0x0943 @@ -1491,16 +1495,17 @@ L_mlkem_aarch64_zetas_inv: .short 0x0c37,0x0be2,0x0773,0x072c,0x05ed,0x0167,0x02f6,0x05a1 #ifndef __APPLE__ .text - .type L_mlkem_aarch64_zetas_inv_qinv, %object .section .rodata + .type L_mlkem_aarch64_zetas_inv_qinv, %object .size L_mlkem_aarch64_zetas_inv_qinv, 576 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_zetas_inv_qinv: .short 0xa5a5,0xa5a5,0x440f,0x440f,0xe1b4,0xe1b4,0xa243,0xa243 @@ -5481,16 +5486,17 @@ _mlkem_invntt_sqrdmlsh: #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ #ifndef __APPLE__ .text - .type L_mlkem_aarch64_zetas_mul, %object .section .rodata + .type L_mlkem_aarch64_zetas_mul, %object .size L_mlkem_aarch64_zetas_mul, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_zetas_mul: .short 0x08b2,0xf74e,0x01ae,0xfe52,0x022b,0xfdd5,0x034b,0xfcb5 @@ -6955,16 +6961,17 @@ _mlkem_basemul_mont_add: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_mlkem_aarch64_q, %object .section .rodata + .type L_mlkem_aarch64_q, %object .size L_mlkem_aarch64_q, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_aarch64_q: .short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 @@ -8160,46 +8167,49 @@ _mlkem_to_mont_sqrdmlsh: #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ #ifndef __APPLE__ .text - .type L_mlkem_to_msg_low, %object .section .rodata + .type L_mlkem_to_msg_low, %object .size L_mlkem_to_msg_low, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_to_msg_low: .short 0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373,0x0373 #ifndef __APPLE__ .text - .type L_mlkem_to_msg_high, %object .section .rodata + .type L_mlkem_to_msg_high, %object .size L_mlkem_to_msg_high, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_to_msg_high: .short 0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0,0x09c0 #ifndef __APPLE__ .text - .type L_mlkem_to_msg_bits, %object .section .rodata + .type L_mlkem_to_msg_bits, %object .size L_mlkem_to_msg_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_to_msg_bits: .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080 @@ -8456,31 +8466,33 @@ _mlkem_to_msg_neon: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_mlkem_from_msg_q1half, %object .section .rodata + .type L_mlkem_from_msg_q1half, %object .size L_mlkem_from_msg_q1half, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_from_msg_q1half: .short 0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681 #ifndef __APPLE__ .text - .type L_mlkem_from_msg_bits, %object .section .rodata + .type L_mlkem_from_msg_bits, %object .size L_mlkem_from_msg_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_from_msg_bits: .byte 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 @@ -8941,46 +8953,49 @@ L_mlkem_aarch64_cmp_neon_done: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_mlkem_rej_uniform_mask, %object .section .rodata + .type L_mlkem_rej_uniform_mask, %object .size L_mlkem_rej_uniform_mask, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_rej_uniform_mask: .short 0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff,0x0fff #ifndef __APPLE__ .text - .type L_mlkem_rej_uniform_bits, %object .section .rodata + .type L_mlkem_rej_uniform_bits, %object .size L_mlkem_rej_uniform_bits, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 2 + .align 3 #else - .p2align 2 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_rej_uniform_bits: .short 0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080 #ifndef __APPLE__ .text - .type L_mlkem_rej_uniform_indices, %object .section .rodata + .type L_mlkem_rej_uniform_indices, %object .size L_mlkem_rej_uniform_indices, 4096 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ - .align 1 + .align 3 #else - .p2align 1 + .p2align 3 #endif /* __APPLE__ */ L_mlkem_rej_uniform_indices: .byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff @@ -9688,42 +9703,31 @@ L_mlkem_rej_uniform_done: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_sha3_aarch64_r, %object .section .rodata + .type L_sha3_aarch64_r, %object .size L_sha3_aarch64_r, 192 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 3 + .align 4 #else - .p2align 3 + .p2align 4 #endif /* __APPLE__ */ L_sha3_aarch64_r: - .xword 0x0000000000000001 - .xword 0x0000000000008082 - .xword 0x800000000000808a - .xword 0x8000000080008000 - .xword 0x000000000000808b - .xword 0x0000000080000001 - .xword 0x8000000080008081 - .xword 0x8000000000008009 - .xword 0x000000000000008a - .xword 0x0000000000000088 - .xword 0x0000000080008009 - .xword 0x000000008000000a - .xword 0x000000008000808b - .xword 0x800000000000008b - .xword 0x8000000000008089 - .xword 0x8000000000008003 - .xword 0x8000000000008002 - .xword 0x8000000000000080 - .xword 0x000000000000800a - .xword 0x800000008000000a - .xword 0x8000000080008081 - .xword 0x8000000000008080 - .xword 0x0000000080000001 - .xword 0x8000000080008008 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 #ifndef __APPLE__ .text diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c index 9e5780815f9..6f7ba392a24 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c @@ -30,14 +30,14 @@ #ifdef WOLFSSL_ARMASM #ifdef __aarch64__ #ifdef WOLFSSL_ARMASM_INLINE -static const word16 L_mlkem_aarch64_consts[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_consts[] = { 0x0d01, 0xf301, 0x4ebf, 0x0549, 0x5049, 0x0000, 0x0000, 0x0000, }; #include #ifdef WOLFSSL_WC_MLKEM -static const word16 L_mlkem_aarch64_zetas[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -76,7 +76,7 @@ static const word16 L_mlkem_aarch64_zetas[] = { 0x03be, 0x03be, 0x074d, 0x074d, 0x05f2, 0x05f2, 0x065c, 0x065c, }; -static const word16 L_mlkem_aarch64_zetas_qinv[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_qinv[] = { 0xffed, 0x7b0b, 0x399a, 0x0314, 0x34d5, 0xcf8e, 0x6e1f, 0xbeca, 0xae56, 0x6c6e, 0xf129, 0xc2b6, 0x29c2, 0x054f, 0xd43f, 0x79bc, 0xe93d, 0x43d4, 0x9908, 0x8e7f, 0x15c4, 0xfbb2, 0x53bf, 0x997f, @@ -1376,7 +1376,7 @@ void mlkem_ntt(sword16* r) ); } -static const word16 L_mlkem_aarch64_zetas_inv[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_inv[] = { 0x06a5, 0x06a5, 0x070f, 0x070f, 0x05b4, 0x05b4, 0x0943, 0x0943, 0x0922, 0x0922, 0x091d, 0x091d, 0x0134, 0x0134, 0x006c, 0x006c, 0x0b23, 0x0b23, 0x0366, 0x0366, 0x0356, 0x0356, 0x05e6, 0x05e6, @@ -1415,7 +1415,7 @@ static const word16 L_mlkem_aarch64_zetas_inv[] = { 0x0c37, 0x0be2, 0x0773, 0x072c, 0x05ed, 0x0167, 0x02f6, 0x05a1, }; -static const word16 L_mlkem_aarch64_zetas_inv_qinv[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_inv_qinv[] = { 0xa5a5, 0xa5a5, 0x440f, 0x440f, 0xe1b4, 0xe1b4, 0xa243, 0xa243, 0x4f22, 0x4f22, 0x901d, 0x901d, 0x5d34, 0x5d34, 0x846c, 0x846c, 0x4423, 0x4423, 0xd566, 0xd566, 0xa556, 0xa556, 0x57e6, 0x57e6, @@ -5295,7 +5295,7 @@ void mlkem_invntt_sqrdmlsh(sword16* r) } #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ -static const word16 L_mlkem_aarch64_zetas_mul[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_zetas_mul[] = { 0x08b2, 0xf74e, 0x01ae, 0xfe52, 0x022b, 0xfdd5, 0x034b, 0xfcb5, 0x081e, 0xf7e2, 0x0367, 0xfc99, 0x060e, 0xf9f2, 0x0069, 0xff97, 0x01a6, 0xfe5a, 0x024b, 0xfdb5, 0x00b1, 0xff4f, 0x0c16, 0xf3ea, @@ -6703,7 +6703,7 @@ void mlkem_basemul_mont_add(sword16* r, const sword16* a, const sword16* b) ); } -static const word16 L_mlkem_aarch64_q[] = { +XALIGNED(4) static const word16 L_mlkem_aarch64_q[] = { 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, 0x0d01, }; @@ -7761,15 +7761,15 @@ void mlkem_to_mont_sqrdmlsh(sword16* p) } #endif /* WOLFSSL_AARCH64_NO_SQRDMLSH */ -static const word16 L_mlkem_to_msg_low[] = { +XALIGNED(4) static const word16 L_mlkem_to_msg_low[] = { 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, 0x0373, }; -static const word16 L_mlkem_to_msg_high[] = { +XALIGNED(4) static const word16 L_mlkem_to_msg_high[] = { 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, 0x09c0, }; -static const word16 L_mlkem_to_msg_bits[] = { +XALIGNED(4) static const word16 L_mlkem_to_msg_bits[] = { 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, }; @@ -7990,11 +7990,11 @@ void mlkem_to_msg_neon(byte* msg, sword16* p) ); } -static const word16 L_mlkem_from_msg_q1half[] = { +XALIGNED(4) static const word16 L_mlkem_from_msg_q1half[] = { 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, 0x0681, }; -static const word8 L_mlkem_from_msg_bits[] = { +XALIGNED(4) static const word8 L_mlkem_from_msg_bits[] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, }; @@ -8400,7 +8400,7 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz) "orr v8.16b, v8.16b, v0.16b\n\t" "orr v9.16b, v9.16b, v1.16b\n\t" "\n" - "L_mlkem_aarch64_cmp_neon_done_%=: \n\t" + "L_mlkem_aarch64_cmp_neon_done_%=:\n\t" "orr v8.16b, v8.16b, v9.16b\n\t" "orr v10.16b, v10.16b, v11.16b\n\t" "orr v8.16b, v8.16b, v10.16b\n\t" @@ -8417,15 +8417,15 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz) return (word32)(size_t)a; } -static const word16 L_mlkem_rej_uniform_mask[] = { +XALIGNED(4) static const word16 L_mlkem_rej_uniform_mask[] = { 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, }; -static const word16 L_mlkem_rej_uniform_bits[] = { +XALIGNED(4) static const word16 L_mlkem_rej_uniform_bits[] = { 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, }; -static const word8 L_mlkem_rej_uniform_indices[] = { +XALIGNED(4) static const word8 L_mlkem_rej_uniform_indices[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -8963,7 +8963,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "subs wzr, %w[len], #16\n\t" "b.lt L_mlkem_rej_uniform_loop_4_%=\n\t" "\n" - "L_mlkem_rej_uniform_loop_16_%=: \n\t" + "L_mlkem_rej_uniform_loop_16_%=:\n\t" "ld3 {v4.8b, v5.8b, v6.8b}, [%x[r]], #24\n\t" "zip1 v4.16b, v4.16b, v1.16b\n\t" "zip1 v5.16b, v5.16b, v1.16b\n\t" @@ -9010,7 +9010,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "b.lt L_mlkem_rej_uniform_loop_4_%=\n\t" "b L_mlkem_rej_uniform_loop_16_%=\n\t" "\n" - "L_mlkem_rej_uniform_loop_4_%=: \n\t" + "L_mlkem_rej_uniform_loop_4_%=:\n\t" "subs w10, %w[len], w12\n\t" "b.eq L_mlkem_rej_uniform_done_%=\n\t" "subs x10, x10, #4\n\t" @@ -9047,7 +9047,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "b.eq L_mlkem_rej_uniform_done_%=\n\t" "b L_mlkem_rej_uniform_loop_4_%=\n\t" "\n" - "L_mlkem_rej_uniform_loop_lt_4_%=: \n\t" + "L_mlkem_rej_uniform_loop_lt_4_%=:\n\t" "ldr %[mask], [%x[r]], #6\n\t" "lsr %[q], %[mask], #12\n\t" "lsr %[bits], %[mask], #24\n\t" @@ -9088,7 +9088,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "b.eq L_mlkem_rej_uniform_done_%=\n\t" "b L_mlkem_rej_uniform_loop_lt_4_%=\n\t" "\n" - "L_mlkem_rej_uniform_done_%=: \n\t" + "L_mlkem_rej_uniform_done_%=:\n\t" "mov x0, x12\n\t" : [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen) : [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits), @@ -9100,19 +9100,19 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, return (word32)(size_t)p; } -static const word64 L_sha3_aarch64_r[] = { - 0x0000000000000001, 0x0000000000008082, - 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, - 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, - 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, - 0x0000000080000001, 0x8000000080008008, +XALIGNED(16) static const word64 L_sha3_aarch64_r[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, }; #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 @@ -9155,7 +9155,7 @@ void mlkem_sha3_blocksx3_neon(word64* state) "mov x28, #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_transform_blocksx3_neon_begin_%=: \n\t" + "L_SHA3_transform_blocksx3_neon_begin_%=:\n\t" "stp %[r], x28, [x29, #48]\n\t" /* Col Mix */ "eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t" @@ -9476,7 +9476,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) "mov %x[seed], #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_shake128_blocksx3_seed_neon_begin_%=: \n\t" + "L_SHA3_shake128_blocksx3_seed_neon_begin_%=:\n\t" "stp %[r], %x[seed], [x29, #48]\n\t" /* Col Mix */ "eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t" @@ -9797,7 +9797,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) "mov %x[seed], #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_shake256_blocksx3_seed_neon_begin_%=: \n\t" + "L_SHA3_shake256_blocksx3_seed_neon_begin_%=:\n\t" "stp %[r], %x[seed], [x29, #48]\n\t" /* Col Mix */ "eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t" @@ -10097,7 +10097,7 @@ void mlkem_sha3_blocksx3_neon(word64* state) "mov x28, #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_transform_blocksx3_neon_begin_%=: \n\t" + "L_SHA3_transform_blocksx3_neon_begin_%=:\n\t" "stp %[r], x28, [x29, #48]\n\t" /* Col Mix NEON */ "eor v30.16b, v4.16b, v9.16b\n\t" @@ -10503,7 +10503,7 @@ void mlkem_shake128_blocksx3_seed_neon(word64* state, byte* seed) "mov %x[seed], #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_shake128_blocksx3_seed_neon_begin_%=: \n\t" + "L_SHA3_shake128_blocksx3_seed_neon_begin_%=:\n\t" "stp %[r], %x[seed], [x29, #48]\n\t" /* Col Mix NEON */ "eor v30.16b, v4.16b, v9.16b\n\t" @@ -10909,7 +10909,7 @@ void mlkem_shake256_blocksx3_seed_neon(word64* state, byte* seed) "mov %x[seed], #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_shake256_blocksx3_seed_neon_begin_%=: \n\t" + "L_SHA3_shake256_blocksx3_seed_neon_begin_%=:\n\t" "stp %[r], %x[seed], [x29, #48]\n\t" /* Col Mix NEON */ "eor v30.16b, v4.16b, v9.16b\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-poly1305-asm.S b/wolfcrypt/src/port/arm/armv8-poly1305-asm.S index ef7b908e096..00897f153bb 100644 --- a/wolfcrypt/src/port/arm/armv8-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/armv8-poly1305-asm.S @@ -438,22 +438,20 @@ L_poly1305_arm64_blocks_done_all: #endif /* __APPLE__ */ #ifndef __APPLE__ .text - .type L_poly1305_set_key_arm64_clamp, %object .section .rodata + .type L_poly1305_set_key_arm64_clamp, %object .size L_poly1305_set_key_arm64_clamp, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_poly1305_set_key_arm64_clamp: - .word 0x0fffffff - .word 0x0ffffffc - .word 0x0ffffffc - .word 0x0ffffffc + .long 0x0fffffff,0x0ffffffc,0x0ffffffc,0x0ffffffc #ifndef __APPLE__ .text .globl poly1305_set_key diff --git a/wolfcrypt/src/port/arm/armv8-poly1305-asm_c.c b/wolfcrypt/src/port/arm/armv8-poly1305-asm_c.c index 5ba43381043..26429dcd3ee 100644 --- a/wolfcrypt/src/port/arm/armv8-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-poly1305-asm_c.c @@ -138,7 +138,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "mul v22.4s, v18.4s, v24.4s\n\t" "mul v23.4s, v19.4s, v24.4s\n\t" "\n" - "L_poly1305_arm64_blocks_loop_64_%=: \n\t" + "L_poly1305_arm64_blocks_loop_64_%=:\n\t" /* Load message of 64 bytes - setting hi bit for not finished */ "ld4 {v5.4s, v6.4s, v7.4s, v8.4s}, [%x[m]], #0x40\n\t" "sub %x[bytes], %x[bytes], #0x40\n\t" @@ -314,7 +314,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "usra v4.2d, v3.2d, #26\n\t" "and v3.16b, v3.16b, v26.16b\n\t" "\n" - "L_poly1305_arm64_blocks_done_32_%=: \n\t" + "L_poly1305_arm64_blocks_done_32_%=:\n\t" "cmp %x[bytes], #16\n\t" "b.eq L_poly1305_arm64_blocks_transfer_%=\n\t" "add x14, %x[ctx], #0x60\n\t" @@ -322,7 +322,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "st1 {v4.s}[0], [x14]\n\t" "b L_poly1305_arm64_blocks_done_all_%=\n\t" "\n" - "L_poly1305_arm64_blocks_transfer_%=: \n\t" + "L_poly1305_arm64_blocks_transfer_%=:\n\t" "mov w3, v0.s[0]\n\t" "mov w4, v1.s[0]\n\t" "mov w5, v2.s[0]\n\t" @@ -330,7 +330,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "mov w7, v4.s[0]\n\t" "b L_poly1305_arm64_blocks_start_%=\n\t" "\n" - "L_poly1305_arm64_blocks_done_%=: \n\t" + "L_poly1305_arm64_blocks_done_%=:\n\t" "cmp %x[bytes], #16\n\t" "b.lt L_poly1305_arm64_blocks_done_all_%=\n\t" /* Load h */ @@ -338,7 +338,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "ldp w5, w6, [%x[ctx], #104]\n\t" "ldr w7, [%x[ctx], #112]\n\t" "\n" - "L_poly1305_arm64_blocks_start_%=: \n\t" + "L_poly1305_arm64_blocks_start_%=:\n\t" "mov x17, #1\n\t" /* Load r */ "ldp x8, x9, [%x[ctx]]\n\t" @@ -350,7 +350,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "lsr x5, x7, #24\n\t" "add x4, x4, x7, lsl 40\n\t" "\n" - "L_poly1305_arm64_blocks_loop_%=: \n\t" + "L_poly1305_arm64_blocks_loop_%=:\n\t" /* Load m */ "ldr x14, [%x[m]]\n\t" "ldr x15, [%x[m], #8]\n\t" @@ -413,7 +413,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) "stp w5, w6, [%x[ctx], #104]\n\t" "str w7, [%x[ctx], #112]\n\t" "\n" - "L_poly1305_arm64_blocks_done_all_%=: \n\t" + "L_poly1305_arm64_blocks_done_all_%=:\n\t" : [ctx] "+r" (ctx), [bytes] "+r" (bytes) : [m] "r" (m) : "memory", "cc", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", @@ -424,7 +424,7 @@ void poly1305_arm64_blocks(Poly1305* ctx, const unsigned char* m, size_t bytes) ); } -static const word32 L_poly1305_set_key_arm64_clamp[] = { +XALIGNED(8) static const word32 L_poly1305_set_key_arm64_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; diff --git a/wolfcrypt/src/port/arm/armv8-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-sha256-asm.S index b764a3fef60..669e28021db 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha256-asm.S @@ -32,82 +32,35 @@ #if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) #ifndef __APPLE__ .text - .type L_SHA256_transform_neon_len_k, %object .section .rodata + .type L_SHA256_transform_neon_len_k, %object .size L_SHA256_transform_neon_len_k, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_SHA256_transform_neon_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0x0fc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x06ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .text .globl Transform_Sha256_Len_neon @@ -1101,82 +1054,35 @@ L_sha256_len_neon_start: #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO #ifndef __APPLE__ .text - .type L_SHA256_trans_crypto_len_k, %object .section .rodata + .type L_SHA256_trans_crypto_len_k, %object .size L_SHA256_trans_crypto_len_k, 256 #else .section __DATA,__data #endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned #ifndef __APPLE__ .align 3 #else .p2align 3 #endif /* __APPLE__ */ L_SHA256_trans_crypto_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0x0fc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x06ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __APPLE__ .text .globl Transform_Sha256_Len_crypto diff --git a/wolfcrypt/src/port/arm/armv8-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha256-asm_c.c index 671badfc146..5e5c05aeb96 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha256-asm_c.c @@ -33,7 +33,7 @@ #include #if !defined(NO_SHA256) || defined(WOLFSSL_SHA224) -static const word32 L_SHA256_transform_neon_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_neon_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -67,7 +67,7 @@ void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len) "ldr w11, [%x[sha256], #28]\n\t" /* Start of loop processing a block */ "\n" - "L_sha256_len_neon_begin_%=: \n\t" + "L_sha256_len_neon_begin_%=:\n\t" /* Load W */ /* Copy digest to add in at end */ "ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [%x[data]], #32\n\t" @@ -91,7 +91,7 @@ void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len) "mov x24, #3\n\t" /* Start of 16 rounds */ "\n" - "L_sha256_len_neon_start_%=: \n\t" + "L_sha256_len_neon_start_%=:\n\t" /* Round 0 */ "mov w14, v0.s[0]\n\t" "ror w12, w8, #6\n\t" @@ -1020,7 +1020,7 @@ void Transform_Sha256_Len_neon(wc_Sha256* sha256, const byte* data, word32 len) } #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO -static const word32 L_SHA256_trans_crypto_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_trans_crypto_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -1055,7 +1055,7 @@ void Transform_Sha256_Len_crypto(wc_Sha256* sha256, const byte* data, "ld1 {v0.4s, v1.4s}, [%x[sha256]]\n\t" /* Start of loop processing a block */ "\n" - "L_sha256_len_crypto_begin_%=: \n\t" + "L_sha256_len_crypto_begin_%=:\n\t" /* Load W */ "ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%x[data]], #0x40\n\t" "rev32 v4.16b, v4.16b\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-sha3-asm.S index fb2c3dd79b0..631432169ba 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm.S @@ -33,42 +33,31 @@ #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 #ifndef __APPLE__ .text - .type L_SHA3_transform_crypto_r, %object .section .rodata + .type L_SHA3_transform_crypto_r, %object .size L_SHA3_transform_crypto_r, 192 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 3 + .align 4 #else - .p2align 3 + .p2align 4 #endif /* __APPLE__ */ L_SHA3_transform_crypto_r: - .xword 0x0000000000000001 - .xword 0x0000000000008082 - .xword 0x800000000000808a - .xword 0x8000000080008000 - .xword 0x000000000000808b - .xword 0x0000000080000001 - .xword 0x8000000080008081 - .xword 0x8000000000008009 - .xword 0x000000000000008a - .xword 0x0000000000000088 - .xword 0x0000000080008009 - .xword 0x000000008000000a - .xword 0x000000008000808b - .xword 0x800000000000008b - .xword 0x8000000000008089 - .xword 0x8000000000008003 - .xword 0x8000000000008002 - .xword 0x8000000000000080 - .xword 0x000000000000800a - .xword 0x800000008000000a - .xword 0x8000000080008081 - .xword 0x8000000000008080 - .xword 0x0000000080000001 - .xword 0x8000000080008008 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 #ifndef __APPLE__ .text .globl BlockSha3_crypto @@ -207,42 +196,31 @@ L_sha3_crypto_begin: #endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ #ifndef __APPLE__ .text - .type L_SHA3_transform_base_r, %object .section .rodata + .type L_SHA3_transform_base_r, %object .size L_SHA3_transform_base_r, 192 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 3 + .align 4 #else - .p2align 3 + .p2align 4 #endif /* __APPLE__ */ L_SHA3_transform_base_r: - .xword 0x0000000000000001 - .xword 0x0000000000008082 - .xword 0x800000000000808a - .xword 0x8000000080008000 - .xword 0x000000000000808b - .xword 0x0000000080000001 - .xword 0x8000000080008081 - .xword 0x8000000000008009 - .xword 0x000000000000008a - .xword 0x0000000000000088 - .xword 0x0000000080008009 - .xword 0x000000008000000a - .xword 0x000000008000808b - .xword 0x800000000000008b - .xword 0x8000000000008089 - .xword 0x8000000000008003 - .xword 0x8000000000008002 - .xword 0x8000000000000080 - .xword 0x000000000000800a - .xword 0x800000008000000a - .xword 0x8000000080008081 - .xword 0x8000000000008080 - .xword 0x0000000080000001 - .xword 0x8000000080008008 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 #ifndef __APPLE__ .text .globl BlockSha3_base diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c index 253053f29d5..341cddee957 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c @@ -34,19 +34,19 @@ #ifdef WOLFSSL_SHA3 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 -static const word64 L_SHA3_transform_crypto_r[] = { - 0x0000000000000001, 0x0000000000008082, - 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, - 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, - 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, - 0x0000000080000001, 0x8000000080008008, +XALIGNED(16) static const word64 L_SHA3_transform_crypto_r[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, }; void BlockSha3_crypto(word64* state) @@ -67,7 +67,7 @@ void BlockSha3_crypto(word64* state) "mov x2, #24\n\t" /* Start of 24 rounds */ "\n" - "L_sha3_crypto_begin_%=: \n\t" + "L_sha3_crypto_begin_%=:\n\t" /* Col Mix */ "eor3 v31.16b, v0.16b, v5.16b, v10.16b\n\t" "eor3 v27.16b, v1.16b, v6.16b, v11.16b\n\t" @@ -165,19 +165,19 @@ void BlockSha3_crypto(word64* state) } #endif /* WOLFSSL_ARMASM_CRYPTO_SHA3 */ -static const word64 L_SHA3_transform_base_r[] = { - 0x0000000000000001, 0x0000000000008082, - 0x800000000000808a, 0x8000000080008000, - 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, - 0x000000000000008a, 0x0000000000000088, - 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, - 0x8000000000008089, 0x8000000000008003, - 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, - 0x8000000080008081, 0x8000000000008080, - 0x0000000080000001, 0x8000000080008008, +XALIGNED(16) static const word64 L_SHA3_transform_base_r[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, }; void BlockSha3_base(word64* state) @@ -203,7 +203,7 @@ void BlockSha3_base(word64* state) "mov x28, #24\n\t" /* Start of 24 rounds */ "\n" - "L_SHA3_transform_base_begin_%=: \n\t" + "L_SHA3_transform_base_begin_%=:\n\t" "stp %[r], x28, [x29, #48]\n\t" "eor %x[state], x5, x10\n\t" "eor x30, x1, x6\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-sha512-asm.S index 92cebfda8d0..1a48cf73177 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm.S @@ -32,113 +32,75 @@ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #ifndef __APPLE__ .text - .type L_SHA512_transform_neon_len_k, %object .section .rodata + .type L_SHA512_transform_neon_len_k, %object .size L_SHA512_transform_neon_len_k, 640 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 3 + .align 4 #else - .p2align 3 + .p2align 4 #endif /* __APPLE__ */ L_SHA512_transform_neon_len_k: - .xword 0x428a2f98d728ae22 - .xword 0x7137449123ef65cd - .xword 0xb5c0fbcfec4d3b2f - .xword 0xe9b5dba58189dbbc - .xword 0x3956c25bf348b538 - .xword 0x59f111f1b605d019 - .xword 0x923f82a4af194f9b - .xword 0xab1c5ed5da6d8118 - .xword 0xd807aa98a3030242 - .xword 0x12835b0145706fbe - .xword 0x243185be4ee4b28c - .xword 0x550c7dc3d5ffb4e2 - .xword 0x72be5d74f27b896f - .xword 0x80deb1fe3b1696b1 - .xword 0x9bdc06a725c71235 - .xword 0xc19bf174cf692694 - .xword 0xe49b69c19ef14ad2 - .xword 0xefbe4786384f25e3 - .xword 0x0fc19dc68b8cd5b5 - .xword 0x240ca1cc77ac9c65 - .xword 0x2de92c6f592b0275 - .xword 0x4a7484aa6ea6e483 - .xword 0x5cb0a9dcbd41fbd4 - .xword 0x76f988da831153b5 - .xword 0x983e5152ee66dfab - .xword 0xa831c66d2db43210 - .xword 0xb00327c898fb213f - .xword 0xbf597fc7beef0ee4 - .xword 0xc6e00bf33da88fc2 - .xword 0xd5a79147930aa725 - .xword 0x06ca6351e003826f - .xword 0x142929670a0e6e70 - .xword 0x27b70a8546d22ffc - .xword 0x2e1b21385c26c926 - .xword 0x4d2c6dfc5ac42aed - .xword 0x53380d139d95b3df - .xword 0x650a73548baf63de - .xword 0x766a0abb3c77b2a8 - .xword 0x81c2c92e47edaee6 - .xword 0x92722c851482353b - .xword 0xa2bfe8a14cf10364 - .xword 0xa81a664bbc423001 - .xword 0xc24b8b70d0f89791 - .xword 0xc76c51a30654be30 - .xword 0xd192e819d6ef5218 - .xword 0xd69906245565a910 - .xword 0xf40e35855771202a - .xword 0x106aa07032bbd1b8 - .xword 0x19a4c116b8d2d0c8 - .xword 0x1e376c085141ab53 - .xword 0x2748774cdf8eeb99 - .xword 0x34b0bcb5e19b48a8 - .xword 0x391c0cb3c5c95a63 - .xword 0x4ed8aa4ae3418acb - .xword 0x5b9cca4f7763e373 - .xword 0x682e6ff3d6b2b8a3 - .xword 0x748f82ee5defb2fc - .xword 0x78a5636f43172f60 - .xword 0x84c87814a1f0ab72 - .xword 0x8cc702081a6439ec - .xword 0x90befffa23631e28 - .xword 0xa4506cebde82bde9 - .xword 0xbef9a3f7b2c67915 - .xword 0xc67178f2e372532b - .xword 0xca273eceea26619c - .xword 0xd186b8c721c0c207 - .xword 0xeada7dd6cde0eb1e - .xword 0xf57d4f7fee6ed178 - .xword 0x06f067aa72176fba - .xword 0x0a637dc5a2c898a6 - .xword 0x113f9804bef90dae - .xword 0x1b710b35131c471b - .xword 0x28db77f523047d84 - .xword 0x32caab7b40c72493 - .xword 0x3c9ebe0a15c9bebc - .xword 0x431d67c49c100d4c - .xword 0x4cc5d4becb3e42b6 - .xword 0x597f299cfc657e2a - .xword 0x5fcb6fab3ad6faec - .xword 0x6c44198c4a475817 + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 #ifndef __APPLE__ .text - .type L_SHA512_transform_neon_len_r8, %object .section .rodata + .type L_SHA512_transform_neon_len_r8, %object .size L_SHA512_transform_neon_len_r8, 16 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ .align 4 #else .p2align 4 #endif /* __APPLE__ */ L_SHA512_transform_neon_len_r8: - .xword 0x7060504030201, 0x80f0e0d0c0b0a09 + .quad 0x0007060504030201,0x080f0e0d0c0b0a09 #ifndef __APPLE__ .text .globl Transform_Sha512_Len_neon @@ -1087,98 +1049,59 @@ L_sha512_len_neon_start: #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 #ifndef __APPLE__ .text - .type L_SHA512_trans_crypto_len_k, %object .section .rodata + .type L_SHA512_trans_crypto_len_k, %object .size L_SHA512_trans_crypto_len_k, 640 #else .section __DATA,__data #endif /* __APPLE__ */ + # 16-byte aligned, 128-bit aligned #ifndef __APPLE__ - .align 3 + .align 4 #else - .p2align 3 + .p2align 4 #endif /* __APPLE__ */ L_SHA512_trans_crypto_len_k: - .xword 0x428a2f98d728ae22 - .xword 0x7137449123ef65cd - .xword 0xb5c0fbcfec4d3b2f - .xword 0xe9b5dba58189dbbc - .xword 0x3956c25bf348b538 - .xword 0x59f111f1b605d019 - .xword 0x923f82a4af194f9b - .xword 0xab1c5ed5da6d8118 - .xword 0xd807aa98a3030242 - .xword 0x12835b0145706fbe - .xword 0x243185be4ee4b28c - .xword 0x550c7dc3d5ffb4e2 - .xword 0x72be5d74f27b896f - .xword 0x80deb1fe3b1696b1 - .xword 0x9bdc06a725c71235 - .xword 0xc19bf174cf692694 - .xword 0xe49b69c19ef14ad2 - .xword 0xefbe4786384f25e3 - .xword 0x0fc19dc68b8cd5b5 - .xword 0x240ca1cc77ac9c65 - .xword 0x2de92c6f592b0275 - .xword 0x4a7484aa6ea6e483 - .xword 0x5cb0a9dcbd41fbd4 - .xword 0x76f988da831153b5 - .xword 0x983e5152ee66dfab - .xword 0xa831c66d2db43210 - .xword 0xb00327c898fb213f - .xword 0xbf597fc7beef0ee4 - .xword 0xc6e00bf33da88fc2 - .xword 0xd5a79147930aa725 - .xword 0x06ca6351e003826f - .xword 0x142929670a0e6e70 - .xword 0x27b70a8546d22ffc - .xword 0x2e1b21385c26c926 - .xword 0x4d2c6dfc5ac42aed - .xword 0x53380d139d95b3df - .xword 0x650a73548baf63de - .xword 0x766a0abb3c77b2a8 - .xword 0x81c2c92e47edaee6 - .xword 0x92722c851482353b - .xword 0xa2bfe8a14cf10364 - .xword 0xa81a664bbc423001 - .xword 0xc24b8b70d0f89791 - .xword 0xc76c51a30654be30 - .xword 0xd192e819d6ef5218 - .xword 0xd69906245565a910 - .xword 0xf40e35855771202a - .xword 0x106aa07032bbd1b8 - .xword 0x19a4c116b8d2d0c8 - .xword 0x1e376c085141ab53 - .xword 0x2748774cdf8eeb99 - .xword 0x34b0bcb5e19b48a8 - .xword 0x391c0cb3c5c95a63 - .xword 0x4ed8aa4ae3418acb - .xword 0x5b9cca4f7763e373 - .xword 0x682e6ff3d6b2b8a3 - .xword 0x748f82ee5defb2fc - .xword 0x78a5636f43172f60 - .xword 0x84c87814a1f0ab72 - .xword 0x8cc702081a6439ec - .xword 0x90befffa23631e28 - .xword 0xa4506cebde82bde9 - .xword 0xbef9a3f7b2c67915 - .xword 0xc67178f2e372532b - .xword 0xca273eceea26619c - .xword 0xd186b8c721c0c207 - .xword 0xeada7dd6cde0eb1e - .xword 0xf57d4f7fee6ed178 - .xword 0x06f067aa72176fba - .xword 0x0a637dc5a2c898a6 - .xword 0x113f9804bef90dae - .xword 0x1b710b35131c471b - .xword 0x28db77f523047d84 - .xword 0x32caab7b40c72493 - .xword 0x3c9ebe0a15c9bebc - .xword 0x431d67c49c100d4c - .xword 0x4cc5d4becb3e42b6 - .xword 0x597f299cfc657e2a - .xword 0x5fcb6fab3ad6faec - .xword 0x6c44198c4a475817 + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 #ifndef __APPLE__ .text .globl Transform_Sha512_Len_crypto diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c index d637a737a2b..b7b6ec773d5 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c @@ -33,51 +33,51 @@ #include #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) -static const word64 L_SHA512_transform_neon_len_k[] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, - 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, - 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, - 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, - 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, - 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, - 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, - 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, - 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, - 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, - 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, - 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, - 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, - 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, - 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, - 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, - 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, - 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, - 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, - 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, - 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, +XALIGNED(16) static const word64 L_SHA512_transform_neon_len_k[] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; -static const word64 L_SHA512_transform_neon_len_r8[] = { - 0x0007060504030201, 0x080f0e0d0c0b0a09, +XALIGNED(16) static const word64 L_SHA512_transform_neon_len_r8[] = { + 0x0007060504030201UL, 0x080f0e0d0c0b0a09UL, }; void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) @@ -93,7 +93,7 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) "ldp x10, x11, [%x[sha512], #48]\n\t" /* Start of loop processing a block */ "\n" - "L_sha512_len_neon_begin_%=: \n\t" + "L_sha512_len_neon_begin_%=:\n\t" /* Load W */ /* Copy digest to add in at end */ "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t" @@ -119,7 +119,7 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) "mov %[r8], #4\n\t" /* Start of 16 rounds */ "\n" - "L_sha512_len_neon_start_%=: \n\t" + "L_sha512_len_neon_start_%=:\n\t" /* Round 0 */ "mov x13, v0.d[0]\n\t" "ldr x15, [%[k]], #8\n\t" @@ -995,47 +995,47 @@ void Transform_Sha512_Len_neon(wc_Sha512* sha512, const byte* data, word32 len) } #ifdef WOLFSSL_ARMASM_CRYPTO_SHA512 -static const word64 L_SHA512_trans_crypto_len_k[] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, - 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, - 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, - 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, - 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, - 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, - 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, - 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, - 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, - 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, - 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, - 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, - 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, - 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, - 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, - 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, - 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, - 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, - 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, - 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, - 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, +XALIGNED(16) static const word64 L_SHA512_trans_crypto_len_k[] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, @@ -1055,7 +1055,7 @@ void Transform_Sha512_Len_crypto(wc_Sha512* sha512, const byte* data, "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [%x[sha512]]\n\t" /* Start of loop processing a block */ "\n" - "L_sha512_len_crypto_begin_%=: \n\t" + "L_sha512_len_crypto_begin_%=:\n\t" "mov x3, %[k]\n\t" /* Load W */ "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[data]], #0x40\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index ceea6793caf..2275959d2ad 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -34,551 +34,203 @@ .syntax unified #ifndef NO_AES #ifdef HAVE_AES_DECRYPT +#ifndef __APPLE__ .text .type L_AES_Thumb2_td_data, %object .size L_AES_Thumb2_td_data, 1024 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_td_data: - .word 0x5051f4a7 - .word 0x537e4165 - .word 0xc31a17a4 - .word 0x963a275e - .word 0xcb3bab6b - .word 0xf11f9d45 - .word 0xabacfa58 - .word 0x934be303 - .word 0x552030fa - .word 0xf6ad766d - .word 0x9188cc76 - .word 0x25f5024c - .word 0xfc4fe5d7 - .word 0xd7c52acb - .word 0x80263544 - .word 0x8fb562a3 - .word 0x49deb15a - .word 0x6725ba1b - .word 0x9845ea0e - .word 0xe15dfec0 - .word 0x2c32f75 - .word 0x12814cf0 - .word 0xa38d4697 - .word 0xc66bd3f9 - .word 0xe7038f5f - .word 0x9515929c - .word 0xebbf6d7a - .word 0xda955259 - .word 0x2dd4be83 - .word 0xd3587421 - .word 0x2949e069 - .word 0x448ec9c8 - .word 0x6a75c289 - .word 0x78f48e79 - .word 0x6b99583e - .word 0xdd27b971 - .word 0xb6bee14f - .word 0x17f088ad - .word 0x66c920ac - .word 0xb47dce3a - .word 0x1863df4a - .word 0x82e51a31 - .word 0x60975133 - .word 0x4562537f - .word 0xe0b16477 - .word 0x84bb6bae - .word 0x1cfe81a0 - .word 0x94f9082b - .word 0x58704868 - .word 0x198f45fd - .word 0x8794de6c - .word 0xb7527bf8 - .word 0x23ab73d3 - .word 0xe2724b02 - .word 0x57e31f8f - .word 0x2a6655ab - .word 0x7b2eb28 - .word 0x32fb5c2 - .word 0x9a86c57b - .word 0xa5d33708 - .word 0xf2302887 - .word 0xb223bfa5 - .word 0xba02036a - .word 0x5ced1682 - .word 0x2b8acf1c - .word 0x92a779b4 - .word 0xf0f307f2 - .word 0xa14e69e2 - .word 0xcd65daf4 - .word 0xd50605be - .word 0x1fd13462 - .word 0x8ac4a6fe - .word 0x9d342e53 - .word 0xa0a2f355 - .word 0x32058ae1 - .word 0x75a4f6eb - .word 0x390b83ec - .word 0xaa4060ef - .word 0x65e719f - .word 0x51bd6e10 - .word 0xf93e218a - .word 0x3d96dd06 - .word 0xaedd3e05 - .word 0x464de6bd - .word 0xb591548d - .word 0x571c45d - .word 0x6f0406d4 - .word 0xff605015 - .word 0x241998fb - .word 0x97d6bde9 - .word 0xcc894043 - .word 0x7767d99e - .word 0xbdb0e842 - .word 0x8807898b - .word 0x38e7195b - .word 0xdb79c8ee - .word 0x47a17c0a - .word 0xe97c420f - .word 0xc9f8841e - .word 0x0 - .word 0x83098086 - .word 0x48322bed - .word 0xac1e1170 - .word 0x4e6c5a72 - .word 0xfbfd0eff - .word 0x560f8538 - .word 0x1e3daed5 - .word 0x27362d39 - .word 0x640a0fd9 - .word 0x21685ca6 - .word 0xd19b5b54 - .word 0x3a24362e - .word 0xb10c0a67 - .word 0xf9357e7 - .word 0xd2b4ee96 - .word 0x9e1b9b91 - .word 0x4f80c0c5 - .word 0xa261dc20 - .word 0x695a774b - .word 0x161c121a - .word 0xae293ba - .word 0xe5c0a02a - .word 0x433c22e0 - .word 0x1d121b17 - .word 0xb0e090d - .word 0xadf28bc7 - .word 0xb92db6a8 - .word 0xc8141ea9 - .word 0x8557f119 - .word 0x4caf7507 - .word 0xbbee99dd - .word 0xfda37f60 - .word 0x9ff70126 - .word 0xbc5c72f5 - .word 0xc544663b - .word 0x345bfb7e - .word 0x768b4329 - .word 0xdccb23c6 - .word 0x68b6edfc - .word 0x63b8e4f1 - .word 0xcad731dc - .word 0x10426385 - .word 0x40139722 - .word 0x2084c611 - .word 0x7d854a24 - .word 0xf8d2bb3d - .word 0x11aef932 - .word 0x6dc729a1 - .word 0x4b1d9e2f - .word 0xf3dcb230 - .word 0xec0d8652 - .word 0xd077c1e3 - .word 0x6c2bb316 - .word 0x99a970b9 - .word 0xfa119448 - .word 0x2247e964 - .word 0xc4a8fc8c - .word 0x1aa0f03f - .word 0xd8567d2c - .word 0xef223390 - .word 0xc787494e - .word 0xc1d938d1 - .word 0xfe8ccaa2 - .word 0x3698d40b - .word 0xcfa6f581 - .word 0x28a57ade - .word 0x26dab78e - .word 0xa43fadbf - .word 0xe42c3a9d - .word 0xd507892 - .word 0x9b6a5fcc - .word 0x62547e46 - .word 0xc2f68d13 - .word 0xe890d8b8 - .word 0x5e2e39f7 - .word 0xf582c3af - .word 0xbe9f5d80 - .word 0x7c69d093 - .word 0xa96fd52d - .word 0xb3cf2512 - .word 0x3bc8ac99 - .word 0xa710187d - .word 0x6ee89c63 - .word 0x7bdb3bbb - .word 0x9cd2678 - .word 0xf46e5918 - .word 0x1ec9ab7 - .word 0xa8834f9a - .word 0x65e6956e - .word 0x7eaaffe6 - .word 0x821bccf - .word 0xe6ef15e8 - .word 0xd9bae79b - .word 0xce4a6f36 - .word 0xd4ea9f09 - .word 0xd629b07c - .word 0xaf31a4b2 - .word 0x312a3f23 - .word 0x30c6a594 - .word 0xc035a266 - .word 0x37744ebc - .word 0xa6fc82ca - .word 0xb0e090d0 - .word 0x1533a7d8 - .word 0x4af10498 - .word 0xf741ecda - .word 0xe7fcd50 - .word 0x2f1791f6 - .word 0x8d764dd6 - .word 0x4d43efb0 - .word 0x54ccaa4d - .word 0xdfe49604 - .word 0xe39ed1b5 - .word 0x1b4c6a88 - .word 0xb8c12c1f - .word 0x7f466551 - .word 0x49d5eea - .word 0x5d018c35 - .word 0x73fa8774 - .word 0x2efb0b41 - .word 0x5ab3671d - .word 0x5292dbd2 - .word 0x33e91056 - .word 0x136dd647 - .word 0x8c9ad761 - .word 0x7a37a10c - .word 0x8e59f814 - .word 0x89eb133c - .word 0xeecea927 - .word 0x35b761c9 - .word 0xede11ce5 - .word 0x3c7a47b1 - .word 0x599cd2df - .word 0x3f55f273 - .word 0x791814ce - .word 0xbf73c737 - .word 0xea53f7cd - .word 0x5b5ffdaa - .word 0x14df3d6f - .word 0x867844db - .word 0x81caaff3 - .word 0x3eb968c4 - .word 0x2c382434 - .word 0x5fc2a340 - .word 0x72161dc3 - .word 0xcbce225 - .word 0x8b283c49 - .word 0x41ff0d95 - .word 0x7139a801 - .word 0xde080cb3 - .word 0x9cd8b4e4 - .word 0x906456c1 - .word 0x617bcb84 - .word 0x70d532b6 - .word 0x74486c5c - .word 0x42d0b857 + .long 0x5051f4a7,0x537e4165,0xc31a17a4,0x963a275e + .long 0xcb3bab6b,0xf11f9d45,0xabacfa58,0x934be303 + .long 0x552030fa,0xf6ad766d,0x9188cc76,0x25f5024c + .long 0xfc4fe5d7,0xd7c52acb,0x80263544,0x8fb562a3 + .long 0x49deb15a,0x6725ba1b,0x9845ea0e,0xe15dfec0 + .long 0x02c32f75,0x12814cf0,0xa38d4697,0xc66bd3f9 + .long 0xe7038f5f,0x9515929c,0xebbf6d7a,0xda955259 + .long 0x2dd4be83,0xd3587421,0x2949e069,0x448ec9c8 + .long 0x6a75c289,0x78f48e79,0x6b99583e,0xdd27b971 + .long 0xb6bee14f,0x17f088ad,0x66c920ac,0xb47dce3a + .long 0x1863df4a,0x82e51a31,0x60975133,0x4562537f + .long 0xe0b16477,0x84bb6bae,0x1cfe81a0,0x94f9082b + .long 0x58704868,0x198f45fd,0x8794de6c,0xb7527bf8 + .long 0x23ab73d3,0xe2724b02,0x57e31f8f,0x2a6655ab + .long 0x07b2eb28,0x032fb5c2,0x9a86c57b,0xa5d33708 + .long 0xf2302887,0xb223bfa5,0xba02036a,0x5ced1682 + .long 0x2b8acf1c,0x92a779b4,0xf0f307f2,0xa14e69e2 + .long 0xcd65daf4,0xd50605be,0x1fd13462,0x8ac4a6fe + .long 0x9d342e53,0xa0a2f355,0x32058ae1,0x75a4f6eb + .long 0x390b83ec,0xaa4060ef,0x065e719f,0x51bd6e10 + .long 0xf93e218a,0x3d96dd06,0xaedd3e05,0x464de6bd + .long 0xb591548d,0x0571c45d,0x6f0406d4,0xff605015 + .long 0x241998fb,0x97d6bde9,0xcc894043,0x7767d99e + .long 0xbdb0e842,0x8807898b,0x38e7195b,0xdb79c8ee + .long 0x47a17c0a,0xe97c420f,0xc9f8841e,0x00000000 + .long 0x83098086,0x48322bed,0xac1e1170,0x4e6c5a72 + .long 0xfbfd0eff,0x560f8538,0x1e3daed5,0x27362d39 + .long 0x640a0fd9,0x21685ca6,0xd19b5b54,0x3a24362e + .long 0xb10c0a67,0x0f9357e7,0xd2b4ee96,0x9e1b9b91 + .long 0x4f80c0c5,0xa261dc20,0x695a774b,0x161c121a + .long 0x0ae293ba,0xe5c0a02a,0x433c22e0,0x1d121b17 + .long 0x0b0e090d,0xadf28bc7,0xb92db6a8,0xc8141ea9 + .long 0x8557f119,0x4caf7507,0xbbee99dd,0xfda37f60 + .long 0x9ff70126,0xbc5c72f5,0xc544663b,0x345bfb7e + .long 0x768b4329,0xdccb23c6,0x68b6edfc,0x63b8e4f1 + .long 0xcad731dc,0x10426385,0x40139722,0x2084c611 + .long 0x7d854a24,0xf8d2bb3d,0x11aef932,0x6dc729a1 + .long 0x4b1d9e2f,0xf3dcb230,0xec0d8652,0xd077c1e3 + .long 0x6c2bb316,0x99a970b9,0xfa119448,0x2247e964 + .long 0xc4a8fc8c,0x1aa0f03f,0xd8567d2c,0xef223390 + .long 0xc787494e,0xc1d938d1,0xfe8ccaa2,0x3698d40b + .long 0xcfa6f581,0x28a57ade,0x26dab78e,0xa43fadbf + .long 0xe42c3a9d,0x0d507892,0x9b6a5fcc,0x62547e46 + .long 0xc2f68d13,0xe890d8b8,0x5e2e39f7,0xf582c3af + .long 0xbe9f5d80,0x7c69d093,0xa96fd52d,0xb3cf2512 + .long 0x3bc8ac99,0xa710187d,0x6ee89c63,0x7bdb3bbb + .long 0x09cd2678,0xf46e5918,0x01ec9ab7,0xa8834f9a + .long 0x65e6956e,0x7eaaffe6,0x0821bccf,0xe6ef15e8 + .long 0xd9bae79b,0xce4a6f36,0xd4ea9f09,0xd629b07c + .long 0xaf31a4b2,0x312a3f23,0x30c6a594,0xc035a266 + .long 0x37744ebc,0xa6fc82ca,0xb0e090d0,0x1533a7d8 + .long 0x4af10498,0xf741ecda,0x0e7fcd50,0x2f1791f6 + .long 0x8d764dd6,0x4d43efb0,0x54ccaa4d,0xdfe49604 + .long 0xe39ed1b5,0x1b4c6a88,0xb8c12c1f,0x7f466551 + .long 0x049d5eea,0x5d018c35,0x73fa8774,0x2efb0b41 + .long 0x5ab3671d,0x5292dbd2,0x33e91056,0x136dd647 + .long 0x8c9ad761,0x7a37a10c,0x8e59f814,0x89eb133c + .long 0xeecea927,0x35b761c9,0xede11ce5,0x3c7a47b1 + .long 0x599cd2df,0x3f55f273,0x791814ce,0xbf73c737 + .long 0xea53f7cd,0x5b5ffdaa,0x14df3d6f,0x867844db + .long 0x81caaff3,0x3eb968c4,0x2c382434,0x5fc2a340 + .long 0x72161dc3,0x0cbce225,0x8b283c49,0x41ff0d95 + .long 0x7139a801,0xde080cb3,0x9cd8b4e4,0x906456c1 + .long 0x617bcb84,0x70d532b6,0x74486c5c,0x42d0b857 #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#ifndef __APPLE__ .text .type L_AES_Thumb2_te_data, %object .size L_AES_Thumb2_te_data, 1024 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_te_data: - .word 0xa5c66363 - .word 0x84f87c7c - .word 0x99ee7777 - .word 0x8df67b7b - .word 0xdfff2f2 - .word 0xbdd66b6b - .word 0xb1de6f6f - .word 0x5491c5c5 - .word 0x50603030 - .word 0x3020101 - .word 0xa9ce6767 - .word 0x7d562b2b - .word 0x19e7fefe - .word 0x62b5d7d7 - .word 0xe64dabab - .word 0x9aec7676 - .word 0x458fcaca - .word 0x9d1f8282 - .word 0x4089c9c9 - .word 0x87fa7d7d - .word 0x15effafa - .word 0xebb25959 - .word 0xc98e4747 - .word 0xbfbf0f0 - .word 0xec41adad - .word 0x67b3d4d4 - .word 0xfd5fa2a2 - .word 0xea45afaf - .word 0xbf239c9c - .word 0xf753a4a4 - .word 0x96e47272 - .word 0x5b9bc0c0 - .word 0xc275b7b7 - .word 0x1ce1fdfd - .word 0xae3d9393 - .word 0x6a4c2626 - .word 0x5a6c3636 - .word 0x417e3f3f - .word 0x2f5f7f7 - .word 0x4f83cccc - .word 0x5c683434 - .word 0xf451a5a5 - .word 0x34d1e5e5 - .word 0x8f9f1f1 - .word 0x93e27171 - .word 0x73abd8d8 - .word 0x53623131 - .word 0x3f2a1515 - .word 0xc080404 - .word 0x5295c7c7 - .word 0x65462323 - .word 0x5e9dc3c3 - .word 0x28301818 - .word 0xa1379696 - .word 0xf0a0505 - .word 0xb52f9a9a - .word 0x90e0707 - .word 0x36241212 - .word 0x9b1b8080 - .word 0x3ddfe2e2 - .word 0x26cdebeb - .word 0x694e2727 - .word 0xcd7fb2b2 - .word 0x9fea7575 - .word 0x1b120909 - .word 0x9e1d8383 - .word 0x74582c2c - .word 0x2e341a1a - .word 0x2d361b1b - .word 0xb2dc6e6e - .word 0xeeb45a5a - .word 0xfb5ba0a0 - .word 0xf6a45252 - .word 0x4d763b3b - .word 0x61b7d6d6 - .word 0xce7db3b3 - .word 0x7b522929 - .word 0x3edde3e3 - .word 0x715e2f2f - .word 0x97138484 - .word 0xf5a65353 - .word 0x68b9d1d1 - .word 0x0 - .word 0x2cc1eded - .word 0x60402020 - .word 0x1fe3fcfc - .word 0xc879b1b1 - .word 0xedb65b5b - .word 0xbed46a6a - .word 0x468dcbcb - .word 0xd967bebe - .word 0x4b723939 - .word 0xde944a4a - .word 0xd4984c4c - .word 0xe8b05858 - .word 0x4a85cfcf - .word 0x6bbbd0d0 - .word 0x2ac5efef - .word 0xe54faaaa - .word 0x16edfbfb - .word 0xc5864343 - .word 0xd79a4d4d - .word 0x55663333 - .word 0x94118585 - .word 0xcf8a4545 - .word 0x10e9f9f9 - .word 0x6040202 - .word 0x81fe7f7f - .word 0xf0a05050 - .word 0x44783c3c - .word 0xba259f9f - .word 0xe34ba8a8 - .word 0xf3a25151 - .word 0xfe5da3a3 - .word 0xc0804040 - .word 0x8a058f8f - .word 0xad3f9292 - .word 0xbc219d9d - .word 0x48703838 - .word 0x4f1f5f5 - .word 0xdf63bcbc - .word 0xc177b6b6 - .word 0x75afdada - .word 0x63422121 - .word 0x30201010 - .word 0x1ae5ffff - .word 0xefdf3f3 - .word 0x6dbfd2d2 - .word 0x4c81cdcd - .word 0x14180c0c - .word 0x35261313 - .word 0x2fc3ecec - .word 0xe1be5f5f - .word 0xa2359797 - .word 0xcc884444 - .word 0x392e1717 - .word 0x5793c4c4 - .word 0xf255a7a7 - .word 0x82fc7e7e - .word 0x477a3d3d - .word 0xacc86464 - .word 0xe7ba5d5d - .word 0x2b321919 - .word 0x95e67373 - .word 0xa0c06060 - .word 0x98198181 - .word 0xd19e4f4f - .word 0x7fa3dcdc - .word 0x66442222 - .word 0x7e542a2a - .word 0xab3b9090 - .word 0x830b8888 - .word 0xca8c4646 - .word 0x29c7eeee - .word 0xd36bb8b8 - .word 0x3c281414 - .word 0x79a7dede - .word 0xe2bc5e5e - .word 0x1d160b0b - .word 0x76addbdb - .word 0x3bdbe0e0 - .word 0x56643232 - .word 0x4e743a3a - .word 0x1e140a0a - .word 0xdb924949 - .word 0xa0c0606 - .word 0x6c482424 - .word 0xe4b85c5c - .word 0x5d9fc2c2 - .word 0x6ebdd3d3 - .word 0xef43acac - .word 0xa6c46262 - .word 0xa8399191 - .word 0xa4319595 - .word 0x37d3e4e4 - .word 0x8bf27979 - .word 0x32d5e7e7 - .word 0x438bc8c8 - .word 0x596e3737 - .word 0xb7da6d6d - .word 0x8c018d8d - .word 0x64b1d5d5 - .word 0xd29c4e4e - .word 0xe049a9a9 - .word 0xb4d86c6c - .word 0xfaac5656 - .word 0x7f3f4f4 - .word 0x25cfeaea - .word 0xafca6565 - .word 0x8ef47a7a - .word 0xe947aeae - .word 0x18100808 - .word 0xd56fbaba - .word 0x88f07878 - .word 0x6f4a2525 - .word 0x725c2e2e - .word 0x24381c1c - .word 0xf157a6a6 - .word 0xc773b4b4 - .word 0x5197c6c6 - .word 0x23cbe8e8 - .word 0x7ca1dddd - .word 0x9ce87474 - .word 0x213e1f1f - .word 0xdd964b4b - .word 0xdc61bdbd - .word 0x860d8b8b - .word 0x850f8a8a - .word 0x90e07070 - .word 0x427c3e3e - .word 0xc471b5b5 - .word 0xaacc6666 - .word 0xd8904848 - .word 0x5060303 - .word 0x1f7f6f6 - .word 0x121c0e0e - .word 0xa3c26161 - .word 0x5f6a3535 - .word 0xf9ae5757 - .word 0xd069b9b9 - .word 0x91178686 - .word 0x5899c1c1 - .word 0x273a1d1d - .word 0xb9279e9e - .word 0x38d9e1e1 - .word 0x13ebf8f8 - .word 0xb32b9898 - .word 0x33221111 - .word 0xbbd26969 - .word 0x70a9d9d9 - .word 0x89078e8e - .word 0xa7339494 - .word 0xb62d9b9b - .word 0x223c1e1e - .word 0x92158787 - .word 0x20c9e9e9 - .word 0x4987cece - .word 0xffaa5555 - .word 0x78502828 - .word 0x7aa5dfdf - .word 0x8f038c8c - .word 0xf859a1a1 - .word 0x80098989 - .word 0x171a0d0d - .word 0xda65bfbf - .word 0x31d7e6e6 - .word 0xc6844242 - .word 0xb8d06868 - .word 0xc3824141 - .word 0xb0299999 - .word 0x775a2d2d - .word 0x111e0f0f - .word 0xcb7bb0b0 - .word 0xfca85454 - .word 0xd66dbbbb - .word 0x3a2c1616 + .long 0xa5c66363,0x84f87c7c,0x99ee7777,0x8df67b7b + .long 0x0dfff2f2,0xbdd66b6b,0xb1de6f6f,0x5491c5c5 + .long 0x50603030,0x03020101,0xa9ce6767,0x7d562b2b + .long 0x19e7fefe,0x62b5d7d7,0xe64dabab,0x9aec7676 + .long 0x458fcaca,0x9d1f8282,0x4089c9c9,0x87fa7d7d + .long 0x15effafa,0xebb25959,0xc98e4747,0x0bfbf0f0 + .long 0xec41adad,0x67b3d4d4,0xfd5fa2a2,0xea45afaf + .long 0xbf239c9c,0xf753a4a4,0x96e47272,0x5b9bc0c0 + .long 0xc275b7b7,0x1ce1fdfd,0xae3d9393,0x6a4c2626 + .long 0x5a6c3636,0x417e3f3f,0x02f5f7f7,0x4f83cccc + .long 0x5c683434,0xf451a5a5,0x34d1e5e5,0x08f9f1f1 + .long 0x93e27171,0x73abd8d8,0x53623131,0x3f2a1515 + .long 0x0c080404,0x5295c7c7,0x65462323,0x5e9dc3c3 + .long 0x28301818,0xa1379696,0x0f0a0505,0xb52f9a9a + .long 0x090e0707,0x36241212,0x9b1b8080,0x3ddfe2e2 + .long 0x26cdebeb,0x694e2727,0xcd7fb2b2,0x9fea7575 + .long 0x1b120909,0x9e1d8383,0x74582c2c,0x2e341a1a + .long 0x2d361b1b,0xb2dc6e6e,0xeeb45a5a,0xfb5ba0a0 + .long 0xf6a45252,0x4d763b3b,0x61b7d6d6,0xce7db3b3 + .long 0x7b522929,0x3edde3e3,0x715e2f2f,0x97138484 + .long 0xf5a65353,0x68b9d1d1,0x00000000,0x2cc1eded + .long 0x60402020,0x1fe3fcfc,0xc879b1b1,0xedb65b5b + .long 0xbed46a6a,0x468dcbcb,0xd967bebe,0x4b723939 + .long 0xde944a4a,0xd4984c4c,0xe8b05858,0x4a85cfcf + .long 0x6bbbd0d0,0x2ac5efef,0xe54faaaa,0x16edfbfb + .long 0xc5864343,0xd79a4d4d,0x55663333,0x94118585 + .long 0xcf8a4545,0x10e9f9f9,0x06040202,0x81fe7f7f + .long 0xf0a05050,0x44783c3c,0xba259f9f,0xe34ba8a8 + .long 0xf3a25151,0xfe5da3a3,0xc0804040,0x8a058f8f + .long 0xad3f9292,0xbc219d9d,0x48703838,0x04f1f5f5 + .long 0xdf63bcbc,0xc177b6b6,0x75afdada,0x63422121 + .long 0x30201010,0x1ae5ffff,0x0efdf3f3,0x6dbfd2d2 + .long 0x4c81cdcd,0x14180c0c,0x35261313,0x2fc3ecec + .long 0xe1be5f5f,0xa2359797,0xcc884444,0x392e1717 + .long 0x5793c4c4,0xf255a7a7,0x82fc7e7e,0x477a3d3d + .long 0xacc86464,0xe7ba5d5d,0x2b321919,0x95e67373 + .long 0xa0c06060,0x98198181,0xd19e4f4f,0x7fa3dcdc + .long 0x66442222,0x7e542a2a,0xab3b9090,0x830b8888 + .long 0xca8c4646,0x29c7eeee,0xd36bb8b8,0x3c281414 + .long 0x79a7dede,0xe2bc5e5e,0x1d160b0b,0x76addbdb + .long 0x3bdbe0e0,0x56643232,0x4e743a3a,0x1e140a0a + .long 0xdb924949,0x0a0c0606,0x6c482424,0xe4b85c5c + .long 0x5d9fc2c2,0x6ebdd3d3,0xef43acac,0xa6c46262 + .long 0xa8399191,0xa4319595,0x37d3e4e4,0x8bf27979 + .long 0x32d5e7e7,0x438bc8c8,0x596e3737,0xb7da6d6d + .long 0x8c018d8d,0x64b1d5d5,0xd29c4e4e,0xe049a9a9 + .long 0xb4d86c6c,0xfaac5656,0x07f3f4f4,0x25cfeaea + .long 0xafca6565,0x8ef47a7a,0xe947aeae,0x18100808 + .long 0xd56fbaba,0x88f07878,0x6f4a2525,0x725c2e2e + .long 0x24381c1c,0xf157a6a6,0xc773b4b4,0x5197c6c6 + .long 0x23cbe8e8,0x7ca1dddd,0x9ce87474,0x213e1f1f + .long 0xdd964b4b,0xdc61bdbd,0x860d8b8b,0x850f8a8a + .long 0x90e07070,0x427c3e3e,0xc471b5b5,0xaacc6666 + .long 0xd8904848,0x05060303,0x01f7f6f6,0x121c0e0e + .long 0xa3c26161,0x5f6a3535,0xf9ae5757,0xd069b9b9 + .long 0x91178686,0x5899c1c1,0x273a1d1d,0xb9279e9e + .long 0x38d9e1e1,0x13ebf8f8,0xb32b9898,0x33221111 + .long 0xbbd26969,0x70a9d9d9,0x89078e8e,0xa7339494 + .long 0xb62d9b9b,0x223c1e1e,0x92158787,0x20c9e9e9 + .long 0x4987cece,0xffaa5555,0x78502828,0x7aa5dfdf + .long 0x8f038c8c,0xf859a1a1,0x80098989,0x171a0d0d + .long 0xda65bfbf,0x31d7e6e6,0xc6844242,0xb8d06868 + .long 0xc3824141,0xb0299999,0x775a2d2d,0x111e0f0f + .long 0xcb7bb0b0,0xfca85454,0xd66dbbbb,0x3a2c1616 #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT +#ifndef __APPLE__ .text .type L_AES_Thumb2_td, %object .size L_AES_Thumb2_td, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_td: - .word L_AES_Thumb2_td_data + .long L_AES_Thumb2_td_data #endif /* HAVE_AES_DECRYPT */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#ifndef __APPLE__ .text .type L_AES_Thumb2_te, %object .size L_AES_Thumb2_te, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_te: - .word L_AES_Thumb2_te_data + .long L_AES_Thumb2_te_data #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT @@ -683,21 +335,23 @@ L_AES_invert_key_mix_loop: /* Cycle Count = 165 */ .size AES_invert_key,.-AES_invert_key #endif /* HAVE_AES_DECRYPT */ +#ifndef __APPLE__ .text .type L_AES_Thumb2_rcon, %object .size L_AES_Thumb2_rcon, 40 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_rcon: - .word 0x1000000 - .word 0x2000000 - .word 0x4000000 - .word 0x8000000 - .word 0x10000000 - .word 0x20000000 - .word 0x40000000 - .word 0x80000000 - .word 0x1b000000 - .word 0x36000000 + .long 0x01000000,0x02000000,0x04000000,0x08000000 + .long 0x10000000,0x20000000,0x40000000,0x80000000 + .long 0x1b000000,0x36000000 .text .align 4 .globl AES_set_encrypt_key @@ -1142,12 +796,21 @@ L_AES_encrypt_block_nr: #endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ #if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +#ifndef __APPLE__ .text .type L_AES_Thumb2_te_ecb, %object .size L_AES_Thumb2_te_ecb, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_te_ecb: - .word L_AES_Thumb2_te_data + .long L_AES_Thumb2_te_data #endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || * WOLFSSL_AES_COUNTER */ #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ @@ -2727,12 +2390,21 @@ L_AES_CBC_encrypt_end: .size AES_CBC_encrypt,.-AES_CBC_encrypt #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER +#ifndef __APPLE__ .text .type L_AES_Thumb2_te_ctr, %object .size L_AES_Thumb2_te_ctr, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_te_ctr: - .word L_AES_Thumb2_te_data + .long L_AES_Thumb2_te_data .text .align 4 .globl AES_CTR_encrypt @@ -3768,273 +3440,67 @@ L_AES_decrypt_block_nr: /* Cycle Count = 285 */ .size AES_decrypt_block,.-AES_decrypt_block #endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#ifndef __APPLE__ .text .type L_AES_Thumb2_td_ecb, %object .size L_AES_Thumb2_td_ecb, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_td_ecb: - .word L_AES_Thumb2_td_data + .long L_AES_Thumb2_td_data +#ifndef __APPLE__ .text .type L_AES_Thumb2_td4, %object .size L_AES_Thumb2_td4, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 4-byte aligned, 32-bit aligned */ +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_AES_Thumb2_td4: - .byte 0x52 - .byte 0x9 - .byte 0x6a - .byte 0xd5 - .byte 0x30 - .byte 0x36 - .byte 0xa5 - .byte 0x38 - .byte 0xbf - .byte 0x40 - .byte 0xa3 - .byte 0x9e - .byte 0x81 - .byte 0xf3 - .byte 0xd7 - .byte 0xfb - .byte 0x7c - .byte 0xe3 - .byte 0x39 - .byte 0x82 - .byte 0x9b - .byte 0x2f - .byte 0xff - .byte 0x87 - .byte 0x34 - .byte 0x8e - .byte 0x43 - .byte 0x44 - .byte 0xc4 - .byte 0xde - .byte 0xe9 - .byte 0xcb - .byte 0x54 - .byte 0x7b - .byte 0x94 - .byte 0x32 - .byte 0xa6 - .byte 0xc2 - .byte 0x23 - .byte 0x3d - .byte 0xee - .byte 0x4c - .byte 0x95 - .byte 0xb - .byte 0x42 - .byte 0xfa - .byte 0xc3 - .byte 0x4e - .byte 0x8 - .byte 0x2e - .byte 0xa1 - .byte 0x66 - .byte 0x28 - .byte 0xd9 - .byte 0x24 - .byte 0xb2 - .byte 0x76 - .byte 0x5b - .byte 0xa2 - .byte 0x49 - .byte 0x6d - .byte 0x8b - .byte 0xd1 - .byte 0x25 - .byte 0x72 - .byte 0xf8 - .byte 0xf6 - .byte 0x64 - .byte 0x86 - .byte 0x68 - .byte 0x98 - .byte 0x16 - .byte 0xd4 - .byte 0xa4 - .byte 0x5c - .byte 0xcc - .byte 0x5d - .byte 0x65 - .byte 0xb6 - .byte 0x92 - .byte 0x6c - .byte 0x70 - .byte 0x48 - .byte 0x50 - .byte 0xfd - .byte 0xed - .byte 0xb9 - .byte 0xda - .byte 0x5e - .byte 0x15 - .byte 0x46 - .byte 0x57 - .byte 0xa7 - .byte 0x8d - .byte 0x9d - .byte 0x84 - .byte 0x90 - .byte 0xd8 - .byte 0xab - .byte 0x0 - .byte 0x8c - .byte 0xbc - .byte 0xd3 - .byte 0xa - .byte 0xf7 - .byte 0xe4 - .byte 0x58 - .byte 0x5 - .byte 0xb8 - .byte 0xb3 - .byte 0x45 - .byte 0x6 - .byte 0xd0 - .byte 0x2c - .byte 0x1e - .byte 0x8f - .byte 0xca - .byte 0x3f - .byte 0xf - .byte 0x2 - .byte 0xc1 - .byte 0xaf - .byte 0xbd - .byte 0x3 - .byte 0x1 - .byte 0x13 - .byte 0x8a - .byte 0x6b - .byte 0x3a - .byte 0x91 - .byte 0x11 - .byte 0x41 - .byte 0x4f - .byte 0x67 - .byte 0xdc - .byte 0xea - .byte 0x97 - .byte 0xf2 - .byte 0xcf - .byte 0xce - .byte 0xf0 - .byte 0xb4 - .byte 0xe6 - .byte 0x73 - .byte 0x96 - .byte 0xac - .byte 0x74 - .byte 0x22 - .byte 0xe7 - .byte 0xad - .byte 0x35 - .byte 0x85 - .byte 0xe2 - .byte 0xf9 - .byte 0x37 - .byte 0xe8 - .byte 0x1c - .byte 0x75 - .byte 0xdf - .byte 0x6e - .byte 0x47 - .byte 0xf1 - .byte 0x1a - .byte 0x71 - .byte 0x1d - .byte 0x29 - .byte 0xc5 - .byte 0x89 - .byte 0x6f - .byte 0xb7 - .byte 0x62 - .byte 0xe - .byte 0xaa - .byte 0x18 - .byte 0xbe - .byte 0x1b - .byte 0xfc - .byte 0x56 - .byte 0x3e - .byte 0x4b - .byte 0xc6 - .byte 0xd2 - .byte 0x79 - .byte 0x20 - .byte 0x9a - .byte 0xdb - .byte 0xc0 - .byte 0xfe - .byte 0x78 - .byte 0xcd - .byte 0x5a - .byte 0xf4 - .byte 0x1f - .byte 0xdd - .byte 0xa8 - .byte 0x33 - .byte 0x88 - .byte 0x7 - .byte 0xc7 - .byte 0x31 - .byte 0xb1 - .byte 0x12 - .byte 0x10 - .byte 0x59 - .byte 0x27 - .byte 0x80 - .byte 0xec - .byte 0x5f - .byte 0x60 - .byte 0x51 - .byte 0x7f - .byte 0xa9 - .byte 0x19 - .byte 0xb5 - .byte 0x4a - .byte 0xd - .byte 0x2d - .byte 0xe5 - .byte 0x7a - .byte 0x9f - .byte 0x93 - .byte 0xc9 - .byte 0x9c - .byte 0xef - .byte 0xa0 - .byte 0xe0 - .byte 0x3b - .byte 0x4d - .byte 0xae - .byte 0x2a - .byte 0xf5 - .byte 0xb0 - .byte 0xc8 - .byte 0xeb - .byte 0xbb - .byte 0x3c - .byte 0x83 - .byte 0x53 - .byte 0x99 - .byte 0x61 - .byte 0x17 - .byte 0x2b - .byte 0x4 - .byte 0x7e - .byte 0xba - .byte 0x77 - .byte 0xd6 - .byte 0x26 - .byte 0xe1 - .byte 0x69 - .byte 0x14 - .byte 0x63 - .byte 0x55 - .byte 0x21 - .byte 0xc - .byte 0x7d + .byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 + .byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb + .byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 + .byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb + .byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d + .byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e + .byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 + .byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 + .byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 + .byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 + .byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda + .byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 + .byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a + .byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 + .byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 + .byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b + .byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea + .byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 + .byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 + .byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e + .byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 + .byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b + .byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 + .byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 + .byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 + .byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f + .byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d + .byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef + .byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 + .byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 + .byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 + .byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_ECB) .text .align 4 @@ -6398,27 +5864,24 @@ L_AES_CBC_decrypt_end: * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM +#ifndef __APPLE__ .text .type L_GCM_gmult_len_r, %object .size L_GCM_gmult_len_r, 64 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_GCM_gmult_len_r: - .word 0x0 - .word 0x1c200000 - .word 0x38400000 - .word 0x24600000 - .word 0x70800000 - .word 0x6ca00000 - .word 0x48c00000 - .word 0x54e00000 - .word 0xe1000000 - .word 0xfd200000 - .word 0xd9400000 - .word 0xc5600000 - .word 0x91800000 - .word 0x8da00000 - .word 0xa9c00000 - .word 0xb5e00000 + .long 0x00000000,0x1c200000,0x38400000,0x24600000 + .long 0x70800000,0x6ca00000,0x48c00000,0x54e00000 + .long 0xe1000000,0xfd200000,0xd9400000,0xc5600000 + .long 0x91800000,0x8da00000,0xa9c00000,0xb5e00000 .text .align 4 .globl GCM_gmult_len @@ -6979,12 +6442,21 @@ L_GCM_gmult_len_start_block: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 742 */ .size GCM_gmult_len,.-GCM_gmult_len +#ifndef __APPLE__ .text .type L_AES_Thumb2_te_gcm, %object .size L_AES_Thumb2_te_gcm, 12 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_AES_Thumb2_te_gcm: - .word L_AES_Thumb2_te_data + .long L_AES_Thumb2_te_data .text .align 4 .globl AES_GCM_encrypt @@ -7795,6 +7267,6 @@ L_AES_GCM_encrypt_end: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index fd89d3973da..6d332507a92 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -41,12 +41,17 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifndef NO_AES #include #ifdef HAVE_AES_DECRYPT -XALIGNED(16) static const word32 L_AES_Thumb2_td_data[] = { +XALIGNED(8) static const word32 L_AES_Thumb2_td_data[] = { 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, @@ -117,7 +122,7 @@ XALIGNED(16) static const word32 L_AES_Thumb2_td_data[] = { #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || \ defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) -XALIGNED(16) static const word32 L_AES_Thumb2_te_data[] = { +XALIGNED(8) static const word32 L_AES_Thumb2_te_data[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, @@ -196,7 +201,7 @@ static const word32* L_AES_Thumb2_te = L_AES_Thumb2_te_data; #endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || * WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT -void AES_invert_key(unsigned char* ks, word32 rounds); +void AES_invert_key(unsigned char* ks_p, word32 rounds_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks_p, word32 rounds_p) #else @@ -208,15 +213,11 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) register word32 rounds __asm__ ("r1") = (word32)rounds_p; register word32* L_AES_Thumb2_te_c __asm__ ("r2") = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_td_c __asm__ ("r3") = (word32*)L_AES_Thumb2_td; - #else register word32* L_AES_Thumb2_te_c = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_td_c = (word32*)L_AES_Thumb2_td; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -325,24 +326,31 @@ WC_OMIT_FRAME_POINTER void AES_invert_key(unsigned char* ks, word32 rounds) #else "BNE.W L_AES_invert_key_mix_loop_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) : +#else + : + : [ks] "r" (ks), [rounds] "r" (rounds), + [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te_c), + [L_AES_Thumb2_td] "r" (L_AES_Thumb2_td_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } #endif /* HAVE_AES_DECRYPT */ -XALIGNED(16) static const word32 L_AES_Thumb2_rcon[] = { +XALIGNED(8) static const word32 L_AES_Thumb2_rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1b000000, 0x36000000 }; -void AES_set_encrypt_key(const unsigned char* key, word32 len, - unsigned char* ks); +void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, + unsigned char* ks_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) @@ -358,15 +366,11 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, register unsigned char* ks __asm__ ("r2") = (unsigned char*)ks_p; register word32* L_AES_Thumb2_te_c __asm__ ("r3") = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_rcon_c __asm__ ("r4") = (word32*)&L_AES_Thumb2_rcon; - #else register word32* L_AES_Thumb2_te_c = (word32*)L_AES_Thumb2_te; - register word32* L_AES_Thumb2_rcon_c = (word32*)&L_AES_Thumb2_rcon; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -629,16 +633,24 @@ WC_OMIT_FRAME_POINTER void AES_set_encrypt_key(const unsigned char* key, #else "L_AES_set_encrypt_key_end_%=:\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) : +#else + : + : [key] "r" (key), [len] "r" (len), [ks] "r" (ks), + [L_AES_Thumb2_te] "r" (L_AES_Thumb2_te_c), + [L_AES_Thumb2_rcon] "r" (L_AES_Thumb2_rcon_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10" ); } #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE -void AES_encrypt_block(const word32* te, int nr, int len, const word32* ks); +void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, + const word32* ks_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_encrypt_block(const word32* te_p, int nr_p, int len_p, const word32* ks_p) @@ -869,8 +881,13 @@ WC_OMIT_FRAME_POINTER void AES_encrypt_block(const word32* te, int nr, int len, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) : +#else + : + : [te] "r" (te), [nr] "r" (nr), [len] "r" (len), [ks] "r" (ks) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -884,8 +901,8 @@ static const word32* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data; #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || \ defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_ECB) -void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr); +void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -905,10 +922,8 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, register int nr __asm__ ("r4") = (int)nr_p; register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_te_ecb; - #else register word32* L_AES_Thumb2_te_ecb_c = (word32*)L_AES_Thumb2_te_ecb; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1749,9 +1764,15 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, "L_AES_ECB_encrypt_end_%=:\n\t" #endif "POP {%[ks]}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -1759,8 +1780,9 @@ WC_OMIT_FRAME_POINTER void AES_ECB_encrypt(const unsigned char* in, #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || * WOLFSSL_AES_COUNTER || HAVE_AES_ECB */ #ifdef HAVE_AES_CBC -void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* iv_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -1782,10 +1804,8 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; register word32* L_AES_Thumb2_te_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ecb; - #else register word32* L_AES_Thumb2_te_ecb_c = (word32*)L_AES_Thumb2_te_ecb; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2645,10 +2665,17 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, #endif "POP {%[ks], r9}\n\t" "STM r9, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [iv] "r" (iv), + [L_AES_Thumb2_te_ecb] "r" (L_AES_Thumb2_te_ecb_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -2656,8 +2683,9 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, #endif /* HAVE_AES_CBC */ #ifdef WOLFSSL_AES_COUNTER static const word32* L_AES_Thumb2_te_ctr = L_AES_Thumb2_te_data; -void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* ctr_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -2679,10 +2707,8 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; register word32* L_AES_Thumb2_te_ctr_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_ctr; - #else register word32* L_AES_Thumb2_te_ctr_c = (word32*)L_AES_Thumb2_te_ctr; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3563,10 +3589,17 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_ctr] "+r" (L_AES_Thumb2_te_ctr_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [ctr] "r" (ctr), + [L_AES_Thumb2_te_ctr] "r" (L_AES_Thumb2_te_ctr_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } @@ -3576,7 +3609,7 @@ WC_OMIT_FRAME_POINTER void AES_CTR_encrypt(const unsigned char* in, #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ defined(HAVE_AES_CBC) || defined(HAVE_AES_ECB) #ifndef WOLFSSL_ARMASM_AES_BLOCK_INLINE -void AES_decrypt_block(const word32* td, int nr, const byte* td4); +void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td_p, int nr_p, const byte* td4_p) @@ -3806,15 +3839,20 @@ WC_OMIT_FRAME_POINTER void AES_decrypt_block(const word32* td, int nr, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : +#else + : + : [td] "r" (td), [nr] "r" (nr), [td4] "r" (td4) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } #endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ static const word32* L_AES_Thumb2_td_ecb = L_AES_Thumb2_td_data; -static const byte L_AES_Thumb2_td4[] = { +XALIGNED(4) static const word8 L_AES_Thumb2_td4[] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, @@ -3849,9 +3887,10 @@ static const byte L_AES_Thumb2_td4[] = { 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, }; -#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_ECB) -void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr); +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || \ + defined(HAVE_AES_ECB) +void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -3871,15 +3910,11 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, register int nr __asm__ ("r4") = (int)nr_p; register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r5") = (word32*)L_AES_Thumb2_td_ecb; - - register byte* L_AES_Thumb2_td4_c __asm__ ("r6") = - (byte*)&L_AES_Thumb2_td4; - + register word8* L_AES_Thumb2_td4_c __asm__ ("r6") = + (word8*)&L_AES_Thumb2_td4; #else register word32* L_AES_Thumb2_td_ecb_c = (word32*)L_AES_Thumb2_td_ecb; - - register byte* L_AES_Thumb2_td4_c = (byte*)&L_AES_Thumb2_td4; - + register word8* L_AES_Thumb2_td4_c = (word8*)&L_AES_Thumb2_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4717,18 +4752,26 @@ WC_OMIT_FRAME_POINTER void AES_ECB_decrypt(const unsigned char* in, #else "L_AES_ECB_decrypt_end_%=:\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb_c), + [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || defined(HAVE_AES_ECB) */ #ifdef HAVE_AES_CBC -void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* iv_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -4750,15 +4793,11 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, register unsigned char* iv __asm__ ("r5") = (unsigned char*)iv_p; register word32* L_AES_Thumb2_td_ecb_c __asm__ ("r6") = (word32*)L_AES_Thumb2_td_ecb; - - register byte* L_AES_Thumb2_td4_c __asm__ ("r7") = - (byte*)&L_AES_Thumb2_td4; - + register word8* L_AES_Thumb2_td4_c __asm__ ("r7") = + (word8*)&L_AES_Thumb2_td4; #else register word32* L_AES_Thumb2_td_ecb_c = (word32*)L_AES_Thumb2_td_ecb; - - register byte* L_AES_Thumb2_td4_c = (byte*)&L_AES_Thumb2_td4; - + register word8* L_AES_Thumb2_td4_c = (word8*)&L_AES_Thumb2_td4; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -6431,11 +6470,19 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, "L_AES_CBC_decrypt_end_%=:\n\t" #endif "POP {%[ks], r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [iv] "r" (iv), + [L_AES_Thumb2_td_ecb] "r" (L_AES_Thumb2_td_ecb_c), + [L_AES_Thumb2_td4] "r" (L_AES_Thumb2_td4_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r8", "r9", "r10", "r11" ); } @@ -6445,15 +6492,15 @@ WC_OMIT_FRAME_POINTER void AES_CBC_decrypt(const unsigned char* in, * HAVE_AES_ECB */ #endif /* HAVE_AES_DECRYPT */ #ifdef HAVE_AESGCM -XALIGNED(16) static const word32 L_GCM_gmult_len_r[] = { +XALIGNED(8) static const word32 L_GCM_gmult_len_r[] = { 0x00000000, 0x1c200000, 0x38400000, 0x24600000, 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, 0x91800000, 0x8da00000, 0xa9c00000, 0xb5e00000, }; -void GCM_gmult_len(unsigned char* x, const unsigned char** m, - const unsigned char* data, unsigned long len); +void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, + const unsigned char* data_p, unsigned long len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) @@ -6471,10 +6518,8 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, register unsigned long len __asm__ ("r3") = (unsigned long)len_p; register word32* L_GCM_gmult_len_r_c __asm__ ("r4") = (word32*)&L_GCM_gmult_len_r; - #else register word32* L_GCM_gmult_len_r_c = (word32*)&L_GCM_gmult_len_r; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -7036,17 +7081,24 @@ WC_OMIT_FRAME_POINTER void GCM_gmult_len(unsigned char* x, #else "BNE.W L_GCM_gmult_len_start_block_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : +#else + : + : [x] "r" (x), [m] "r" (m), [data] "r" (data), [len] "r" (len), + [L_GCM_gmult_len_r] "r" (L_GCM_gmult_len_r_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } static const word32* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data; -void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, - unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, + unsigned long len_p, const unsigned char* ks_p, int nr_p, + unsigned char* ctr_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, @@ -7068,10 +7120,8 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, register unsigned char* ctr __asm__ ("r5") = (unsigned char*)ctr_p; register word32* L_AES_Thumb2_te_gcm_c __asm__ ("r6") = (word32*)L_AES_Thumb2_te_gcm; - #else register word32* L_AES_Thumb2_te_gcm_c = (word32*)L_AES_Thumb2_te_gcm; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -7943,10 +7993,17 @@ WC_OMIT_FRAME_POINTER void AES_GCM_encrypt(const unsigned char* in, "REV r6, r6\n\t" "REV r7, r7\n\t" "STM r8, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_gcm] "+r" (L_AES_Thumb2_te_gcm_c) : +#else + : + : [in] "r" (in), [out] "r" (out), [len] "r" (len), [ks] "r" (ks), + [nr] "r" (nr), [ctr] "r" (ctr), + [L_AES_Thumb2_te_gcm] "r" (L_AES_Thumb2_te_gcm_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S index be046d02bb2..775c3f51483 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S @@ -53,19 +53,22 @@ wc_chacha_setiv: POP {r4, r5, r6, pc} /* Cycle Count = 26 */ .size wc_chacha_setiv,.-wc_chacha_setiv +#ifndef __APPLE__ .text .type L_chacha_thumb2_constants, %object .size L_chacha_thumb2_constants, 32 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_chacha_thumb2_constants: - .word 0x61707865 - .word 0x3120646e - .word 0x79622d36 - .word 0x6b206574 - .word 0x61707865 - .word 0x3320646e - .word 0x79622d32 - .word 0x6b206574 + .long 0x61707865,0x3120646e,0x79622d36,0x6b206574 + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 .text .align 4 .globl wc_chacha_setkey @@ -568,6 +571,6 @@ L_chacha_thumb2_over_done: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c index 4d2627524bc..cfaf6fa2ddb 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c @@ -41,6 +41,11 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef HAVE_CHACHA #include @@ -71,13 +76,18 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setiv(word32* x, const byte* iv, "REV r6, r6\n\t" #endif /* BIG_ENDIAN_ORDER */ "STM r3, {r4, r5, r6}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [iv] "+r" (iv), [counter] "+r" (counter) : +#else + : + : [x] "r" (x), [iv] "r" (iv), [counter] "r" (counter) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6" ); } -XALIGNED(16) static const word32 L_chacha_thumb2_constants[] = { +XALIGNED(8) static const word32 L_chacha_thumb2_constants[] = { 0x61707865, 0x3120646e, 0x79622d36, 0x6b206574, 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, }; @@ -96,11 +106,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, register word32 keySz __asm__ ("r2") = (word32)keySz_p; register word32* L_chacha_thumb2_constants_c __asm__ ("r3") = (word32*)&L_chacha_thumb2_constants; - #else register word32* L_chacha_thumb2_constants_c = (word32*)&L_chacha_thumb2_constants; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143,9 +151,15 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "L_chacha_thumb2_setkey_same_key_bytes_%=:\n\t" #endif "STM %[x], {r3, r4, r5, r6}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), [L_chacha_thumb2_constants] "+r" (L_chacha_thumb2_constants_c) : +#else + : + : [x] "r" (x), [key] "r" (key), [keySz] "r" (keySz), + [L_chacha_thumb2_constants] "r" (L_chacha_thumb2_constants_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7" ); } @@ -583,8 +597,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "L_chacha_thumb2_crypt_done_%=:\n\t" #endif "ADD sp, sp, #0x34\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [c] "+r" (c), [m] "+r" (m), [len] "+r" (len) : +#else + : + : [ctx] "r" (ctx), [c] "r" (c), [m] "r" (m), [len] "r" (len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -727,9 +746,15 @@ WC_OMIT_FRAME_POINTER void wc_chacha_use_over(byte* over, byte* output, #else "L_chacha_thumb2_over_done_%=:\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [over] "+r" (over), [output] "+r" (output), [input] "+r" (input), [len] "+r" (len) : +#else + : + : [over] "r" (over), [output] "r" (output), [input] "r" (input), + [len] "r" (len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index bbacc310e47..27acee8b1cb 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -6480,6 +6480,6 @@ sc_muladd: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 23816a095a7..e351b4349e2 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -41,6 +41,11 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ /* Based on work by: Emil Lenngren * https://github.com/pornin/X25519-Cortex-M4 @@ -63,8 +68,13 @@ WC_OMIT_FRAME_POINTER void fe_init() #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : + : +#else : : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc" ); } @@ -166,8 +176,13 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op() "SBC r11, r11, #0x0\n\t" "STM r1, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" /* Done Add-Sub */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : + : +#else : : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -207,8 +222,13 @@ WC_OMIT_FRAME_POINTER void fe_sub_op() "SBC lr, lr, #0x0\n\t" "STM r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Sub */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : + : +#else : : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -227,8 +247,13 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) __asm__ __volatile__ ( "BL fe_sub_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -269,8 +294,13 @@ WC_OMIT_FRAME_POINTER void fe_add_op() "ADC lr, lr, #0x0\n\t" "STM r0, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" /* Done Add */ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -289,8 +319,13 @@ WC_OMIT_FRAME_POINTER void fe_add(fe r, const fe a, const fe b) __asm__ __volatile__ ( "BL fe_add_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -327,8 +362,13 @@ WC_OMIT_FRAME_POINTER void fe_frombytes(fe out, const unsigned char* in) "STR r7, [%[out], #20]\n\t" "STR r8, [%[out], #24]\n\t" "STR r9, [%[out], #28]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [out] "+r" (out), [in] "+r" (in) : +#else + : + : [out] "r" (out), [in] "r" (in) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -373,8 +413,13 @@ WC_OMIT_FRAME_POINTER void fe_tobytes(unsigned char* out, const fe n) "STR r7, [%[out], #20]\n\t" "STR r8, [%[out], #24]\n\t" "STR r9, [%[out], #28]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [out] "+r" (out), [n] "+r" (n) : +#else + : + : [out] "r" (out), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -400,8 +445,13 @@ WC_OMIT_FRAME_POINTER void fe_1(fe n) "MOV r8, #0x0\n\t" "MOV r9, #0x0\n\t" "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) : +#else + : + : [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -427,8 +477,13 @@ WC_OMIT_FRAME_POINTER void fe_0(fe n) "MOV r8, #0x0\n\t" "MOV r9, #0x0\n\t" "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [n] "+r" (n) : +#else + : + : [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -454,8 +509,13 @@ WC_OMIT_FRAME_POINTER void fe_copy(fe r, const fe a) "LDRD r4, r5, [%[a], #24]\n\t" "STRD r2, r3, [%[r], #16]\n\t" "STRD r4, r5, [%[r], #24]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -487,8 +547,13 @@ WC_OMIT_FRAME_POINTER void fe_neg(fe r, const fe a) "SBCS r4, r7, r4\n\t" "SBC r5, r6, r5\n\t" "STM %[r]!, {r2, r3, r4, r5}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); } @@ -531,8 +596,13 @@ WC_OMIT_FRAME_POINTER int fe_isnonzero(const fe a) "ORR r4, r4, r6\n\t" "ORR r2, r2, r8\n\t" "ORR %[a], r2, r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); @@ -564,14 +634,20 @@ WC_OMIT_FRAME_POINTER int fe_isnegative(const fe a) "AND %[a], r2, #0x1\n\t" "LSR r1, r1, #31\n\t" "EOR %[a], %[a], r1\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } -#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || defined(WOLFSSL_CURVE25519_USE_ED25519) +#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) || \ + defined(WOLFSSL_CURVE25519_USE_ED25519) #ifndef WC_NO_CACHE_RESISTANT #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r_p, const fe* base_p, @@ -1550,8 +1626,13 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "STRD r4, r5, [%[r], #24]\n\t" "STRD r6, r7, [%[r], #56]\n\t" "STRD r8, r9, [%[r], #88]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : +#else + : + : [r] "r" (r), [base] "r" (base), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr" ); @@ -1664,15 +1745,21 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) "AND r7, r7, lr\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "SUB %[base], %[base], %[b]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : +#else + : + : [r] "r" (r), [base] "r" (base), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } #endif /* WC_NO_CACHE_RESISTANT */ -#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN || WOLFSSL_CURVE25519_USE_ED25519 */ +#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN || + * WOLFSSL_CURVE25519_USE_ED25519 */ #endif /* HAVE_ED25519 || WOLFSSL_CURVE25519_USE_ED25519 */ #ifdef WOLFSSL_ARM_ARCH_7M void fe_mul_op(void); @@ -2054,8 +2141,13 @@ WC_OMIT_FRAME_POINTER void fe_mul_op() "LDR r0, [sp, #36]\n\t" "STM r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x28\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2194,8 +2286,13 @@ WC_OMIT_FRAME_POINTER void fe_mul_op() /* Store */ "STM lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "ADD sp, sp, #0x10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : : +#else + : + : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2215,8 +2312,13 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) __asm__ __volatile__ ( "BL fe_mul_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -2495,8 +2597,13 @@ WC_OMIT_FRAME_POINTER void fe_sq_op() "LDR r0, [sp, #64]\n\t" "STM r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : + : +#else : : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2621,8 +2728,13 @@ WC_OMIT_FRAME_POINTER void fe_sq_op() "POP {lr}\n\t" /* Store */ "STM lr, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : + : +#else : : +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -2641,8 +2753,13 @@ WC_OMIT_FRAME_POINTER void fe_sq(fe r, const fe a) __asm__ __volatile__ ( "BL fe_sq_op\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -2702,8 +2819,13 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) "ADCS r8, r8, #0x0\n\t" "ADC r9, r9, #0x0\n\t" "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -2749,8 +2871,13 @@ WC_OMIT_FRAME_POINTER void fe_mul121666(fe r, fe a) "ADCS r8, r8, #0x0\n\t" "ADC r9, r9, #0x0\n\t" "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -3247,8 +3374,13 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xbc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : +#else + : + : [r] "r" (r), [n] "r" (n), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -3655,8 +3787,13 @@ WC_OMIT_FRAME_POINTER int curve25519(byte* r, const byte* n, const byte* a) "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : +#else + : + : [r] "r" (r), [n] "r" (n), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -3916,8 +4053,13 @@ WC_OMIT_FRAME_POINTER void fe_invert(fe r, const fe a) "LDR %[a], [sp, #132]\n\t" "LDR %[r], [sp, #128]\n\t" "ADD sp, sp, #0x88\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -4231,8 +4373,13 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "LDR r0, [sp, #64]\n\t" "STM r0, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -4394,8 +4541,13 @@ WC_OMIT_FRAME_POINTER void fe_sq2(fe r, const fe a) "STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "MOV r0, r12\n\t" "MOV r1, lr\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr" ); } @@ -4651,8 +4803,13 @@ WC_OMIT_FRAME_POINTER void fe_pow22523(fe r, const fe a) "LDR %[a], [sp, #100]\n\t" "LDR %[r], [sp, #96]\n\t" "ADD sp, sp, #0x68\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -4688,8 +4845,13 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "ADD r0, r0, #0x40\n\t" "BL fe_mul_op\n\t" "ADD sp, sp, #0x8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -4730,8 +4892,13 @@ WC_OMIT_FRAME_POINTER void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "ADD r0, r0, #0x60\n\t" "BL fe_mul_op\n\t" "ADD sp, sp, #0x8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -4784,8 +4951,13 @@ WC_OMIT_FRAME_POINTER void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) "MOV r1, r0\n\t" "BL fe_sub_op\n\t" "ADD sp, sp, #0x8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p) : +#else + : + : [r] "r" (r), [p] "r" (p) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -4875,8 +5047,13 @@ WC_OMIT_FRAME_POINTER void ge_madd(ge_p1p1 * r, const ge_p3 * p, "ADD r1, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0xc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -4967,8 +5144,13 @@ WC_OMIT_FRAME_POINTER void ge_msub(ge_p1p1 * r, const ge_p3 * p, "ADD r0, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0xc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5059,8 +5241,13 @@ WC_OMIT_FRAME_POINTER void ge_add(ge_p1p1 * r, const ge_p3 * p, "ADD r0, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5151,8 +5338,13 @@ WC_OMIT_FRAME_POINTER void ge_sub(ge_p1p1 * r, const ge_p3 * p, "ADD r0, r0, #0x40\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : +#else + : + : [r] "r" (r), [p] "r" (p), [q] "r" (q) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5588,8 +5780,13 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LDR %[s], [sp, #52]\n\t" "STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADD sp, sp, #0x38\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : +#else + : + : [s] "r" (s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -5894,8 +6091,13 @@ WC_OMIT_FRAME_POINTER void sc_reduce(byte* s) "LDR %[s], [sp, #52]\n\t" "STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADD sp, sp, #0x38\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s) : +#else + : + : [s] "r" (s) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -6693,8 +6895,13 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "STR r8, [%[s], #24]\n\t" "STR r9, [%[s], #28]\n\t" "ADD sp, sp, #0x50\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : +#else + : + : [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -7134,8 +7341,13 @@ WC_OMIT_FRAME_POINTER void sc_muladd(byte* s, const byte* a, const byte* b, "STR r8, [%[s], #24]\n\t" "STR r9, [%[s], #28]\n\t" "ADD sp, sp, #0x50\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : +#else + : + : [s] "r" (s), [a] "r" (a), [b] "r" (b), [c] "r" (c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); diff --git a/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S index 5dd14176559..42cd8622f69 100644 --- a/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm.S @@ -33,139 +33,36 @@ .thumb .syntax unified #ifdef WOLFSSL_WC_MLKEM +#ifndef __APPLE__ .text .type L_mlkem_thumb2_ntt_zetas, %object .size L_mlkem_thumb2_ntt_zetas, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 4-byte aligned, 32-bit aligned */ +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_thumb2_ntt_zetas: - .short 0x8ed - .short 0xa0b - .short 0xb9a - .short 0x714 - .short 0x5d5 - .short 0x58e - .short 0x11f - .short 0xca - .short 0xc56 - .short 0x26e - .short 0x629 - .short 0xb6 - .short 0x3c2 - .short 0x84f - .short 0x73f - .short 0x5bc - .short 0x23d - .short 0x7d4 - .short 0x108 - .short 0x17f - .short 0x9c4 - .short 0x5b2 - .short 0x6bf - .short 0xc7f - .short 0xa58 - .short 0x3f9 - .short 0x2dc - .short 0x260 - .short 0x6fb - .short 0x19b - .short 0xc34 - .short 0x6de - .short 0x4c7 - .short 0x28c - .short 0xad9 - .short 0x3f7 - .short 0x7f4 - .short 0x5d3 - .short 0xbe7 - .short 0x6f9 - .short 0x204 - .short 0xcf9 - .short 0xbc1 - .short 0xa67 - .short 0x6af - .short 0x877 - .short 0x7e - .short 0x5bd - .short 0x9ac - .short 0xca7 - .short 0xbf2 - .short 0x33e - .short 0x6b - .short 0x774 - .short 0xc0a - .short 0x94a - .short 0xb73 - .short 0x3c1 - .short 0x71d - .short 0xa2c - .short 0x1c0 - .short 0x8d8 - .short 0x2a5 - .short 0x806 - .short 0x8b2 - .short 0x1ae - .short 0x22b - .short 0x34b - .short 0x81e - .short 0x367 - .short 0x60e - .short 0x69 - .short 0x1a6 - .short 0x24b - .short 0xb1 - .short 0xc16 - .short 0xbde - .short 0xb35 - .short 0x626 - .short 0x675 - .short 0xc0b - .short 0x30a - .short 0x487 - .short 0xc6e - .short 0x9f8 - .short 0x5cb - .short 0xaa7 - .short 0x45f - .short 0x6cb - .short 0x284 - .short 0x999 - .short 0x15d - .short 0x1a2 - .short 0x149 - .short 0xc65 - .short 0xcb6 - .short 0x331 - .short 0x449 - .short 0x25b - .short 0x262 - .short 0x52a - .short 0x7fc - .short 0x748 - .short 0x180 - .short 0x842 - .short 0xc79 - .short 0x4c2 - .short 0x7ca - .short 0x997 - .short 0xdc - .short 0x85e - .short 0x686 - .short 0x860 - .short 0x707 - .short 0x803 - .short 0x31a - .short 0x71b - .short 0x9ab - .short 0x99b - .short 0x1de - .short 0xc95 - .short 0xbcd - .short 0x3e4 - .short 0x3df - .short 0x3be - .short 0x74d - .short 0x5f2 - .short 0x65c + .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca + .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc + .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f + .short 0x0a58,0x03f9,0x02dc,0x0260,0x06fb,0x019b,0x0c34,0x06de + .short 0x04c7,0x028c,0x0ad9,0x03f7,0x07f4,0x05d3,0x0be7,0x06f9 + .short 0x0204,0x0cf9,0x0bc1,0x0a67,0x06af,0x0877,0x007e,0x05bd + .short 0x09ac,0x0ca7,0x0bf2,0x033e,0x006b,0x0774,0x0c0a,0x094a + .short 0x0b73,0x03c1,0x071d,0x0a2c,0x01c0,0x08d8,0x02a5,0x0806 + .short 0x08b2,0x01ae,0x022b,0x034b,0x081e,0x0367,0x060e,0x0069 + .short 0x01a6,0x024b,0x00b1,0x0c16,0x0bde,0x0b35,0x0626,0x0675 + .short 0x0c0b,0x030a,0x0487,0x0c6e,0x09f8,0x05cb,0x0aa7,0x045f + .short 0x06cb,0x0284,0x0999,0x015d,0x01a2,0x0149,0x0c65,0x0cb6 + .short 0x0331,0x0449,0x025b,0x0262,0x052a,0x07fc,0x0748,0x0180 + .short 0x0842,0x0c79,0x04c2,0x07ca,0x0997,0x00dc,0x085e,0x0686 + .short 0x0860,0x0707,0x0803,0x031a,0x071b,0x09ab,0x099b,0x01de + .short 0x0c95,0x0bcd,0x03e4,0x03df,0x03be,0x074d,0x05f2,0x065c .text .align 4 .globl mlkem_thumb2_ntt @@ -1425,139 +1322,36 @@ L_mlkem_thumb2_ntt_loop_567: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1270 */ .size mlkem_thumb2_ntt,.-mlkem_thumb2_ntt +#ifndef __APPLE__ .text .type L_mlkem_invntt_zetas_inv, %object .size L_mlkem_invntt_zetas_inv, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 4-byte aligned, 32-bit aligned */ +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_invntt_zetas_inv: - .short 0x6a5 - .short 0x70f - .short 0x5b4 - .short 0x943 - .short 0x922 - .short 0x91d - .short 0x134 - .short 0x6c - .short 0xb23 - .short 0x366 - .short 0x356 - .short 0x5e6 - .short 0x9e7 - .short 0x4fe - .short 0x5fa - .short 0x4a1 - .short 0x67b - .short 0x4a3 - .short 0xc25 - .short 0x36a - .short 0x537 - .short 0x83f - .short 0x88 - .short 0x4bf - .short 0xb81 - .short 0x5b9 - .short 0x505 - .short 0x7d7 - .short 0xa9f - .short 0xaa6 - .short 0x8b8 - .short 0x9d0 - .short 0x4b - .short 0x9c - .short 0xbb8 - .short 0xb5f - .short 0xba4 - .short 0x368 - .short 0xa7d - .short 0x636 - .short 0x8a2 - .short 0x25a - .short 0x736 - .short 0x309 - .short 0x93 - .short 0x87a - .short 0x9f7 - .short 0xf6 - .short 0x68c - .short 0x6db - .short 0x1cc - .short 0x123 - .short 0xeb - .short 0xc50 - .short 0xab6 - .short 0xb5b - .short 0xc98 - .short 0x6f3 - .short 0x99a - .short 0x4e3 - .short 0x9b6 - .short 0xad6 - .short 0xb53 - .short 0x44f - .short 0x4fb - .short 0xa5c - .short 0x429 - .short 0xb41 - .short 0x2d5 - .short 0x5e4 - .short 0x940 - .short 0x18e - .short 0x3b7 - .short 0xf7 - .short 0x58d - .short 0xc96 - .short 0x9c3 - .short 0x10f - .short 0x5a - .short 0x355 - .short 0x744 - .short 0xc83 - .short 0x48a - .short 0x652 - .short 0x29a - .short 0x140 - .short 0x8 - .short 0xafd - .short 0x608 - .short 0x11a - .short 0x72e - .short 0x50d - .short 0x90a - .short 0x228 - .short 0xa75 - .short 0x83a - .short 0x623 - .short 0xcd - .short 0xb66 - .short 0x606 - .short 0xaa1 - .short 0xa25 - .short 0x908 - .short 0x2a9 - .short 0x82 - .short 0x642 - .short 0x74f - .short 0x33d - .short 0xb82 - .short 0xbf9 - .short 0x52d - .short 0xac4 - .short 0x745 - .short 0x5c2 - .short 0x4b2 - .short 0x93f - .short 0xc4b - .short 0x6d8 - .short 0xa93 - .short 0xab - .short 0xc37 - .short 0xbe2 - .short 0x773 - .short 0x72c - .short 0x5ed - .short 0x167 - .short 0x2f6 - .short 0x5a1 + .short 0x06a5,0x070f,0x05b4,0x0943,0x0922,0x091d,0x0134,0x006c + .short 0x0b23,0x0366,0x0356,0x05e6,0x09e7,0x04fe,0x05fa,0x04a1 + .short 0x067b,0x04a3,0x0c25,0x036a,0x0537,0x083f,0x0088,0x04bf + .short 0x0b81,0x05b9,0x0505,0x07d7,0x0a9f,0x0aa6,0x08b8,0x09d0 + .short 0x004b,0x009c,0x0bb8,0x0b5f,0x0ba4,0x0368,0x0a7d,0x0636 + .short 0x08a2,0x025a,0x0736,0x0309,0x0093,0x087a,0x09f7,0x00f6 + .short 0x068c,0x06db,0x01cc,0x0123,0x00eb,0x0c50,0x0ab6,0x0b5b + .short 0x0c98,0x06f3,0x099a,0x04e3,0x09b6,0x0ad6,0x0b53,0x044f + .short 0x04fb,0x0a5c,0x0429,0x0b41,0x02d5,0x05e4,0x0940,0x018e + .short 0x03b7,0x00f7,0x058d,0x0c96,0x09c3,0x010f,0x005a,0x0355 + .short 0x0744,0x0c83,0x048a,0x0652,0x029a,0x0140,0x0008,0x0afd + .short 0x0608,0x011a,0x072e,0x050d,0x090a,0x0228,0x0a75,0x083a + .short 0x0623,0x00cd,0x0b66,0x0606,0x0aa1,0x0a25,0x0908,0x02a9 + .short 0x0082,0x0642,0x074f,0x033d,0x0b82,0x0bf9,0x052d,0x0ac4 + .short 0x0745,0x05c2,0x04b2,0x093f,0x0c4b,0x06d8,0x0a93,0x00ab + .short 0x0c37,0x0be2,0x0773,0x072c,0x05ed,0x0167,0x02f6,0x05a1 .text .align 4 .globl mlkem_thumb2_invntt @@ -3184,139 +2978,36 @@ L_mlkem_invntt_loop_321: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 1629 */ .size mlkem_thumb2_invntt,.-mlkem_thumb2_invntt +#ifndef __APPLE__ .text .type L_mlkem_basemul_mont_zetas, %object .size L_mlkem_basemul_mont_zetas, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 4-byte aligned, 32-bit aligned */ +#ifndef __APPLE__ + .align 2 +#else + .p2align 2 +#endif /* __APPLE__ */ L_mlkem_basemul_mont_zetas: - .short 0x8ed - .short 0xa0b - .short 0xb9a - .short 0x714 - .short 0x5d5 - .short 0x58e - .short 0x11f - .short 0xca - .short 0xc56 - .short 0x26e - .short 0x629 - .short 0xb6 - .short 0x3c2 - .short 0x84f - .short 0x73f - .short 0x5bc - .short 0x23d - .short 0x7d4 - .short 0x108 - .short 0x17f - .short 0x9c4 - .short 0x5b2 - .short 0x6bf - .short 0xc7f - .short 0xa58 - .short 0x3f9 - .short 0x2dc - .short 0x260 - .short 0x6fb - .short 0x19b - .short 0xc34 - .short 0x6de - .short 0x4c7 - .short 0x28c - .short 0xad9 - .short 0x3f7 - .short 0x7f4 - .short 0x5d3 - .short 0xbe7 - .short 0x6f9 - .short 0x204 - .short 0xcf9 - .short 0xbc1 - .short 0xa67 - .short 0x6af - .short 0x877 - .short 0x7e - .short 0x5bd - .short 0x9ac - .short 0xca7 - .short 0xbf2 - .short 0x33e - .short 0x6b - .short 0x774 - .short 0xc0a - .short 0x94a - .short 0xb73 - .short 0x3c1 - .short 0x71d - .short 0xa2c - .short 0x1c0 - .short 0x8d8 - .short 0x2a5 - .short 0x806 - .short 0x8b2 - .short 0x1ae - .short 0x22b - .short 0x34b - .short 0x81e - .short 0x367 - .short 0x60e - .short 0x69 - .short 0x1a6 - .short 0x24b - .short 0xb1 - .short 0xc16 - .short 0xbde - .short 0xb35 - .short 0x626 - .short 0x675 - .short 0xc0b - .short 0x30a - .short 0x487 - .short 0xc6e - .short 0x9f8 - .short 0x5cb - .short 0xaa7 - .short 0x45f - .short 0x6cb - .short 0x284 - .short 0x999 - .short 0x15d - .short 0x1a2 - .short 0x149 - .short 0xc65 - .short 0xcb6 - .short 0x331 - .short 0x449 - .short 0x25b - .short 0x262 - .short 0x52a - .short 0x7fc - .short 0x748 - .short 0x180 - .short 0x842 - .short 0xc79 - .short 0x4c2 - .short 0x7ca - .short 0x997 - .short 0xdc - .short 0x85e - .short 0x686 - .short 0x860 - .short 0x707 - .short 0x803 - .short 0x31a - .short 0x71b - .short 0x9ab - .short 0x99b - .short 0x1de - .short 0xc95 - .short 0xbcd - .short 0x3e4 - .short 0x3df - .short 0x3be - .short 0x74d - .short 0x5f2 - .short 0x65c + .short 0x08ed,0x0a0b,0x0b9a,0x0714,0x05d5,0x058e,0x011f,0x00ca + .short 0x0c56,0x026e,0x0629,0x00b6,0x03c2,0x084f,0x073f,0x05bc + .short 0x023d,0x07d4,0x0108,0x017f,0x09c4,0x05b2,0x06bf,0x0c7f + .short 0x0a58,0x03f9,0x02dc,0x0260,0x06fb,0x019b,0x0c34,0x06de + .short 0x04c7,0x028c,0x0ad9,0x03f7,0x07f4,0x05d3,0x0be7,0x06f9 + .short 0x0204,0x0cf9,0x0bc1,0x0a67,0x06af,0x0877,0x007e,0x05bd + .short 0x09ac,0x0ca7,0x0bf2,0x033e,0x006b,0x0774,0x0c0a,0x094a + .short 0x0b73,0x03c1,0x071d,0x0a2c,0x01c0,0x08d8,0x02a5,0x0806 + .short 0x08b2,0x01ae,0x022b,0x034b,0x081e,0x0367,0x060e,0x0069 + .short 0x01a6,0x024b,0x00b1,0x0c16,0x0bde,0x0b35,0x0626,0x0675 + .short 0x0c0b,0x030a,0x0487,0x0c6e,0x09f8,0x05cb,0x0aa7,0x045f + .short 0x06cb,0x0284,0x0999,0x015d,0x01a2,0x0149,0x0c65,0x0cb6 + .short 0x0331,0x0449,0x025b,0x0262,0x052a,0x07fc,0x0748,0x0180 + .short 0x0842,0x0c79,0x04c2,0x07ca,0x0997,0x00dc,0x085e,0x0686 + .short 0x0860,0x0707,0x0803,0x031a,0x071b,0x09ab,0x099b,0x01de + .short 0x0c95,0x0bcd,0x03e4,0x03df,0x03be,0x074d,0x05f2,0x065c .text .align 4 .globl mlkem_thumb2_basemul_mont @@ -3896,6 +3587,6 @@ L_mlkem_thumb2_rej_uniform_done: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c index 40a55b99238..5f45fc70518 100644 --- a/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-mlkem-asm_c.c @@ -41,11 +41,16 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #include #ifdef WOLFSSL_WC_MLKEM -XALIGNED(16) static const word16 L_mlkem_thumb2_ntt_zetas[] = { +XALIGNED(4) static const word16 L_mlkem_thumb2_ntt_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -74,11 +79,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) register sword16* r __asm__ ("r0") = (sword16*)r_p; register word16* L_mlkem_thumb2_ntt_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_thumb2_ntt_zetas; - #else register word16* L_mlkem_thumb2_ntt_zetas_c = (word16*)&L_mlkem_thumb2_ntt_zetas; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1360,15 +1363,21 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_ntt(sword16* r) "BNE.N L_mlkem_thumb2_ntt_loop_567_%=\n\t" #endif "ADD sp, sp, #0x8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_thumb2_ntt_zetas] "+r" (L_mlkem_thumb2_ntt_zetas_c) : +#else + : + : [r] "r" (r), + [L_mlkem_thumb2_ntt_zetas] "r" (L_mlkem_thumb2_ntt_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -XALIGNED(16) static const word16 L_mlkem_invntt_zetas_inv[] = { +XALIGNED(4) static const word16 L_mlkem_invntt_zetas_inv[] = { 0x06a5, 0x070f, 0x05b4, 0x0943, 0x0922, 0x091d, 0x0134, 0x006c, 0x0b23, 0x0366, 0x0356, 0x05e6, 0x09e7, 0x04fe, 0x05fa, 0x04a1, 0x067b, 0x04a3, 0x0c25, 0x036a, 0x0537, 0x083f, 0x0088, 0x04bf, @@ -1397,11 +1406,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) register sword16* r __asm__ ("r0") = (sword16*)r_p; register word16* L_mlkem_invntt_zetas_inv_c __asm__ ("r1") = (word16*)&L_mlkem_invntt_zetas_inv; - #else register word16* L_mlkem_invntt_zetas_inv_c = (word16*)&L_mlkem_invntt_zetas_inv; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3050,15 +3057,21 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_invntt(sword16* r) "BNE.N L_mlkem_invntt_loop_321_%=\n\t" #endif "ADD sp, sp, #0x8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [L_mlkem_invntt_zetas_inv] "+r" (L_mlkem_invntt_zetas_inv_c) : +#else + : + : [r] "r" (r), + [L_mlkem_invntt_zetas_inv] "r" (L_mlkem_invntt_zetas_inv_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -XALIGNED(16) static const word16 L_mlkem_basemul_mont_zetas[] = { +XALIGNED(4) static const word16 L_mlkem_basemul_mont_zetas[] = { 0x08ed, 0x0a0b, 0x0b9a, 0x0714, 0x05d5, 0x058e, 0x011f, 0x00ca, 0x0c56, 0x026e, 0x0629, 0x00b6, 0x03c2, 0x084f, 0x073f, 0x05bc, 0x023d, 0x07d4, 0x0108, 0x017f, 0x09c4, 0x05b2, 0x06bf, 0x0c7f, @@ -3091,11 +3104,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, register const sword16* b __asm__ ("r2") = (const sword16*)b_p; register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; - #else register word16* L_mlkem_basemul_mont_zetas_c = (word16*)&L_mlkem_basemul_mont_zetas; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3220,9 +3231,15 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont(sword16* r, #else "BNE.N L_mlkem_basemul_mont_loop_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -3242,11 +3259,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, register const sword16* b __asm__ ("r2") = (const sword16*)b_p; register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r3") = (word16*)&L_mlkem_basemul_mont_zetas; - #else register word16* L_mlkem_basemul_mont_zetas_c = (word16*)&L_mlkem_basemul_mont_zetas; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3383,9 +3398,15 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_basemul_mont_add(sword16* r, #else "BNE.N L_mlkem_thumb2_basemul_mont_add_loop_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -3401,11 +3422,9 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p) register sword16* p __asm__ ("r0") = (sword16*)p_p; register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r1") = (word16*)&L_mlkem_basemul_mont_zetas; - #else register word16* L_mlkem_basemul_mont_zetas_c = (word16*)&L_mlkem_basemul_mont_zetas; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3496,9 +3515,15 @@ WC_OMIT_FRAME_POINTER void mlkem_thumb2_csubq(sword16* p) #else "BNE.N L_mlkem_thumb2_csubq_loop_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [p] "r" (p), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -3519,11 +3544,9 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, register unsigned int rLen __asm__ ("r3") = (unsigned int)rLen_p; register word16* L_mlkem_basemul_mont_zetas_c __asm__ ("r4") = (word16*)&L_mlkem_basemul_mont_zetas; - #else register word16* L_mlkem_basemul_mont_zetas_c = (word16*)&L_mlkem_basemul_mont_zetas; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3847,9 +3870,15 @@ WC_OMIT_FRAME_POINTER unsigned int mlkem_thumb2_rej_uniform(sword16* p, "L_mlkem_thumb2_rej_uniform_done_%=:\n\t" #endif "LSR r0, r9, #1\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [p] "+r" (p), [len] "+r" (len), [r] "+r" (r), [rLen] "+r" (rLen), [L_mlkem_basemul_mont_zetas] "+r" (L_mlkem_basemul_mont_zetas_c) : +#else + : + : [p] "r" (p), [len] "r" (len), [r] "r" (r), [rLen] "r" (rLen), + [L_mlkem_basemul_mont_zetas] "r" (L_mlkem_basemul_mont_zetas_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)p; diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S index 9ac9aca9d9c..2bd38c14b7e 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm.S @@ -256,15 +256,21 @@ L_poly1305_thumb2_16_done: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} /* Cycle Count = 250 */ .size poly1305_blocks_thumb2_16,.-poly1305_blocks_thumb2_16 +#ifndef __APPLE__ .text .type L_poly1305_thumb2_clamp, %object .size L_poly1305_thumb2_clamp, 16 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_poly1305_thumb2_clamp: - .word 0xfffffff - .word 0xffffffc - .word 0xffffffc - .word 0xffffffc + .long 0x0fffffff,0x0ffffffc,0x0ffffffc,0x0ffffffc .text .align 4 .globl poly1305_set_key @@ -362,6 +368,6 @@ poly1305_final: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c index 515c955b372..c4b607a5c06 100644 --- a/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-poly1305-asm_c.c @@ -41,6 +41,11 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef HAVE_POLY1305 #include @@ -289,15 +294,21 @@ WC_OMIT_FRAME_POINTER void poly1305_blocks_thumb2_16(Poly1305* ctx, "L_poly1305_thumb2_16_done_%=:\n\t" #endif "ADD sp, sp, #0x1c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [m] "+r" (m), [len] "+r" (len), [notLast] "+r" (notLast) : +#else + : + : [ctx] "r" (ctx), [m] "r" (m), [len] "r" (len), + [notLast] "r" (notLast) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -XALIGNED(16) static const word32 L_poly1305_thumb2_clamp[] = { +XALIGNED(8) static const word32 L_poly1305_thumb2_clamp[] = { 0x0fffffff, 0x0ffffffc, 0x0ffffffc, 0x0ffffffc, }; @@ -312,11 +323,9 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) register const byte* key __asm__ ("r1") = (const byte*)key_p; register word32* L_poly1305_thumb2_clamp_c __asm__ ("r2") = (word32*)&L_poly1305_thumb2_clamp; - #else register word32* L_poly1305_thumb2_clamp_c = (word32*)&L_poly1305_thumb2_clamp; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -351,9 +360,15 @@ WC_OMIT_FRAME_POINTER void poly1305_set_key(Poly1305* ctx, const byte* key) "STM r10, {r5, r6, r7, r8, r9}\n\t" /* Zero leftover */ "STR r5, [%[ctx], #52]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [key] "+r" (key), [L_poly1305_thumb2_clamp] "+r" (L_poly1305_thumb2_clamp_c) : +#else + : + : [ctx] "r" (ctx), [key] "r" (key), + [L_poly1305_thumb2_clamp] "r" (L_poly1305_thumb2_clamp_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -413,8 +428,13 @@ WC_OMIT_FRAME_POINTER void poly1305_final(Poly1305* ctx, byte* mac) /* Zero out padding. */ "ADD r11, %[ctx], #0x24\n\t" "STM r11, {r2, r3, r4, r5}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [ctx] "+r" (ctx), [mac] "+r" (mac) : +#else + : + : [ctx] "r" (ctx), [mac] "r" (mac) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S index cb4252f3257..3daa7a93e1b 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm.S @@ -34,75 +34,36 @@ .syntax unified #ifndef NO_SHA256 #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_SHA256_transform_len_k, %object .size L_SHA256_transform_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 8-byte aligned, 64-bit aligned */ +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_transform_len_k: - .word 0x428a2f98 - .word 0x71374491 - .word 0xb5c0fbcf - .word 0xe9b5dba5 - .word 0x3956c25b - .word 0x59f111f1 - .word 0x923f82a4 - .word 0xab1c5ed5 - .word 0xd807aa98 - .word 0x12835b01 - .word 0x243185be - .word 0x550c7dc3 - .word 0x72be5d74 - .word 0x80deb1fe - .word 0x9bdc06a7 - .word 0xc19bf174 - .word 0xe49b69c1 - .word 0xefbe4786 - .word 0xfc19dc6 - .word 0x240ca1cc - .word 0x2de92c6f - .word 0x4a7484aa - .word 0x5cb0a9dc - .word 0x76f988da - .word 0x983e5152 - .word 0xa831c66d - .word 0xb00327c8 - .word 0xbf597fc7 - .word 0xc6e00bf3 - .word 0xd5a79147 - .word 0x6ca6351 - .word 0x14292967 - .word 0x27b70a85 - .word 0x2e1b2138 - .word 0x4d2c6dfc - .word 0x53380d13 - .word 0x650a7354 - .word 0x766a0abb - .word 0x81c2c92e - .word 0x92722c85 - .word 0xa2bfe8a1 - .word 0xa81a664b - .word 0xc24b8b70 - .word 0xc76c51a3 - .word 0xd192e819 - .word 0xd6990624 - .word 0xf40e3585 - .word 0x106aa070 - .word 0x19a4c116 - .word 0x1e376c08 - .word 0x2748774c - .word 0x34b0bcb5 - .word 0x391c0cb3 - .word 0x4ed8aa4a - .word 0x5b9cca4f - .word 0x682e6ff3 - .word 0x748f82ee - .word 0x78a5636f - .word 0x84c87814 - .word 0x8cc70208 - .word 0x90befffa - .word 0xa4506ceb - .word 0xbef9a3f7 - .word 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .text .align 4 .globl Transform_Sha256_Len_base @@ -2365,6 +2326,6 @@ L_SHA256_transform_len_blk_end_15: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index 2a5643d9a39..cb657fcc71c 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -41,12 +41,17 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifndef NO_SHA256 #include #ifdef WOLFSSL_ARMASM_NO_NEON -XALIGNED(16) static const word32 L_SHA256_transform_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -65,7 +70,8 @@ XALIGNED(16) static const word32 L_SHA256_transform_len_k[] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; -void Transform_Sha256_Len_base(wc_Sha256* sha256, const byte* data, word32 len); +void Transform_Sha256_Len_base(wc_Sha256* sha256_p, const byte* data_p, + word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) @@ -80,11 +86,9 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, register word32 len __asm__ ("r2") = (word32)len_p; register word32* L_SHA256_transform_len_k_c __asm__ ("r3") = (word32*)&L_SHA256_transform_len_k; - #else register word32* L_SHA256_transform_len_k_c = (word32*)&L_SHA256_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2468,9 +2472,15 @@ WC_OMIT_FRAME_POINTER void Transform_Sha256_Len_base(wc_Sha256* sha256, "BNE.W L_SHA256_transform_len_begin_%=\n\t" #endif "ADD sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : +#else + : + : [sha256] "r" (sha256), [data] "r" (data), [len] "r" (len), + [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S index 511883d5f11..ab4254dee9d 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S @@ -33,59 +33,32 @@ .thumb .syntax unified #ifdef WOLFSSL_SHA3 +#ifndef __APPLE__ .text .type L_sha3_thumb2_rt, %object .size L_sha3_thumb2_rt, 192 - .align 8 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 16-byte aligned, 128-bit aligned */ +#ifndef __APPLE__ + .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_sha3_thumb2_rt: - .word 0x1 - .word 0x0 - .word 0x8082 - .word 0x0 - .word 0x808a - .word 0x80000000 - .word 0x80008000 - .word 0x80000000 - .word 0x808b - .word 0x0 - .word 0x80000001 - .word 0x0 - .word 0x80008081 - .word 0x80000000 - .word 0x8009 - .word 0x80000000 - .word 0x8a - .word 0x0 - .word 0x88 - .word 0x0 - .word 0x80008009 - .word 0x0 - .word 0x8000000a - .word 0x0 - .word 0x8000808b - .word 0x0 - .word 0x8b - .word 0x80000000 - .word 0x8089 - .word 0x80000000 - .word 0x8003 - .word 0x80000000 - .word 0x8002 - .word 0x80000000 - .word 0x80 - .word 0x80000000 - .word 0x800a - .word 0x0 - .word 0x8000000a - .word 0x80000000 - .word 0x80008081 - .word 0x80000000 - .word 0x8080 - .word 0x80000000 - .word 0x80000001 - .word 0x0 - .word 0x80008008 - .word 0x80000000 + .quad 0x0000000000000001,0x0000000000008082 + .quad 0x800000000000808a,0x8000000080008000 + .quad 0x000000000000808b,0x0000000080000001 + .quad 0x8000000080008081,0x8000000000008009 + .quad 0x000000000000008a,0x0000000000000088 + .quad 0x0000000080008009,0x000000008000000a + .quad 0x000000008000808b,0x800000000000008b + .quad 0x8000000000008089,0x8000000000008003 + .quad 0x8000000000008002,0x8000000000000080 + .quad 0x000000000000800a,0x800000008000000a + .quad 0x8000000080008081,0x8000000000008080 + .quad 0x0000000080000001,0x8000000080008008 .text .align 4 .globl BlockSha3 @@ -1169,6 +1142,6 @@ L_sha3_thumb2_begin: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c index 04da0699eab..e0c6d065d1a 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c @@ -41,9 +41,14 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #ifdef WOLFSSL_SHA3 -static const word64 L_sha3_thumb2_rt[] = { +XALIGNED(16) static const word64 L_sha3_thumb2_rt[] = { 0x0000000000000001UL, 0x0000000000008082UL, 0x800000000000808aUL, 0x8000000080008000UL, 0x000000000000808bUL, 0x0000000080000001UL, @@ -70,10 +75,8 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) register word64* state __asm__ ("r0") = (word64*)state_p; register word64* L_sha3_thumb2_rt_c __asm__ ("r1") = (word64*)&L_sha3_thumb2_rt; - #else register word64* L_sha3_thumb2_rt_c = (word64*)&L_sha3_thumb2_rt; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1153,8 +1156,13 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "BNE.W L_sha3_thumb2_begin_%=\n\t" #endif "ADD sp, sp, #0xcc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [state] "+r" (state), [L_sha3_thumb2_rt] "+r" (L_sha3_thumb2_rt_c) : +#else + : + : [state] "r" (state), [L_sha3_thumb2_rt] "r" (L_sha3_thumb2_rt_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S index f36baae5c28..f05da4f8cad 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S @@ -34,171 +34,60 @@ .syntax unified #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #ifdef WOLFSSL_ARMASM_NO_NEON +#ifndef __APPLE__ .text .type L_SHA512_transform_len_k, %object .size L_SHA512_transform_len_k, 640 - .align 8 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + /* 16-byte aligned, 128-bit aligned */ +#ifndef __APPLE__ + .align 4 +#else + .p2align 4 +#endif /* __APPLE__ */ L_SHA512_transform_len_k: - .word 0xd728ae22 - .word 0x428a2f98 - .word 0x23ef65cd - .word 0x71374491 - .word 0xec4d3b2f - .word 0xb5c0fbcf - .word 0x8189dbbc - .word 0xe9b5dba5 - .word 0xf348b538 - .word 0x3956c25b - .word 0xb605d019 - .word 0x59f111f1 - .word 0xaf194f9b - .word 0x923f82a4 - .word 0xda6d8118 - .word 0xab1c5ed5 - .word 0xa3030242 - .word 0xd807aa98 - .word 0x45706fbe - .word 0x12835b01 - .word 0x4ee4b28c - .word 0x243185be - .word 0xd5ffb4e2 - .word 0x550c7dc3 - .word 0xf27b896f - .word 0x72be5d74 - .word 0x3b1696b1 - .word 0x80deb1fe - .word 0x25c71235 - .word 0x9bdc06a7 - .word 0xcf692694 - .word 0xc19bf174 - .word 0x9ef14ad2 - .word 0xe49b69c1 - .word 0x384f25e3 - .word 0xefbe4786 - .word 0x8b8cd5b5 - .word 0xfc19dc6 - .word 0x77ac9c65 - .word 0x240ca1cc - .word 0x592b0275 - .word 0x2de92c6f - .word 0x6ea6e483 - .word 0x4a7484aa - .word 0xbd41fbd4 - .word 0x5cb0a9dc - .word 0x831153b5 - .word 0x76f988da - .word 0xee66dfab - .word 0x983e5152 - .word 0x2db43210 - .word 0xa831c66d - .word 0x98fb213f - .word 0xb00327c8 - .word 0xbeef0ee4 - .word 0xbf597fc7 - .word 0x3da88fc2 - .word 0xc6e00bf3 - .word 0x930aa725 - .word 0xd5a79147 - .word 0xe003826f - .word 0x6ca6351 - .word 0xa0e6e70 - .word 0x14292967 - .word 0x46d22ffc - .word 0x27b70a85 - .word 0x5c26c926 - .word 0x2e1b2138 - .word 0x5ac42aed - .word 0x4d2c6dfc - .word 0x9d95b3df - .word 0x53380d13 - .word 0x8baf63de - .word 0x650a7354 - .word 0x3c77b2a8 - .word 0x766a0abb - .word 0x47edaee6 - .word 0x81c2c92e - .word 0x1482353b - .word 0x92722c85 - .word 0x4cf10364 - .word 0xa2bfe8a1 - .word 0xbc423001 - .word 0xa81a664b - .word 0xd0f89791 - .word 0xc24b8b70 - .word 0x654be30 - .word 0xc76c51a3 - .word 0xd6ef5218 - .word 0xd192e819 - .word 0x5565a910 - .word 0xd6990624 - .word 0x5771202a - .word 0xf40e3585 - .word 0x32bbd1b8 - .word 0x106aa070 - .word 0xb8d2d0c8 - .word 0x19a4c116 - .word 0x5141ab53 - .word 0x1e376c08 - .word 0xdf8eeb99 - .word 0x2748774c - .word 0xe19b48a8 - .word 0x34b0bcb5 - .word 0xc5c95a63 - .word 0x391c0cb3 - .word 0xe3418acb - .word 0x4ed8aa4a - .word 0x7763e373 - .word 0x5b9cca4f - .word 0xd6b2b8a3 - .word 0x682e6ff3 - .word 0x5defb2fc - .word 0x748f82ee - .word 0x43172f60 - .word 0x78a5636f - .word 0xa1f0ab72 - .word 0x84c87814 - .word 0x1a6439ec - .word 0x8cc70208 - .word 0x23631e28 - .word 0x90befffa - .word 0xde82bde9 - .word 0xa4506ceb - .word 0xb2c67915 - .word 0xbef9a3f7 - .word 0xe372532b - .word 0xc67178f2 - .word 0xea26619c - .word 0xca273ece - .word 0x21c0c207 - .word 0xd186b8c7 - .word 0xcde0eb1e - .word 0xeada7dd6 - .word 0xee6ed178 - .word 0xf57d4f7f - .word 0x72176fba - .word 0x6f067aa - .word 0xa2c898a6 - .word 0xa637dc5 - .word 0xbef90dae - .word 0x113f9804 - .word 0x131c471b - .word 0x1b710b35 - .word 0x23047d84 - .word 0x28db77f5 - .word 0x40c72493 - .word 0x32caab7b - .word 0x15c9bebc - .word 0x3c9ebe0a - .word 0x9c100d4c - .word 0x431d67c4 - .word 0xcb3e42b6 - .word 0x4cc5d4be - .word 0xfc657e2a - .word 0x597f299c - .word 0x3ad6faec - .word 0x5fcb6fab - .word 0x4a475817 - .word 0x6c44198c + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .text .align 4 .globl Transform_Sha512_Len_base @@ -3670,6 +3559,6 @@ L_SHA512_transform_len_start: #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits +.section .note.GNU-stack,"",%progbits #endif #endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 531c7a02de5..4f87445f522 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -41,12 +41,17 @@ #define __asm__ __asm #define __volatile__ volatile #endif /* __KEIL__ */ +#ifdef __ghs__ +#define __asm__ __asm +#define __volatile__ +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __ghs__ */ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #include #ifdef WOLFSSL_ARMASM_NO_NEON -static const word64 L_SHA512_transform_len_k[] = { +XALIGNED(16) static const word64 L_SHA512_transform_len_k[] = { 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, @@ -89,7 +94,8 @@ static const word64 L_SHA512_transform_len_k[] = { 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; -void Transform_Sha512_Len_base(wc_Sha512* sha512, const byte* data, word32 len); +void Transform_Sha512_Len_base(wc_Sha512* sha512_p, const byte* data_p, + word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) @@ -104,11 +110,9 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, register word32 len __asm__ ("r2") = (word32)len_p; register word64* L_SHA512_transform_len_k_c __asm__ ("r3") = (word64*)&L_SHA512_transform_len_k; - #else register word64* L_SHA512_transform_len_k_c = (word64*)&L_SHA512_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3582,9 +3586,15 @@ WC_OMIT_FRAME_POINTER void Transform_Sha512_Len_base(wc_Sha512* sha512, #endif "EOR r0, r0, r0\n\t" "ADD sp, sp, #0xc0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : +#else + : + : [sha512] "r" (sha512), [data] "r" (data), [len] "r" (len), + [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k_c) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); diff --git a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm.S b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm.S index 9ead9dd790f..66e9de8713e 100644 --- a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm.S +++ b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm.S @@ -31,76 +31,36 @@ .machine ppc #ifndef NO_SHA256 #ifdef WOLFSSL_PPC32_ASM_SPE - .section ".text" - .section .rodata - .type L_SHA256_transform_spe_len_k, @object +#ifndef __APPLE__ + .text + .type L_SHA256_transform_spe_len_k, %object .size L_SHA256_transform_spe_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_transform_spe_len_k: - .long 0x428a2f98 - .long 0x71374491 - .long 0xb5c0fbcf - .long 0xe9b5dba5 - .long 0x3956c25b - .long 0x59f111f1 - .long 0x923f82a4 - .long 0xab1c5ed5 - .long 0xd807aa98 - .long 0x12835b01 - .long 0x243185be - .long 0x550c7dc3 - .long 0x72be5d74 - .long 0x80deb1fe - .long 0x9bdc06a7 - .long 0xc19bf174 - .long 0xe49b69c1 - .long 0xefbe4786 - .long 0xfc19dc6 - .long 0x240ca1cc - .long 0x2de92c6f - .long 0x4a7484aa - .long 0x5cb0a9dc - .long 0x76f988da - .long 0x983e5152 - .long 0xa831c66d - .long 0xb00327c8 - .long 0xbf597fc7 - .long 0xc6e00bf3 - .long 0xd5a79147 - .long 0x6ca6351 - .long 0x14292967 - .long 0x27b70a85 - .long 0x2e1b2138 - .long 0x4d2c6dfc - .long 0x53380d13 - .long 0x650a7354 - .long 0x766a0abb - .long 0x81c2c92e - .long 0x92722c85 - .long 0xa2bfe8a1 - .long 0xa81a664b - .long 0xc24b8b70 - .long 0xc76c51a3 - .long 0xd192e819 - .long 0xd6990624 - .long 0xf40e3585 - .long 0x106aa070 - .long 0x19a4c116 - .long 0x1e376c08 - .long 0x2748774c - .long 0x34b0bcb5 - .long 0x391c0cb3 - .long 0x4ed8aa4a - .long 0x5b9cca4f - .long 0x682e6ff3 - .long 0x748f82ee - .long 0x78a5636f - .long 0x84c87814 - .long 0x8cc70208 - .long 0x90befffa - .long 0xa4506ceb - .long 0xbef9a3f7 - .long 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .section ".text" .align 4 .globl Transform_Sha256_Len @@ -1217,76 +1177,36 @@ L_SHA256_transform_spe_len_start: .size Transform_Sha256_Len,.-Transform_Sha256_Len #endif /* WOLFSSL_PPC32_ASM_SPE */ #ifndef WOLFSSL_PPC32_ASM_SPE - .section ".text" - .section .rodata - .type L_SHA256_transform_len_k, @object +#ifndef __APPLE__ + .text + .type L_SHA256_transform_len_k, %object .size L_SHA256_transform_len_k, 256 - .align 4 +#else + .section __DATA,__data +#endif /* __APPLE__ */ + # 8-byte aligned, 64-bit aligned +#ifndef __APPLE__ + .align 3 +#else + .p2align 3 +#endif /* __APPLE__ */ L_SHA256_transform_len_k: - .long 0x428a2f98 - .long 0x71374491 - .long 0xb5c0fbcf - .long 0xe9b5dba5 - .long 0x3956c25b - .long 0x59f111f1 - .long 0x923f82a4 - .long 0xab1c5ed5 - .long 0xd807aa98 - .long 0x12835b01 - .long 0x243185be - .long 0x550c7dc3 - .long 0x72be5d74 - .long 0x80deb1fe - .long 0x9bdc06a7 - .long 0xc19bf174 - .long 0xe49b69c1 - .long 0xefbe4786 - .long 0xfc19dc6 - .long 0x240ca1cc - .long 0x2de92c6f - .long 0x4a7484aa - .long 0x5cb0a9dc - .long 0x76f988da - .long 0x983e5152 - .long 0xa831c66d - .long 0xb00327c8 - .long 0xbf597fc7 - .long 0xc6e00bf3 - .long 0xd5a79147 - .long 0x6ca6351 - .long 0x14292967 - .long 0x27b70a85 - .long 0x2e1b2138 - .long 0x4d2c6dfc - .long 0x53380d13 - .long 0x650a7354 - .long 0x766a0abb - .long 0x81c2c92e - .long 0x92722c85 - .long 0xa2bfe8a1 - .long 0xa81a664b - .long 0xc24b8b70 - .long 0xc76c51a3 - .long 0xd192e819 - .long 0xd6990624 - .long 0xf40e3585 - .long 0x106aa070 - .long 0x19a4c116 - .long 0x1e376c08 - .long 0x2748774c - .long 0x34b0bcb5 - .long 0x391c0cb3 - .long 0x4ed8aa4a - .long 0x5b9cca4f - .long 0x682e6ff3 - .long 0x748f82ee - .long 0x78a5636f - .long 0x84c87814 - .long 0x8cc70208 - .long 0x90befffa - .long 0xa4506ceb - .long 0xbef9a3f7 - .long 0xc67178f2 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #ifndef __PIC__ .section ".text" .align 4 diff --git a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_c.c b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_c.c index 5a005004e6b..9630846caab 100644 --- a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_c.c +++ b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_c.c @@ -29,8 +29,6 @@ #include #ifdef WOLFSSL_PPC32_ASM -#include -#include #ifdef WOLFSSL_PPC32_ASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -47,11 +45,12 @@ #define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ + #ifndef NO_SHA256 #include #ifdef WOLFSSL_PPC32_ASM_SPE -static const word32 L_SHA256_transform_spe_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_spe_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -73,21 +72,22 @@ static const word32 L_SHA256_transform_spe_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_spe_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("4") = (const byte*)data_p; + register word32 len __asm__ ("5") = (word32)len_p; + register word32* L_SHA256_transform_spe_len_k_c __asm__ ("6") = (word32*)&L_SHA256_transform_spe_len_k; #else register word32* L_SHA256_transform_spe_len_k_c = (word32*)&L_SHA256_transform_spe_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -104,7 +104,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz 21, 28(%[sha256])\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_spe_len_begin_%=: \n\t" + "L_SHA256_transform_spe_len_begin_%=:\n\t" /* Load W */ "lwz 22, 0(%[data])\n\t" "lwz 0, 4(%[data])\n\t" @@ -134,7 +134,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr 0\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_spe_len_start_%=: \n\t" + "L_SHA256_transform_spe_len_start_%=:\n\t" /* Round 0 */ "mr 9, 22\n\t" "rotlwi 6, 18, 26\n\t" @@ -1178,7 +1178,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) #ifndef WOLFSSL_PPC32_ASM_SPE #include -static const word32 L_SHA256_transform_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -1201,21 +1201,22 @@ static const word32 L_SHA256_transform_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("4") = (const byte*)data_p; + register word32 len __asm__ ("5") = (word32)len_p; + register word32* L_SHA256_transform_len_k_c __asm__ ("6") = (word32*)&L_SHA256_transform_len_k; #else register word32* L_SHA256_transform_len_k_c = (word32*)&L_SHA256_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1234,7 +1235,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr %[len]\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz 16, 0(%[data])\n\t" "lwz 17, 4(%[data])\n\t" @@ -3625,7 +3626,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "stw %[len], 0(1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz 16, 0(%[data])\n\t" "lwz 17, 4(%[data])\n\t" @@ -3647,7 +3648,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr 0\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_%=: \n\t" + "L_SHA256_transform_len_start_%=:\n\t" /* Round 0 */ "rotlwi 0, 11, 26\n\t" "rotlwi %[len], 11, 21\n\t" @@ -3692,7 +3693,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 16, 16, 0\n\t" "add 16, 16, 25\n\t" "\n" - "L_SHA256_transform_len_after_blk_0_%=: \n\t" + "L_SHA256_transform_len_after_blk_0_%=:\n\t" /* Round 1 */ "rotlwi 0, 10, 26\n\t" "rotlwi %[len], 10, 21\n\t" @@ -3737,7 +3738,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 17, 17, 0\n\t" "add 17, 17, 26\n\t" "\n" - "L_SHA256_transform_len_after_blk_1_%=: \n\t" + "L_SHA256_transform_len_after_blk_1_%=:\n\t" /* Round 2 */ "rotlwi 0, 9, 26\n\t" "rotlwi %[len], 9, 21\n\t" @@ -3782,7 +3783,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 18, 18, 0\n\t" "add 18, 18, 27\n\t" "\n" - "L_SHA256_transform_len_after_blk_2_%=: \n\t" + "L_SHA256_transform_len_after_blk_2_%=:\n\t" /* Round 3 */ "rotlwi 0, 8, 26\n\t" "rotlwi %[len], 8, 21\n\t" @@ -3827,7 +3828,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 19, 19, 0\n\t" "add 19, 19, 28\n\t" "\n" - "L_SHA256_transform_len_after_blk_3_%=: \n\t" + "L_SHA256_transform_len_after_blk_3_%=:\n\t" /* Round 4 */ "rotlwi 0, 7, 26\n\t" "rotlwi %[len], 7, 21\n\t" @@ -3872,7 +3873,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 20, 20, 0\n\t" "add 20, 20, 29\n\t" "\n" - "L_SHA256_transform_len_after_blk_4_%=: \n\t" + "L_SHA256_transform_len_after_blk_4_%=:\n\t" /* Round 5 */ "rotlwi 0, 15, 26\n\t" "rotlwi %[len], 15, 21\n\t" @@ -3917,7 +3918,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 21, 21, 0\n\t" "add 21, 21, 30\n\t" "\n" - "L_SHA256_transform_len_after_blk_5_%=: \n\t" + "L_SHA256_transform_len_after_blk_5_%=:\n\t" /* Round 6 */ "rotlwi 0, 14, 26\n\t" "rotlwi %[len], 14, 21\n\t" @@ -3962,7 +3963,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 22, 22, 0\n\t" "add 22, 22, 31\n\t" "\n" - "L_SHA256_transform_len_after_blk_6_%=: \n\t" + "L_SHA256_transform_len_after_blk_6_%=:\n\t" /* Round 7 */ "rotlwi 0, 12, 26\n\t" "rotlwi %[len], 12, 21\n\t" @@ -4007,7 +4008,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 23, 23, 0\n\t" "add 23, 23, 16\n\t" "\n" - "L_SHA256_transform_len_after_blk_7_%=: \n\t" + "L_SHA256_transform_len_after_blk_7_%=:\n\t" /* Round 8 */ "rotlwi 0, 11, 26\n\t" "rotlwi %[len], 11, 21\n\t" @@ -4052,7 +4053,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 24, 24, 0\n\t" "add 24, 24, 17\n\t" "\n" - "L_SHA256_transform_len_after_blk_8_%=: \n\t" + "L_SHA256_transform_len_after_blk_8_%=:\n\t" /* Round 9 */ "rotlwi 0, 10, 26\n\t" "rotlwi %[len], 10, 21\n\t" @@ -4097,7 +4098,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 25, 25, 0\n\t" "add 25, 25, 18\n\t" "\n" - "L_SHA256_transform_len_after_blk_9_%=: \n\t" + "L_SHA256_transform_len_after_blk_9_%=:\n\t" /* Round 10 */ "rotlwi 0, 9, 26\n\t" "rotlwi %[len], 9, 21\n\t" @@ -4142,7 +4143,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 26, 26, 0\n\t" "add 26, 26, 19\n\t" "\n" - "L_SHA256_transform_len_after_blk_10_%=: \n\t" + "L_SHA256_transform_len_after_blk_10_%=:\n\t" /* Round 11 */ "rotlwi 0, 8, 26\n\t" "rotlwi %[len], 8, 21\n\t" @@ -4187,7 +4188,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 27, 27, 0\n\t" "add 27, 27, 20\n\t" "\n" - "L_SHA256_transform_len_after_blk_11_%=: \n\t" + "L_SHA256_transform_len_after_blk_11_%=:\n\t" /* Round 12 */ "rotlwi 0, 7, 26\n\t" "rotlwi %[len], 7, 21\n\t" @@ -4232,7 +4233,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 28, 28, 0\n\t" "add 28, 28, 21\n\t" "\n" - "L_SHA256_transform_len_after_blk_12_%=: \n\t" + "L_SHA256_transform_len_after_blk_12_%=:\n\t" /* Round 13 */ "rotlwi 0, 15, 26\n\t" "rotlwi %[len], 15, 21\n\t" @@ -4277,7 +4278,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 29, 29, 0\n\t" "add 29, 29, 22\n\t" "\n" - "L_SHA256_transform_len_after_blk_13_%=: \n\t" + "L_SHA256_transform_len_after_blk_13_%=:\n\t" /* Round 14 */ "rotlwi 0, 14, 26\n\t" "rotlwi %[len], 14, 21\n\t" @@ -4322,7 +4323,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 30, 30, 0\n\t" "add 30, 30, 23\n\t" "\n" - "L_SHA256_transform_len_after_blk_14_%=: \n\t" + "L_SHA256_transform_len_after_blk_14_%=:\n\t" /* Round 15 */ "rotlwi 0, 12, 26\n\t" "rotlwi %[len], 12, 21\n\t" @@ -4367,7 +4368,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 31, 31, 0\n\t" "add 31, 31, 24\n\t" "\n" - "L_SHA256_transform_len_after_blk_15_%=: \n\t" + "L_SHA256_transform_len_after_blk_15_%=:\n\t" "addi 6, 6, 0x40\n\t" "bdnz L_SHA256_transform_len_start_%=\n\t" "subi 6, 6, 0x100\n\t" @@ -4424,21 +4425,22 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("4") = (const byte*)data_p; + register word32 len __asm__ ("5") = (word32)len_p; + register word32* L_SHA256_transform_len_k_c __asm__ ("6") = (word32*)&L_SHA256_transform_len_k; #else register word32* L_SHA256_transform_len_k_c = (word32*)&L_SHA256_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4461,7 +4463,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz %[sha256], 4(1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz 14, 0(%[sha256])\n\t" "lwz 15, 4(%[sha256])\n\t" @@ -6868,7 +6870,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz %[sha256], 4(1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz 14, 0(%[sha256])\n\t" "lwz 15, 4(%[sha256])\n\t" @@ -6890,7 +6892,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr %[sha256]\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_%=: \n\t" + "L_SHA256_transform_len_start_%=:\n\t" /* Round 0 */ "rotlwi %[sha256], 9, 26\n\t" "rotlwi %[len], 9, 21\n\t" @@ -6935,7 +6937,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 14, 14, %[sha256]\n\t" "add 14, 14, 23\n\t" "\n" - "L_SHA256_transform_len_after_blk_0_%=: \n\t" + "L_SHA256_transform_len_after_blk_0_%=:\n\t" /* Round 1 */ "rotlwi %[sha256], 8, 26\n\t" "rotlwi %[len], 8, 21\n\t" @@ -6980,7 +6982,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 15, 15, %[sha256]\n\t" "add 15, 15, 24\n\t" "\n" - "L_SHA256_transform_len_after_blk_1_%=: \n\t" + "L_SHA256_transform_len_after_blk_1_%=:\n\t" /* Round 2 */ "rotlwi %[sha256], 7, 26\n\t" "rotlwi %[len], 7, 21\n\t" @@ -7025,7 +7027,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 16, 16, %[sha256]\n\t" "add 16, 16, 25\n\t" "\n" - "L_SHA256_transform_len_after_blk_2_%=: \n\t" + "L_SHA256_transform_len_after_blk_2_%=:\n\t" /* Round 3 */ "rotlwi %[sha256], %[data], 26\n\t" "rotlwi %[len], %[data], 21\n\t" @@ -7070,7 +7072,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 17, 17, %[sha256]\n\t" "add 17, 17, 26\n\t" "\n" - "L_SHA256_transform_len_after_blk_3_%=: \n\t" + "L_SHA256_transform_len_after_blk_3_%=:\n\t" /* Round 4 */ "rotlwi %[sha256], 0, 26\n\t" "rotlwi %[len], 0, 21\n\t" @@ -7115,7 +7117,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 18, 18, %[sha256]\n\t" "add 18, 18, 27\n\t" "\n" - "L_SHA256_transform_len_after_blk_4_%=: \n\t" + "L_SHA256_transform_len_after_blk_4_%=:\n\t" /* Round 5 */ "rotlwi %[sha256], 12, 26\n\t" "rotlwi %[len], 12, 21\n\t" @@ -7160,7 +7162,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 19, 19, %[sha256]\n\t" "add 19, 19, 28\n\t" "\n" - "L_SHA256_transform_len_after_blk_5_%=: \n\t" + "L_SHA256_transform_len_after_blk_5_%=:\n\t" /* Round 6 */ "rotlwi %[sha256], 11, 26\n\t" "rotlwi %[len], 11, 21\n\t" @@ -7205,7 +7207,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 20, 20, %[sha256]\n\t" "add 20, 20, 29\n\t" "\n" - "L_SHA256_transform_len_after_blk_6_%=: \n\t" + "L_SHA256_transform_len_after_blk_6_%=:\n\t" /* Round 7 */ "rotlwi %[sha256], 10, 26\n\t" "rotlwi %[len], 10, 21\n\t" @@ -7250,7 +7252,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 21, 21, %[sha256]\n\t" "add 21, 21, 14\n\t" "\n" - "L_SHA256_transform_len_after_blk_7_%=: \n\t" + "L_SHA256_transform_len_after_blk_7_%=:\n\t" /* Round 8 */ "rotlwi %[sha256], 9, 26\n\t" "rotlwi %[len], 9, 21\n\t" @@ -7295,7 +7297,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 22, 22, %[sha256]\n\t" "add 22, 22, 15\n\t" "\n" - "L_SHA256_transform_len_after_blk_8_%=: \n\t" + "L_SHA256_transform_len_after_blk_8_%=:\n\t" /* Round 9 */ "rotlwi %[sha256], 8, 26\n\t" "rotlwi %[len], 8, 21\n\t" @@ -7340,7 +7342,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 23, 23, %[sha256]\n\t" "add 23, 23, 16\n\t" "\n" - "L_SHA256_transform_len_after_blk_9_%=: \n\t" + "L_SHA256_transform_len_after_blk_9_%=:\n\t" /* Round 10 */ "rotlwi %[sha256], 7, 26\n\t" "rotlwi %[len], 7, 21\n\t" @@ -7385,7 +7387,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 24, 24, %[sha256]\n\t" "add 24, 24, 17\n\t" "\n" - "L_SHA256_transform_len_after_blk_10_%=: \n\t" + "L_SHA256_transform_len_after_blk_10_%=:\n\t" /* Round 11 */ "rotlwi %[sha256], %[data], 26\n\t" "rotlwi %[len], %[data], 21\n\t" @@ -7430,7 +7432,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 25, 25, %[sha256]\n\t" "add 25, 25, 18\n\t" "\n" - "L_SHA256_transform_len_after_blk_11_%=: \n\t" + "L_SHA256_transform_len_after_blk_11_%=:\n\t" /* Round 12 */ "rotlwi %[sha256], 0, 26\n\t" "rotlwi %[len], 0, 21\n\t" @@ -7475,7 +7477,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 26, 26, %[sha256]\n\t" "add 26, 26, 19\n\t" "\n" - "L_SHA256_transform_len_after_blk_12_%=: \n\t" + "L_SHA256_transform_len_after_blk_12_%=:\n\t" /* Round 13 */ "rotlwi %[sha256], 12, 26\n\t" "rotlwi %[len], 12, 21\n\t" @@ -7520,7 +7522,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 27, 27, %[sha256]\n\t" "add 27, 27, 20\n\t" "\n" - "L_SHA256_transform_len_after_blk_13_%=: \n\t" + "L_SHA256_transform_len_after_blk_13_%=:\n\t" /* Round 14 */ "rotlwi %[sha256], 11, 26\n\t" "rotlwi %[len], 11, 21\n\t" @@ -7565,7 +7567,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 28, 28, %[sha256]\n\t" "add 28, 28, 21\n\t" "\n" - "L_SHA256_transform_len_after_blk_14_%=: \n\t" + "L_SHA256_transform_len_after_blk_14_%=:\n\t" /* Round 15 */ "rotlwi %[sha256], 10, 26\n\t" "rotlwi %[len], 10, 21\n\t" @@ -7610,7 +7612,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add 29, 29, %[sha256]\n\t" "add 29, 29, 22\n\t" "\n" - "L_SHA256_transform_len_after_blk_15_%=: \n\t" + "L_SHA256_transform_len_after_blk_15_%=:\n\t" "addi 6, 6, 0x40\n\t" "bdnz L_SHA256_transform_len_start_%=\n\t" "subi 6, 6, 0x100\n\t" diff --git a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_cr.c b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_cr.c index 2f8c2b8b129..1a1d42cf338 100644 --- a/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_cr.c +++ b/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_cr.c @@ -29,8 +29,6 @@ #include #ifdef WOLFSSL_PPC32_ASM -#include -#include #ifdef WOLFSSL_PPC32_ASM_INLINE #ifdef __IAR_SYSTEMS_ICC__ @@ -47,11 +45,12 @@ #define __volatile__ #define WOLFSSL_NO_VAR_ASSIGN_REG #endif /* __ghs__ */ + #ifndef NO_SHA256 #include #ifdef WOLFSSL_PPC32_ASM_SPE -static const word32 L_SHA256_transform_spe_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_spe_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -73,21 +72,22 @@ static const word32 L_SHA256_transform_spe_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_spe_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("r3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r4") = (const byte*)data_p; + register word32 len __asm__ ("r5") = (word32)len_p; + register word32* L_SHA256_transform_spe_len_k_c __asm__ ("r6") = (word32*)&L_SHA256_transform_spe_len_k; #else register word32* L_SHA256_transform_spe_len_k_c = (word32*)&L_SHA256_transform_spe_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -104,7 +104,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz r21, 28(%[sha256])\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_spe_len_begin_%=: \n\t" + "L_SHA256_transform_spe_len_begin_%=:\n\t" /* Load W */ "lwz r22, 0(%[data])\n\t" "lwz r0, 4(%[data])\n\t" @@ -134,7 +134,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr r0\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_spe_len_start_%=: \n\t" + "L_SHA256_transform_spe_len_start_%=:\n\t" /* Round 0 */ "mr r9, r22\n\t" "rotlwi r6, r18, 26\n\t" @@ -1178,7 +1178,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) #ifndef WOLFSSL_PPC32_ASM_SPE #include -static const word32 L_SHA256_transform_len_k[] = { +XALIGNED(8) static const word32 L_SHA256_transform_len_k[] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -1201,21 +1201,22 @@ static const word32 L_SHA256_transform_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("r3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r4") = (const byte*)data_p; + register word32 len __asm__ ("r5") = (word32)len_p; + register word32* L_SHA256_transform_len_k_c __asm__ ("r6") = (word32*)&L_SHA256_transform_len_k; #else register word32* L_SHA256_transform_len_k_c = (word32*)&L_SHA256_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -1234,7 +1235,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr %[len]\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz r16, 0(%[data])\n\t" "lwz r17, 4(%[data])\n\t" @@ -3625,7 +3626,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "stw %[len], 0(r1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz r16, 0(%[data])\n\t" "lwz r17, 4(%[data])\n\t" @@ -3647,7 +3648,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr r0\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_%=: \n\t" + "L_SHA256_transform_len_start_%=:\n\t" /* Round 0 */ "rotlwi r0, r11, 26\n\t" "rotlwi %[len], r11, 21\n\t" @@ -3692,7 +3693,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r16, r16, r0\n\t" "add r16, r16, r25\n\t" "\n" - "L_SHA256_transform_len_after_blk_0_%=: \n\t" + "L_SHA256_transform_len_after_blk_0_%=:\n\t" /* Round 1 */ "rotlwi r0, r10, 26\n\t" "rotlwi %[len], r10, 21\n\t" @@ -3737,7 +3738,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r17, r17, r0\n\t" "add r17, r17, r26\n\t" "\n" - "L_SHA256_transform_len_after_blk_1_%=: \n\t" + "L_SHA256_transform_len_after_blk_1_%=:\n\t" /* Round 2 */ "rotlwi r0, r9, 26\n\t" "rotlwi %[len], r9, 21\n\t" @@ -3782,7 +3783,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r18, r18, r0\n\t" "add r18, r18, r27\n\t" "\n" - "L_SHA256_transform_len_after_blk_2_%=: \n\t" + "L_SHA256_transform_len_after_blk_2_%=:\n\t" /* Round 3 */ "rotlwi r0, r8, 26\n\t" "rotlwi %[len], r8, 21\n\t" @@ -3827,7 +3828,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r19, r19, r0\n\t" "add r19, r19, r28\n\t" "\n" - "L_SHA256_transform_len_after_blk_3_%=: \n\t" + "L_SHA256_transform_len_after_blk_3_%=:\n\t" /* Round 4 */ "rotlwi r0, r7, 26\n\t" "rotlwi %[len], r7, 21\n\t" @@ -3872,7 +3873,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r20, r20, r0\n\t" "add r20, r20, r29\n\t" "\n" - "L_SHA256_transform_len_after_blk_4_%=: \n\t" + "L_SHA256_transform_len_after_blk_4_%=:\n\t" /* Round 5 */ "rotlwi r0, r15, 26\n\t" "rotlwi %[len], r15, 21\n\t" @@ -3917,7 +3918,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r21, r21, r0\n\t" "add r21, r21, r30\n\t" "\n" - "L_SHA256_transform_len_after_blk_5_%=: \n\t" + "L_SHA256_transform_len_after_blk_5_%=:\n\t" /* Round 6 */ "rotlwi r0, r14, 26\n\t" "rotlwi %[len], r14, 21\n\t" @@ -3962,7 +3963,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r22, r22, r0\n\t" "add r22, r22, r31\n\t" "\n" - "L_SHA256_transform_len_after_blk_6_%=: \n\t" + "L_SHA256_transform_len_after_blk_6_%=:\n\t" /* Round 7 */ "rotlwi r0, r12, 26\n\t" "rotlwi %[len], r12, 21\n\t" @@ -4007,7 +4008,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r23, r23, r0\n\t" "add r23, r23, r16\n\t" "\n" - "L_SHA256_transform_len_after_blk_7_%=: \n\t" + "L_SHA256_transform_len_after_blk_7_%=:\n\t" /* Round 8 */ "rotlwi r0, r11, 26\n\t" "rotlwi %[len], r11, 21\n\t" @@ -4052,7 +4053,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r24, r24, r0\n\t" "add r24, r24, r17\n\t" "\n" - "L_SHA256_transform_len_after_blk_8_%=: \n\t" + "L_SHA256_transform_len_after_blk_8_%=:\n\t" /* Round 9 */ "rotlwi r0, r10, 26\n\t" "rotlwi %[len], r10, 21\n\t" @@ -4097,7 +4098,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r25, r25, r0\n\t" "add r25, r25, r18\n\t" "\n" - "L_SHA256_transform_len_after_blk_9_%=: \n\t" + "L_SHA256_transform_len_after_blk_9_%=:\n\t" /* Round 10 */ "rotlwi r0, r9, 26\n\t" "rotlwi %[len], r9, 21\n\t" @@ -4142,7 +4143,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r26, r26, r0\n\t" "add r26, r26, r19\n\t" "\n" - "L_SHA256_transform_len_after_blk_10_%=: \n\t" + "L_SHA256_transform_len_after_blk_10_%=:\n\t" /* Round 11 */ "rotlwi r0, r8, 26\n\t" "rotlwi %[len], r8, 21\n\t" @@ -4187,7 +4188,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r27, r27, r0\n\t" "add r27, r27, r20\n\t" "\n" - "L_SHA256_transform_len_after_blk_11_%=: \n\t" + "L_SHA256_transform_len_after_blk_11_%=:\n\t" /* Round 12 */ "rotlwi r0, r7, 26\n\t" "rotlwi %[len], r7, 21\n\t" @@ -4232,7 +4233,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r28, r28, r0\n\t" "add r28, r28, r21\n\t" "\n" - "L_SHA256_transform_len_after_blk_12_%=: \n\t" + "L_SHA256_transform_len_after_blk_12_%=:\n\t" /* Round 13 */ "rotlwi r0, r15, 26\n\t" "rotlwi %[len], r15, 21\n\t" @@ -4277,7 +4278,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r29, r29, r0\n\t" "add r29, r29, r22\n\t" "\n" - "L_SHA256_transform_len_after_blk_13_%=: \n\t" + "L_SHA256_transform_len_after_blk_13_%=:\n\t" /* Round 14 */ "rotlwi r0, r14, 26\n\t" "rotlwi %[len], r14, 21\n\t" @@ -4322,7 +4323,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r30, r30, r0\n\t" "add r30, r30, r23\n\t" "\n" - "L_SHA256_transform_len_after_blk_14_%=: \n\t" + "L_SHA256_transform_len_after_blk_14_%=:\n\t" /* Round 15 */ "rotlwi r0, r12, 26\n\t" "rotlwi %[len], r12, 21\n\t" @@ -4367,7 +4368,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r31, r31, r0\n\t" "add r31, r31, r24\n\t" "\n" - "L_SHA256_transform_len_after_blk_15_%=: \n\t" + "L_SHA256_transform_len_after_blk_15_%=:\n\t" "addi r6, r6, 0x40\n\t" "bdnz L_SHA256_transform_len_start_%=\n\t" "subi r6, r6, 0x100\n\t" @@ -4424,21 +4425,22 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256_p, + const byte* data_p, word32 len_p) #else -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +WC_OMIT_FRAME_POINTER void Transform_Sha256_Len(wc_Sha256* sha256, + const byte* data, word32 len) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register wc_Sha256* sha256 asm ("3") = (wc_Sha256*)sha256_p; - register const byte* data asm ("4") = (const byte*)data_p; - register word32 len asm ("5") = (word32)len_p; - register word32* L_SHA256_transform_len_k_c asm ("6") = + register wc_Sha256* sha256 __asm__ ("r3") = (wc_Sha256*)sha256_p; + register const byte* data __asm__ ("r4") = (const byte*)data_p; + register word32 len __asm__ ("r5") = (word32)len_p; + register word32* L_SHA256_transform_len_k_c __asm__ ("r6") = (word32*)&L_SHA256_transform_len_k; #else register word32* L_SHA256_transform_len_k_c = (word32*)&L_SHA256_transform_len_k; - #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4461,7 +4463,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz %[sha256], 4(r1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz r14, 0(%[sha256])\n\t" "lwz r15, 4(%[sha256])\n\t" @@ -6868,7 +6870,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "lwz %[sha256], 4(r1)\n\t" /* Start of loop processing a block */ "\n" - "L_SHA256_transform_len_begin_%=: \n\t" + "L_SHA256_transform_len_begin_%=:\n\t" /* Load W - 64 bytes */ "lwz r14, 0(%[sha256])\n\t" "lwz r15, 4(%[sha256])\n\t" @@ -6890,7 +6892,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "mtctr %[sha256]\n\t" /* Start of 16 rounds */ "\n" - "L_SHA256_transform_len_start_%=: \n\t" + "L_SHA256_transform_len_start_%=:\n\t" /* Round 0 */ "rotlwi %[sha256], r9, 26\n\t" "rotlwi %[len], r9, 21\n\t" @@ -6935,7 +6937,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r14, r14, %[sha256]\n\t" "add r14, r14, r23\n\t" "\n" - "L_SHA256_transform_len_after_blk_0_%=: \n\t" + "L_SHA256_transform_len_after_blk_0_%=:\n\t" /* Round 1 */ "rotlwi %[sha256], r8, 26\n\t" "rotlwi %[len], r8, 21\n\t" @@ -6980,7 +6982,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r15, r15, %[sha256]\n\t" "add r15, r15, r24\n\t" "\n" - "L_SHA256_transform_len_after_blk_1_%=: \n\t" + "L_SHA256_transform_len_after_blk_1_%=:\n\t" /* Round 2 */ "rotlwi %[sha256], r7, 26\n\t" "rotlwi %[len], r7, 21\n\t" @@ -7025,7 +7027,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r16, r16, %[sha256]\n\t" "add r16, r16, r25\n\t" "\n" - "L_SHA256_transform_len_after_blk_2_%=: \n\t" + "L_SHA256_transform_len_after_blk_2_%=:\n\t" /* Round 3 */ "rotlwi %[sha256], %[data], 26\n\t" "rotlwi %[len], %[data], 21\n\t" @@ -7070,7 +7072,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r17, r17, %[sha256]\n\t" "add r17, r17, r26\n\t" "\n" - "L_SHA256_transform_len_after_blk_3_%=: \n\t" + "L_SHA256_transform_len_after_blk_3_%=:\n\t" /* Round 4 */ "rotlwi %[sha256], r0, 26\n\t" "rotlwi %[len], r0, 21\n\t" @@ -7115,7 +7117,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r18, r18, %[sha256]\n\t" "add r18, r18, r27\n\t" "\n" - "L_SHA256_transform_len_after_blk_4_%=: \n\t" + "L_SHA256_transform_len_after_blk_4_%=:\n\t" /* Round 5 */ "rotlwi %[sha256], r12, 26\n\t" "rotlwi %[len], r12, 21\n\t" @@ -7160,7 +7162,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r19, r19, %[sha256]\n\t" "add r19, r19, r28\n\t" "\n" - "L_SHA256_transform_len_after_blk_5_%=: \n\t" + "L_SHA256_transform_len_after_blk_5_%=:\n\t" /* Round 6 */ "rotlwi %[sha256], r11, 26\n\t" "rotlwi %[len], r11, 21\n\t" @@ -7205,7 +7207,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r20, r20, %[sha256]\n\t" "add r20, r20, r29\n\t" "\n" - "L_SHA256_transform_len_after_blk_6_%=: \n\t" + "L_SHA256_transform_len_after_blk_6_%=:\n\t" /* Round 7 */ "rotlwi %[sha256], r10, 26\n\t" "rotlwi %[len], r10, 21\n\t" @@ -7250,7 +7252,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r21, r21, %[sha256]\n\t" "add r21, r21, r14\n\t" "\n" - "L_SHA256_transform_len_after_blk_7_%=: \n\t" + "L_SHA256_transform_len_after_blk_7_%=:\n\t" /* Round 8 */ "rotlwi %[sha256], r9, 26\n\t" "rotlwi %[len], r9, 21\n\t" @@ -7295,7 +7297,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r22, r22, %[sha256]\n\t" "add r22, r22, r15\n\t" "\n" - "L_SHA256_transform_len_after_blk_8_%=: \n\t" + "L_SHA256_transform_len_after_blk_8_%=:\n\t" /* Round 9 */ "rotlwi %[sha256], r8, 26\n\t" "rotlwi %[len], r8, 21\n\t" @@ -7340,7 +7342,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r23, r23, %[sha256]\n\t" "add r23, r23, r16\n\t" "\n" - "L_SHA256_transform_len_after_blk_9_%=: \n\t" + "L_SHA256_transform_len_after_blk_9_%=:\n\t" /* Round 10 */ "rotlwi %[sha256], r7, 26\n\t" "rotlwi %[len], r7, 21\n\t" @@ -7385,7 +7387,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r24, r24, %[sha256]\n\t" "add r24, r24, r17\n\t" "\n" - "L_SHA256_transform_len_after_blk_10_%=: \n\t" + "L_SHA256_transform_len_after_blk_10_%=:\n\t" /* Round 11 */ "rotlwi %[sha256], %[data], 26\n\t" "rotlwi %[len], %[data], 21\n\t" @@ -7430,7 +7432,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r25, r25, %[sha256]\n\t" "add r25, r25, r18\n\t" "\n" - "L_SHA256_transform_len_after_blk_11_%=: \n\t" + "L_SHA256_transform_len_after_blk_11_%=:\n\t" /* Round 12 */ "rotlwi %[sha256], r0, 26\n\t" "rotlwi %[len], r0, 21\n\t" @@ -7475,7 +7477,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r26, r26, %[sha256]\n\t" "add r26, r26, r19\n\t" "\n" - "L_SHA256_transform_len_after_blk_12_%=: \n\t" + "L_SHA256_transform_len_after_blk_12_%=:\n\t" /* Round 13 */ "rotlwi %[sha256], r12, 26\n\t" "rotlwi %[len], r12, 21\n\t" @@ -7520,7 +7522,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r27, r27, %[sha256]\n\t" "add r27, r27, r20\n\t" "\n" - "L_SHA256_transform_len_after_blk_13_%=: \n\t" + "L_SHA256_transform_len_after_blk_13_%=:\n\t" /* Round 14 */ "rotlwi %[sha256], r11, 26\n\t" "rotlwi %[len], r11, 21\n\t" @@ -7565,7 +7567,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r28, r28, %[sha256]\n\t" "add r28, r28, r21\n\t" "\n" - "L_SHA256_transform_len_after_blk_14_%=: \n\t" + "L_SHA256_transform_len_after_blk_14_%=:\n\t" /* Round 15 */ "rotlwi %[sha256], r10, 26\n\t" "rotlwi %[len], r10, 21\n\t" @@ -7610,7 +7612,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "add r29, r29, %[sha256]\n\t" "add r29, r29, r22\n\t" "\n" - "L_SHA256_transform_len_after_blk_15_%=: \n\t" + "L_SHA256_transform_len_after_blk_15_%=:\n\t" "addi r6, r6, 0x40\n\t" "bdnz L_SHA256_transform_len_start_%=\n\t" "subi r6, r6, 0x100\n\t" diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S index 1f103ad2c7a..a407b7de1f5 100644 --- a/wolfcrypt/src/sha256_asm.S +++ b/wolfcrypt/src/sha256_asm.S @@ -53,6 +53,11 @@ #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sse2_sha256_sha_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -81,7 +86,7 @@ L_sse2_sha256_sha_k: .p2align 4 #endif /* __APPLE__ */ L_sse2_sha256_shuf_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_SSE2_Sha @@ -487,6 +492,11 @@ L_sha256_sha_len_sse2_start: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -515,7 +525,7 @@ L_avx1_sha256_k: .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_shuf_00BA: -.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff #ifndef __APPLE__ .data #else @@ -527,7 +537,7 @@ L_avx1_sha256_shuf_00BA: .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_shuf_DC00: -.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 #ifndef __APPLE__ .data #else @@ -539,7 +549,7 @@ L_avx1_sha256_shuf_DC00: .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_flip_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1 @@ -5342,6 +5352,11 @@ L_sha256_len_avx1_start: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -5370,7 +5385,7 @@ L_avx1_rorx_sha256_k: .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_00BA: -.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff #ifndef __APPLE__ .data #else @@ -5382,7 +5397,7 @@ L_avx1_rorx_sha256_shuf_00BA: .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_DC00: -.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 #ifndef __APPLE__ .data #else @@ -5394,7 +5409,7 @@ L_avx1_rorx_sha256_shuf_DC00: .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha256_flip_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1_RORX @@ -10114,6 +10129,11 @@ L_sha256_len_avx1_len_rorx_start: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha256_sha_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -10142,7 +10162,7 @@ L_avx1_sha256_sha_k: .p2align 4 #endif /* __APPLE__ */ L_avx1_sha256_shuf_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX1_Sha @@ -10493,6 +10513,11 @@ L_sha256_sha_len_avx1_start: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -10537,8 +10562,8 @@ L_avx2_sha256_k: .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_shuf_00BA: -.quad 0xb0a090803020100, 0xffffffffffffffff -.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff #ifndef __APPLE__ .data #else @@ -10550,8 +10575,8 @@ L_avx2_sha256_shuf_00BA: .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_shuf_DC00: -.quad 0xffffffffffffffff, 0xb0a090803020100 -.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 #ifndef __APPLE__ .data #else @@ -10563,8 +10588,8 @@ L_avx2_sha256_shuf_DC00: .p2align 5 #endif /* __APPLE__ */ L_avx2_sha256_flip_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2 @@ -17092,6 +17117,11 @@ L_sha256_len_avx2_done: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -17136,8 +17166,8 @@ L_avx2_rorx_sha256_k: .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_flip_mask: -.quad 0x405060700010203, 0xc0d0e0f08090a0b -.quad 0x405060700010203, 0xc0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b +.quad 0x0405060700010203,0x0c0d0e0f08090a0b #ifndef __APPLE__ .data #else @@ -17149,8 +17179,8 @@ L_avx2_rorx_sha256_flip_mask: .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_00BA: -.quad 0xb0a090803020100, 0xffffffffffffffff -.quad 0xb0a090803020100, 0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff +.quad 0x0b0a090803020100,0xffffffffffffffff #ifndef __APPLE__ .data #else @@ -17162,8 +17192,8 @@ L_avx2_rorx_sha256_shuf_00BA: .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_DC00: -.quad 0xffffffffffffffff, 0xb0a090803020100 -.quad 0xffffffffffffffff, 0xb0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 +.quad 0xffffffffffffffff,0x0b0a090803020100 #ifndef __APPLE__ .text .globl Transform_Sha256_AVX2_RORX diff --git a/wolfcrypt/src/sha3_asm.S b/wolfcrypt/src/sha3_asm.S index 8151a7d61bc..387a42a4c86 100644 --- a/wolfcrypt/src/sha3_asm.S +++ b/wolfcrypt/src/sha3_asm.S @@ -53,9 +53,9 @@ .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_avx2_r: .quad 0x0000000000000001,0x0000000000000001 @@ -112,9 +112,9 @@ L_sha3_avx2_r: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_x4_avx2_r: .quad 0x0000000000000001,0x0000000000000001 @@ -9314,9 +9314,9 @@ L_sha3_block_n_bmi2_rounds: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_block_avx2_rotl: .quad 0x0000000000000001,0x000000000000003e @@ -9337,9 +9337,9 @@ L_sha3_block_avx2_rotl: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_block_avx2_rotr: .quad 0x000000000000003f,0x0000000000000002 @@ -9533,9 +9533,9 @@ L_sha3_block_avx2_start: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_block_n_avx2_rotl: .quad 0x0000000000000001,0x000000000000003e @@ -9556,9 +9556,9 @@ L_sha3_block_n_avx2_rotl: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_sha3_block_n_avx2_rotr: .quad 0x000000000000003f,0x0000000000000002 @@ -15298,8 +15298,8 @@ _sha3_blocksx4_avx2: .p2align 5 #endif /* __APPLE__ */ L_sha3_128_blockx4_seed_avx2_end_mark: -.quad 0x8000000000000000, 0x8000000000000000 -.quad 0x8000000000000000, 0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 #ifndef __APPLE__ .text .globl sha3_128_blocksx4_seed_avx2 @@ -20677,8 +20677,8 @@ _sha3_128_blocksx4_seed_avx2: .p2align 5 #endif /* __APPLE__ */ L_sha3_256_blockx4_seed_avx2_end_mark: -.quad 0x8000000000000000, 0x8000000000000000 -.quad 0x8000000000000000, 0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 #ifndef __APPLE__ .text .globl sha3_256_blocksx4_seed_avx2 @@ -26057,8 +26057,8 @@ _sha3_256_blocksx4_seed_avx2: .p2align 5 #endif /* __APPLE__ */ L_sha3_256_blockx4_seed_64_avx2_end_mark: -.quad 0x8000000000000000, 0x8000000000000000 -.quad 0x8000000000000000, 0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 +.quad 0x8000000000000000,0x8000000000000000 #ifndef __APPLE__ .text .globl sha3_256_blocksx4_seed_64_avx2 diff --git a/wolfcrypt/src/sha512_asm.S b/wolfcrypt/src/sha512_asm.S index 00042f95c33..d0ca1dd4fd4 100644 --- a/wolfcrypt/src/sha512_asm.S +++ b/wolfcrypt/src/sha512_asm.S @@ -54,9 +54,9 @@ .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx1_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -110,7 +110,7 @@ L_avx1_sha512_k: .p2align 4 #endif /* __APPLE__ */ L_avx1_sha512_flip_mask: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .text .globl Transform_Sha512_AVX1 @@ -2682,9 +2682,9 @@ L_sha512_len_avx1_start: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx1_rorx_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -2738,7 +2738,7 @@ L_avx1_rorx_sha512_k: .p2align 4 #endif /* __APPLE__ */ L_avx1_rorx_sha512_flip_mask: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .text .globl Transform_Sha512_AVX1_RORX @@ -5186,9 +5186,9 @@ L_sha512_len_avx1_rorx_start: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx2_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -5237,9 +5237,9 @@ L_avx2_sha512_k: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx2_sha512_k_2: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -5345,8 +5345,8 @@ L_avx2_sha512_k_2_end: .p2align 5 #endif /* __APPLE__ */ L_avx2_sha512_flip_mask: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .text .globl Transform_Sha512_AVX2 @@ -6430,11 +6430,11 @@ _Transform_Sha512_AVX2_Len: movq %rsi, %rbp testb $0x80, %bpl je L_sha512_len_avx2_block - movq 224(%rdi), %rcx - vmovdqu (%rcx), %ymm0 - vmovdqu 32(%rcx), %ymm1 - vmovdqu 64(%rcx), %ymm2 - vmovdqu 96(%rcx), %ymm3 + movq 224(%rdi), %rbx + vmovdqu (%rbx), %ymm0 + vmovdqu 32(%rbx), %ymm1 + vmovdqu 64(%rbx), %ymm2 + vmovdqu 96(%rbx), %ymm3 vmovups %ymm0, 64(%rdi) vmovups %ymm1, 96(%rdi) vmovups %ymm2, 128(%rdi) @@ -8043,9 +8043,9 @@ L_sha512_len_avx2_done: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -8094,9 +8094,9 @@ L_avx2_rorx_sha512_k: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha512_k_2: .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -8202,8 +8202,8 @@ L_avx2_rorx_sha512_k_2_end: .p2align 5 #endif /* __APPLE__ */ L_avx2_rorx_sha512_flip_mask: -.quad 0x1020304050607, 0x8090a0b0c0d0e0f -.quad 0x1020304050607, 0x8090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f #ifndef __APPLE__ .text .globl Transform_Sha512_AVX2_RORX diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index dd05a2578f2..41c7d9ce1eb 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -308,12 +308,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2317,12 +2317,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2684,12 +2684,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2829,12 +2829,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2877,11 +2877,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_16(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -2938,12 +2938,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_16(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3065,11 +3065,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3154,12 +3154,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3313,11 +3313,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3458,12 +3458,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -3673,11 +3673,11 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -4896,11 +4896,11 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5152,11 +5152,11 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5283,12 +5283,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5367,12 +5367,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_16(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5465,12 +5465,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5593,19 +5593,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x100\n\t" "\n" - "L_sp_2048_add_64_word_%=: \n\t" + "L_sp_2048_add_64_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -5645,18 +5645,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x100\n\t" "\n" - "L_sp_2048_sub_in_place_64_word_%=: \n\t" + "L_sp_2048_sub_in_place_64_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -5696,12 +5696,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5714,13 +5714,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_2048_mul_64_outer_%=: \n\t" + "L_sp_2048_mul_64_outer_%=:\n\t" "subs r3, r5, #0xfc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_2048_mul_64_inner_%=: \n\t" + "L_sp_2048_mul_64_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -5838,7 +5838,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_mul_64_inner_done_%=: \n\t" + "L_sp_2048_mul_64_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -5880,7 +5880,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_2048_mul_64_store_%=: \n\t" + "L_sp_2048_mul_64_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -5907,11 +5907,11 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -5923,13 +5923,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_2048_sqr_64_outer_%=: \n\t" + "L_sp_2048_sqr_64_outer_%=:\n\t" "subs r3, r5, #0xfc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_2048_sqr_64_inner_%=: \n\t" + "L_sp_2048_sqr_64_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -6014,7 +6014,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_sqr_64_inner_done_%=: \n\t" + "L_sp_2048_sqr_64_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -6050,7 +6050,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_2048_sqr_64_store_%=: \n\t" + "L_sp_2048_sqr_64_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -6099,19 +6099,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x80\n\t" "\n" - "L_sp_2048_add_32_word_%=: \n\t" + "L_sp_2048_add_32_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -6151,18 +6151,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x80\n\t" "\n" - "L_sp_2048_sub_in_place_32_word_%=: \n\t" + "L_sp_2048_sub_in_place_32_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -6202,12 +6202,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -6220,13 +6220,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_2048_mul_32_outer_%=: \n\t" + "L_sp_2048_mul_32_outer_%=:\n\t" "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_2048_mul_32_inner_%=: \n\t" + "L_sp_2048_mul_32_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -6344,7 +6344,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_mul_32_inner_done_%=: \n\t" + "L_sp_2048_mul_32_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -6386,7 +6386,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_2048_mul_32_store_%=: \n\t" + "L_sp_2048_mul_32_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -6413,11 +6413,11 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -6429,13 +6429,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_2048_sqr_32_outer_%=: \n\t" + "L_sp_2048_sqr_32_outer_%=:\n\t" "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_2048_sqr_32_inner_%=: \n\t" + "L_sp_2048_sqr_32_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -6520,7 +6520,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_2048_sqr_32_inner_done_%=: \n\t" + "L_sp_2048_sqr_32_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -6556,7 +6556,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_2048_sqr_32_store_%=: \n\t" + "L_sp_2048_sqr_32_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -6609,12 +6609,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -6650,7 +6650,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_2048_mul_d_64_word_%=: \n\t" + "L_sp_2048_mul_d_64_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -6720,12 +6720,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -8816,13 +8816,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -8830,7 +8830,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_2048_cond_sub_32_words_%=: \n\t" + "L_sp_2048_cond_sub_32_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -8869,13 +8869,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -9019,12 +9019,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -9037,7 +9037,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=: \n\t" + "L_sp_2048_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -10013,12 +10013,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -10029,7 +10029,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=: \n\t" + "L_sp_2048_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -10322,12 +10322,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -10340,7 +10340,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_2048_mont_reduce_32_word_%=: \n\t" + "L_sp_2048_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -10572,12 +10572,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -10613,7 +10613,7 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_2048_mul_d_32_word_%=: \n\t" + "L_sp_2048_mul_d_32_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -10683,12 +10683,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -11741,12 +11741,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_32(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -11813,12 +11813,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_32(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -11836,7 +11836,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_2048_word_32_bit_%=: \n\t" + "L_div_2048_word_32_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -11962,11 +11962,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_32(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -11977,7 +11977,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_32(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #0x7c\n\t" "\n" - "L_sp_2048_cmp_32_words_%=: \n\t" + "L_sp_2048_cmp_32_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -12742,13 +12742,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -12756,7 +12756,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_2048_cond_sub_64_words_%=: \n\t" + "L_sp_2048_cond_sub_64_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -12795,13 +12795,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -13057,12 +13057,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -13075,7 +13075,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=: \n\t" + "L_sp_2048_mont_reduce_64_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -14979,12 +14979,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -14995,7 +14995,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=: \n\t" + "L_sp_2048_mont_reduce_64_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -15544,12 +15544,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -15562,7 +15562,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_2048_mont_reduce_64_word_%=: \n\t" + "L_sp_2048_mont_reduce_64_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -15954,19 +15954,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x100\n\t" "\n" - "L_sp_2048_sub_64_word_%=: \n\t" + "L_sp_2048_sub_64_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -16005,12 +16005,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -16156,12 +16156,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_64(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -16228,12 +16228,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_64(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -16251,7 +16251,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_2048_word_64_bit_%=: \n\t" + "L_div_2048_word_64_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -16481,11 +16481,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_64(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -16496,7 +16496,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_64(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #0xfc\n\t" "\n" - "L_sp_2048_cmp_64_words_%=: \n\t" + "L_sp_2048_cmp_64_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -17722,13 +17722,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -17736,7 +17736,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r, "mov r6, #0\n\t" "mov r12, #0\n\t" "\n" - "L_sp_2048_cond_add_32_words_%=: \n\t" + "L_sp_2048_cond_add_32_words_%=:\n\t" "adds lr, lr, #-1\n\t" "ldr r4, [%[a], r12]\n\t" "ldr r5, [%[b], r12]\n\t" @@ -17775,13 +17775,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -18191,12 +18191,12 @@ WC_OMIT_FRAME_POINTER static void sp_2048_lshift_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -19012,12 +19012,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -24522,12 +24522,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -24577,11 +24577,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_24(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -24652,12 +24652,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_24(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -24797,11 +24797,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -24914,12 +24914,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -25101,11 +25101,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -25302,12 +25302,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -25572,11 +25572,11 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_12(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -28649,12 +28649,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -28740,12 +28740,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_24(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -28852,12 +28852,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -29008,19 +29008,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x180\n\t" "\n" - "L_sp_3072_add_96_word_%=: \n\t" + "L_sp_3072_add_96_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -29060,18 +29060,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x180\n\t" "\n" - "L_sp_3072_sub_in_place_96_word_%=: \n\t" + "L_sp_3072_sub_in_place_96_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -29111,12 +29111,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -29129,13 +29129,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_3072_mul_96_outer_%=: \n\t" + "L_sp_3072_mul_96_outer_%=:\n\t" "subs r3, r5, #0x17c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_3072_mul_96_inner_%=: \n\t" + "L_sp_3072_mul_96_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -29253,7 +29253,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_mul_96_inner_done_%=: \n\t" + "L_sp_3072_mul_96_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -29295,7 +29295,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_3072_mul_96_store_%=: \n\t" + "L_sp_3072_mul_96_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -29322,11 +29322,11 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -29338,13 +29338,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_3072_sqr_96_outer_%=: \n\t" + "L_sp_3072_sqr_96_outer_%=:\n\t" "subs r3, r5, #0x17c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_3072_sqr_96_inner_%=: \n\t" + "L_sp_3072_sqr_96_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -29429,7 +29429,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_sqr_96_inner_done_%=: \n\t" + "L_sp_3072_sqr_96_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -29465,7 +29465,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_3072_sqr_96_store_%=: \n\t" + "L_sp_3072_sqr_96_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -29514,19 +29514,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0xc0\n\t" "\n" - "L_sp_3072_add_48_word_%=: \n\t" + "L_sp_3072_add_48_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -29566,18 +29566,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0xc0\n\t" "\n" - "L_sp_3072_sub_in_place_48_word_%=: \n\t" + "L_sp_3072_sub_in_place_48_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -29617,12 +29617,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -29635,13 +29635,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_3072_mul_48_outer_%=: \n\t" + "L_sp_3072_mul_48_outer_%=:\n\t" "subs r3, r5, #0xbc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_3072_mul_48_inner_%=: \n\t" + "L_sp_3072_mul_48_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -29759,7 +29759,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_mul_48_inner_done_%=: \n\t" + "L_sp_3072_mul_48_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -29801,7 +29801,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_3072_mul_48_store_%=: \n\t" + "L_sp_3072_mul_48_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -29828,11 +29828,11 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -29844,13 +29844,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_3072_sqr_48_outer_%=: \n\t" + "L_sp_3072_sqr_48_outer_%=:\n\t" "subs r3, r5, #0xbc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_3072_sqr_48_inner_%=: \n\t" + "L_sp_3072_sqr_48_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -29935,7 +29935,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_3072_sqr_48_inner_done_%=: \n\t" + "L_sp_3072_sqr_48_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -29971,7 +29971,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_3072_sqr_48_store_%=: \n\t" + "L_sp_3072_sqr_48_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -30024,12 +30024,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -30065,7 +30065,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_3072_mul_d_96_word_%=: \n\t" + "L_sp_3072_mul_d_96_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -30135,12 +30135,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -33255,13 +33255,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -33269,7 +33269,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_3072_cond_sub_48_words_%=: \n\t" + "L_sp_3072_cond_sub_48_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -33308,13 +33308,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -33514,12 +33514,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -33532,7 +33532,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=: \n\t" + "L_sp_3072_mont_reduce_48_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -34972,12 +34972,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -34988,7 +34988,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=: \n\t" + "L_sp_3072_mont_reduce_48_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -35409,12 +35409,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -35427,7 +35427,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_3072_mont_reduce_48_word_%=: \n\t" + "L_sp_3072_mont_reduce_48_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -35739,12 +35739,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -35780,7 +35780,7 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_3072_mul_d_48_word_%=: \n\t" + "L_sp_3072_mul_d_48_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -35850,12 +35850,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -37420,12 +37420,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_48(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -37492,12 +37492,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_48(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -37515,7 +37515,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_3072_word_48_bit_%=: \n\t" + "L_div_3072_word_48_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -37641,11 +37641,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_48(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -37656,7 +37656,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_48(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #0xbc\n\t" "\n" - "L_sp_3072_cmp_48_words_%=: \n\t" + "L_sp_3072_cmp_48_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -38597,13 +38597,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -38611,7 +38611,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_3072_cond_sub_96_words_%=: \n\t" + "L_sp_3072_cond_sub_96_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -38650,13 +38650,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -39024,12 +39024,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -39042,7 +39042,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=: \n\t" + "L_sp_3072_mont_reduce_96_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -41874,12 +41874,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -41890,7 +41890,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=: \n\t" + "L_sp_3072_mont_reduce_96_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -42695,12 +42695,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -42713,7 +42713,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_3072_mont_reduce_96_word_%=: \n\t" + "L_sp_3072_mont_reduce_96_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -43265,19 +43265,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x180\n\t" "\n" - "L_sp_3072_sub_96_word_%=: \n\t" + "L_sp_3072_sub_96_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -43316,12 +43316,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -43523,12 +43523,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_96(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -43595,12 +43595,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_96(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -43618,7 +43618,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_3072_word_96_bit_%=: \n\t" + "L_div_3072_word_96_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -43848,11 +43848,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_96(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -43868,7 +43868,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_96(const sp_digit* a, "mov r4, #0x17c\n\t" #endif "\n" - "L_sp_3072_cmp_96_words_%=: \n\t" + "L_sp_3072_cmp_96_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -45446,13 +45446,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -45460,7 +45460,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r, "mov r6, #0\n\t" "mov r12, #0\n\t" "\n" - "L_sp_3072_cond_add_48_words_%=: \n\t" + "L_sp_3072_cond_add_48_words_%=:\n\t" "adds lr, lr, #-1\n\t" "ldr r4, [%[a], r12]\n\t" "ldr r5, [%[b], r12]\n\t" @@ -45499,13 +45499,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -45971,12 +45971,12 @@ WC_OMIT_FRAME_POINTER static void sp_3072_lshift_96(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -46983,11 +46983,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -47240,12 +47240,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -47576,19 +47576,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x200\n\t" "\n" - "L_sp_4096_add_128_word_%=: \n\t" + "L_sp_4096_add_128_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -47628,18 +47628,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x200\n\t" "\n" - "L_sp_4096_sub_in_place_128_word_%=: \n\t" + "L_sp_4096_sub_in_place_128_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -47679,12 +47679,12 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -47697,13 +47697,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_4096_mul_128_outer_%=: \n\t" + "L_sp_4096_mul_128_outer_%=:\n\t" "subs r3, r5, #0x1fc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_4096_mul_128_inner_%=: \n\t" + "L_sp_4096_mul_128_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -47821,7 +47821,7 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_4096_mul_128_inner_done_%=: \n\t" + "L_sp_4096_mul_128_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -47863,7 +47863,7 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_4096_mul_128_store_%=: \n\t" + "L_sp_4096_mul_128_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -47891,11 +47891,11 @@ WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -47907,13 +47907,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_4096_sqr_128_outer_%=: \n\t" + "L_sp_4096_sqr_128_outer_%=:\n\t" "subs r3, r5, #0x1fc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_4096_sqr_128_inner_%=: \n\t" + "L_sp_4096_sqr_128_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -47998,7 +47998,7 @@ WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_4096_sqr_128_inner_done_%=: \n\t" + "L_sp_4096_sqr_128_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -48034,7 +48034,7 @@ WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_4096_sqr_128_store_%=: \n\t" + "L_sp_4096_sqr_128_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -48085,12 +48085,12 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -48126,7 +48126,7 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_4096_mul_d_128_word_%=: \n\t" + "L_sp_4096_mul_d_128_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -48196,12 +48196,12 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -52341,13 +52341,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -52355,7 +52355,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_4096_cond_sub_128_words_%=: \n\t" + "L_sp_4096_cond_sub_128_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -52394,13 +52394,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -52880,12 +52880,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -52898,7 +52898,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=: \n\t" + "L_sp_4096_mont_reduce_128_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -56658,12 +56658,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -56674,7 +56674,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=: \n\t" + "L_sp_4096_mont_reduce_128_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -57735,12 +57735,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -57753,7 +57753,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_4096_mont_reduce_128_word_%=: \n\t" + "L_sp_4096_mont_reduce_128_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -58465,19 +58465,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x200\n\t" "\n" - "L_sp_4096_sub_128_word_%=: \n\t" + "L_sp_4096_sub_128_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -58516,12 +58516,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -58779,12 +58779,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_4096_word_128(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -58851,12 +58851,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_4096_word_128(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -58874,7 +58874,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_4096_word_128(sp_digit d1, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_4096_word_128_bit_%=: \n\t" + "L_div_4096_word_128_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -59104,11 +59104,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_4096_cmp_128(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -59124,7 +59124,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_4096_cmp_128(const sp_digit* a, "mov r4, #0x1fc\n\t" #endif "\n" - "L_sp_4096_cmp_128_words_%=: \n\t" + "L_sp_4096_cmp_128_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -61054,13 +61054,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -61068,7 +61068,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r, "mov r6, #0\n\t" "mov r12, #0\n\t" "\n" - "L_sp_4096_cond_add_64_words_%=: \n\t" + "L_sp_4096_cond_add_64_words_%=:\n\t" "adds lr, lr, #-1\n\t" "ldr r4, [%[a], r12]\n\t" "ldr r5, [%[b], r12]\n\t" @@ -61107,13 +61107,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -61635,12 +61635,12 @@ WC_OMIT_FRAME_POINTER static void sp_4096_lshift_128(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -62707,12 +62707,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -62725,13 +62725,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_256_mul_8_outer_%=: \n\t" + "L_sp_256_mul_8_outer_%=:\n\t" "subs r3, r5, #28\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_256_mul_8_inner_%=: \n\t" + "L_sp_256_mul_8_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -62849,7 +62849,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_256_mul_8_inner_done_%=: \n\t" + "L_sp_256_mul_8_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -62891,7 +62891,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_256_mul_8_store_%=: \n\t" + "L_sp_256_mul_8_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -62922,12 +62922,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -64931,12 +64931,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -65298,12 +65298,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -65443,11 +65443,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -65459,13 +65459,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_256_sqr_8_outer_%=: \n\t" + "L_sp_256_sqr_8_outer_%=:\n\t" "subs r3, r5, #28\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_256_sqr_8_inner_%=: \n\t" + "L_sp_256_sqr_8_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -65550,7 +65550,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_256_sqr_8_inner_done_%=: \n\t" + "L_sp_256_sqr_8_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -65586,7 +65586,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_256_sqr_8_store_%=: \n\t" + "L_sp_256_sqr_8_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -65615,11 +65615,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -66838,11 +66838,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -67094,11 +67094,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -67227,19 +67227,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #32\n\t" "\n" - "L_sp_256_add_8_word_%=: \n\t" + "L_sp_256_add_8_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -67279,12 +67279,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -67329,11 +67329,11 @@ WC_OMIT_FRAME_POINTER static int sp_256_mod_mul_norm_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -67783,12 +67783,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -69926,12 +69926,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -70426,12 +70426,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -70702,11 +70702,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -71921,11 +71921,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72309,11 +72309,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72667,11 +72667,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_256_cmp_8(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72682,7 +72682,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_256_cmp_8(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #28\n\t" "\n" - "L_sp_256_cmp_8_words_%=: \n\t" + "L_sp_256_cmp_8_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -72822,13 +72822,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72836,7 +72836,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_256_cond_sub_8_words_%=: \n\t" + "L_sp_256_cond_sub_8_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -72875,13 +72875,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72944,12 +72944,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -72962,7 +72962,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_8_word_%=: \n\t" + "L_sp_256_mont_reduce_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -73242,12 +73242,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -73258,7 +73258,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_8_word_%=: \n\t" + "L_sp_256_mont_reduce_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -73359,12 +73359,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -73377,7 +73377,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_256_mont_reduce_8_word_%=: \n\t" + "L_sp_256_mont_reduce_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -73459,10 +73459,10 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -73632,12 +73632,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -73650,7 +73650,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + "L_sp_256_mont_reduce_order_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -73930,12 +73930,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -73946,7 +73946,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + "L_sp_256_mont_reduce_order_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -74047,12 +74047,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -74065,7 +74065,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + "L_sp_256_mont_reduce_order_8_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -74188,12 +74188,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mont_add_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -74261,11 +74261,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_mont_dbl_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -74329,11 +74329,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_mont_tpl_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -74430,12 +74430,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mont_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -74501,12 +74501,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mont_div2_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -77886,10 +77886,10 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, WC_OMIT_FRAME_POINTER static void sp_256_add_one_8(sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_256_add_one_8(sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78271,18 +78271,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #32\n\t" "\n" - "L_sp_256_sub_in_place_8_word_%=: \n\t" + "L_sp_256_sub_in_place_8_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -78320,11 +78320,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78369,12 +78369,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78410,7 +78410,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_256_mul_d_8_word_%=: \n\t" + "L_sp_256_mul_d_8_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -78480,12 +78480,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78770,12 +78770,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_256_word_8(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78842,12 +78842,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_256_word_8(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -78865,7 +78865,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_256_word_8_bit_%=: \n\t" + "L_div_256_word_8_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -79603,19 +79603,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #32\n\t" "\n" - "L_sp_256_sub_8_word_%=: \n\t" + "L_sp_256_sub_8_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -79654,12 +79654,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -79697,11 +79697,11 @@ WC_OMIT_FRAME_POINTER static void sp_256_rshift1_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -79793,12 +79793,12 @@ WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -79822,7 +79822,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, "adc r3, r12, r12\n\t" "b L_sp_256_div2_mod_8_div2_%=\n\t" "\n" - "L_sp_256_div2_mod_8_even_%=: \n\t" + "L_sp_256_div2_mod_8_even_%=:\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[a], #16]\n\t" @@ -79836,7 +79836,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, "ldrd r6, r7, [%[a], #20]\n\t" #endif "\n" - "L_sp_256_div2_mod_8_div2_%=: \n\t" + "L_sp_256_div2_mod_8_div2_%=:\n\t" "lsr r8, r4, #1\n\t" "and r4, r4, #1\n\t" "lsr r9, r5, #1\n\t" @@ -79882,7 +79882,7 @@ WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, } #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) -static const byte L_sp_256_num_bits_8_table[] = { +XALIGNED(4) static const word8 L_sp_256_num_bits_8_table[] = { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, @@ -79921,15 +79921,15 @@ static const byte L_sp_256_num_bits_8_table[] = { WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register byte* L_sp_256_num_bits_8_table_c asm ("r1") = - (byte*)&L_sp_256_num_bits_8_table; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register word8* L_sp_256_num_bits_8_table_c __asm__ ("r1") = + (word8*)&L_sp_256_num_bits_8_table; #else - register byte* L_sp_256_num_bits_8_table_c = - (byte*)&L_sp_256_num_bits_8_table; + register word8* L_sp_256_num_bits_8_table_c = + (word8*)&L_sp_256_num_bits_8_table; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -79945,7 +79945,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_7_3_%=: \n\t" + "L_sp_256_num_bits_8_7_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -79955,7 +79955,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_7_2_%=: \n\t" + "L_sp_256_num_bits_8_7_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -79965,14 +79965,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_7_1_%=: \n\t" + "L_sp_256_num_bits_8_7_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xe0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_7_%=: \n\t" + "L_sp_256_num_bits_8_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_6_%=\n\t" @@ -79984,7 +79984,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_6_3_%=: \n\t" + "L_sp_256_num_bits_8_6_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -79994,7 +79994,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_6_2_%=: \n\t" + "L_sp_256_num_bits_8_6_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80004,14 +80004,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_6_1_%=: \n\t" + "L_sp_256_num_bits_8_6_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xc0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_6_%=: \n\t" + "L_sp_256_num_bits_8_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_5_%=\n\t" @@ -80023,7 +80023,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_5_3_%=: \n\t" + "L_sp_256_num_bits_8_5_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80033,7 +80033,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_5_2_%=: \n\t" + "L_sp_256_num_bits_8_5_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80043,14 +80043,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_5_1_%=: \n\t" + "L_sp_256_num_bits_8_5_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xa0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_5_%=: \n\t" + "L_sp_256_num_bits_8_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_4_%=\n\t" @@ -80062,7 +80062,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_4_3_%=: \n\t" + "L_sp_256_num_bits_8_4_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80072,7 +80072,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_4_2_%=: \n\t" + "L_sp_256_num_bits_8_4_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80082,14 +80082,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_4_1_%=: \n\t" + "L_sp_256_num_bits_8_4_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x80\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_4_%=: \n\t" + "L_sp_256_num_bits_8_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_3_%=\n\t" @@ -80101,7 +80101,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_3_3_%=: \n\t" + "L_sp_256_num_bits_8_3_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80111,7 +80111,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_3_2_%=: \n\t" + "L_sp_256_num_bits_8_3_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80121,14 +80121,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_3_1_%=: \n\t" + "L_sp_256_num_bits_8_3_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x60\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_3_%=: \n\t" + "L_sp_256_num_bits_8_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_2_%=\n\t" @@ -80140,7 +80140,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_2_3_%=: \n\t" + "L_sp_256_num_bits_8_2_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80150,7 +80150,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_2_2_%=: \n\t" + "L_sp_256_num_bits_8_2_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80160,14 +80160,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_2_1_%=: \n\t" + "L_sp_256_num_bits_8_2_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x40\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_2_%=: \n\t" + "L_sp_256_num_bits_8_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_1_%=\n\t" @@ -80179,7 +80179,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_1_3_%=: \n\t" + "L_sp_256_num_bits_8_1_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80189,7 +80189,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_1_2_%=: \n\t" + "L_sp_256_num_bits_8_1_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80199,14 +80199,14 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_1_1_%=: \n\t" + "L_sp_256_num_bits_8_1_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #32\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_1_%=: \n\t" + "L_sp_256_num_bits_8_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "lsr r3, r1, #24\n\t" "cmp r3, #0\n\t" @@ -80216,7 +80216,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_0_3_%=: \n\t" + "L_sp_256_num_bits_8_0_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80226,7 +80226,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_0_2_%=: \n\t" + "L_sp_256_num_bits_8_0_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -80236,11 +80236,11 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_0_1_%=: \n\t" + "L_sp_256_num_bits_8_0_1_%=:\n\t" "and r3, r1, #0xff\n\t" "ldrb r12, [lr, r3]\n\t" "\n" - "L_sp_256_num_bits_8_9_%=: \n\t" + "L_sp_256_num_bits_8_9_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), @@ -80261,10 +80261,10 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -80276,7 +80276,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_7_%=: \n\t" + "L_sp_256_num_bits_8_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_6_%=\n\t" @@ -80285,7 +80285,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_6_%=: \n\t" + "L_sp_256_num_bits_8_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_5_%=\n\t" @@ -80294,7 +80294,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_5_%=: \n\t" + "L_sp_256_num_bits_8_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_4_%=\n\t" @@ -80303,7 +80303,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_4_%=: \n\t" + "L_sp_256_num_bits_8_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_3_%=\n\t" @@ -80312,7 +80312,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_3_%=: \n\t" + "L_sp_256_num_bits_8_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_2_%=\n\t" @@ -80321,7 +80321,7 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_2_%=: \n\t" + "L_sp_256_num_bits_8_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_256_num_bits_8_1_%=\n\t" @@ -80330,13 +80330,13 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_256_num_bits_8_9_%=\n\t" "\n" - "L_sp_256_num_bits_8_1_%=: \n\t" + "L_sp_256_num_bits_8_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "mov r2, #32\n\t" "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "\n" - "L_sp_256_num_bits_8_9_%=: \n\t" + "L_sp_256_num_bits_8_9_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) @@ -81292,12 +81292,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -81310,13 +81310,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_384_mul_12_outer_%=: \n\t" + "L_sp_384_mul_12_outer_%=:\n\t" "subs r3, r5, #44\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_384_mul_12_inner_%=: \n\t" + "L_sp_384_mul_12_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -81434,7 +81434,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_384_mul_12_inner_done_%=: \n\t" + "L_sp_384_mul_12_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -81476,7 +81476,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_384_mul_12_store_%=: \n\t" + "L_sp_384_mul_12_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -81506,12 +81506,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -87016,11 +87016,11 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -87032,13 +87032,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_384_sqr_12_outer_%=: \n\t" + "L_sp_384_sqr_12_outer_%=:\n\t" "subs r3, r5, #44\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_384_sqr_12_inner_%=: \n\t" + "L_sp_384_sqr_12_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -87123,7 +87123,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_384_sqr_12_inner_done_%=: \n\t" + "L_sp_384_sqr_12_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -87159,7 +87159,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_384_sqr_12_store_%=: \n\t" + "L_sp_384_sqr_12_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -87187,11 +87187,11 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90266,19 +90266,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #48\n\t" "\n" - "L_sp_384_add_12_word_%=: \n\t" + "L_sp_384_add_12_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -90318,12 +90318,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90662,13 +90662,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90676,7 +90676,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_384_cond_sub_12_words_%=: \n\t" + "L_sp_384_cond_sub_12_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -90715,13 +90715,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90797,12 +90797,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -90815,7 +90815,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_384_mont_reduce_12_word_%=: \n\t" + "L_sp_384_mont_reduce_12_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -91211,12 +91211,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -91227,7 +91227,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_384_mont_reduce_12_word_%=: \n\t" + "L_sp_384_mont_reduce_12_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -91360,12 +91360,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -91378,7 +91378,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_384_mont_reduce_12_word_%=: \n\t" + "L_sp_384_mont_reduce_12_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -91622,11 +91622,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_384_cmp_12(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -91637,7 +91637,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_384_cmp_12(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #44\n\t" "\n" - "L_sp_384_cmp_12_words_%=: \n\t" + "L_sp_384_cmp_12_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -91859,13 +91859,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -91959,12 +91959,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_dbl_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92048,12 +92048,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_tpl_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92203,12 +92203,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92262,13 +92262,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92276,7 +92276,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, "mov r6, #0\n\t" "mov r12, #0\n\t" "\n" - "L_sp_384_cond_add_12_words_%=: \n\t" + "L_sp_384_cond_add_12_words_%=:\n\t" "adds lr, lr, #-1\n\t" "ldr r4, [%[a], r12]\n\t" "ldr r5, [%[b], r12]\n\t" @@ -92315,13 +92315,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92395,13 +92395,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mont_sub_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -92490,11 +92490,11 @@ WC_OMIT_FRAME_POINTER static void sp_384_rshift1_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -95935,10 +95935,10 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, WC_OMIT_FRAME_POINTER static void sp_384_add_one_12(sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_384_add_one_12(sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -96326,18 +96326,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #48\n\t" "\n" - "L_sp_384_sub_in_place_12_word_%=: \n\t" + "L_sp_384_sub_in_place_12_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -96375,11 +96375,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -96431,12 +96431,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -96472,7 +96472,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_384_mul_d_12_word_%=: \n\t" + "L_sp_384_mul_d_12_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -96542,12 +96542,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -96960,12 +96960,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_384_word_12(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -97032,12 +97032,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_384_word_12(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -97055,7 +97055,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_384_word_12_bit_%=: \n\t" + "L_div_384_word_12_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -97763,12 +97763,12 @@ WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -97800,7 +97800,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, "adc r3, r12, r12\n\t" "b L_sp_384_div2_mod_12_div2_%=\n\t" "\n" - "L_sp_384_div2_mod_12_even_%=: \n\t" + "L_sp_384_div2_mod_12_even_%=:\n\t" "ldm %[a]!, {r5, r6, r7}\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" @@ -97808,7 +97808,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, "ldm %[a]!, {r4, r5, r6, r7}\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "\n" - "L_sp_384_div2_mod_12_div2_%=: \n\t" + "L_sp_384_div2_mod_12_div2_%=:\n\t" "sub %[r], %[r], #48\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldm r0, {r8, r9}\n\t" @@ -97874,7 +97874,7 @@ WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, } #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) -static const byte L_sp_384_num_bits_12_table[] = { +XALIGNED(4) static const word8 L_sp_384_num_bits_12_table[] = { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, @@ -97913,15 +97913,15 @@ static const byte L_sp_384_num_bits_12_table[] = { WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register byte* L_sp_384_num_bits_12_table_c asm ("r1") = - (byte*)&L_sp_384_num_bits_12_table; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register word8* L_sp_384_num_bits_12_table_c __asm__ ("r1") = + (word8*)&L_sp_384_num_bits_12_table; #else - register byte* L_sp_384_num_bits_12_table_c = - (byte*)&L_sp_384_num_bits_12_table; + register word8* L_sp_384_num_bits_12_table_c = + (word8*)&L_sp_384_num_bits_12_table; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -97942,7 +97942,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_11_3_%=: \n\t" + "L_sp_384_num_bits_12_11_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -97957,7 +97957,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_11_2_%=: \n\t" + "L_sp_384_num_bits_12_11_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -97972,7 +97972,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_11_1_%=: \n\t" + "L_sp_384_num_bits_12_11_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x60\n\t" @@ -97984,7 +97984,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_11_%=: \n\t" + "L_sp_384_num_bits_12_11_%=:\n\t" "ldr r1, [%[a], #40]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_10_%=\n\t" @@ -98001,7 +98001,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_10_3_%=: \n\t" + "L_sp_384_num_bits_12_10_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98016,7 +98016,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_10_2_%=: \n\t" + "L_sp_384_num_bits_12_10_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98031,7 +98031,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_10_1_%=: \n\t" + "L_sp_384_num_bits_12_10_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x40\n\t" @@ -98043,7 +98043,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_10_%=: \n\t" + "L_sp_384_num_bits_12_10_%=:\n\t" "ldr r1, [%[a], #36]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_9_%=\n\t" @@ -98060,7 +98060,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_9_3_%=: \n\t" + "L_sp_384_num_bits_12_9_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98075,7 +98075,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_9_2_%=: \n\t" + "L_sp_384_num_bits_12_9_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98090,7 +98090,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_9_1_%=: \n\t" + "L_sp_384_num_bits_12_9_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x20\n\t" @@ -98102,7 +98102,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_9_%=: \n\t" + "L_sp_384_num_bits_12_9_%=:\n\t" "ldr r1, [%[a], #32]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_8_%=\n\t" @@ -98119,7 +98119,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_8_3_%=: \n\t" + "L_sp_384_num_bits_12_8_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98134,7 +98134,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_8_2_%=: \n\t" + "L_sp_384_num_bits_12_8_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98149,14 +98149,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_8_1_%=: \n\t" + "L_sp_384_num_bits_12_8_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x100\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_8_%=: \n\t" + "L_sp_384_num_bits_12_8_%=:\n\t" "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_7_%=\n\t" @@ -98168,7 +98168,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_7_3_%=: \n\t" + "L_sp_384_num_bits_12_7_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98178,7 +98178,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_7_2_%=: \n\t" + "L_sp_384_num_bits_12_7_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98188,14 +98188,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_7_1_%=: \n\t" + "L_sp_384_num_bits_12_7_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xe0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_7_%=: \n\t" + "L_sp_384_num_bits_12_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_6_%=\n\t" @@ -98207,7 +98207,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_6_3_%=: \n\t" + "L_sp_384_num_bits_12_6_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98217,7 +98217,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_6_2_%=: \n\t" + "L_sp_384_num_bits_12_6_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98227,14 +98227,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_6_1_%=: \n\t" + "L_sp_384_num_bits_12_6_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xc0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_6_%=: \n\t" + "L_sp_384_num_bits_12_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_5_%=\n\t" @@ -98246,7 +98246,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_5_3_%=: \n\t" + "L_sp_384_num_bits_12_5_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98256,7 +98256,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_5_2_%=: \n\t" + "L_sp_384_num_bits_12_5_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98266,14 +98266,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_5_1_%=: \n\t" + "L_sp_384_num_bits_12_5_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xa0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_5_%=: \n\t" + "L_sp_384_num_bits_12_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_4_%=\n\t" @@ -98285,7 +98285,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_4_3_%=: \n\t" + "L_sp_384_num_bits_12_4_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98295,7 +98295,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_4_2_%=: \n\t" + "L_sp_384_num_bits_12_4_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98305,14 +98305,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_4_1_%=: \n\t" + "L_sp_384_num_bits_12_4_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x80\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_4_%=: \n\t" + "L_sp_384_num_bits_12_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_3_%=\n\t" @@ -98324,7 +98324,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_3_3_%=: \n\t" + "L_sp_384_num_bits_12_3_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98334,7 +98334,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_3_2_%=: \n\t" + "L_sp_384_num_bits_12_3_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98344,14 +98344,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_3_1_%=: \n\t" + "L_sp_384_num_bits_12_3_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x60\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_3_%=: \n\t" + "L_sp_384_num_bits_12_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_2_%=\n\t" @@ -98363,7 +98363,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_2_3_%=: \n\t" + "L_sp_384_num_bits_12_2_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98373,7 +98373,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_2_2_%=: \n\t" + "L_sp_384_num_bits_12_2_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98383,14 +98383,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_2_1_%=: \n\t" + "L_sp_384_num_bits_12_2_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x40\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_2_%=: \n\t" + "L_sp_384_num_bits_12_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_1_%=\n\t" @@ -98402,7 +98402,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_1_3_%=: \n\t" + "L_sp_384_num_bits_12_1_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98412,7 +98412,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_1_2_%=: \n\t" + "L_sp_384_num_bits_12_1_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98422,14 +98422,14 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_1_1_%=: \n\t" + "L_sp_384_num_bits_12_1_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #32\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_1_%=: \n\t" + "L_sp_384_num_bits_12_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "lsr r3, r1, #24\n\t" "cmp r3, #0\n\t" @@ -98439,7 +98439,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_0_3_%=: \n\t" + "L_sp_384_num_bits_12_0_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98449,7 +98449,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_0_2_%=: \n\t" + "L_sp_384_num_bits_12_0_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -98459,11 +98459,11 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_0_1_%=: \n\t" + "L_sp_384_num_bits_12_0_1_%=:\n\t" "and r3, r1, #0xff\n\t" "ldrb r12, [lr, r3]\n\t" "\n" - "L_sp_384_num_bits_12_13_%=: \n\t" + "L_sp_384_num_bits_12_13_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), @@ -98484,10 +98484,10 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -98504,7 +98504,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_11_%=: \n\t" + "L_sp_384_num_bits_12_11_%=:\n\t" "ldr r1, [%[a], #40]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_10_%=\n\t" @@ -98518,7 +98518,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_10_%=: \n\t" + "L_sp_384_num_bits_12_10_%=:\n\t" "ldr r1, [%[a], #36]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_9_%=\n\t" @@ -98532,7 +98532,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_9_%=: \n\t" + "L_sp_384_num_bits_12_9_%=:\n\t" "ldr r1, [%[a], #32]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_8_%=\n\t" @@ -98546,7 +98546,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_8_%=: \n\t" + "L_sp_384_num_bits_12_8_%=:\n\t" "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_7_%=\n\t" @@ -98555,7 +98555,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_7_%=: \n\t" + "L_sp_384_num_bits_12_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_6_%=\n\t" @@ -98564,7 +98564,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_6_%=: \n\t" + "L_sp_384_num_bits_12_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_5_%=\n\t" @@ -98573,7 +98573,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_5_%=: \n\t" + "L_sp_384_num_bits_12_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_4_%=\n\t" @@ -98582,7 +98582,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_4_%=: \n\t" + "L_sp_384_num_bits_12_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_3_%=\n\t" @@ -98591,7 +98591,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_3_%=: \n\t" + "L_sp_384_num_bits_12_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_2_%=\n\t" @@ -98600,7 +98600,7 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_2_%=: \n\t" + "L_sp_384_num_bits_12_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_384_num_bits_12_1_%=\n\t" @@ -98609,13 +98609,13 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_384_num_bits_12_13_%=\n\t" "\n" - "L_sp_384_num_bits_12_1_%=: \n\t" + "L_sp_384_num_bits_12_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "mov r2, #32\n\t" "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "\n" - "L_sp_384_num_bits_12_13_%=: \n\t" + "L_sp_384_num_bits_12_13_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) @@ -99618,12 +99618,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -99636,13 +99636,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_521_mul_17_outer_%=: \n\t" + "L_sp_521_mul_17_outer_%=:\n\t" "subs r3, r5, #0x40\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_521_mul_17_inner_%=: \n\t" + "L_sp_521_mul_17_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -99760,7 +99760,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_521_mul_17_inner_done_%=: \n\t" + "L_sp_521_mul_17_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -99805,7 +99805,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, "stm %[r]!, {r6, r7}\n\t" "sub r5, r5, #8\n\t" "\n" - "L_sp_521_mul_17_store_%=: \n\t" + "L_sp_521_mul_17_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -99835,12 +99835,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -110864,11 +110864,11 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -110880,13 +110880,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_521_sqr_17_outer_%=: \n\t" + "L_sp_521_sqr_17_outer_%=:\n\t" "subs r3, r5, #0x40\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_521_sqr_17_inner_%=: \n\t" + "L_sp_521_sqr_17_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -110971,7 +110971,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_521_sqr_17_inner_done_%=: \n\t" + "L_sp_521_sqr_17_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -111010,7 +111010,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "stm %[r]!, {r6, r7}\n\t" "sub r5, r5, #8\n\t" "\n" - "L_sp_521_sqr_17_store_%=: \n\t" + "L_sp_521_sqr_17_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -111038,11 +111038,11 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -116926,19 +116926,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x40\n\t" "\n" - "L_sp_521_add_17_word_%=: \n\t" + "L_sp_521_add_17_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -116984,12 +116984,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -117268,13 +117268,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -117282,7 +117282,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_521_cond_sub_17_words_%=: \n\t" + "L_sp_521_cond_sub_17_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -117321,13 +117321,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -117419,10 +117419,10 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_17( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -117569,12 +117569,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -117587,7 +117587,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + "L_sp_521_mont_reduce_order_17_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" "cmp r9, #0x40\n\t" @@ -117600,7 +117600,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #endif "and r8, r8, r7\n\t" "\n" - "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + "L_sp_521_mont_reduce_order_17_nomask_%=:\n\t" /* a[i+0] += m[0] * mu */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) "ldr r11, [%[m]]\n\t" @@ -118210,12 +118210,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -118226,7 +118226,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + "L_sp_521_mont_reduce_order_17_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" "cmp r9, #0x40\n\t" @@ -118239,7 +118239,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #endif "and r8, r8, r7\n\t" "\n" - "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + "L_sp_521_mont_reduce_order_17_nomask_%=:\n\t" /* a[i+0] += m[0] * mu */ "mov r5, #0\n\t" "umlal r12, r5, r8, r11\n\t" @@ -118481,12 +118481,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -118499,7 +118499,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + "L_sp_521_mont_reduce_order_17_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" "cmp r12, #0x40\n\t" @@ -118512,7 +118512,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #endif "and r11, r11, r10\n\t" "\n" - "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + "L_sp_521_mont_reduce_order_17_nomask_%=:\n\t" /* a[i+0] += m[0] * mu */ "ldr r10, [%[m]]\n\t" "mov r3, #0\n\t" @@ -118847,11 +118847,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_521_cmp_17(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -118862,7 +118862,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_521_cmp_17(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #0x40\n\t" "\n" - "L_sp_521_cmp_17_words_%=: \n\t" + "L_sp_521_cmp_17_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -119139,12 +119139,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_add_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -119242,11 +119242,11 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_dbl_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -119335,11 +119335,11 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_tpl_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -119463,12 +119463,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_sub_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -119561,11 +119561,11 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift1_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -123658,10 +123658,10 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, WC_OMIT_FRAME_POINTER static void sp_521_add_one_17(sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_521_add_one_17(sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124051,12 +124051,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124172,12 +124172,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_lshift_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124301,12 +124301,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_lshift_34(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register byte n asm ("r2") = (byte)n_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register byte n __asm__ ("r2") = (byte)n_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124538,18 +124538,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x40\n\t" "\n" - "L_sp_521_sub_in_place_17_word_%=: \n\t" + "L_sp_521_sub_in_place_17_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -124592,11 +124592,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124659,12 +124659,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -124700,7 +124700,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_521_mul_d_17_word_%=: \n\t" + "L_sp_521_mul_d_17_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -124770,12 +124770,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -125348,12 +125348,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_521_word_17(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -125420,12 +125420,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_521_word_17(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -125443,7 +125443,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_521_word_17_bit_%=: \n\t" + "L_div_521_word_17_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -126182,19 +126182,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x40\n\t" "\n" - "L_sp_521_sub_17_word_%=: \n\t" + "L_sp_521_sub_17_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a]!, {r3, r4, r5, r6}\n\t" "ldm %[b]!, {r7, r8, r9, r10}\n\t" @@ -126238,12 +126238,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -126305,12 +126305,12 @@ WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -126353,7 +126353,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, "adc r3, r12, r12\n\t" "b L_sp_521_div2_mod_17_div2_%=\n\t" "\n" - "L_sp_521_div2_mod_17_even_%=: \n\t" + "L_sp_521_div2_mod_17_even_%=:\n\t" "ldm %[a]!, {r5, r6, r7}\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" @@ -126365,7 +126365,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, "ldm %[a]!, {r4}\n\t" "stm %[r]!, {r4}\n\t" "\n" - "L_sp_521_div2_mod_17_div2_%=: \n\t" + "L_sp_521_div2_mod_17_div2_%=:\n\t" "sub %[r], %[r], #0x44\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "ldm r0, {r8, r9}\n\t" @@ -126451,7 +126451,7 @@ WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, } #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) -static const byte L_sp_521_num_bits_17_table[] = { +XALIGNED(4) static const word8 L_sp_521_num_bits_17_table[] = { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, @@ -126490,15 +126490,15 @@ static const byte L_sp_521_num_bits_17_table[] = { WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register byte* L_sp_521_num_bits_17_table_c asm ("r1") = - (byte*)&L_sp_521_num_bits_17_table; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register word8* L_sp_521_num_bits_17_table_c __asm__ ("r1") = + (word8*)&L_sp_521_num_bits_17_table; #else - register byte* L_sp_521_num_bits_17_table_c = - (byte*)&L_sp_521_num_bits_17_table; + register word8* L_sp_521_num_bits_17_table_c = + (word8*)&L_sp_521_num_bits_17_table; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -126519,7 +126519,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_16_3_%=: \n\t" + "L_sp_521_num_bits_17_16_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126534,7 +126534,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_16_2_%=: \n\t" + "L_sp_521_num_bits_17_16_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126549,14 +126549,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_16_1_%=: \n\t" + "L_sp_521_num_bits_17_16_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x200\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_16_%=: \n\t" + "L_sp_521_num_bits_17_16_%=:\n\t" "ldr r1, [%[a], #60]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_15_%=\n\t" @@ -126573,7 +126573,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_15_3_%=: \n\t" + "L_sp_521_num_bits_17_15_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126588,7 +126588,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_15_2_%=: \n\t" + "L_sp_521_num_bits_17_15_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126603,7 +126603,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_15_1_%=: \n\t" + "L_sp_521_num_bits_17_15_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0xe0\n\t" @@ -126615,7 +126615,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_15_%=: \n\t" + "L_sp_521_num_bits_17_15_%=:\n\t" "ldr r1, [%[a], #56]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_14_%=\n\t" @@ -126632,7 +126632,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_14_3_%=: \n\t" + "L_sp_521_num_bits_17_14_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126647,7 +126647,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_14_2_%=: \n\t" + "L_sp_521_num_bits_17_14_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126662,7 +126662,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_14_1_%=: \n\t" + "L_sp_521_num_bits_17_14_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0xc0\n\t" @@ -126674,7 +126674,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_14_%=: \n\t" + "L_sp_521_num_bits_17_14_%=:\n\t" "ldr r1, [%[a], #52]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_13_%=\n\t" @@ -126691,7 +126691,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_13_3_%=: \n\t" + "L_sp_521_num_bits_17_13_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126706,7 +126706,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_13_2_%=: \n\t" + "L_sp_521_num_bits_17_13_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126721,7 +126721,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_13_1_%=: \n\t" + "L_sp_521_num_bits_17_13_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0xa0\n\t" @@ -126733,7 +126733,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_13_%=: \n\t" + "L_sp_521_num_bits_17_13_%=:\n\t" "ldr r1, [%[a], #48]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_12_%=\n\t" @@ -126750,7 +126750,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_12_3_%=: \n\t" + "L_sp_521_num_bits_17_12_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126765,7 +126765,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_12_2_%=: \n\t" + "L_sp_521_num_bits_17_12_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126780,7 +126780,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_12_1_%=: \n\t" + "L_sp_521_num_bits_17_12_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x80\n\t" @@ -126792,7 +126792,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_12_%=: \n\t" + "L_sp_521_num_bits_17_12_%=:\n\t" "ldr r1, [%[a], #44]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_11_%=\n\t" @@ -126809,7 +126809,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_11_3_%=: \n\t" + "L_sp_521_num_bits_17_11_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126824,7 +126824,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_11_2_%=: \n\t" + "L_sp_521_num_bits_17_11_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126839,7 +126839,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_11_1_%=: \n\t" + "L_sp_521_num_bits_17_11_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x60\n\t" @@ -126851,7 +126851,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_11_%=: \n\t" + "L_sp_521_num_bits_17_11_%=:\n\t" "ldr r1, [%[a], #40]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_10_%=\n\t" @@ -126868,7 +126868,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_10_3_%=: \n\t" + "L_sp_521_num_bits_17_10_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126883,7 +126883,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_10_2_%=: \n\t" + "L_sp_521_num_bits_17_10_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126898,7 +126898,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_10_1_%=: \n\t" + "L_sp_521_num_bits_17_10_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x40\n\t" @@ -126910,7 +126910,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_10_%=: \n\t" + "L_sp_521_num_bits_17_10_%=:\n\t" "ldr r1, [%[a], #36]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_9_%=\n\t" @@ -126927,7 +126927,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_9_3_%=: \n\t" + "L_sp_521_num_bits_17_9_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126942,7 +126942,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_9_2_%=: \n\t" + "L_sp_521_num_bits_17_9_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -126957,7 +126957,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_9_1_%=: \n\t" + "L_sp_521_num_bits_17_9_1_%=:\n\t" "and r3, r1, #0xff\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r2, #0x20\n\t" @@ -126969,7 +126969,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_9_%=: \n\t" + "L_sp_521_num_bits_17_9_%=:\n\t" "ldr r1, [%[a], #32]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_8_%=\n\t" @@ -126986,7 +126986,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_8_3_%=: \n\t" + "L_sp_521_num_bits_17_8_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127001,7 +127001,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_8_2_%=: \n\t" + "L_sp_521_num_bits_17_8_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127016,14 +127016,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_8_1_%=: \n\t" + "L_sp_521_num_bits_17_8_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x100\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_8_%=: \n\t" + "L_sp_521_num_bits_17_8_%=:\n\t" "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_7_%=\n\t" @@ -127035,7 +127035,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_7_3_%=: \n\t" + "L_sp_521_num_bits_17_7_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127045,7 +127045,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_7_2_%=: \n\t" + "L_sp_521_num_bits_17_7_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127055,14 +127055,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_7_1_%=: \n\t" + "L_sp_521_num_bits_17_7_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xe0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_7_%=: \n\t" + "L_sp_521_num_bits_17_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_6_%=\n\t" @@ -127074,7 +127074,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_6_3_%=: \n\t" + "L_sp_521_num_bits_17_6_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127084,7 +127084,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_6_2_%=: \n\t" + "L_sp_521_num_bits_17_6_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127094,14 +127094,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_6_1_%=: \n\t" + "L_sp_521_num_bits_17_6_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xc0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_6_%=: \n\t" + "L_sp_521_num_bits_17_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_5_%=\n\t" @@ -127113,7 +127113,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_5_3_%=: \n\t" + "L_sp_521_num_bits_17_5_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127123,7 +127123,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_5_2_%=: \n\t" + "L_sp_521_num_bits_17_5_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127133,14 +127133,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_5_1_%=: \n\t" + "L_sp_521_num_bits_17_5_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0xa0\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_5_%=: \n\t" + "L_sp_521_num_bits_17_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_4_%=\n\t" @@ -127152,7 +127152,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_4_3_%=: \n\t" + "L_sp_521_num_bits_17_4_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127162,7 +127162,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_4_2_%=: \n\t" + "L_sp_521_num_bits_17_4_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127172,14 +127172,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_4_1_%=: \n\t" + "L_sp_521_num_bits_17_4_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x80\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_4_%=: \n\t" + "L_sp_521_num_bits_17_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_3_%=\n\t" @@ -127191,7 +127191,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_3_3_%=: \n\t" + "L_sp_521_num_bits_17_3_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127201,7 +127201,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_3_2_%=: \n\t" + "L_sp_521_num_bits_17_3_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127211,14 +127211,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_3_1_%=: \n\t" + "L_sp_521_num_bits_17_3_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x60\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_3_%=: \n\t" + "L_sp_521_num_bits_17_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_2_%=\n\t" @@ -127230,7 +127230,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_2_3_%=: \n\t" + "L_sp_521_num_bits_17_2_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127240,7 +127240,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_2_2_%=: \n\t" + "L_sp_521_num_bits_17_2_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127250,14 +127250,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_2_1_%=: \n\t" + "L_sp_521_num_bits_17_2_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #0x40\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_2_%=: \n\t" + "L_sp_521_num_bits_17_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_1_%=\n\t" @@ -127269,7 +127269,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_1_3_%=: \n\t" + "L_sp_521_num_bits_17_1_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127279,7 +127279,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_1_2_%=: \n\t" + "L_sp_521_num_bits_17_1_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127289,14 +127289,14 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_1_1_%=: \n\t" + "L_sp_521_num_bits_17_1_1_%=:\n\t" "and r3, r1, #0xff\n\t" "mov r2, #32\n\t" "ldrb r12, [lr, r3]\n\t" "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_1_%=: \n\t" + "L_sp_521_num_bits_17_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "lsr r3, r1, #24\n\t" "cmp r3, #0\n\t" @@ -127306,7 +127306,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_0_3_%=: \n\t" + "L_sp_521_num_bits_17_0_3_%=:\n\t" "lsr r3, r1, #16\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127316,7 +127316,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_0_2_%=: \n\t" + "L_sp_521_num_bits_17_0_2_%=:\n\t" "lsr r3, r1, #8\n\t" "and r3, r3, #0xff\n\t" "cmp r3, #0\n\t" @@ -127326,11 +127326,11 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "add r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_0_1_%=: \n\t" + "L_sp_521_num_bits_17_0_1_%=:\n\t" "and r3, r1, #0xff\n\t" "ldrb r12, [lr, r3]\n\t" "\n" - "L_sp_521_num_bits_17_18_%=: \n\t" + "L_sp_521_num_bits_17_18_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), @@ -127351,10 +127351,10 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -127371,7 +127371,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_16_%=: \n\t" + "L_sp_521_num_bits_17_16_%=:\n\t" "ldr r1, [%[a], #60]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_15_%=\n\t" @@ -127380,7 +127380,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_15_%=: \n\t" + "L_sp_521_num_bits_17_15_%=:\n\t" "ldr r1, [%[a], #56]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_14_%=\n\t" @@ -127394,7 +127394,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_14_%=: \n\t" + "L_sp_521_num_bits_17_14_%=:\n\t" "ldr r1, [%[a], #52]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_13_%=\n\t" @@ -127408,7 +127408,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_13_%=: \n\t" + "L_sp_521_num_bits_17_13_%=:\n\t" "ldr r1, [%[a], #48]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_12_%=\n\t" @@ -127422,7 +127422,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_12_%=: \n\t" + "L_sp_521_num_bits_17_12_%=:\n\t" "ldr r1, [%[a], #44]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_11_%=\n\t" @@ -127436,7 +127436,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_11_%=: \n\t" + "L_sp_521_num_bits_17_11_%=:\n\t" "ldr r1, [%[a], #40]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_10_%=\n\t" @@ -127450,7 +127450,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_10_%=: \n\t" + "L_sp_521_num_bits_17_10_%=:\n\t" "ldr r1, [%[a], #36]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_9_%=\n\t" @@ -127464,7 +127464,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_9_%=: \n\t" + "L_sp_521_num_bits_17_9_%=:\n\t" "ldr r1, [%[a], #32]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_8_%=\n\t" @@ -127478,7 +127478,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_8_%=: \n\t" + "L_sp_521_num_bits_17_8_%=:\n\t" "ldr r1, [%[a], #28]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_7_%=\n\t" @@ -127487,7 +127487,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_7_%=: \n\t" + "L_sp_521_num_bits_17_7_%=:\n\t" "ldr r1, [%[a], #24]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_6_%=\n\t" @@ -127496,7 +127496,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_6_%=: \n\t" + "L_sp_521_num_bits_17_6_%=:\n\t" "ldr r1, [%[a], #20]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_5_%=\n\t" @@ -127505,7 +127505,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_5_%=: \n\t" + "L_sp_521_num_bits_17_5_%=:\n\t" "ldr r1, [%[a], #16]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_4_%=\n\t" @@ -127514,7 +127514,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_4_%=: \n\t" + "L_sp_521_num_bits_17_4_%=:\n\t" "ldr r1, [%[a], #12]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_3_%=\n\t" @@ -127523,7 +127523,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_3_%=: \n\t" + "L_sp_521_num_bits_17_3_%=:\n\t" "ldr r1, [%[a], #8]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_2_%=\n\t" @@ -127532,7 +127532,7 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_2_%=: \n\t" + "L_sp_521_num_bits_17_2_%=:\n\t" "ldr r1, [%[a], #4]\n\t" "cmp r1, #0\n\t" "beq L_sp_521_num_bits_17_1_%=\n\t" @@ -127541,13 +127541,13 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "sub r12, r2, r12\n\t" "b L_sp_521_num_bits_17_18_%=\n\t" "\n" - "L_sp_521_num_bits_17_1_%=: \n\t" + "L_sp_521_num_bits_17_1_%=:\n\t" "ldr r1, [%[a]]\n\t" "mov r2, #32\n\t" "clz r12, r1\n\t" "sub r12, r2, r12\n\t" "\n" - "L_sp_521_num_bits_17_18_%=: \n\t" + "L_sp_521_num_bits_17_18_%=:\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) @@ -128430,12 +128430,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_16(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -138200,11 +138200,11 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_16(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143447,12 +143447,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_16(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143509,11 +143509,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143598,12 +143598,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143758,12 +143758,12 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_16(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143857,12 +143857,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -143875,13 +143875,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_1024_mul_32_outer_%=: \n\t" + "L_sp_1024_mul_32_outer_%=:\n\t" "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_1024_mul_32_inner_%=: \n\t" + "L_sp_1024_mul_32_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[b], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -143999,7 +143999,7 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_1024_mul_32_inner_done_%=: \n\t" + "L_sp_1024_mul_32_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -144041,7 +144041,7 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_1024_mul_32_store_%=: \n\t" + "L_sp_1024_mul_32_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -144068,11 +144068,11 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r_p, const sp_digit* a_p) #else WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -144084,13 +144084,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "mov r8, #0\n\t" "mov r5, #4\n\t" "\n" - "L_sp_1024_sqr_32_outer_%=: \n\t" + "L_sp_1024_sqr_32_outer_%=:\n\t" "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" "\n" - "L_sp_1024_sqr_32_inner_%=: \n\t" + "L_sp_1024_sqr_32_inner_%=:\n\t" "ldr lr, [%[a], r3]\n\t" "ldr r11, [%[a], r4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -144175,7 +144175,7 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "adc r8, r8, #0\n\t" #endif "\n" - "L_sp_1024_sqr_32_inner_done_%=: \n\t" + "L_sp_1024_sqr_32_inner_done_%=:\n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" @@ -144211,7 +144211,7 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "add r5, r5, #4\n\t" "str r7, [sp, r5]\n\t" "\n" - "L_sp_1024_sqr_32_store_%=: \n\t" + "L_sp_1024_sqr_32_store_%=:\n\t" "ldm sp!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "stm %[r]!, {r3, r4, r6, r7, r8, r9, r10, r11}\n\t" "subs r5, r5, #32\n\t" @@ -144326,18 +144326,18 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r12, #0\n\t" "add lr, %[a], #0x80\n\t" "\n" - "L_sp_1024_sub_in_place_32_word_%=: \n\t" + "L_sp_1024_sub_in_place_32_word_%=:\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -144379,13 +144379,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -144393,7 +144393,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r, "mov r12, #0\n\t" "mov lr, #0\n\t" "\n" - "L_sp_1024_cond_sub_32_words_%=: \n\t" + "L_sp_1024_cond_sub_32_words_%=:\n\t" "subs r12, r6, r12\n\t" "ldr r4, [%[a], lr]\n\t" "ldr r5, [%[b], lr]\n\t" @@ -144432,13 +144432,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -144582,19 +144582,19 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "mov r3, #0\n\t" "add r12, %[a], #0x80\n\t" "\n" - "L_sp_1024_add_32_word_%=: \n\t" + "L_sp_1024_add_32_word_%=:\n\t" "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" @@ -144635,12 +144635,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -144676,7 +144676,7 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, "mov r5, #0\n\t" "mov r9, #4\n\t" "\n" - "L_sp_1024_mul_d_32_word_%=: \n\t" + "L_sp_1024_mul_d_32_word_%=:\n\t" /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 4) @@ -144746,12 +144746,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register sp_digit b asm ("r2") = (sp_digit)b_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register sp_digit b __asm__ ("r2") = (sp_digit)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -145804,12 +145804,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_1024_word_32(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -145876,12 +145876,12 @@ WC_OMIT_FRAME_POINTER static sp_digit div_1024_word_32(sp_digit d1_p, #else WC_OMIT_FRAME_POINTER static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit d1 asm ("r0") = (sp_digit)d1_p; - register sp_digit d0 asm ("r1") = (sp_digit)d0_p; - register sp_digit div asm ("r2") = (sp_digit)div_p; + register sp_digit d1 __asm__ ("r0") = (sp_digit)d1_p; + register sp_digit d0 __asm__ ("r1") = (sp_digit)d0_p; + register sp_digit div __asm__ ("r2") = (sp_digit)div_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -145899,7 +145899,7 @@ WC_OMIT_FRAME_POINTER static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, /* Next 30 bits */ "mov r12, #29\n\t" "\n" - "L_div_1024_word_32_bit_%=: \n\t" + "L_div_1024_word_32_bit_%=:\n\t" "lsls r4, r4, #1\n\t" "adc r5, r5, r5\n\t" "subs r6, lr, r5\n\t" @@ -146055,11 +146055,11 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_1024_cmp_32(const sp_digit* a_p, #else WC_OMIT_FRAME_POINTER static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register const sp_digit* a asm ("r0") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r1") = (const sp_digit*)b_p; + register const sp_digit* a __asm__ ("r0") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r1") = (const sp_digit*)b_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -146070,7 +146070,7 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_1024_cmp_32(const sp_digit* a, #ifdef WOLFSSL_SP_SMALL "mov r4, #0x7c\n\t" "\n" - "L_sp_1024_cmp_32_words_%=: \n\t" + "L_sp_1024_cmp_32_words_%=:\n\t" "ldr r12, [%[a], r4]\n\t" "ldr lr, [%[b], r4]\n\t" "and r12, r12, r3\n\t" @@ -146795,12 +146795,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -146813,7 +146813,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_1024_mont_reduce_32_word_%=: \n\t" + "L_sp_1024_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -147794,12 +147794,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -147810,7 +147810,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( "ldr r12, [%[a]]\n\t" "ldr lr, [%[a], #4]\n\t" "\n" - "L_sp_1024_mont_reduce_32_word_%=: \n\t" + "L_sp_1024_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r8, %[mp], r12\n\t" /* a[i+0] += m[0] * mu */ @@ -148108,12 +148108,12 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( #else WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* a asm ("r0") = (sp_digit*)a_p; - register const sp_digit* m asm ("r1") = (const sp_digit*)m_p; - register sp_digit mp asm ("r2") = (sp_digit)mp_p; + register sp_digit* a __asm__ ("r0") = (sp_digit*)a_p; + register const sp_digit* m __asm__ ("r1") = (const sp_digit*)m_p; + register sp_digit mp __asm__ ("r2") = (sp_digit)mp_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -148126,7 +148126,7 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( "ldr r7, [%[a], #12]\n\t" "ldr r8, [%[a], #16]\n\t" "\n" - "L_sp_1024_mont_reduce_32_word_%=: \n\t" + "L_sp_1024_mont_reduce_32_word_%=:\n\t" /* mu = a[i] * mp */ "mul r11, %[mp], r4\n\t" /* a[i+0] += m[0] * mu */ @@ -148470,13 +148470,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mont_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -148656,12 +148656,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mont_dbl_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -148825,12 +148825,12 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mont_tpl_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* m asm ("r2") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* m __asm__ ("r2") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -149150,13 +149150,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mont_sub_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register const sp_digit* m asm ("r3") = (const sp_digit*)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register const sp_digit* m __asm__ ("r3") = (const sp_digit*)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -149333,13 +149333,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -149347,7 +149347,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r, "mov r6, #0\n\t" "mov r12, #0\n\t" "\n" - "L_sp_1024_cond_add_32_words_%=: \n\t" + "L_sp_1024_cond_add_32_words_%=:\n\t" "adds lr, lr, #-1\n\t" "ldr r4, [%[a], r12]\n\t" "ldr r5, [%[b], r12]\n\t" @@ -149386,13 +149386,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; - register const sp_digit* b asm ("r2") = (const sp_digit*)b_p; - register sp_digit m asm ("r3") = (sp_digit)m_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; + register const sp_digit* b __asm__ ("r2") = (const sp_digit*)b_p; + register sp_digit m __asm__ ("r3") = (sp_digit)m_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( @@ -149529,11 +149529,11 @@ WC_OMIT_FRAME_POINTER static void sp_1024_rshift1_32(sp_digit* r_p, #else WC_OMIT_FRAME_POINTER static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) -#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - register sp_digit* r asm ("r0") = (sp_digit*)r_p; - register const sp_digit* a asm ("r1") = (const sp_digit*)a_p; + register sp_digit* r __asm__ ("r0") = (sp_digit*)r_p; + register const sp_digit* a __asm__ ("r1") = (const sp_digit*)a_p; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 88604d693c8..b479c93b9bd 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -302,9 +302,10 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[8 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -534,8 +535,10 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -547,9 +550,10 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r8, %[r]\n\t" @@ -9490,7 +9494,7 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -9501,9 +9505,10 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -9581,7 +9586,7 @@ SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -9592,9 +9597,10 @@ SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_word_8(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_word_8( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -9669,7 +9675,7 @@ SP_NOINLINE static sp_digit sp_2048_add_word_8(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -9679,9 +9685,10 @@ SP_NOINLINE static sp_digit sp_2048_add_word_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_in_place_16( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -9834,7 +9841,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -9845,9 +9852,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -9993,7 +10001,7 @@ SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -10067,9 +10075,10 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_word_16(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_word_16( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -10208,7 +10217,7 @@ SP_NOINLINE static sp_digit sp_2048_add_word_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -10218,9 +10227,10 @@ SP_NOINLINE static sp_digit sp_2048_add_word_16(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_in_place_32( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -10517,7 +10527,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -10528,9 +10538,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -10812,7 +10823,7 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -10890,9 +10901,10 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_word_32(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_word_32( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -11159,7 +11171,7 @@ SP_NOINLINE static sp_digit sp_2048_add_word_32(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -11169,9 +11181,10 @@ SP_NOINLINE static sp_digit sp_2048_add_word_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_in_place_64( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -11756,7 +11769,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -11767,9 +11780,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_64(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -12323,7 +12337,7 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -12401,8 +12415,10 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -12815,7 +12831,8 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -12825,8 +12842,10 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r8, %[r]\n\t" @@ -19330,7 +19349,8 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r5}\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -19341,9 +19361,10 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -19420,7 +19441,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -19466,9 +19487,10 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -19613,7 +19635,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -19659,9 +19681,10 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -19942,7 +19965,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -19990,9 +20013,10 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_64(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -20058,7 +20082,7 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -20070,9 +20094,10 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_in_place_64( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -20137,7 +20162,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -20150,9 +20175,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_64(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[64 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -20392,8 +20418,10 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -20403,8 +20431,10 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_64(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -20842,7 +20872,8 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -20872,9 +20903,10 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_add_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -20934,7 +20966,7 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -20946,9 +20978,10 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_in_place_32( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -21007,7 +21040,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -21020,9 +21053,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[32 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -21252,8 +21286,10 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -21263,8 +21299,10 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_32(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -21687,7 +21725,8 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -21720,9 +21759,10 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_d_64(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0xff\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -21901,7 +21941,7 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -21928,9 +21968,10 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_cond_sub_32( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x80\n\t" @@ -21978,7 +22019,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -21990,9 +22031,10 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -23046,7 +23088,8 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -23086,9 +23129,10 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_d_32(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0x80\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -23262,7 +23306,7 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -23275,9 +23319,10 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_32(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -23877,7 +23922,7 @@ SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -23889,8 +23934,10 @@ SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_2048_cmp_32( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -23986,7 +24033,7 @@ SP_NOINLINE static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -24367,9 +24414,10 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_cond_sub_64( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0xff\n\t" @@ -24422,7 +24470,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -24434,9 +24482,10 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -26044,7 +26093,8 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -26085,9 +26135,10 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_64(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -26147,7 +26198,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -26159,9 +26210,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_sub_64(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -26714,7 +26766,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -26729,9 +26781,10 @@ SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_64(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -27331,7 +27384,7 @@ SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -27447,8 +27500,10 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_2048_cmp_64( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -27544,7 +27599,7 @@ SP_NOINLINE static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -28033,9 +28088,10 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_2048_cond_add_32( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x80\n\t" @@ -28089,7 +28145,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -28373,8 +28429,10 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_2048_lshift_64(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #31\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -29954,7 +30012,7 @@ static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) "str r5, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -30369,9 +30427,10 @@ static void sp_3072_to_bin_96(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[12 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -30601,8 +30660,10 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -30614,9 +30675,10 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r8, %[r]\n\t" @@ -50783,7 +50845,7 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -50794,9 +50856,10 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -50908,7 +50971,7 @@ SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -50919,9 +50982,10 @@ SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_word_12(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_word_12( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -51028,7 +51092,7 @@ SP_NOINLINE static sp_digit sp_3072_add_word_12(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -51038,9 +51102,10 @@ SP_NOINLINE static sp_digit sp_3072_add_word_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_in_place_24( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -51265,7 +51330,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -51276,9 +51341,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_24(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -51492,7 +51558,7 @@ SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -51570,9 +51636,10 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_word_24(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_word_24( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -51775,7 +51842,7 @@ SP_NOINLINE static sp_digit sp_3072_add_word_24(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -51785,9 +51852,10 @@ SP_NOINLINE static sp_digit sp_3072_add_word_24(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_in_place_48( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -52228,7 +52296,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -52239,9 +52307,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_48(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -52659,7 +52728,7 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -52737,9 +52806,10 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_word_48(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_word_48( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -53134,7 +53204,7 @@ SP_NOINLINE static sp_digit sp_3072_add_word_48(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -53144,9 +53214,10 @@ SP_NOINLINE static sp_digit sp_3072_add_word_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_in_place_96( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -54019,7 +54090,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -54030,9 +54101,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_96(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -54858,7 +54930,7 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -54936,8 +55008,10 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_sqr_12(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -55350,7 +55424,8 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -55360,8 +55435,10 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_sqr_12(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r8, %[r]\n\t" @@ -70121,7 +70198,8 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r5}\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -70132,9 +70210,10 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -70245,7 +70324,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -70291,9 +70370,10 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_24(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -70506,7 +70586,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -70552,9 +70632,10 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_48(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -70971,7 +71052,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -71019,9 +71100,10 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_96(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -71087,7 +71169,7 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -71099,9 +71181,10 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_in_place_96( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -71166,7 +71249,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -71179,9 +71262,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_96(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[96 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -71431,8 +71515,10 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -71442,8 +71528,10 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_sqr_96(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -71896,7 +71984,8 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -71926,9 +72015,10 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_add_48(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -71988,7 +72078,7 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -72000,9 +72090,10 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_in_place_48( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -72061,7 +72152,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -72074,9 +72165,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_48(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[48 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -72311,8 +72403,10 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -72322,8 +72416,10 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_sqr_48(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -72756,7 +72852,8 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -72789,9 +72886,10 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_d_96(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0xff\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -72970,7 +73068,7 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -72997,9 +73095,10 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_cond_sub_48( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0xc0\n\t" @@ -73047,7 +73146,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -73059,9 +73158,10 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -74387,7 +74487,8 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -74427,9 +74528,10 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mul_d_48(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0xc0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -74603,7 +74705,7 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -74616,9 +74718,10 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_48(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -75218,7 +75321,7 @@ SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -75230,8 +75333,10 @@ SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_3072_cmp_48( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -75327,7 +75432,7 @@ SP_NOINLINE static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -75708,9 +75813,10 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_cond_sub_96( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0xff\n\t" @@ -75763,7 +75869,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -75775,9 +75881,10 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -77939,7 +78046,8 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -77980,9 +78088,10 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_96(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -78042,7 +78151,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -78054,9 +78163,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_sub_96(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -78881,7 +78991,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -78896,9 +79006,10 @@ SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_96(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -79498,7 +79609,7 @@ SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -79614,8 +79725,10 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_3072_cmp_96( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -79716,7 +79829,7 @@ SP_NOINLINE static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -80205,9 +80318,10 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_3072_cond_add_48( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0xc0\n\t" @@ -80261,7 +80375,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -80545,8 +80659,10 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_3072_lshift_96(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #31\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -82924,7 +83040,7 @@ static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) "str r3, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -83338,9 +83454,10 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_add_word_64(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_add_word_64( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -83863,7 +83980,7 @@ SP_NOINLINE static sp_digit sp_4096_add_word_64(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -83873,9 +83990,10 @@ SP_NOINLINE static sp_digit sp_4096_add_word_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_sub_in_place_128( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -85036,7 +85154,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -85047,9 +85165,10 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_add_128(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -86147,7 +86266,7 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -86232,9 +86351,10 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_add_128(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -86300,7 +86420,7 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -86312,9 +86432,10 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_sub_in_place_128( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -86379,7 +86500,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -86392,9 +86513,10 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mul_128(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[128 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -86644,8 +86766,10 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -86655,8 +86779,10 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_sqr_128(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -87109,7 +87235,8 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -87140,9 +87267,10 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mul_d_128(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #2\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -87321,7 +87449,7 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -87349,9 +87477,10 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_cond_sub_128( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #2\n\t" @@ -87404,7 +87533,7 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -87416,9 +87545,10 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -90124,7 +90254,8 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -90165,9 +90296,10 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_sub_128(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -90227,7 +90359,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -90239,9 +90371,10 @@ SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_sub_128(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -91338,7 +91471,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -91353,9 +91486,10 @@ SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_4096_word_128(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -91955,7 +92089,7 @@ SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -92071,9 +92205,10 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_4096_cmp_128(const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_4096_cmp_128( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -92174,7 +92309,7 @@ SP_NOINLINE static sp_int32 sp_4096_cmp_128(const sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -92663,9 +92798,10 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_4096_cond_add_64( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0xff\n\t" @@ -92724,7 +92860,7 @@ SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -93008,8 +93144,10 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_4096_lshift_128(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #31\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -96175,7 +96313,7 @@ static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) "str r4, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -96455,9 +96593,10 @@ static const sp_digit p256_b[8] = { * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[8 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -96687,8 +96826,10 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -96698,8 +96839,10 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_sqr_8(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -97112,7 +97255,8 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -97123,9 +97267,10 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_add_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -97185,7 +97330,7 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -97197,9 +97342,10 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_add_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -97277,7 +97423,7 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -97567,9 +97713,10 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, + const sp_digit* m, sp_digit mp) { + (void)mp; (void)m; @@ -97877,8 +98024,10 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "str r7, [%[a], #28]\n\t" : [a] "+l" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); + (void)m; + (void)mp; } /* Reduce the number back to 256 bits using Montgomery reduction. @@ -97887,9 +98036,10 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, - const sp_digit* m, sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -98535,7 +98685,8 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -98672,8 +98823,10 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_256_cmp_8( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -98769,7 +98922,7 @@ SP_NOINLINE static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -98788,9 +98941,10 @@ SP_NOINLINE static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_cond_sub_8(sp_digit* r, + const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #32\n\t" @@ -98838,7 +98992,7 @@ SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -98890,9 +99044,10 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_add_8(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + (void)m; __asm__ __volatile__ ( "movs r3, #0\n\t" @@ -99065,8 +99220,10 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], #28]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11" ); + (void)m; } /* Double a Montgomery form number (r = a + a % m). @@ -99075,9 +99232,10 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, * a Number to double in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_dbl_8(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + (void)m; __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" @@ -99242,8 +99400,10 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], #28]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11" ); + (void)m; } /* Triple a Montgomery form number (r = a + a + a % m). @@ -99252,9 +99412,10 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, * a Number to triple in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_tpl_8(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + (void)m; __asm__ __volatile__ ( "ldr r6, [%[a]]\n\t" @@ -99575,8 +99736,10 @@ SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, "str r2, [%[r], #28]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); + (void)m; } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -99586,9 +99749,10 @@ SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sub_8(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + (void)m; __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" @@ -99754,8 +99918,10 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], #28]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11" ); + (void)m; } /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -99764,9 +99930,10 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_div2_8(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + (void)m; __asm__ __volatile__ ( "ldr r6, [%[a]]\n\t" @@ -100027,8 +100194,9 @@ SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6" ); + (void)m; } /* Double the Montgomery form projective point p. @@ -103334,8 +103502,9 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_add_one_8(sp_digit* a) { + __asm__ __volatile__ ( "movs r2, #1\n\t" "ldr r1, [%[a]]\n\t" @@ -103411,7 +103580,7 @@ SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) "str r1, [%[a], #28]\n\t" : [a] "+l" (a) : - : "memory", "r1", "r2", "cc" + : "memory", "cc", "r1", "r2" ); } @@ -103764,9 +103933,10 @@ int sp_ecc_secret_gen_256_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_sub_in_place_8( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -103825,7 +103995,7 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -103836,9 +104006,10 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_sub_in_place_8( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -103919,7 +104090,7 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -103931,9 +104102,10 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_d_8(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #32\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -104107,7 +104279,7 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -104120,9 +104292,10 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_256_word_8(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -104722,7 +104895,7 @@ SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -105347,9 +105520,10 @@ int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_sub_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -105403,7 +105577,7 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -105415,9 +105589,10 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_256_sub_8(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -105494,7 +105669,7 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -105505,8 +105680,10 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static void sp_256_rshift1_8(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" @@ -105650,7 +105827,7 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) "str r3, [%[r], #28]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -105660,8 +105837,10 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus. */ -static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r7, [%[a]]\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -105921,12 +106100,13 @@ static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "str r6, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } -static int sp_256_num_bits_8(sp_digit* a) +WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(sp_digit* a) { + static const byte sp_num_bits_table[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -106676,7 +106856,7 @@ static int sp_256_num_bits_8(sp_digit* a) "movs %[a], r2\n\t" : [a] "+l" (a), [table] "+l" (table) : - : "memory", "r2", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6" ); return (word32)(size_t)a; } @@ -107615,9 +107795,10 @@ static const sp_digit p384_b[12] = { * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mul_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[12 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -107847,8 +108028,10 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -107858,8 +108041,10 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_sqr_12(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -108272,7 +108457,8 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -108283,9 +108469,10 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_add_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -108345,7 +108532,7 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -108357,9 +108544,10 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_add_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -108471,7 +108659,7 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -108769,9 +108957,10 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_cond_sub_12( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #48\n\t" @@ -108819,7 +109008,7 @@ SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -108832,9 +109021,10 @@ SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, + const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -109548,7 +109738,8 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -109701,8 +109892,10 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_384_cmp_12( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -109798,7 +109991,7 @@ SP_NOINLINE static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -109856,9 +110049,10 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_add_12(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit o; o = sp_384_add_12(r, a, b); @@ -109871,9 +110065,10 @@ SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, * a Number to double in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_dbl_12(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + sp_digit o; o = sp_384_add_12(r, a, a); @@ -109886,9 +110081,10 @@ SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, * a Number to triple in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_tpl_12(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + sp_digit o; o = sp_384_add_12(r, a, a); @@ -109904,9 +110100,10 @@ SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_sub_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -109960,7 +110157,7 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -109972,9 +110169,10 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_sub_12(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -110085,7 +110283,7 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -110099,9 +110297,10 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_cond_add_12( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #48\n\t" @@ -110155,7 +110354,7 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -110167,9 +110366,10 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_sub_12(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit o; o = sp_384_sub_12(r, a, b); @@ -110181,8 +110381,10 @@ SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static void sp_384_rshift1_12(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" @@ -110402,7 +110604,7 @@ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) "str r4, [%[r], #44]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -110412,9 +110614,10 @@ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_div2_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_div2_12(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + sp_digit o; o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1)); @@ -113783,8 +113986,9 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -SP_NOINLINE static void sp_384_add_one_12(sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_add_one_12(sp_digit* a) { + __asm__ __volatile__ ( "movs r2, #1\n\t" "ldr r1, [%[a]]\n\t" @@ -113896,7 +114100,7 @@ SP_NOINLINE static void sp_384_add_one_12(sp_digit* a) "str r1, [%[a], #44]\n\t" : [a] "+l" (a) : - : "memory", "r1", "r2", "cc" + : "memory", "cc", "r1", "r2" ); } @@ -114249,9 +114453,10 @@ int sp_ecc_secret_gen_384_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_sub_in_place_12( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -114310,7 +114515,7 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -114321,9 +114526,10 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_384_sub_in_place_12( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -114440,7 +114646,7 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -114452,9 +114658,10 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mul_d_12(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #48\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -114628,7 +114835,7 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -114641,9 +114848,10 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_384_word_12(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -115243,7 +115451,7 @@ SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -115838,9 +116046,10 @@ int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a Number to divide. * m Modulus. */ -static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r3, [%[a]]\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -116234,12 +116443,13 @@ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, "str r7, [%[r], #44]\n\t" : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } -static int sp_384_num_bits_12(sp_digit* a) +WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(sp_digit* a) { + static const byte sp_num_bits_table[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -117433,7 +117643,7 @@ static int sp_384_num_bits_12(sp_digit* a) "movs %[a], r2\n\t" : [a] "+l" (a), [table] "+l" (table) : - : "memory", "r2", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6" ); return (word32)(size_t)a; } @@ -118419,9 +118629,10 @@ static const sp_digit p521_b[17] = { * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_521_mul_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mul_17(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[17 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -118651,8 +118862,10 @@ SP_NOINLINE static void sp_521_mul_17(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -118662,8 +118875,10 @@ SP_NOINLINE static void sp_521_mul_17(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_sqr_17(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -119076,7 +119291,8 @@ SP_NOINLINE static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -119087,9 +119303,10 @@ SP_NOINLINE static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_add_17(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -119149,7 +119366,7 @@ SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -119161,9 +119378,10 @@ SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_add_17(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -119319,7 +119537,7 @@ SP_NOINLINE static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -119546,9 +119764,10 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_cond_sub_17( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x44\n\t" @@ -119596,7 +119815,7 @@ SP_NOINLINE static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -119607,9 +119826,10 @@ SP_NOINLINE static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, + const sp_digit* m, sp_digit mp) { + (void)mp; (void)m; @@ -120244,8 +120464,10 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, "add sp, sp, #0x44\n\t" : [a] "+l" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); + (void)m; + (void)mp; } /* Reduce the number back to 521 bits using Montgomery reduction. @@ -120254,9 +120476,10 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, - const sp_digit* m, sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -121411,7 +121634,8 @@ SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -121561,8 +121785,10 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_521_cmp_17( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -121658,7 +121884,7 @@ SP_NOINLINE static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -121716,9 +121942,10 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_add_17(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" @@ -122039,7 +122266,7 @@ SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7" ); } @@ -122049,9 +122276,10 @@ SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, * a Number to double in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_dbl_17(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -122355,7 +122583,7 @@ SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -122365,9 +122593,10 @@ SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, * a Number to triple in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_tpl_17(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldm %[a]!, {r3, r4, r5, r6}\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -122833,7 +123062,7 @@ SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -122844,9 +123073,10 @@ SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_sub_17(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" @@ -123173,7 +123403,7 @@ SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7" ); } @@ -123185,9 +123415,10 @@ SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_cond_add_17( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x44\n\t" @@ -123241,7 +123472,7 @@ SP_NOINLINE static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -123251,8 +123482,10 @@ SP_NOINLINE static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static void sp_521_rshift1_17(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" @@ -123567,7 +123800,7 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) "str r3, [%[r], #64]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -123577,9 +123810,10 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_521_mont_div2_17(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_div2_17(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + sp_digit o; o = sp_521_cond_add_17(r, a, m, 0 - (a[0] & 1)); @@ -127579,8 +127813,9 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, * * a A single precision integer. */ -SP_NOINLINE static void sp_521_add_one_17(sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_add_one_17(sp_digit* a) { + __asm__ __volatile__ ( "movs r2, #1\n\t" "ldr r1, [%[a]]\n\t" @@ -127737,7 +127972,7 @@ SP_NOINLINE static void sp_521_add_one_17(sp_digit* a) "str r1, [%[a], #64]\n\t" : [a] "+l" (a) : - : "memory", "r1", "r2", "cc" + : "memory", "cc", "r1", "r2" ); } @@ -128089,8 +128324,10 @@ int sp_ecc_secret_gen_521_nb(sp_ecc_ctx_t* sp_ctx, const mp_int* priv, * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #32\n\t" #ifdef WOLFSSL_KEIL @@ -128431,7 +128668,7 @@ static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) "str r4, [%[r], #64]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -128445,8 +128682,10 @@ static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_521_lshift_17(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #31\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -128858,7 +129097,7 @@ static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) "str r4, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -128868,8 +129107,10 @@ static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) * a A single precision integer. * n Integer representing number of bits to shift. */ -static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) +WC_OMIT_FRAME_POINTER static void sp_521_lshift_34(sp_digit* r, + const sp_digit* a, byte n) { + __asm__ __volatile__ ( "movs r7, #31\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -129719,7 +129960,7 @@ static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) "str r5, [%[r], #4]\n\t" : [r] "+l" (r), [a] "+l" (a), [n] "+l" (n) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -129729,9 +129970,10 @@ static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_sub_in_place_17( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -129790,7 +130032,7 @@ SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -129801,9 +130043,10 @@ SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_sub_in_place_17( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -129966,7 +130209,7 @@ SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -129978,9 +130221,10 @@ SP_NOINLINE static sp_digit sp_521_sub_in_place_17(sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mul_d_17(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0x44\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -130154,7 +130398,7 @@ SP_NOINLINE static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -130167,9 +130411,10 @@ SP_NOINLINE static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_521_word_17(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -130769,7 +131014,7 @@ SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -131395,9 +131640,10 @@ int sp_ecc_sign_521_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, W * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_sub_17(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r3, #0\n\t" @@ -131451,7 +131697,7 @@ SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -131463,9 +131709,10 @@ SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_521_sub_17(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -131620,7 +131867,7 @@ SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -131632,9 +131879,10 @@ SP_NOINLINE static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, * a Number to divide. * m Modulus. */ -static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r3, [%[a]]\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -132183,12 +132431,13 @@ static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, "str r5, [%[r], #64]\n\t" : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } -static int sp_521_num_bits_17(sp_digit* a) +WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(sp_digit* a) { + static const byte sp_num_bits_table[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -133952,7 +134201,7 @@ static int sp_521_num_bits_17(sp_digit* a) "movs %[a], r2\n\t" : [a] "+l" (a), [table] "+l" (table) : - : "memory", "r2", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6" ); return (word32)(size_t)a; } @@ -134820,9 +135069,10 @@ typedef struct sp_point_1024 { * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mul_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[16 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -135052,8 +135302,10 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -135065,9 +135317,10 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mul_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" "mov r8, %[r]\n\t" @@ -170940,7 +171193,7 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, "stm %[r]!, {r3, r4, r5, r6}\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } @@ -170951,8 +171204,10 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_sqr_16(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -171365,7 +171620,8 @@ SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -171375,8 +171631,10 @@ SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_sqr_16(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "sub sp, sp, #0x40\n\t" "mov r8, %[r]\n\t" @@ -197720,7 +197978,8 @@ SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r5}\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -197731,9 +197990,10 @@ SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_add_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -197879,7 +198139,7 @@ SP_NOINLINE static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -197890,9 +198150,10 @@ SP_NOINLINE static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_add_word_16(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_add_word_16( + sp_digit* r, const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r5, #0\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -198031,7 +198292,7 @@ SP_NOINLINE static sp_digit sp_1024_add_word_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "cc" + : "memory", "cc", "r3", "r4", "r5" ); return (word32)(size_t)r; } @@ -198041,9 +198302,10 @@ SP_NOINLINE static sp_digit sp_1024_add_word_16(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_sub_in_place_32( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r4, r5}\n\t" "ldr r2, [%[a]]\n\t" @@ -198340,7 +198602,7 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, #endif : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; } @@ -198351,9 +198613,10 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_add_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -198635,7 +198898,7 @@ SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -198713,9 +198976,10 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_sub_16(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "ldm %[b]!, {r5, r6}\n\t" "ldm %[a]!, {r3, r4}\n\t" @@ -198860,7 +199124,7 @@ SP_NOINLINE static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, #endif : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6" ); return (word32)(size_t)r; } @@ -198907,9 +199171,10 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mul_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + sp_digit t[32 * 2]; sp_digit* tmp = t; __asm__ __volatile__ ( @@ -199139,8 +199404,10 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, "mov %[b], r10\n\t" : [a] "+l" (a), [b] "+l" (b), [tmp] "+l" (tmp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); + (void)r; XMEMCPY(r, t, sizeof(t)); } @@ -199150,8 +199417,10 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_sqr_32(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "movs r3, #0\n\t" "movs r4, #0\n\t" @@ -199574,7 +199843,8 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" ); } @@ -199670,9 +199940,10 @@ static const sp_point_1024 p1024_base = { * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_sub_in_place_32( + sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r7, %[a]\n\t" "movs r2, #0\n\t" @@ -199731,7 +200002,7 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -199745,9 +200016,10 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, * b A single precision number to subtract. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_cond_sub_32( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x80\n\t" @@ -199795,7 +200067,7 @@ SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -199807,9 +200079,10 @@ SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_add_32(sp_digit* r, + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r6, %[a]\n\t" "movs r7, #0\n\t" @@ -199869,7 +200142,7 @@ SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, "movs %[r], r3\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)r; } @@ -199881,9 +200154,10 @@ SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision digit. */ -SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mul_d_32(sp_digit* r, + const sp_digit* a, sp_digit b) { + __asm__ __volatile__ ( "movs r6, #0x80\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -200057,7 +200331,7 @@ SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, "str r3, [%[r]]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -200070,9 +200344,10 @@ SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, * * Note that this is an approximate div. It may give an answer 1 larger. */ -SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, - sp_digit div) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_1024_word_32(sp_digit d1, + sp_digit d0, sp_digit div) { + __asm__ __volatile__ ( "movs r3, #0\n\t" #if defined(__clang__) || defined(WOLFSSL_KEIL) @@ -200672,7 +200947,7 @@ SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, "movs %[d1], r3\n\t" : [d1] "+l" (d1), [d0] "+l" (d0), [div] "+l" (div) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)d1; } @@ -200714,8 +200989,10 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -SP_NOINLINE static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_int32 sp_1024_cmp_32( + const sp_digit* a, const sp_digit* b) { + __asm__ __volatile__ ( "movs r2, #0\n\t" "movs r3, #0\n\t" @@ -200811,7 +201088,7 @@ SP_NOINLINE static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) "movs %[a], r2\n\t" : [a] "+l" (a), [b] "+l" (b) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7" ); return (word32)(size_t)a; } @@ -201153,9 +201430,10 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( + sp_digit* a, const sp_digit* m, sp_digit mp) { + __asm__ __volatile__ ( "movs r7, #0\n\t" "mov r8, %[mp]\n\t" @@ -202229,7 +202507,8 @@ SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, #endif /* WOLFSSL_SP_LARGE_CODE */ : [a] "+l" (a), [m] "+l" (m), [mp] "+l" (mp) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "lr" ); } @@ -202377,9 +202656,10 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_add_32(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" @@ -203276,7 +203556,7 @@ SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], #124]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7" ); } @@ -203286,9 +203566,10 @@ SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, * a Number to double in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_dbl_32(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" @@ -204153,7 +204434,7 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], #124]\n\t" : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -204163,9 +204444,10 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, * a Number to triple in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_tpl_32(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" @@ -205923,7 +206205,7 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, "str r7, [%[r], #124]\n\t" : [r] "+l" (r), [a] "+l" (a), [m] "+l" (m) : - : "memory", "r3", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r3", "r4", "r5", "r6", "r7" ); } @@ -205934,9 +206216,10 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_sub_32(sp_digit* r, + const sp_digit* a, const sp_digit* b, const sp_digit* m) { + __asm__ __volatile__ ( "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" @@ -207357,7 +207640,7 @@ SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], #124]\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7" ); } @@ -207369,9 +207652,10 @@ SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b, sp_digit m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit sp_1024_cond_add_32( + sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { + __asm__ __volatile__ ( "movs r4, #0\n\t" "movs r5, #0x80\n\t" @@ -207425,7 +207709,7 @@ SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, "movs %[r], r4\n\t" : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b), [m] "+l" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "cc" + : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; } @@ -207435,8 +207719,10 @@ SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) +WC_OMIT_FRAME_POINTER static void sp_1024_rshift1_32(sp_digit* r, + const sp_digit* a) { + __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" @@ -208036,7 +208322,7 @@ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) "str r3, [%[r], #124]\n\t" : [r] "+l" (r), [a] "+l" (a) : - : "memory", "r2", "r3", "r4", "r5", "cc" + : "memory", "cc", "r2", "r3", "r4", "r5" ); } @@ -208046,9 +208332,10 @@ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_div2_32(sp_digit* r, const sp_digit* a, - const sp_digit* m) +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_div2_32(sp_digit* r, + const sp_digit* a, const sp_digit* m) { + sp_digit o; o = sp_1024_cond_add_32(r, a, m, 0 - (a[0] & 1)); diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 08b3b31a659..9a334f4d4a4 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -303,10 +303,10 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a) * b A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -650,8 +650,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, "SUB %[r], %[r], #0x20\n\t" "STM %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x24\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -665,10 +670,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, * b A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -790,8 +795,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, "LDM sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr" ); @@ -835,8 +845,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_8(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -890,8 +905,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_16(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -948,8 +968,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_16(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -1096,8 +1121,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -1182,8 +1212,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -1390,8 +1425,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -1532,8 +1572,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -1615,10 +1660,10 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, * a A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -1853,8 +1898,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, "SUB %[r], %[r], #0x20\n\t" "STM %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -1867,10 +1917,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, * a A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -1973,8 +2023,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, "LDM sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" "STM lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "ADD sp, sp, #0x20\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -2017,8 +2072,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_8(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -2110,8 +2170,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_16(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -2231,8 +2296,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_32(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -2324,8 +2394,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_64(sp_digit* r, "BNE.N L_sp_2048_add_64_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -2379,8 +2454,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "BNE.N L_sp_2048_sub_in_place_64_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -2510,8 +2590,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_2048_mul_64_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -2629,8 +2714,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_2048_sqr_64_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -2704,8 +2794,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_add_32(sp_digit* r, "BNE.N L_sp_2048_add_32_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -2759,8 +2854,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "BNE.N L_sp_2048_sub_in_place_32_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -2890,8 +2990,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_2048_mul_32_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -3009,8 +3114,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_2048_sqr_32_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -3094,8 +3204,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r, "BLT.N L_sp_2048_mul_d_64_word_%=\n\t" #endif "STR r3, [%[r], #256]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -3442,8 +3557,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_64(sp_digit* r, "UMLAL r3, r4, %[b], r8\n\t" "STM %[r]!, {r3}\n\t" "STR r4, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -3515,8 +3635,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, "BLT.N L_sp_2048_cond_sub_32_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -3661,8 +3786,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -3678,10 +3808,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_32(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -3976,8 +4106,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -3992,10 +4127,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -4097,8 +4232,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -4115,10 +4255,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -4323,8 +4463,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -4339,10 +4484,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -4432,8 +4577,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_32( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -4527,8 +4677,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, "BLT.N L_sp_2048_mul_d_32_word_%=\n\t" #endif "STR r3, [%[r], #128]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -4715,8 +4870,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, "UMLAL r4, r5, %[b], r8\n\t" "STM %[r]!, {r4}\n\t" "STR r5, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -4733,10 +4893,10 @@ WC_OMIT_FRAME_POINTER static void sp_2048_mul_d_32(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_32( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -4782,8 +4942,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -4800,10 +4965,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_32( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -4866,8 +5031,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -5281,8 +5451,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_32(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -5705,8 +5880,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, "BLT.N L_sp_2048_cond_sub_64_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -5963,8 +6143,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -5980,10 +6165,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_sub_64(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -6534,8 +6719,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -6550,10 +6740,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -6655,8 +6845,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -6673,10 +6868,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -7041,8 +7236,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -7057,10 +7257,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -7150,8 +7350,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_2048_mont_reduce_64( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -7238,8 +7443,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r, "BNE.N L_sp_2048_sub_64_word_%=\n\t" #endif "MOV %[r], r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -7381,8 +7591,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -7400,10 +7615,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_64(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_64( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -7449,8 +7664,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -7467,10 +7687,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_64( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -7533,8 +7753,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -8403,8 +8628,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_2048_cmp_64(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -8936,8 +9166,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r, "BLT.N L_sp_2048_cond_add_32_words_%=\n\t" #endif "MOV %[r], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -9082,8 +9317,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_cond_add_32(sp_digit* r, "ADCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "ADC %[r], r10, r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -9763,8 +10003,13 @@ WC_OMIT_FRAME_POINTER static void sp_2048_lshift_64(sp_digit* r, "ORR r6, r6, r3\n\t" "STR r5, [%[r]]\n\t" "STR r6, [%[r], #4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -11196,8 +11441,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "STM %[r]!, {r3, r4, r5, r6}\n\t" "LDM sp!, {r3, r4, r5, r6}\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12" ); @@ -11247,8 +11497,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_12(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -11316,8 +11571,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_24(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -11388,8 +11648,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_24(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -11568,8 +11833,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -11682,8 +11952,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -11946,8 +12221,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -12144,8 +12424,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -12915,8 +13200,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "STM %[r]!, {r2, r3, r4, r8}\n\t" "LDM sp!, {r2, r3, r4, r8}\n\t" "STM %[r]!, {r2, r3, r4, r8}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); @@ -12965,8 +13255,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_12(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -13072,8 +13367,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_24(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -13221,8 +13521,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_48(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -13314,8 +13619,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_96(sp_digit* r, "BNE.N L_sp_3072_add_96_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -13369,8 +13679,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "BNE.N L_sp_3072_sub_in_place_96_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -13500,8 +13815,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_3072_mul_96_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -13619,8 +13939,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_3072_sqr_96_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -13694,8 +14019,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_add_48(sp_digit* r, "BNE.N L_sp_3072_add_48_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -13749,8 +14079,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "BNE.N L_sp_3072_sub_in_place_48_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -13880,8 +14215,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_3072_mul_48_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -13999,8 +14339,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_3072_sqr_48_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -14084,8 +14429,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r, "BLT.N L_sp_3072_mul_d_96_word_%=\n\t" #endif "STR r3, [%[r], #384]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -14592,8 +14942,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_96(sp_digit* r, "UMLAL r5, r3, %[b], r8\n\t" "STM %[r]!, {r5}\n\t" "STR r3, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -14665,8 +15020,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, "BLT.N L_sp_3072_cond_sub_48_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -14867,8 +15227,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -14884,10 +15249,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_48(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -15310,8 +15675,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -15326,10 +15696,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -15431,8 +15801,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -15449,10 +15824,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -15737,8 +16112,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -15753,10 +16133,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -15846,8 +16226,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_48( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -15941,8 +16326,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, "BLT.N L_sp_3072_mul_d_48_word_%=\n\t" #endif "STR r3, [%[r], #192]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -16209,8 +16599,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, "UMLAL r5, r3, %[b], r8\n\t" "STM %[r]!, {r5}\n\t" "STR r3, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -16227,10 +16622,10 @@ WC_OMIT_FRAME_POINTER static void sp_3072_mul_d_48(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_48( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -16276,8 +16671,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -16294,10 +16694,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_48( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -16360,8 +16760,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -16951,8 +17356,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_48(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -17375,8 +17785,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, "BLT.N L_sp_3072_cond_sub_96_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -17745,8 +18160,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -17762,10 +18182,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_sub_96(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -18572,8 +18992,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -18588,10 +19013,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -18693,8 +19118,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -18711,10 +19141,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -19239,8 +19669,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -19255,10 +19690,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -19348,8 +19783,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_3072_mont_reduce_96( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -19436,8 +19876,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r, "BNE.N L_sp_3072_sub_96_word_%=\n\t" #endif "MOV %[r], r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -19635,8 +20080,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -19654,10 +20104,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_96(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_96( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -19703,8 +20153,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -19721,10 +20176,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_96( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -19787,8 +20242,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -21009,8 +21469,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_3072_cmp_96(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -21542,8 +22007,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r, "BLT.N L_sp_3072_cond_add_48_words_%=\n\t" #endif "MOV %[r], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -21744,8 +22214,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_cond_add_48(sp_digit* r, "ADCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "ADC %[r], r10, r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -22617,8 +23092,13 @@ WC_OMIT_FRAME_POINTER static void sp_3072_lshift_96(sp_digit* r, "ORR r4, r4, r3\n\t" "STR r6, [%[r]]\n\t" "STR r4, [%[r], #4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -23271,8 +23751,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -23525,8 +24010,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -23657,8 +24147,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_add_128(sp_digit* r, "BNE.N L_sp_4096_add_128_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -23712,8 +24207,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "BNE.N L_sp_4096_sub_in_place_128_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -23843,8 +24343,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_128(sp_digit* r, #else "BGT.N L_sp_4096_mul_128_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -23963,8 +24468,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_sqr_128(sp_digit* r, #else "BGT.N L_sp_4096_sqr_128_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -24046,8 +24556,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r, "BLT.N L_sp_4096_mul_d_128_word_%=\n\t" #endif "STR r3, [%[r], #512]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -24714,8 +25229,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_mul_d_128(sp_digit* r, "UMLAL r4, r5, %[b], r8\n\t" "STM %[r]!, {r4}\n\t" "STR r5, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -24788,8 +25308,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, "BLT.N L_sp_4096_cond_sub_128_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -25270,8 +25795,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -25287,10 +25817,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_sub_128(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -26353,8 +26883,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -26369,10 +26904,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -26474,8 +27009,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -26492,10 +27032,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -27180,8 +27720,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -27196,10 +27741,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -27289,8 +27834,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_4096_mont_reduce_128( #endif /* Loop Done */ "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -27377,8 +27927,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r, "BNE.N L_sp_4096_sub_128_word_%=\n\t" #endif "MOV %[r], r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -27632,8 +28187,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -27651,10 +28211,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_128(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_4096_word_128( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -27700,8 +28260,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -27718,10 +28283,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_4096_word_128( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -27784,8 +28349,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -29358,8 +29928,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_4096_cmp_128(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -29891,8 +30466,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r, "BLT.N L_sp_4096_cond_add_64_words_%=\n\t" #endif "MOV %[r], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -30149,8 +30729,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_cond_add_64(sp_digit* r, "ADCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "ADC %[r], r10, r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -31214,8 +31799,13 @@ WC_OMIT_FRAME_POINTER static void sp_4096_lshift_128(sp_digit* r, "ORR r5, r5, r3\n\t" "STR r4, [%[r]]\n\t" "STR r5, [%[r], #4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -31612,8 +32202,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_256_mul_8_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -31628,10 +32223,10 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_8(sp_digit* r, const sp_digit* a, * b A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -31975,8 +32570,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r, "SUB %[r], %[r], #0x20\n\t" "STM %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x24\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -31990,10 +32590,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r, * b A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -32115,8 +32715,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mul_8(sp_digit* r, "LDM sp, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "STM lr, {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x2c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr" ); @@ -32237,8 +32842,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_256_sqr_8_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -32252,10 +32862,10 @@ WC_OMIT_FRAME_POINTER static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_sqr_8(sp_digit* r, const sp_digit* a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -32490,8 +33100,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, "SUB %[r], %[r], #0x20\n\t" "STM %[r], {r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -32504,10 +33119,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, * a A single precision integer. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_sqr_8(sp_digit* r_p, const sp_digit* a_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_sqr_8(sp_digit* r, const sp_digit* a) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -32610,8 +33225,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, "LDM sp, {r0, r1, r2, r3, r4, r5, r6}\n\t" "STM lr, {r0, r1, r2, r3, r4, r5, r6}\n\t" "ADD sp, sp, #0x20\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -32668,8 +33288,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r, "BNE.N L_sp_256_add_8_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -32714,8 +33339,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_add_8(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -32958,8 +33588,13 @@ WC_OMIT_FRAME_POINTER static int sp_256_mod_mul_norm_8(sp_digit* r, "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r11}\n\t" "MOV %[r], #0x0\n\t" "ADD sp, sp, #0x18\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -33177,11 +33812,11 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * mp Montgomery multiplier. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -33644,8 +34279,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, "LDR %[r], [sp, #64]\n\t" "STM %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -33672,11 +34312,11 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, * mp Montgomery multiplier. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -33917,8 +34557,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, "LDR %[r], [sp, #68]\n\t" "STM %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x4c\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r10", "r11", "r12", "r7", "r8", "r9", "lr" ); @@ -33944,10 +34589,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, * mp Montgomery multiplier. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -34301,8 +34946,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, "LDR %[r], [sp, #64]\n\t" "STM %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -34327,10 +34977,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, * mp Montgomery multiplier. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -34553,8 +35203,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, "LDR %[r], [sp, #64]\n\t" "STM %[r], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -34810,8 +35465,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_256_cmp_8(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -34874,8 +35534,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, "BLT.N L_sp_256_cond_sub_8_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -34936,8 +35601,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -34955,10 +35625,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_cond_sub_8(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35061,8 +35731,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35077,10 +35752,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35165,8 +35840,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35182,10 +35862,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35325,8 +36005,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, "LDR %[a], [sp, #64]\n\t" "STM %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "ADD sp, sp, #0x44\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35350,10 +36035,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35456,8 +36141,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35472,10 +36162,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35560,8 +36250,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_reduce_order_8( "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35618,10 +36313,10 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_add_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35667,8 +36362,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, "SBCS r11, r11, lr, LSR #31\n\t" "SBC r12, r12, lr\n\t" "STM %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35686,10 +36386,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_dbl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35730,8 +36430,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, "SBCS r10, r10, r2, LSR #31\n\t" "SBC r11, r11, r2\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2" ); @@ -35749,10 +36454,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_tpl_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35825,8 +36530,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, "SBCS r10, r10, r12, LSR #31\n\t" "SBC r11, r11, r12\n\t" "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3", "r12" ); @@ -35845,10 +36555,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sub_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35893,8 +36603,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, "ADCS r11, r11, lr, LSR #31\n\t" "ADC r12, r12, lr\n\t" "STM %[r], {r5, r6, r7, r8, r9, r10, r11, r12}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -35907,10 +36622,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_div2_8(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -35958,8 +36673,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_256_mont_div2_8(sp_digit* r, "ORR r10, r10, r7, LSL #31\n\t" "ORR r11, r11, r3, LSL #31\n\t" "STM %[r], {r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3" ); @@ -39291,8 +40011,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_add_one_8(sp_digit* a) "ADCS r3, r3, #0x0\n\t" "ADCS r4, r4, #0x0\n\t" "STM %[a]!, {r1, r2, r3, r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -39686,8 +40411,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "BNE.N L_sp_256_sub_in_place_8_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -39729,8 +40459,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -39792,8 +40527,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "BLT.N L_sp_256_mul_d_8_word_%=\n\t" #endif "STR r3, [%[r], #32]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -39860,8 +40600,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "UMLAL r4, r5, %[b], r8\n\t" "STM %[r]!, {r4}\n\t" "STR r5, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -39878,10 +40623,10 @@ WC_OMIT_FRAME_POINTER static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -39927,8 +40672,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -39945,10 +40695,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_256_word_8(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -40011,8 +40761,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -40678,8 +41433,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r, "BNE.N L_sp_256_sub_8_word_%=\n\t" #endif "MOV %[r], r11\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -40723,8 +41483,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_8(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -40771,8 +41536,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_rshift1_8(sp_digit* r, "ORR r9, r9, r10, LSL #31\n\t" "STRD r6, r7, [%[r]]\n\t" "STRD r8, r9, [%[r], #8]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -40866,8 +41636,13 @@ WC_OMIT_FRAME_POINTER static void sp_256_div2_mod_8(sp_digit* r, "ORR r10, r10, r7, LSL #31\n\t" "ORR r11, r11, r3, LSL #31\n\t" "STM %[r], {r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -41070,8 +41845,13 @@ WC_OMIT_FRAME_POINTER static int sp_256_num_bits_8(const sp_digit* a) "L_sp_256_num_bits_8_9_%=:\n\t" #endif "MOV %[a], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; @@ -42127,8 +42907,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_384_mul_12_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -43158,8 +43943,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "STM %[r]!, {r3, r4, r5, r6}\n\t" "LDM sp!, {r3, r4, r5, r6}\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12" ); @@ -43279,8 +44069,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_384_sqr_12_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -43982,8 +44777,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "STM %[r]!, {r2, r3, r4, r8}\n\t" "LDM sp!, {r2, r3, r4, r8}\n\t" "STM %[r]!, {r2, r3, r4, r8}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); @@ -44039,8 +44839,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r, "BNE.N L_sp_384_add_12_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -44092,8 +44897,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_add_12(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -44435,8 +45245,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, "BLT.N L_sp_384_cond_sub_12_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -44511,8 +45326,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -44529,10 +45349,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_sub_12(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -44667,8 +45487,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, "STR r4, [%[a]]\n\t" "STR r5, [%[a], #4]\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -44683,10 +45508,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -44791,8 +45616,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, "STR r9, [%[a], #12]\n\t" "STR r10, [%[a], #16]\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -45129,8 +45959,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_384_cmp_12(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -45190,10 +46025,10 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_add_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -45271,8 +46106,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, "SBCS r10, r10, r3\n\t" "SBC r11, r11, r3\n\t" "STM %[r]!, {r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -45285,10 +46125,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_dbl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -45356,8 +46196,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, "SBCS r8, r8, r2\n\t" "SBC r9, r9, r2\n\t" "STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r3" ); } @@ -45369,10 +46214,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_tpl_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -45504,8 +46349,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, "SBCS r8, r8, r2\n\t" "SBC r9, r9, r2\n\t" "STM %[r]!, {r4, r5, r6, r7, r8, r9}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -45555,8 +46405,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_12(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -45614,8 +46469,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, "BLT.N L_sp_384_cond_add_12_words_%=\n\t" #endif "MOV %[r], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -45690,8 +46550,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, "ADCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "ADC %[r], r10, r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -45706,10 +46571,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_cond_add_12(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_sub_12(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -45785,8 +46650,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, "ADCS r10, r10, %[m]\n\t" "ADC r11, r11, %[m]\n\t" "STM %[r]!, {r8, r9, r10, r11}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -45855,8 +46725,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_rshift1_12(sp_digit* r, "LSR r4, r4, #1\n\t" "STR r3, [%[r], #40]\n\t" "STR r4, [%[r], #44]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4" ); } @@ -49266,8 +50141,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_add_one_12(sp_digit* a) "ADCS r3, r3, #0x0\n\t" "ADCS r4, r4, #0x0\n\t" "STM %[a]!, {r1, r2, r3, r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -49661,8 +50541,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "BNE.N L_sp_384_sub_in_place_12_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -49711,8 +50596,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -49774,8 +50664,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, "BLT.N L_sp_384_mul_d_12_word_%=\n\t" #endif "STR r3, [%[r], #48]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -49862,8 +50757,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, "UMLAL r5, r3, %[b], r8\n\t" "STM %[r]!, {r5}\n\t" "STR r3, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -49880,10 +50780,10 @@ WC_OMIT_FRAME_POINTER static void sp_384_mul_d_12(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -49929,8 +50829,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -49947,10 +50852,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_384_word_12(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -50013,8 +50918,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -50729,8 +51639,13 @@ WC_OMIT_FRAME_POINTER static void sp_384_div2_mod_12(sp_digit* r, "ORR r10, r10, r3, LSL #31\n\t" "STR r9, [%[r], #40]\n\t" "STR r10, [%[r], #44]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -51033,8 +51948,13 @@ WC_OMIT_FRAME_POINTER static int sp_384_num_bits_12(const sp_digit* a) "L_sp_384_num_bits_12_13_%=:\n\t" #endif "MOV %[a], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; @@ -52140,8 +53060,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_521_mul_17_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -54185,8 +55110,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_17(sp_digit* r, const sp_digit* a, "STM %[r]!, {r3, r4, r5, r6}\n\t" "LDM sp!, {r3}\n\t" "STM %[r]!, {r3}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12" ); @@ -54309,8 +55239,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_521_sqr_17_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -55596,8 +56531,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "STM %[r]!, {r2, r3, r4, r8}\n\t" "LDM sp!, {r2}\n\t" "STM %[r]!, {r2}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); @@ -55659,8 +56599,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r, "STM %[r]!, {r4}\n\t" "MOV r4, #0x0\n\t" "ADC %[r], r4, #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -55723,8 +56668,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_add_17(sp_digit* r, "STM %[r]!, {r3}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -55995,8 +56945,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, "BLT.N L_sp_521_cond_sub_17_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -56090,8 +57045,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, "SBCS r6, r6, r8\n\t" "STR r6, [%[r]]\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -56105,10 +57065,10 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_cond_sub_17(sp_digit* r, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_17( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_17( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -56220,8 +57180,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, "ADCS r7, r7, #0x0\n\t" "ADCS r8, r8, #0x0\n\t" "STM %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -56245,10 +57210,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -56510,8 +57475,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( "LSR r3, r6, #9\n\t" "ADD %[a], %[a], #0x4\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -56526,10 +57496,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -56746,8 +57716,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_reduce_order_17( "LSR r5, r12, #9\n\t" "ADD %[a], %[a], #0x4\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -57136,8 +58111,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_521_cmp_17(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -57197,10 +58177,10 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_add_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -57271,8 +58251,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, "LDM %[r], {r4}\n\t" "ADCS r4, r4, #0x0\n\t" "STM %[r]!, {r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -57290,10 +58275,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_add_17(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_dbl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -57354,8 +58339,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, "LDM %[r], {r4}\n\t" "ADCS r4, r4, #0x0\n\t" "STM %[r]!, {r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); @@ -57373,10 +58363,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_dbl_17(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_tpl_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -57471,8 +58461,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, "LDM %[r], {r4}\n\t" "ADCS r4, r4, #0x0\n\t" "STM %[r]!, {r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); @@ -57491,10 +58486,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_sub_17(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -57567,8 +58562,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_sub_17(sp_digit* r, "LDM %[r], {r4}\n\t" "SBCS r4, r4, #0x0\n\t" "STM %[r]!, {r4}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -57654,8 +58654,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift1_17(sp_digit* r, "LSR r3, r3, #1\n\t" "STR r2, [%[r], #60]\n\t" "STR r3, [%[r], #64]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4" ); } @@ -61706,8 +62711,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_add_one_17(sp_digit* a) "LDM %[a], {r1}\n\t" "ADCS r1, r1, #0x0\n\t" "STM %[a]!, {r1}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4" ); } @@ -62151,8 +63161,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r, "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "STRD r4, r5, [%[r], #60]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -62279,8 +63294,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_lshift_17(sp_digit* r, "ORR r5, r5, r3\n\t" "STR r4, [%[r]]\n\t" "STR r5, [%[r], #4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -62505,8 +63525,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_lshift_34(sp_digit* r, "ORR r6, r6, r3\n\t" "STR r5, [%[r]]\n\t" "STR r6, [%[r], #4]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : +#else + : + : [r] "r" (r), [a] "r" (a), [n] "r" (n) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r3", "r7" ); } @@ -62562,8 +63587,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "SBCS r2, r2, r6\n\t" "STM %[a]!, {r2}\n\t" "SBC %[a], %[a], %[a]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -62623,8 +63653,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "SBCS r2, r2, r6\n\t" "STM %[a]!, {r2}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -62686,8 +63721,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, "BLT.N L_sp_521_mul_d_17_word_%=\n\t" #endif "STR r3, [%[r], #68]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -62799,8 +63839,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, "UMLAL r4, r5, %[b], r8\n\t" "STM %[r]!, {r4}\n\t" "STR r5, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -62817,10 +63862,10 @@ WC_OMIT_FRAME_POINTER static void sp_521_mul_d_17(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -62866,8 +63911,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -62884,10 +63934,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_521_word_17(sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -62950,8 +64000,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_521_word_17(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -63623,8 +64678,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r, "SBCS r3, r3, r7\n\t" "STM %[r]!, {r3}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -63686,8 +64746,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_17(sp_digit* r, "SBCS r3, r3, r7\n\t" "STM %[r]!, {r3}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -63855,8 +64920,13 @@ WC_OMIT_FRAME_POINTER static void sp_521_div2_mod_17(sp_digit* r, "ORR r9, r9, r3, LSL #31\n\t" "STR r8, [%[r], #60]\n\t" "STR r9, [%[r], #64]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -64284,8 +65354,13 @@ WC_OMIT_FRAME_POINTER static int sp_521_num_bits_17(const sp_digit* a) "L_sp_521_num_bits_17_18_%=:\n\t" #endif "MOV %[a], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a) : +#else + : + : [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r1", "r2", "r3", "r4", "r5" ); return (word32)(size_t)a; @@ -66952,8 +68027,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, "STM %[r]!, {r3, r4, r5, r6}\n\t" "LDM sp!, {r3, r4, r5, r6}\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r11", "r12" ); @@ -68106,8 +69186,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "STM %[r]!, {r2, r3, r4, r8}\n\t" "LDM sp!, {r2, r3, r4, r8}\n\t" "STM %[r]!, {r2, r3, r4, r8}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); @@ -68164,8 +69249,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_16(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -68247,8 +69337,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "SBCS r5, r5, r9\n\t" "STM %[a]!, {r2, r3, r4, r5}\n\t" "SBC %[a], r9, r9\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)a; @@ -68333,8 +69428,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r, "STM %[r]!, {r3, r4, r5, r6}\n\t" "MOV %[r], #0x0\n\t" "ADC %[r], %[r], #0x0\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -68459,8 +69559,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_16(sp_digit* r, "SBCS r6, r6, r10\n\t" "STM %[r]!, {r3, r4, r5, r6}\n\t" "SBC %[r], r6, r6\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -68624,8 +69729,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, #else "BGT.N L_sp_1024_mul_32_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -68743,8 +69853,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) #else "BGT.N L_sp_1024_sqr_32_store_%=\n\t" #endif +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); @@ -68882,8 +69997,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "BNE.N L_sp_1024_sub_in_place_32_word_%=\n\t" #endif "MOV %[a], r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); @@ -68942,8 +70062,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r, "BLT.N L_sp_1024_cond_sub_32_words_%=\n\t" #endif "MOV %[r], r4\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -69088,8 +70213,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_sub_32(sp_digit* r, "SBCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "SBC %[r], r5, r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9" ); return (word32)(size_t)r; @@ -69145,8 +70275,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_add_32(sp_digit* r, "BNE.N L_sp_1024_add_32_word_%=\n\t" #endif "MOV %[r], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); @@ -69209,8 +70344,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, "BLT.N L_sp_1024_mul_d_32_word_%=\n\t" #endif "STR r3, [%[r], #128]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -69397,8 +70537,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, "UMLAL r4, r5, %[b], r8\n\t" "STM %[r]!, {r4}\n\t" "STR r5, [%[r]]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8" ); } @@ -69415,10 +70560,10 @@ WC_OMIT_FRAME_POINTER static void sp_1024_mul_d_32(sp_digit* r, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_1024_word_32( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -69464,8 +70609,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, "SUB %[d0], %[d0], r3\n\t" "UDIV r3, %[d0], %[div]\n\t" "ADD %[d1], r6, r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -69482,10 +70632,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, * Note that this is an approximate div. It may give an answer 1 larger. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_1024_word_32( sp_digit d1_p, sp_digit d0_p, sp_digit div_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, +WC_OMIT_FRAME_POINTER static SP_NOINLINE sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -69548,8 +70698,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, "SUBS r8, %[div], r9\n\t" "SBC r8, r8, r8\n\t" "SUB %[d1], r3, r8\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) : +#else + : + : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)d1; @@ -69993,8 +71148,13 @@ WC_OMIT_FRAME_POINTER static sp_int32 sp_1024_cmp_32(const sp_digit* a, "EOR r2, r2, r3\n\t" #endif /*WOLFSSL_SP_SMALL */ "MOV %[a], r2\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) : +#else + : + : [a] "r" (a), [b] "r" (b) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)a; @@ -70336,10 +71496,10 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -70639,8 +71799,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( "SBC r12, r12, r12\n\t" "ORR r3, r3, r12\n\t" "MOV %[mp], r3\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -70655,10 +71820,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( * mp The digit representing the negative inverse of m mod 2^n. */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a_p, const sp_digit* m_p, sp_digit mp_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( sp_digit* a, const sp_digit* m, sp_digit mp) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -70868,8 +72033,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_reduce_32( "SBC r3, r3, r3\n\t" "ORR r5, r5, r3\n\t" "MOV %[mp], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) : +#else + : + : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -71022,10 +72192,10 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_add_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -71189,8 +72359,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, "SBCS r6, r6, r10\n\t" "SBC r7, r7, r11\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -71203,10 +72378,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_dbl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -71353,8 +72528,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, "SBCS r6, r6, r10\n\t" "SBC r7, r7, r11\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12" ); @@ -71367,10 +72547,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_tpl_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -71672,8 +72852,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, "SBCS r6, r6, r10\n\t" "SBC r7, r7, r11\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12" ); @@ -71687,10 +72872,10 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, * m Modulus (prime). */ #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r_p, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_sub_32(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p, const sp_digit* m_p) #else -WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, +WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { @@ -71848,8 +73033,13 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, "ADCS r6, r6, r10\n\t" "ADC r7, r7, r11\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); @@ -71906,8 +73096,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r, "BLT.N L_sp_1024_cond_add_32_words_%=\n\t" #endif "MOV %[r], r5\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8" ); return (word32)(size_t)r; @@ -72052,8 +73247,13 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_cond_add_32(sp_digit* r, "ADCS r7, r7, r9\n\t" "STM %[r]!, {r6, r7}\n\t" "ADC %[r], r10, r10\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : +#else + : + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (word32)(size_t)r; @@ -72200,8 +73400,13 @@ WC_OMIT_FRAME_POINTER static void sp_1024_rshift1_32(sp_digit* r, "LSR r3, r3, #1\n\t" "STR r2, [%[r], #120]\n\t" "STR r3, [%[r], #124]\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [r] "+r" (r), [a] "+r" (a) : +#else + : + : [r] "r" (r), [a] "r" (a) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r4" ); } diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 1e907aee460..7aaae4bb60d 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -56771,10 +56771,10 @@ _sp_256_mont_div2_4: adcq %r10, %r8 movq $0x00, %r11 adcq $0x00, %r11 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r11, %r8 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r11, %r8 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -57615,10 +57615,10 @@ _sp_256_mont_div2_avx2_4: adcq %r10, %r8 movq $0x00, %r11 adcq $0x00, %r11 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r11, %r8 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r11, %r8 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -58754,10 +58754,10 @@ _sp_256_mod_inv_4: testb $0x01, %r11b jnz L_256_mod_inv_4_v_even_end L_256_mod_inv_4_v_even_start: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 movb $0x01, (%rsp,%r15,1) incq %r15 testb $0x01, %r11b @@ -58782,17 +58782,17 @@ L_256_mod_inv_4_uv_u: sbbq %r12, %r8 sbbq %r13, %r9 sbbq %r14, %r10 - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 testb $0x01, %cl jnz L_256_mod_inv_4_usubv_even_end L_256_mod_inv_4_usubv_even_start: - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 movb $0x00, (%rsp,%r15,1) incq %r15 testb $0x01, %cl @@ -58814,17 +58814,17 @@ L_256_mod_inv_4_uv_v: sbbq %r8, %r12 sbbq %r9, %r13 sbbq %r10, %r14 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 testb $0x01, %r11b jnz L_256_mod_inv_4_vsubu_even_end L_256_mod_inv_4_vsubu_even_start: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrq $0x01, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrq $1, %r14 movb $0x01, (%rsp,%r15,1) incq %r15 testb $0x01, %r11b @@ -58877,10 +58877,10 @@ L_256_mod_inv_4_op_div2_b: adcq 24(%rdx), %r10 adcq $0x00, %rsi L_256_mod_inv_4_op_div2_b_mod: - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrdq $0x01, %rsi, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrdq $1, %rsi, %r10 movb (%rsp,%r15,1), %sil incq %r15 cmpb $0x01, %sil @@ -58910,10 +58910,10 @@ L_256_mod_inv_4_op_div2_d: adcq 24(%rdx), %r14 adcq $0x00, %rsi L_256_mod_inv_4_op_div2_d_mod: - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrdq $0x01, %r14, %r13 - shrdq $0x01, %rsi, %r14 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrdq $1, %r14, %r13 + shrdq $1, %rsi, %r14 movb (%rsp,%r15,1), %sil incq %r15 cmpb $0x01, %sil @@ -58951,6 +58951,11 @@ L_256_mod_inv_4_store_end: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_order: .long 0x00632551,0x01e84f3b,0x03bce6fa,0x03ffffff .long 0x03ff0000,0x00000000,0x00000000,0x00000000 @@ -58967,13 +58972,18 @@ L_sp256_mod_inv_avx2_4_order: .p2align 5 #endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_one: -.quad 0x1, 0x0 -.quad 0x0, 0x0 +.quad 0x0000000000000001,0x0000000000000000 +.quad 0x0000000000000000,0x0000000000000000 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_all_one: .long 0x00000001,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000001,0x00000001,0x00000001 @@ -58982,6 +58992,11 @@ L_sp256_mod_inv_avx2_4_all_one: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_mask01111: .long 0x00000000,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000000,0x00000000,0x00000000 @@ -58990,6 +59005,11 @@ L_sp256_mod_inv_avx2_4_mask01111: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_down_one_dword: .long 0x00000001,0x00000002,0x00000003,0x00000004 .long 0x00000005,0x00000006,0x00000007,0x00000007 @@ -58998,6 +59018,11 @@ L_sp256_mod_inv_avx2_4_down_one_dword: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_neg: .long 0x00000000,0x00000000,0x00000000,0x00000000 .long 0x80000000,0x00000000,0x00000000,0x00000000 @@ -59006,6 +59031,11 @@ L_sp256_mod_inv_avx2_4_neg: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_up_one_dword: .long 0x00000007,0x00000000,0x00000001,0x00000002 .long 0x00000003,0x00000007,0x00000007,0x00000007 @@ -59014,6 +59044,11 @@ L_sp256_mod_inv_avx2_4_up_one_dword: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_sp256_mod_inv_avx2_4_mask26: .long 0x03ffffff,0x03ffffff,0x03ffffff,0x03ffffff .long 0x03ffffff,0x00000000,0x00000000,0x00000000 @@ -59073,10 +59108,10 @@ _sp_256_mod_inv_avx2_4: testb $0x01, %r10b jnz L_256_mod_inv_avx2_4_v_even_end L_256_mod_inv_avx2_4_v_even_start: - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 vptest %ymm8, %ymm2 jz L_256_mod_inv_avx2_4_v_even_shr1 vpaddd %ymm6, %ymm2, %ymm2 @@ -59119,10 +59154,10 @@ L_256_mod_inv_avx2_4_uv_u: vpaddd %ymm7, %ymm1, %ymm1 L_256_mod_inv_avx2_4_usubv_done_neg: L_256_mod_inv_avx2_4_usubv_shr1: - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrq $0x01, %r9 + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrq $1, %r9 vptest %ymm8, %ymm0 jz L_256_mod_inv_avx2_4_usubv_sub_shr1 vpaddd %ymm6, %ymm0, %ymm0 @@ -59172,10 +59207,10 @@ L_256_mod_inv_avx2_4_uv_v: vpaddd %ymm7, %ymm3, %ymm3 L_256_mod_inv_avx2_4_vsubu_done_neg: L_256_mod_inv_avx2_4_vsubu_shr1: - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 vptest %ymm8, %ymm2 jz L_256_mod_inv_avx2_4_vsubu_sub_shr1 vpaddd %ymm6, %ymm2, %ymm2 @@ -59268,7 +59303,7 @@ L_256_mod_inv_avx2_4_store_done: adcq %r13, %r12 movslq %r14d, %r14 adcq %r15, %r14 - jge L_256_mod_inv_avx2_4_3_no_add_order + jge L_256_mod_inv_avx2_4_no_add_order movq $0x9cac2fc632551, %rcx movq $0xada7179e84f3b, %r9 movq $0xfffffffbce6fa, %r11 @@ -59296,7 +59331,7 @@ L_256_mod_inv_avx2_4_store_done: andq %rsi, %r12 sarq $52, %r13 addq %r13, %r14 -L_256_mod_inv_avx2_4_3_no_add_order: +L_256_mod_inv_avx2_4_no_add_order: movq %r8, %rcx movq %r10, %r9 movq %r12, %r11 @@ -61148,21 +61183,21 @@ _sp_384_mont_div2_6: adcq $0x00, %r10 movq (%rsp), %rax movq 8(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, (%rdi) movq 16(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 8(%rdi) movq 24(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 16(%rdi) movq 32(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 24(%rdi) movq 40(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 32(%rdi) - shrdq $0x01, %r10, %rcx + shrdq $1, %r10, %rcx movq %rcx, 40(%rdi) addq $48, %rsp repz retq @@ -61783,21 +61818,21 @@ _sp_384_mont_div2_avx2_6: adcq $0x00, %r10 movq (%rdi), %r8 movq 8(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, (%rdi) movq 16(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 8(%rdi) movq 24(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 16(%rdi) movq 32(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 24(%rdi) movq 40(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 32(%rdi) - shrdq $0x01, %r10, %r9 + shrdq $1, %r10, %r9 movq %r9, 40(%rdi) repz retq #ifndef __APPLE__ @@ -62565,12 +62600,12 @@ _sp_384_rshift1_6: movq 24(%rsi), %r8 movq 32(%rsi), %r9 movq 40(%rsi), %r10 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrq $0x01, %r10 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrq $1, %r10 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -62629,12 +62664,12 @@ _sp_384_div2_mod_6: movq $0x00, %rdx adcq $0x00, %rdx L_384_mod_inv_6_div2_mod_no_add: - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrdq $0x01, %r11, %r10 - shrdq $0x01, %rdx, %r11 + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrdq $1, %r11, %r10 + shrdq $1, %rdx, %r11 movq %rax, (%rdi) movq %rcx, 8(%rdi) movq %r8, 16(%rdi) @@ -66688,15 +66723,15 @@ _sp_521_mont_div2_9: sbbq $0x00, %r13 shlq $9, %r14 addq %r14, %r13 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -68454,15 +68489,15 @@ _sp_521_mont_div2_avx2_9: sbbq $0x00, %r13 shlq $9, %r14 addq %r14, %r13 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 - shrdq $0x01, %r11, %r10 - shrdq $0x01, %r12, %r11 - shrdq $0x01, %r13, %r12 - shrq $0x01, %r13 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 + shrdq $1, %r11, %r10 + shrdq $1, %r12, %r11 + shrdq $1, %r13, %r12 + shrq $1, %r13 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -69634,10 +69669,10 @@ _sp_521_rshift1_9: movq 16(%rsi), %rcx movq 24(%rsi), %r8 movq 32(%rsi), %r10 - shrdq $0x01, %rax, %rdx - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r10, %r8 + shrdq $1, %rax, %rdx + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r10, %r8 movq %rdx, (%rdi) movq %rax, 8(%rdi) movq %rcx, 16(%rdi) @@ -69646,15 +69681,15 @@ _sp_521_rshift1_9: movq 48(%rsi), %rcx movq 56(%rsi), %r8 movq 64(%rsi), %rdx - shrdq $0x01, %rax, %r10 - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %rdx, %r8 + shrdq $1, %rax, %r10 + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %rdx, %r8 movq %r10, 32(%rdi) movq %rax, 40(%rdi) movq %rcx, 48(%rdi) movq %r8, 56(%rdi) - shrq $0x01, %rdx + shrq $1, %rdx movq %rdx, 64(%rdi) repz retq #ifndef __APPLE__ @@ -69723,10 +69758,10 @@ L_521_mod_inv_9_div2_mod_no_add: movq 16(%rsi), %r8 movq 24(%rsi), %r9 movq 32(%rsi), %r10 - shrdq $0x01, %rcx, %rax - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %r10, %r9 + shrdq $1, %rcx, %rax + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %r10, %r9 movq %rax, (%rdi) movq %rcx, 8(%rdi) movq %r8, 16(%rdi) @@ -69735,15 +69770,15 @@ L_521_mod_inv_9_div2_mod_no_add: movq 48(%rsi), %r8 movq 56(%rsi), %r9 movq 64(%rsi), %rax - shrdq $0x01, %rcx, %r10 - shrdq $0x01, %r8, %rcx - shrdq $0x01, %r9, %r8 - shrdq $0x01, %rax, %r9 + shrdq $1, %rcx, %r10 + shrdq $1, %r8, %rcx + shrdq $1, %r9, %r8 + shrdq $1, %rax, %r9 movq %r10, 32(%rdi) movq %rcx, 40(%rdi) movq %r8, 48(%rdi) movq %r9, 56(%rdi) - shrq $0x01, %rax + shrq $1, %rax movq %rax, 64(%rdi) repz retq #ifndef __APPLE__ @@ -77428,51 +77463,51 @@ _sp_1024_mont_div2_16: adcq $0x00, %r10 movq (%rsp), %rax movq 8(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, (%rdi) movq 16(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 8(%rdi) movq 24(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 16(%rdi) movq 32(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 24(%rdi) movq 40(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 32(%rdi) movq 48(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 40(%rdi) movq 56(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 48(%rdi) movq 64(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 56(%rdi) movq 72(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 64(%rdi) movq 80(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 72(%rdi) movq 88(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 80(%rdi) movq 96(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 88(%rdi) movq 104(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 96(%rdi) movq 112(%rsp), %rax - shrdq $0x01, %rax, %rcx + shrdq $1, %rax, %rcx movq %rcx, 104(%rdi) movq 120(%rsp), %rcx - shrdq $0x01, %rcx, %rax + shrdq $1, %rcx, %rax movq %rax, 112(%rdi) - shrdq $0x01, %r10, %rcx + shrdq $1, %r10, %rcx movq %rcx, 120(%rdi) addq $0x80, %rsp repz retq @@ -78692,51 +78727,51 @@ _sp_1024_mont_div2_avx2_16: adcq $0x00, %r10 movq (%rdi), %r8 movq 8(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, (%rdi) movq 16(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 8(%rdi) movq 24(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 16(%rdi) movq 32(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 24(%rdi) movq 40(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 32(%rdi) movq 48(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 40(%rdi) movq 56(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 48(%rdi) movq 64(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 56(%rdi) movq 72(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 64(%rdi) movq 80(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 72(%rdi) movq 88(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 80(%rdi) movq 96(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 88(%rdi) movq 104(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 96(%rdi) movq 112(%rdi), %r8 - shrdq $0x01, %r8, %r9 + shrdq $1, %r8, %r9 movq %r9, 104(%rdi) movq 120(%rdi), %r9 - shrdq $0x01, %r9, %r8 + shrdq $1, %r9, %r8 movq %r8, 112(%rdi) - shrdq $0x01, %r10, %r9 + shrdq $1, %r10, %r9 movq %r9, 120(%rdi) repz retq #ifndef __APPLE__ diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index c91ccfa48b5..cdf57894078 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -18,6 +18,7 @@ ; * along with this program; if not, write to the Free Software ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA ; */ + IF @Version LT 1200 ; AVX2 instructions not recognized by old versions of MASM IFNDEF NO_AVX2_SUPPORT @@ -50,7 +51,7 @@ IFNDEF WOLFSSL_SP_NO_2048 ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_from_bin_bswap PROC push r12 push r13 @@ -128,7 +129,7 @@ L_2048_from_bin_bswap_zero_end: pop r12 ret sp_2048_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -138,7 +139,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_from_bin_movbe PROC push r12 mov r11, r8 @@ -204,7 +205,7 @@ L_2048_from_bin_movbe_zero_end: pop r12 ret sp_2048_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 256 @@ -213,7 +214,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_to_bin_bswap_32 PROC mov rax, QWORD PTR [rcx+248] mov r8, QWORD PTR [rcx+240] @@ -313,7 +314,7 @@ sp_2048_to_bin_bswap_32 PROC mov QWORD PTR [rdx+248], r8 ret sp_2048_to_bin_bswap_32 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 256 @@ -322,7 +323,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_to_bin_movbe_32 PROC movbe rax, QWORD PTR [rcx+248] movbe r8, QWORD PTR [rcx+240] @@ -390,7 +391,7 @@ sp_2048_to_bin_movbe_32 PROC mov QWORD PTR [rdx+248], r8 ret sp_2048_to_bin_movbe_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Multiply a and b into r. (r = a * b) ; * @@ -398,7 +399,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_16 PROC push r12 mov r9, rdx @@ -2034,7 +2035,7 @@ sp_2048_mul_16 PROC pop r12 ret sp_2048_mul_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -2042,7 +2043,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_avx2_16 PROC push rbx push rbp @@ -3705,7 +3706,7 @@ L_end_2048_mul_avx2_16: pop rbx ret sp_2048_mul_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -3713,7 +3714,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_add_16 PROC ; Add mov r9, QWORD PTR [rdx] @@ -3768,13 +3769,13 @@ sp_2048_add_16 PROC adc rax, 0 ret sp_2048_add_16 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sub_in_place_32 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -3875,14 +3876,14 @@ sp_2048_sub_in_place_32 PROC sbb rax, rax ret sp_2048_sub_in_place_32 ENDP -_text ENDS +_TEXT ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_add_32 PROC ; Add mov r9, QWORD PTR [rdx] @@ -3985,14 +3986,14 @@ sp_2048_add_32 PROC adc rax, 0 ret sp_2048_add_32 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_32 PROC push r12 push r13 @@ -4690,7 +4691,7 @@ ENDIF pop r12 ret sp_2048_mul_32 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -4698,7 +4699,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_avx2_32 PROC push r12 push r13 @@ -5348,14 +5349,14 @@ ENDIF pop r12 ret sp_2048_mul_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sqr_16 PROC push r12 push r13 @@ -6437,14 +6438,14 @@ sp_2048_sqr_16 PROC pop r12 ret sp_2048_sqr_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sqr_avx2_16 PROC push rbp push r12 @@ -7490,7 +7491,7 @@ L_end_2048_sqr_avx2_16: pop rbp ret sp_2048_sqr_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * @@ -7499,7 +7500,7 @@ ENDIF ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sqr_32 PROC sub rsp, 272 mov QWORD PTR [rsp+256], rcx @@ -8008,7 +8009,7 @@ ENDIF add rsp, 272 ret sp_2048_sqr_32 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * @@ -8017,7 +8018,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sqr_avx2_32 PROC sub rsp, 272 mov QWORD PTR [rsp+256], rcx @@ -8526,14 +8527,14 @@ ENDIF add rsp, 272 ret sp_2048_sqr_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sub_in_place_16 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -8586,14 +8587,14 @@ sp_2048_sub_in_place_16 PROC sbb rax, rax ret sp_2048_sub_in_place_16 ENDP -_text ENDS +_TEXT ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_d_32 PROC push r12 mov r9, rdx @@ -8854,7 +8855,7 @@ sp_2048_mul_d_32 PROC pop r12 ret sp_2048_mul_d_32 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -8863,7 +8864,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_sub_16 PROC sub rsp, 128 mov r10, QWORD PTR [r8] @@ -8982,14 +8983,14 @@ sp_2048_cond_sub_16 PROC add rsp, 128 ret sp_2048_cond_sub_16 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mont_reduce_16 PROC push r12 push r13 @@ -9189,7 +9190,7 @@ ENDIF pop r12 ret sp_2048_mont_reduce_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. @@ -9199,7 +9200,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_sub_avx2_16 PROC push r12 mov r12, QWORD PTR [r8] @@ -9286,7 +9287,7 @@ sp_2048_cond_sub_avx2_16 PROC pop r12 ret sp_2048_cond_sub_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * @@ -9294,7 +9295,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_d_16 PROC push r12 mov r9, rdx @@ -9427,7 +9428,7 @@ sp_2048_mul_d_16 PROC pop r12 ret sp_2048_mul_d_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -9435,7 +9436,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_d_avx2_16 PROC push r12 push r13 @@ -9541,7 +9542,7 @@ sp_2048_mul_d_avx2_16 PROC pop r12 ret sp_2048_mul_d_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -9551,7 +9552,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_2048_word_asm_16 PROC mov r9, rdx mov rax, r9 @@ -9559,7 +9560,7 @@ div_2048_word_asm_16 PROC div r8 ret div_2048_word_asm_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -9568,7 +9569,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cmp_16 PROC push r12 xor r9, r9 @@ -9707,9 +9708,9 @@ sp_2048_cmp_16 PROC pop r12 ret sp_2048_cmp_16 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_get_from_table_16 PROC sub rsp, 128 movdqu OWORD PTR [rsp], xmm6 @@ -10880,7 +10881,7 @@ sp_2048_get_from_table_16 PROC add rsp, 128 ret sp_2048_get_from_table_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 2048 bits using Montgomery reduction. @@ -10889,7 +10890,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mont_reduce_avx2_16 PROC push r12 push r13 @@ -11212,10 +11213,10 @@ L_2048_mont_reduce_avx2_16_loop: pop r12 ret sp_2048_mont_reduce_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_get_from_table_avx2_16 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 @@ -11766,7 +11767,7 @@ sp_2048_get_from_table_avx2_16 PROC add rsp, 128 ret sp_2048_get_from_table_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. @@ -11776,7 +11777,7 @@ ENDIF ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_sub_32 PROC sub rsp, 256 mov r10, QWORD PTR [r8] @@ -12007,14 +12008,14 @@ sp_2048_cond_sub_32 PROC add rsp, 256 ret sp_2048_cond_sub_32 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 2048 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mont_reduce_32 PROC push r12 push r13 @@ -12374,14 +12375,14 @@ ENDIF pop r12 ret sp_2048_mont_reduce_32 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_sub_32 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] @@ -12482,7 +12483,7 @@ sp_2048_sub_32 PROC sbb rax, rax ret sp_2048_sub_32 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -12490,7 +12491,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mul_d_avx2_32 PROC push r12 push r13 @@ -12692,7 +12693,7 @@ sp_2048_mul_d_avx2_32 PROC pop r12 ret sp_2048_mul_d_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -12702,7 +12703,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_2048_word_asm_32 PROC mov r9, rdx mov rax, r9 @@ -12710,7 +12711,7 @@ div_2048_word_asm_32 PROC div r8 ret div_2048_word_asm_32 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -12721,7 +12722,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_sub_avx2_32 PROC push r12 mov r12, QWORD PTR [r8] @@ -12888,7 +12889,7 @@ sp_2048_cond_sub_avx2_32 PROC pop r12 ret sp_2048_cond_sub_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -12897,7 +12898,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cmp_32 PROC push r12 xor r9, r9 @@ -13164,9 +13165,9 @@ sp_2048_cmp_32 PROC pop r12 ret sp_2048_cmp_32 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_get_from_table_32 PROC sub rsp, 128 movdqu OWORD PTR [rsp], xmm6 @@ -17785,7 +17786,7 @@ sp_2048_get_from_table_32 PROC add rsp, 128 ret sp_2048_get_from_table_32 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 2048 bits using Montgomery reduction. @@ -17794,7 +17795,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_mont_reduce_avx2_32 PROC push r12 push r13 @@ -18192,10 +18193,10 @@ L_2048_mont_reduce_avx2_32_loop: pop r12 ret sp_2048_mont_reduce_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_get_from_table_avx2_32 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 @@ -20358,7 +20359,7 @@ sp_2048_get_from_table_avx2_32 PROC add rsp, 128 ret sp_2048_get_from_table_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -20368,7 +20369,7 @@ ENDIF ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_add_16 PROC sub rsp, 128 mov rax, 0 @@ -20488,7 +20489,7 @@ sp_2048_cond_add_16 PROC add rsp, 128 ret sp_2048_cond_add_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -20498,7 +20499,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_cond_add_avx2_16 PROC push r12 mov rax, 0 @@ -20586,7 +20587,7 @@ sp_2048_cond_add_avx2_16 PROC pop r12 ret sp_2048_cond_add_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * @@ -20594,7 +20595,7 @@ ENDIF ; * a Number to shift. ; * n Amoutnt o shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_2048_lshift_32 PROC push r12 push r13 @@ -20703,7 +20704,7 @@ sp_2048_lshift_32 PROC pop r12 ret sp_2048_lshift_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFNDEF WOLFSSL_SP_NO_3072 @@ -20716,7 +20717,7 @@ IFNDEF WOLFSSL_SP_NO_3072 ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_from_bin_bswap PROC push r12 push r13 @@ -20794,7 +20795,7 @@ L_3072_from_bin_bswap_zero_end: pop r12 ret sp_3072_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -20804,7 +20805,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_from_bin_movbe PROC push r12 mov r11, r8 @@ -20870,7 +20871,7 @@ L_3072_from_bin_movbe_zero_end: pop r12 ret sp_3072_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 384 @@ -20879,7 +20880,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_to_bin_bswap_48 PROC mov rax, QWORD PTR [rcx+376] mov r8, QWORD PTR [rcx+368] @@ -21027,7 +21028,7 @@ sp_3072_to_bin_bswap_48 PROC mov QWORD PTR [rdx+376], r8 ret sp_3072_to_bin_bswap_48 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 384 @@ -21036,7 +21037,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_to_bin_movbe_48 PROC movbe rax, QWORD PTR [rcx+376] movbe r8, QWORD PTR [rcx+368] @@ -21136,7 +21137,7 @@ sp_3072_to_bin_movbe_48 PROC mov QWORD PTR [rdx+376], r8 ret sp_3072_to_bin_movbe_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Multiply a and b into r. (r = a * b) ; * @@ -21144,7 +21145,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_12 PROC push r12 mov r9, rdx @@ -22084,7 +22085,7 @@ sp_3072_mul_12 PROC pop r12 ret sp_3072_mul_12 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -22092,7 +22093,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_avx2_12 PROC push rbx push rbp @@ -23055,7 +23056,7 @@ L_end_3072_mul_avx2_12: pop rbx ret sp_3072_mul_avx2_12 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -23063,7 +23064,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_add_12 PROC ; Add mov r9, QWORD PTR [rdx] @@ -23106,13 +23107,13 @@ sp_3072_add_12 PROC adc rax, 0 ret sp_3072_add_12 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sub_in_place_24 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -23189,14 +23190,14 @@ sp_3072_sub_in_place_24 PROC sbb rax, rax ret sp_3072_sub_in_place_24 ENDP -_text ENDS +_TEXT ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_add_24 PROC ; Add mov r9, QWORD PTR [rdx] @@ -23275,14 +23276,14 @@ sp_3072_add_24 PROC adc rax, 0 ret sp_3072_add_24 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_24 PROC push r12 push r13 @@ -23824,7 +23825,7 @@ ENDIF pop r12 ret sp_3072_mul_24 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -23832,7 +23833,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_avx2_24 PROC push r12 push r13 @@ -24338,14 +24339,14 @@ ENDIF pop r12 ret sp_3072_mul_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sub_in_place_48 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -24494,14 +24495,14 @@ sp_3072_sub_in_place_48 PROC sbb rax, rax ret sp_3072_sub_in_place_48 ENDP -_text ENDS +_TEXT ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_add_48 PROC ; Add mov r9, QWORD PTR [rdx] @@ -24652,14 +24653,14 @@ sp_3072_add_48 PROC adc rax, 0 ret sp_3072_add_48 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_48 PROC push r12 push r13 @@ -25669,7 +25670,7 @@ ENDIF pop r12 ret sp_3072_mul_48 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -25677,7 +25678,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_avx2_48 PROC push r12 push r13 @@ -26615,14 +26616,14 @@ ENDIF pop r12 ret sp_3072_mul_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_12 PROC push r12 push r13 @@ -27284,14 +27285,14 @@ sp_3072_sqr_12 PROC pop r12 ret sp_3072_sqr_12 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_avx2_12 PROC push rbp push r12 @@ -27926,7 +27927,7 @@ L_end_3072_sqr_avx2_12: pop rbp ret sp_3072_sqr_avx2_12 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * @@ -27935,7 +27936,7 @@ ENDIF ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_24 PROC sub rsp, 208 mov QWORD PTR [rsp+192], rcx @@ -28328,7 +28329,7 @@ ENDIF add rsp, 208 ret sp_3072_sqr_24 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * @@ -28337,7 +28338,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_avx2_24 PROC sub rsp, 208 mov QWORD PTR [rsp+192], rcx @@ -28730,7 +28731,7 @@ ENDIF add rsp, 208 ret sp_3072_sqr_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * @@ -28739,7 +28740,7 @@ ENDIF ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_48 PROC sub rsp, 400 mov QWORD PTR [rsp+384], rcx @@ -29480,7 +29481,7 @@ ENDIF add rsp, 400 ret sp_3072_sqr_48 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * @@ -29489,7 +29490,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sqr_avx2_48 PROC sub rsp, 400 mov QWORD PTR [rsp+384], rcx @@ -30230,7 +30231,7 @@ ENDIF add rsp, 400 ret sp_3072_sqr_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * @@ -30238,7 +30239,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_d_48 PROC push r12 mov r9, rdx @@ -30627,7 +30628,7 @@ sp_3072_mul_d_48 PROC pop r12 ret sp_3072_mul_d_48 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -30636,7 +30637,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_sub_24 PROC sub rsp, 192 mov r10, QWORD PTR [r8] @@ -30811,14 +30812,14 @@ sp_3072_cond_sub_24 PROC add rsp, 192 ret sp_3072_cond_sub_24 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mont_reduce_24 PROC push r12 push r13 @@ -31098,7 +31099,7 @@ ENDIF pop r12 ret sp_3072_mont_reduce_24 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. @@ -31108,7 +31109,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_sub_avx2_24 PROC push r12 mov r12, QWORD PTR [r8] @@ -31235,7 +31236,7 @@ sp_3072_cond_sub_avx2_24 PROC pop r12 ret sp_3072_cond_sub_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * @@ -31243,7 +31244,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_d_24 PROC push r12 mov r9, rdx @@ -31440,7 +31441,7 @@ sp_3072_mul_d_24 PROC pop r12 ret sp_3072_mul_d_24 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -31448,7 +31449,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_d_avx2_24 PROC push r12 push r13 @@ -31602,7 +31603,7 @@ sp_3072_mul_d_avx2_24 PROC pop r12 ret sp_3072_mul_d_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -31612,7 +31613,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_3072_word_asm_24 PROC mov r9, rdx mov rax, r9 @@ -31620,7 +31621,7 @@ div_3072_word_asm_24 PROC div r8 ret div_3072_word_asm_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -31629,7 +31630,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cmp_24 PROC push r12 xor r9, r9 @@ -31832,9 +31833,9 @@ sp_3072_cmp_24 PROC pop r12 ret sp_3072_cmp_24 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_get_from_table_24 PROC sub rsp, 128 movdqu OWORD PTR [rsp], xmm6 @@ -33593,7 +33594,7 @@ sp_3072_get_from_table_24 PROC add rsp, 128 ret sp_3072_get_from_table_24 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -33602,7 +33603,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mont_reduce_avx2_24 PROC push r12 push r13 @@ -33912,10 +33913,10 @@ L_3072_mont_reduce_avx2_24_loop: pop r12 ret sp_3072_mont_reduce_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_get_from_table_avx2_24 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 @@ -34826,7 +34827,7 @@ sp_3072_get_from_table_avx2_24 PROC add rsp, 128 ret sp_3072_get_from_table_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. @@ -34836,7 +34837,7 @@ ENDIF ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_sub_48 PROC sub rsp, 384 mov r10, QWORD PTR [r8] @@ -35179,14 +35180,14 @@ sp_3072_cond_sub_48 PROC add rsp, 384 ret sp_3072_cond_sub_48 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 3072 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mont_reduce_48 PROC push r12 push r13 @@ -35706,14 +35707,14 @@ ENDIF pop r12 ret sp_3072_mont_reduce_48 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_sub_48 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] @@ -35862,7 +35863,7 @@ sp_3072_sub_48 PROC sbb rax, rax ret sp_3072_sub_48 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -35870,7 +35871,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mul_d_avx2_48 PROC push r12 push r13 @@ -36168,7 +36169,7 @@ sp_3072_mul_d_avx2_48 PROC pop r12 ret sp_3072_mul_d_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -36178,7 +36179,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_3072_word_asm_48 PROC mov r9, rdx mov rax, r9 @@ -36186,7 +36187,7 @@ div_3072_word_asm_48 PROC div r8 ret div_3072_word_asm_48 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -36197,7 +36198,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_sub_avx2_48 PROC push r12 mov r12, QWORD PTR [r8] @@ -36444,7 +36445,7 @@ sp_3072_cond_sub_avx2_48 PROC pop r12 ret sp_3072_cond_sub_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -36453,7 +36454,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cmp_48 PROC push r12 xor r9, r9 @@ -36848,9 +36849,9 @@ sp_3072_cmp_48 PROC pop r12 ret sp_3072_cmp_48 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_get_from_table_48 PROC sub rsp, 128 movdqu OWORD PTR [rsp], xmm6 @@ -38661,7 +38662,7 @@ sp_3072_get_from_table_48 PROC add rsp, 128 ret sp_3072_get_from_table_48 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -38670,7 +38671,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_mont_reduce_avx2_48 PROC push r12 push r13 @@ -39244,10 +39245,10 @@ L_3072_mont_reduce_avx2_48_loop: pop r12 ret sp_3072_mont_reduce_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_get_from_table_avx2_48 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 @@ -40110,7 +40111,7 @@ sp_3072_get_from_table_avx2_48 PROC add rsp, 128 ret sp_3072_get_from_table_avx2_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -40120,7 +40121,7 @@ ENDIF ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_add_24 PROC sub rsp, 192 mov rax, 0 @@ -40296,7 +40297,7 @@ sp_3072_cond_add_24 PROC add rsp, 192 ret sp_3072_cond_add_24 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -40306,7 +40307,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_cond_add_avx2_24 PROC push r12 mov rax, 0 @@ -40434,7 +40435,7 @@ sp_3072_cond_add_avx2_24 PROC pop r12 ret sp_3072_cond_add_avx2_24 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * @@ -40442,7 +40443,7 @@ ENDIF ; * a Number to shift. ; * n Amoutnt o shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_3072_lshift_48 PROC push r12 push r13 @@ -40599,7 +40600,7 @@ sp_3072_lshift_48 PROC pop r12 ret sp_3072_lshift_48 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFDEF WOLFSSL_SP_4096 @@ -40612,7 +40613,7 @@ IFDEF WOLFSSL_SP_4096 ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_from_bin_bswap PROC push r12 push r13 @@ -40690,7 +40691,7 @@ L_4096_from_bin_bswap_zero_end: pop r12 ret sp_4096_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -40700,7 +40701,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_from_bin_movbe PROC push r12 mov r11, r8 @@ -40766,7 +40767,7 @@ L_4096_from_bin_movbe_zero_end: pop r12 ret sp_4096_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 512 @@ -40775,7 +40776,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_to_bin_bswap_64 PROC mov rax, QWORD PTR [rcx+504] mov r8, QWORD PTR [rcx+496] @@ -40971,7 +40972,7 @@ sp_4096_to_bin_bswap_64 PROC mov QWORD PTR [rdx+504], r8 ret sp_4096_to_bin_bswap_64 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 512 @@ -40980,7 +40981,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_to_bin_movbe_64 PROC movbe rax, QWORD PTR [rcx+504] movbe r8, QWORD PTR [rcx+496] @@ -41112,14 +41113,14 @@ sp_4096_to_bin_movbe_64 PROC mov QWORD PTR [rdx+504], r8 ret sp_4096_to_bin_movbe_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_sub_in_place_64 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -41316,14 +41317,14 @@ sp_4096_sub_in_place_64 PROC sbb rax, rax ret sp_4096_sub_in_place_64 ENDP -_text ENDS +_TEXT ENDS ; /* Add b to a into r. (r = a + b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_add_64 PROC ; Add mov r9, QWORD PTR [rdx] @@ -41522,14 +41523,14 @@ sp_4096_add_64 PROC adc rax, 0 ret sp_4096_add_64 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply a and b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mul_64 PROC push r12 push r13 @@ -42851,7 +42852,7 @@ ENDIF pop r12 ret sp_4096_mul_64 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -42859,7 +42860,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mul_avx2_64 PROC push r12 push r13 @@ -44085,7 +44086,7 @@ ENDIF pop r12 ret sp_4096_mul_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * @@ -44094,7 +44095,7 @@ ENDIF ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_sqr_64 PROC sub rsp, 528 mov QWORD PTR [rsp+512], rcx @@ -45067,7 +45068,7 @@ ENDIF add rsp, 528 ret sp_4096_sqr_64 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * @@ -45076,7 +45077,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_sqr_avx2_64 PROC sub rsp, 528 mov QWORD PTR [rsp+512], rcx @@ -46049,7 +46050,7 @@ ENDIF add rsp, 528 ret sp_4096_sqr_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * @@ -46057,7 +46058,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mul_d_64 PROC push r12 mov r9, rdx @@ -46574,7 +46575,7 @@ sp_4096_mul_d_64 PROC pop r12 ret sp_4096_mul_d_64 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -46583,7 +46584,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_cond_sub_64 PROC sub rsp, 512 mov r10, QWORD PTR [r8] @@ -47038,14 +47039,14 @@ sp_4096_cond_sub_64 PROC add rsp, 512 ret sp_4096_cond_sub_64 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 4096 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mont_reduce_64 PROC push r12 push r13 @@ -47725,14 +47726,14 @@ ENDIF pop r12 ret sp_4096_mont_reduce_64 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_sub_64 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] @@ -47929,7 +47930,7 @@ sp_4096_sub_64 PROC sbb rax, rax ret sp_4096_sub_64 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -47937,7 +47938,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mul_d_avx2_64 PROC push r12 push r13 @@ -48331,7 +48332,7 @@ sp_4096_mul_d_avx2_64 PROC pop r12 ret sp_4096_mul_d_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -48341,7 +48342,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_4096_word_asm_64 PROC mov r9, rdx mov rax, r9 @@ -48349,7 +48350,7 @@ div_4096_word_asm_64 PROC div r8 ret div_4096_word_asm_64 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -48360,7 +48361,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_cond_sub_avx2_64 PROC push r12 mov r12, QWORD PTR [r8] @@ -48687,7 +48688,7 @@ sp_4096_cond_sub_avx2_64 PROC pop r12 ret sp_4096_cond_sub_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -48696,7 +48697,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_cmp_64 PROC push r12 xor r9, r9 @@ -49219,9 +49220,9 @@ sp_4096_cmp_64 PROC pop r12 ret sp_4096_cmp_64 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_get_from_table_64 PROC sub rsp, 128 movdqu OWORD PTR [rsp], xmm6 @@ -51632,7 +51633,7 @@ sp_4096_get_from_table_64 PROC add rsp, 128 ret sp_4096_get_from_table_64 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 4096 bits using Montgomery reduction. @@ -51641,7 +51642,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_mont_reduce_avx2_64 PROC push r12 push r13 @@ -52391,10 +52392,10 @@ L_4096_mont_reduce_avx2_64_loop: pop r12 ret sp_4096_mont_reduce_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_get_from_table_avx2_64 PROC sub rsp, 128 vmovdqu OWORD PTR [rsp], xmm6 @@ -53541,7 +53542,7 @@ sp_4096_get_from_table_avx2_64 PROC add rsp, 128 ret sp_4096_get_from_table_avx2_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -53551,7 +53552,7 @@ ENDIF ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_cond_add_32 PROC sub rsp, 256 mov rax, 0 @@ -53783,7 +53784,7 @@ sp_4096_cond_add_32 PROC add rsp, 256 ret sp_4096_cond_add_32 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally add a and b using the mask m. ; * m is -1 to add and 0 when not. @@ -53793,7 +53794,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to add. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_cond_add_avx2_32 PROC push r12 mov rax, 0 @@ -53961,7 +53962,7 @@ sp_4096_cond_add_avx2_32 PROC pop r12 ret sp_4096_cond_add_avx2_32 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number left by n bit. (r = a << n) ; * @@ -53969,7 +53970,7 @@ ENDIF ; * a Number to shift. ; * n Amoutnt o shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_4096_lshift_64 PROC push r12 push r13 @@ -54174,7 +54175,7 @@ sp_4096_lshift_64 PROC pop r12 ret sp_4096_lshift_64 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFNDEF WOLFSSL_SP_NO_256 @@ -54184,7 +54185,7 @@ IFNDEF WOLFSSL_SP_NO_256 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mul_4 PROC push r12 mov r9, rdx @@ -54308,7 +54309,7 @@ sp_256_mul_4 PROC pop r12 ret sp_256_mul_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -54316,7 +54317,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mul_avx2_4 PROC push rbp push r12 @@ -54418,14 +54419,14 @@ sp_256_mul_avx2_4 PROC pop rbp ret sp_256_mul_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_sqr_4 PROC push r12 push r13 @@ -54535,14 +54536,14 @@ sp_256_sqr_4 PROC pop r12 ret sp_256_sqr_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_sqr_avx2_4 PROC push r12 push r13 @@ -54626,7 +54627,7 @@ sp_256_sqr_avx2_4 PROC pop r12 ret sp_256_sqr_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -54634,7 +54635,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_add_4 PROC push r12 xor rax, rax @@ -54654,14 +54655,14 @@ sp_256_add_4 PROC pop r12 ret sp_256_add_4 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_sub_4 PROC push r12 xor rax, rax @@ -54681,7 +54682,7 @@ sp_256_sub_4 PROC pop r12 ret sp_256_sub_4 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * @@ -54689,7 +54690,7 @@ _text ENDS ; * a A single precision number to copy. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_cond_copy_4 PROC mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] @@ -54709,7 +54710,7 @@ sp_256_cond_copy_4 PROC xor QWORD PTR [rcx+24], r11 ret sp_256_cond_copy_4 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * @@ -54719,7 +54720,7 @@ _text ENDS ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_mul_4 PROC push r12 push r13 @@ -54907,7 +54908,7 @@ sp_256_mont_mul_4 PROC pop r12 ret sp_256_mont_mul_4 ENDP -_text ENDS +_TEXT ENDS ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. @@ -54915,7 +54916,7 @@ _text ENDS ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_sqr_4 PROC push r12 push r13 @@ -55082,7 +55083,7 @@ sp_256_mont_sqr_4 PROC pop r12 ret sp_256_mont_sqr_4 ENDP -_text ENDS +_TEXT ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. @@ -55090,7 +55091,7 @@ _text ENDS ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_cmp_4 PROC push r12 xor r9, r9 @@ -55133,7 +55134,7 @@ sp_256_cmp_4 PROC pop r12 ret sp_256_cmp_4 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -55142,7 +55143,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_cond_sub_4 PROC push r12 push r13 @@ -55179,14 +55180,14 @@ sp_256_cond_sub_4 PROC pop r12 ret sp_256_cond_sub_4 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 256 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_reduce_4 PROC push rbx push rsi @@ -55284,14 +55285,14 @@ sp_256_mont_reduce_4 PROC pop rbx ret sp_256_mont_reduce_4 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 256 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_reduce_order_4 PROC push r12 push r13 @@ -55385,7 +55386,7 @@ L_mont_loop_4: pop r12 ret sp_256_mont_reduce_order_4 ENDP -_text ENDS +_TEXT ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. @@ -55393,7 +55394,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_add_4 PROC push r12 push r13 @@ -55428,14 +55429,14 @@ sp_256_mont_add_4 PROC pop r12 ret sp_256_mont_add_4 ENDP -_text ENDS +_TEXT ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_dbl_4 PROC push r12 push r13 @@ -55471,14 +55472,14 @@ sp_256_mont_dbl_4 PROC pop r12 ret sp_256_mont_dbl_4 ENDP -_text ENDS +_TEXT ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of Tripling. ; * a Number to triple in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_tpl_4 PROC push r12 push r13 @@ -55532,7 +55533,7 @@ sp_256_mont_tpl_4 PROC pop r12 ret sp_256_mont_tpl_4 ENDP -_text ENDS +_TEXT ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of subtration. @@ -55540,7 +55541,7 @@ _text ENDS ; * b Number to subtract with in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_sub_4 PROC push r12 push r13 @@ -55575,14 +55576,14 @@ sp_256_mont_sub_4 PROC pop r12 ret sp_256_mont_sub_4 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_div2_4 PROC push r12 push r13 @@ -55614,7 +55615,7 @@ sp_256_mont_div2_4 PROC pop r12 ret sp_256_mont_div2_4 ENDP -_text ENDS +_TEXT ENDS ; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). ; * ; * r Result of subtration. @@ -55622,7 +55623,7 @@ _text ENDS ; * b Number to double and subtract with in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_rsb_sub_dbl_4 PROC push r12 push r13 @@ -55715,7 +55716,7 @@ sp_256_mont_rsb_sub_dbl_4 PROC pop r12 ret sp_256_mont_rsb_sub_dbl_4 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * @@ -55723,7 +55724,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_point_33_4 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 @@ -55795,7 +55796,7 @@ L_256_get_point_33_4_start_1: add rsp, 160 ret sp_256_get_point_33_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * @@ -55803,7 +55804,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_point_33_avx2_4 PROC sub rsp, 64 vmovdqu OWORD PTR [rsp], xmm6 @@ -55847,7 +55848,7 @@ L_256_get_point_33_avx2_4_start: add rsp, 64 ret sp_256_get_point_33_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 @@ -55860,7 +55861,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_mul_avx2_4 PROC push rbp push r12 @@ -56025,7 +56026,7 @@ sp_256_mont_mul_avx2_4 PROC pop rbp ret sp_256_mont_mul_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) @@ -56035,7 +56036,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_sqr_avx2_4 PROC push r12 push r13 @@ -56182,7 +56183,7 @@ sp_256_mont_sqr_avx2_4 PROC pop r12 ret sp_256_mont_sqr_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -56193,7 +56194,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_cond_sub_avx2_4 PROC push r12 push r13 @@ -56230,7 +56231,7 @@ sp_256_cond_sub_avx2_4 PROC pop r12 ret sp_256_cond_sub_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 256 bits using Montgomery reduction. @@ -56239,7 +56240,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_reduce_order_avx2_4 PROC push r12 push r13 @@ -56389,7 +56390,7 @@ sp_256_mont_reduce_order_avx2_4 PROC pop r12 ret sp_256_mont_reduce_order_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -56398,7 +56399,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_div2_avx2_4 PROC push r12 push r13 @@ -56430,7 +56431,7 @@ sp_256_mont_div2_avx2_4 PROC pop r12 ret sp_256_mont_div2_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. @@ -56439,7 +56440,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_entry_64_4 PROC sub rsp, 96 movdqu OWORD PTR [rsp], xmm6 @@ -56494,7 +56495,7 @@ L_256_get_entry_64_4_start_0: add rsp, 96 ret sp_256_get_entry_64_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -56502,7 +56503,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_entry_64_avx2_4 PROC sub rsp, 32 vmovdqu OWORD PTR [rsp], xmm6 @@ -56537,7 +56538,7 @@ L_256_get_entry_64_avx2_4_start: add rsp, 32 ret sp_256_get_entry_64_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -56547,7 +56548,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_entry_65_4 PROC sub rsp, 96 movdqu OWORD PTR [rsp], xmm6 @@ -56602,7 +56603,7 @@ L_256_get_entry_65_4_start_0: add rsp, 96 ret sp_256_get_entry_65_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -56610,7 +56611,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_get_entry_65_avx2_4 PROC sub rsp, 32 vmovdqu OWORD PTR [rsp], xmm6 @@ -56645,14 +56646,14 @@ L_256_get_entry_65_avx2_4_start: add rsp, 32 ret sp_256_get_entry_65_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_add_one_4 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 @@ -56660,7 +56661,7 @@ sp_256_add_one_4 PROC adc QWORD PTR [rcx+24], 0 ret sp_256_add_one_4 ENDP -_text ENDS +_TEXT ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * @@ -56669,7 +56670,7 @@ _text ENDS ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_from_bin_bswap PROC push r12 push r13 @@ -56747,7 +56748,7 @@ L_256_from_bin_bswap_zero_end: pop r12 ret sp_256_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -56757,7 +56758,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_from_bin_movbe PROC push r12 mov r11, r8 @@ -56823,7 +56824,7 @@ L_256_from_bin_movbe_zero_end: pop r12 ret sp_256_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 32 @@ -56832,7 +56833,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_to_bin_bswap_4 PROC mov rax, QWORD PTR [rcx+24] mov r8, QWORD PTR [rcx+16] @@ -56848,7 +56849,7 @@ sp_256_to_bin_bswap_4 PROC mov QWORD PTR [rdx+24], r8 ret sp_256_to_bin_bswap_4 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 32 @@ -56857,7 +56858,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_to_bin_movbe_4 PROC movbe rax, QWORD PTR [rcx+24] movbe r8, QWORD PTR [rcx+16] @@ -56869,14 +56870,14 @@ sp_256_to_bin_movbe_4 PROC mov QWORD PTR [rdx+24], r8 ret sp_256_to_bin_movbe_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_sub_in_place_4 PROC mov r8, QWORD PTR [rdx] mov r9, QWORD PTR [rdx+8] @@ -56889,14 +56890,14 @@ sp_256_sub_in_place_4 PROC sbb rax, rax ret sp_256_sub_in_place_4 ENDP -_text ENDS +_TEXT ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mul_d_4 PROC push r12 mov r9, rdx @@ -56933,7 +56934,7 @@ sp_256_mul_d_4 PROC pop r12 ret sp_256_mul_d_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -56941,7 +56942,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mul_d_avx2_4 PROC push r12 push r13 @@ -56975,7 +56976,7 @@ sp_256_mul_d_avx2_4 PROC pop r12 ret sp_256_mul_d_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -56985,7 +56986,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_256_word_asm_4 PROC mov r9, rdx mov rax, r9 @@ -56993,7 +56994,7 @@ div_256_word_asm_4 PROC div r8 ret div_256_word_asm_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -57003,7 +57004,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply in Montgomery form. ; * b Second number to multiply in Montgomery form. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_mul_order_avx2_4 PROC push rbp push r12 @@ -57214,7 +57215,7 @@ sp_256_mont_mul_order_avx2_4 PROC pop rbp ret sp_256_mont_mul_order_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) @@ -57222,7 +57223,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mont_sqr_order_avx2_4 PROC push rbp push r12 @@ -57417,7 +57418,7 @@ sp_256_mont_sqr_order_avx2_4 PROC pop rbp ret sp_256_mont_sqr_order_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Non-constant time modular inversion. ; * @@ -57426,7 +57427,7 @@ ENDIF ; * @param [in] m Modulus. ; * @return MP_OKAY on success. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mod_inv_4 PROC push r12 push r13 @@ -57638,47 +57639,64 @@ L_256_mod_inv_4_store_end: pop r12 ret sp_256_mod_inv_4 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0 +L_sp256_mod_inv_avx2_4_order DWORD \ + 00632551h, 01e84f3bh, 03bce6fah, 03ffffffh, + 03ff0000h, 00000000h, 00000000h, 00000000h, + 0272b0bfh, 02b69c5eh, 03ffffffh, 000003ffh, + 003fffffh, 00000000h, 00000000h, 00000000h ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_one QWORD 1, 0, - 0, 0 +L_sp256_mod_inv_avx2_4_one QWORD \ + 0000000000000001h, 0000000000000000h, + 0000000000000000h, 0000000000000000h ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1 +L_sp256_mod_inv_avx2_4_all_one DWORD \ + 00000001h, 00000001h, 00000001h, 00000001h, + 00000001h, 00000001h, 00000001h, 00000001h ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0 +L_sp256_mod_inv_avx2_4_mask01111 DWORD \ + 00000000h, 00000001h, 00000001h, 00000001h, + 00000001h, 00000000h, 00000000h, 00000000h ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111 _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7 +L_sp256_mod_inv_avx2_4_down_one_dword DWORD \ + 00000001h, 00000002h, 00000003h, 00000004h, + 00000005h, 00000006h, 00000007h, 00000007h ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0 +L_sp256_mod_inv_avx2_4_neg DWORD \ + 00000000h, 00000000h, 00000000h, 00000000h, + 80000000h, 00000000h, 00000000h, 00000000h ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7 +L_sp256_mod_inv_avx2_4_up_one_dword DWORD \ + 00000007h, 00000000h, 00000001h, 00000002h, + 00000003h, 00000007h, 00000007h, 00000007h ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword _DATA ENDS _DATA SEGMENT ALIGN 16 -L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0 +L_sp256_mod_inv_avx2_4_mask26 DWORD \ + 03ffffffh, 03ffffffh, 03ffffffh, 03ffffffh, + 03ffffffh, 00000000h, 00000000h, 00000000h ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26 _DATA ENDS ; /* Non-constant time modular inversion. @@ -57688,7 +57706,7 @@ _DATA ENDS ; * @param [in] m Modulus. ; * @return MP_OKAY on success. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_256_mod_inv_avx2_4 PROC push r12 push r13 @@ -57820,8 +57838,8 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1: vpextrd r11d, xmm1, 1 vpextrd r13d, xmm1, 2 vpextrd r15d, xmm1, 3 - vextracti128 xmm0, ymm0, 1 - vextracti128 xmm1, ymm1, 1 + vextracti128 xmm0, ymm0, 1 + vextracti128 xmm1, ymm1, 1 vpextrd edi, xmm0, 0 vpextrd esi, xmm1, 0 jmp L_256_mod_inv_avx2_4_store_done @@ -57873,8 +57891,8 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1: vpextrd r11d, xmm3, 1 vpextrd r13d, xmm3, 2 vpextrd r15d, xmm3, 3 - vextracti128 xmm2, ymm2, 1 - vextracti128 xmm3, ymm3, 1 + vextracti128 xmm2, ymm2, 1 + vextracti128 xmm3, ymm3, 1 vpextrd edi, xmm2, 0 vpextrd esi, xmm3, 0 L_256_mod_inv_avx2_4_store_done: @@ -57934,7 +57952,7 @@ L_256_mod_inv_avx2_4_store_done: adc r14, r15 movsxd rdi, edi adc rdi, rsi - jge L_256_mod_inv_avx2_4_3_no_add_order + jge L_256_mod_inv_avx2_4_no_add_order mov r9, 2756213597218129 mov r11, 3054930678533947 mov r13, 4503599622973178 @@ -57962,7 +57980,7 @@ L_256_mod_inv_avx2_4_store_done: and r14, rdx sar r15, 52 add rdi, r15 -L_256_mod_inv_avx2_4_3_no_add_order: +L_256_mod_inv_avx2_4_no_add_order: mov r9, r10 mov r11, r12 mov r13, r14 @@ -58000,7 +58018,7 @@ L_256_mod_inv_avx2_4_3_no_add_order: pop r12 ret sp_256_mod_inv_avx2_4 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFDEF WOLFSSL_SP_384 @@ -58010,7 +58028,7 @@ IFDEF WOLFSSL_SP_384 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mul_6 PROC push r12 mov r9, rdx @@ -58266,7 +58284,7 @@ sp_384_mul_6 PROC pop r12 ret sp_384_mul_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -58274,7 +58292,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mul_avx2_6 PROC push r12 push r13 @@ -58482,14 +58500,14 @@ sp_384_mul_avx2_6 PROC pop r12 ret sp_384_mul_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_sqr_6 PROC push r12 push r13 @@ -58701,14 +58719,14 @@ sp_384_sqr_6 PROC pop r12 ret sp_384_sqr_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r Result of squaring. ; * a Number to square in Montgomery form. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_sqr_avx2_6 PROC push r12 push r13 @@ -58858,7 +58876,7 @@ sp_384_sqr_avx2_6 PROC pop r12 ret sp_384_sqr_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -58866,7 +58884,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_add_6 PROC push r12 push r13 @@ -58896,14 +58914,14 @@ sp_384_add_6 PROC pop r12 ret sp_384_add_6 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_sub_6 PROC push r12 push r13 @@ -58933,7 +58951,7 @@ sp_384_sub_6 PROC pop r12 ret sp_384_sub_6 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * @@ -58941,7 +58959,7 @@ _text ENDS ; * a A single precision number to copy. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_cond_copy_6 PROC push r12 push r13 @@ -58973,7 +58991,7 @@ sp_384_cond_copy_6 PROC pop r12 ret sp_384_cond_copy_6 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -58982,7 +59000,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_cond_sub_6 PROC sub rsp, 48 mov r10, QWORD PTR [r8] @@ -59031,14 +59049,14 @@ sp_384_cond_sub_6 PROC add rsp, 48 ret sp_384_cond_sub_6 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 384 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_reduce_6 PROC push r12 push r13 @@ -59203,14 +59221,14 @@ sp_384_mont_reduce_6 PROC pop r12 ret sp_384_mont_reduce_6 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 384 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_reduce_order_6 PROC push r12 push r13 @@ -59310,7 +59328,7 @@ ENDIF pop r12 ret sp_384_mont_reduce_order_6 ENDP -_text ENDS +_TEXT ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. @@ -59318,7 +59336,7 @@ _text ENDS ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_cmp_6 PROC push r12 xor r9, r9 @@ -59377,7 +59395,7 @@ sp_384_cmp_6 PROC pop r12 ret sp_384_cmp_6 ENDP -_text ENDS +_TEXT ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. @@ -59385,7 +59403,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_add_6 PROC push r12 push r13 @@ -59439,14 +59457,14 @@ sp_384_mont_add_6 PROC pop r12 ret sp_384_mont_add_6 ENDP -_text ENDS +_TEXT ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_dbl_6 PROC push r12 push r13 @@ -59501,14 +59519,14 @@ sp_384_mont_dbl_6 PROC pop r12 ret sp_384_mont_dbl_6 ENDP -_text ENDS +_TEXT ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of doubling. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_tpl_6 PROC push r12 push r13 @@ -59591,7 +59609,7 @@ sp_384_mont_tpl_6 PROC pop r12 ret sp_384_mont_tpl_6 ENDP -_text ENDS +_TEXT ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of subtration. @@ -59599,7 +59617,7 @@ _text ENDS ; * b Number to subtract with in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_sub_6 PROC push r12 push r13 @@ -59653,14 +59671,14 @@ sp_384_mont_sub_6 PROC pop r12 ret sp_384_mont_sub_6 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_div2_6 PROC push r12 push r13 @@ -59723,7 +59741,7 @@ sp_384_mont_div2_6 PROC pop r12 ret sp_384_mont_div2_6 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * @@ -59731,7 +59749,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_point_33_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 @@ -59834,7 +59852,7 @@ L_384_get_point_33_6_start_2: add rsp, 160 ret sp_384_get_point_33_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * @@ -59842,7 +59860,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_point_33_avx2_6 PROC sub rsp, 160 vmovdqu OWORD PTR [rsp], xmm6 @@ -59913,7 +59931,7 @@ L_384_get_point_33_avx2_6_start: add rsp, 160 ret sp_384_get_point_33_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 @@ -59923,7 +59941,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_reduce_order_avx2_6 PROC push r12 push r13 @@ -60237,7 +60255,7 @@ L_mont_loop_order_avx2_6: pop r12 ret sp_384_mont_reduce_order_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -60248,7 +60266,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_cond_sub_avx2_6 PROC push r12 mov r12, QWORD PTR [r8] @@ -60285,7 +60303,7 @@ sp_384_cond_sub_avx2_6 PROC pop r12 ret sp_384_cond_sub_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -60294,7 +60312,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mont_div2_avx2_6 PROC push r12 push r13 @@ -60356,7 +60374,7 @@ sp_384_mont_div2_avx2_6 PROC pop r12 ret sp_384_mont_div2_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. @@ -60365,7 +60383,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_entry_64_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 @@ -60438,7 +60456,7 @@ L_384_get_entry_64_6_start_0: add rsp, 160 ret sp_384_get_entry_64_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -60446,7 +60464,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_entry_64_avx2_6 PROC sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 @@ -60499,7 +60517,7 @@ L_384_get_entry_64_avx2_6_start: add rsp, 96 ret sp_384_get_entry_64_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -60509,7 +60527,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_entry_65_6 PROC sub rsp, 160 movdqu OWORD PTR [rsp], xmm6 @@ -60582,7 +60600,7 @@ L_384_get_entry_65_6_start_0: add rsp, 160 ret sp_384_get_entry_65_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -60590,7 +60608,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_get_entry_65_avx2_6 PROC sub rsp, 96 vmovdqu OWORD PTR [rsp], xmm6 @@ -60643,14 +60661,14 @@ L_384_get_entry_65_avx2_6_start: add rsp, 96 ret sp_384_get_entry_65_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_add_one_6 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 @@ -60660,7 +60678,7 @@ sp_384_add_one_6 PROC adc QWORD PTR [rcx+40], 0 ret sp_384_add_one_6 ENDP -_text ENDS +_TEXT ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * @@ -60669,7 +60687,7 @@ _text ENDS ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_from_bin_bswap PROC push r12 push r13 @@ -60747,7 +60765,7 @@ L_384_from_bin_bswap_zero_end: pop r12 ret sp_384_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -60757,7 +60775,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_from_bin_movbe PROC push r12 mov r11, r8 @@ -60823,7 +60841,7 @@ L_384_from_bin_movbe_zero_end: pop r12 ret sp_384_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 48 @@ -60832,7 +60850,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_to_bin_bswap_6 PROC mov rax, QWORD PTR [rcx+40] mov r8, QWORD PTR [rcx+32] @@ -60854,7 +60872,7 @@ sp_384_to_bin_bswap_6 PROC mov QWORD PTR [rdx+40], r8 ret sp_384_to_bin_bswap_6 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 48 @@ -60863,7 +60881,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_to_bin_movbe_6 PROC movbe rax, QWORD PTR [rcx+40] movbe r8, QWORD PTR [rcx+32] @@ -60879,14 +60897,14 @@ sp_384_to_bin_movbe_6 PROC mov QWORD PTR [rdx+40], r8 ret sp_384_to_bin_movbe_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_sub_in_place_6 PROC push r12 push r13 @@ -60907,14 +60925,14 @@ sp_384_sub_in_place_6 PROC pop r12 ret sp_384_sub_in_place_6 ENDP -_text ENDS +_TEXT ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mul_d_6 PROC push r12 mov r9, rdx @@ -60967,7 +60985,7 @@ sp_384_mul_d_6 PROC pop r12 ret sp_384_mul_d_6 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -60975,7 +60993,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_mul_d_avx2_6 PROC push r12 push r13 @@ -61021,7 +61039,7 @@ sp_384_mul_d_avx2_6 PROC pop r12 ret sp_384_mul_d_avx2_6 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -61031,7 +61049,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_384_word_asm_6 PROC mov r9, rdx mov rax, r9 @@ -61039,14 +61057,14 @@ div_384_word_asm_6 PROC div r8 ret div_384_word_asm_6 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_rshift1_6 PROC push r12 mov rax, QWORD PTR [rdx] @@ -61070,14 +61088,14 @@ sp_384_rshift1_6 PROC pop r12 ret sp_384_rshift1_6 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the prime. (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_384_div2_mod_6 PROC push r12 push r13 @@ -61133,8 +61151,8 @@ L_384_mod_inv_6_div2_mod_no_add: pop r12 ret sp_384_div2_mod_6 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA sp_384_num_bits_6 PROC xor rax, rax mov rdx, QWORD PTR [rcx+40] @@ -61188,7 +61206,7 @@ L_384_num_bits_6_end_0: L_384_num_bits_6_done: ret sp_384_num_bits_6 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF WOLFSSL_SP_521 ; /* Multiply a and b into r. (r = a * b) @@ -61197,7 +61215,7 @@ IFDEF WOLFSSL_SP_521 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mul_9 PROC push r12 mov r9, rdx @@ -61741,7 +61759,7 @@ sp_521_mul_9 PROC pop r12 ret sp_521_mul_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -61749,7 +61767,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mul_avx2_9 PROC push rbx push rbp @@ -62319,14 +62337,14 @@ L_end_521_mul_avx2_9: pop rbx ret sp_521_mul_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_sqr_9 PROC push r12 push r13 @@ -62736,14 +62754,14 @@ sp_521_sqr_9 PROC pop r12 ret sp_521_sqr_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_sqr_avx2_9 PROC push rbp push r12 @@ -63134,7 +63152,7 @@ L_end_521_sqr_avx2_9: pop rbp ret sp_521_sqr_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -63142,7 +63160,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_add_9 PROC ; Add mov r9, QWORD PTR [rdx] @@ -63176,14 +63194,14 @@ sp_521_add_9 PROC adc rax, 0 ret sp_521_add_9 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into r. (r = a - b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_sub_9 PROC mov r9, QWORD PTR [rdx] sub r9, QWORD PTR [r8] @@ -63215,7 +63233,7 @@ sp_521_sub_9 PROC sbb rax, rax ret sp_521_sub_9 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * @@ -63223,7 +63241,7 @@ _text ENDS ; * a A single precision number to copy. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_cond_copy_9 PROC push r12 mov rax, QWORD PTR [rcx] @@ -63265,7 +63283,7 @@ sp_521_cond_copy_9 PROC pop r12 ret sp_521_cond_copy_9 ENDP -_text ENDS +_TEXT ENDS ; /* Multiply two Montgomery form numbers mod the modulus (prime). ; * (r = a * b mod m) ; * @@ -63275,7 +63293,7 @@ _text ENDS ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_mul_9 PROC push r12 push r13 @@ -63857,7 +63875,7 @@ sp_521_mont_mul_9 PROC pop r12 ret sp_521_mont_mul_9 ENDP -_text ENDS +_TEXT ENDS ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) ; * ; * r Result of squaring. @@ -63865,7 +63883,7 @@ _text ENDS ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_sqr_9 PROC push r12 push r13 @@ -64309,7 +64327,7 @@ sp_521_mont_sqr_9 PROC pop r12 ret sp_521_mont_sqr_9 ENDP -_text ENDS +_TEXT ENDS ; /* Compare a with b in constant time. ; * ; * a A single precision integer. @@ -64317,7 +64335,7 @@ _text ENDS ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_cmp_9 PROC push r12 xor r9, r9 @@ -64400,7 +64418,7 @@ sp_521_cmp_9 PROC pop r12 ret sp_521_cmp_9 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -64409,7 +64427,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_cond_sub_9 PROC sub rsp, 72 mov r10, QWORD PTR [r8] @@ -64479,14 +64497,14 @@ sp_521_cond_sub_9 PROC add rsp, 72 ret sp_521_cond_sub_9 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 521 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_reduce_9 PROC push r12 push r13 @@ -64548,14 +64566,14 @@ sp_521_mont_reduce_9 PROC pop r12 ret sp_521_mont_reduce_9 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 521 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_reduce_order_9 PROC push r12 push r13 @@ -64723,7 +64741,7 @@ ENDIF pop r12 ret sp_521_mont_reduce_order_9 ENDP -_text ENDS +_TEXT ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. @@ -64731,7 +64749,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_add_9 PROC push r12 push r13 @@ -64786,14 +64804,14 @@ sp_521_mont_add_9 PROC pop r12 ret sp_521_mont_add_9 ENDP -_text ENDS +_TEXT ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_dbl_9 PROC push r12 push r13 @@ -64846,14 +64864,14 @@ sp_521_mont_dbl_9 PROC pop r12 ret sp_521_mont_dbl_9 ENDP -_text ENDS +_TEXT ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of Tripling. ; * a Number to triple in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_tpl_9 PROC push r12 push r13 @@ -64915,7 +64933,7 @@ sp_521_mont_tpl_9 PROC pop r12 ret sp_521_mont_tpl_9 ENDP -_text ENDS +_TEXT ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of addition. @@ -64923,7 +64941,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_sub_9 PROC push r12 push r13 @@ -64979,14 +64997,14 @@ sp_521_mont_sub_9 PROC pop r12 ret sp_521_mont_sub_9 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_div2_9 PROC push r12 push r13 @@ -65040,7 +65058,7 @@ sp_521_mont_div2_9 PROC pop r12 ret sp_521_mont_div2_9 ENDP -_text ENDS +_TEXT ENDS IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible point that could be being copied. ; * @@ -65048,7 +65066,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_point_33_9 PROC push r12 push r13 @@ -65200,7 +65218,7 @@ L_521_get_point_33_9_start_2: pop r12 ret sp_521_get_point_33_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible point that could be being copied. ; * @@ -65208,7 +65226,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of point to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_point_33_avx2_9 PROC push r12 push r13 @@ -65310,7 +65328,7 @@ L_521_get_point_33_avx2_9_start: pop r12 ret sp_521_get_point_33_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFDEF HAVE_INTEL_AVX2 @@ -65323,7 +65341,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_mul_avx2_9 PROC push rbx push rbp @@ -65923,7 +65941,7 @@ sp_521_mont_mul_avx2_9 PROC pop rbx ret sp_521_mont_mul_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) @@ -65933,7 +65951,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m Modulus (prime). ; * mp Montgomery multiplier. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_sqr_avx2_9 PROC push rbp push r12 @@ -66365,7 +66383,7 @@ sp_521_mont_sqr_avx2_9 PROC pop rbp ret sp_521_mont_sqr_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. @@ -66376,7 +66394,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_cond_sub_avx2_9 PROC push r12 mov r12, QWORD PTR [r8] @@ -66428,7 +66446,7 @@ sp_521_cond_sub_avx2_9 PROC pop r12 ret sp_521_cond_sub_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 521 bits using Montgomery reduction. @@ -66437,7 +66455,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_reduce_order_avx2_9 PROC push r12 push r13 @@ -66741,7 +66759,7 @@ L_521_mont_reduce_order_avx2_9_loop: pop r12 ret sp_521_mont_reduce_order_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -66750,7 +66768,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mont_div2_avx2_9 PROC push r12 push r13 @@ -66804,7 +66822,7 @@ sp_521_mont_div2_avx2_9 PROC pop r12 ret sp_521_mont_div2_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFNDEF WC_NO_CACHE_RESISTANT ; /* Touch each possible entry that could be being copied. @@ -66813,7 +66831,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_entry_64_9 PROC push r12 sub rsp, 160 @@ -66937,7 +66955,7 @@ L_521_get_entry_64_9_start_1: pop r12 ret sp_521_get_entry_64_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -66945,7 +66963,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_entry_64_avx2_9 PROC push r12 push r13 @@ -67020,7 +67038,7 @@ L_521_get_entry_64_avx2_9_start: pop r12 ret sp_521_get_entry_64_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF IFNDEF WC_NO_CACHE_RESISTANT @@ -67030,7 +67048,7 @@ IFNDEF WC_NO_CACHE_RESISTANT ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_entry_65_9 PROC push r12 sub rsp, 160 @@ -67154,7 +67172,7 @@ L_521_get_entry_65_9_start_1: pop r12 ret sp_521_get_entry_65_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Touch each possible entry that could be being copied. ; * @@ -67162,7 +67180,7 @@ IFDEF HAVE_INTEL_AVX2 ; * table Table - start of the entries to access ; * idx Index of entry to retrieve. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_get_entry_65_avx2_9 PROC push r12 push r13 @@ -67237,14 +67255,14 @@ L_521_get_entry_65_avx2_9_start: pop r12 ret sp_521_get_entry_65_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF ; /* Add 1 to a. (a = a + 1) ; * ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_add_one_9 PROC add QWORD PTR [rcx], 1 adc QWORD PTR [rcx+8], 0 @@ -67257,7 +67275,7 @@ sp_521_add_one_9 PROC adc QWORD PTR [rcx+64], 0 ret sp_521_add_one_9 ENDP -_text ENDS +_TEXT ENDS ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. ; * @@ -67266,7 +67284,7 @@ _text ENDS ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_from_bin_bswap PROC push r12 push r13 @@ -67344,7 +67362,7 @@ L_521_from_bin_bswap_zero_end: pop r12 ret sp_521_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -67354,7 +67372,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_from_bin_movbe PROC push r12 mov r11, r8 @@ -67420,7 +67438,7 @@ L_521_from_bin_movbe_zero_end: pop r12 ret sp_521_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 65 @@ -67429,7 +67447,7 @@ ENDIF ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_to_bin_bswap_9 PROC mov r8b, BYTE PTR [rcx+64] mov al, BYTE PTR [rcx+65] @@ -67461,7 +67479,7 @@ sp_521_to_bin_bswap_9 PROC mov QWORD PTR [rdx+58], r8 ret sp_521_to_bin_bswap_9 ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Write r as big endian to byte array. ; * Fixed length number of bytes written: 65 @@ -67470,7 +67488,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * r A single precision integer. ; * a Byte array. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_to_bin_movbe_9 PROC mov r8b, BYTE PTR [rcx+64] mov al, BYTE PTR [rcx+65] @@ -67494,14 +67512,14 @@ sp_521_to_bin_movbe_9 PROC mov QWORD PTR [rdx+58], r8 ret sp_521_to_bin_movbe_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_rshift_9 PROC push r12 mov rax, rcx @@ -67536,14 +67554,14 @@ sp_521_rshift_9 PROC pop r12 ret sp_521_rshift_9 ENDP -_text ENDS +_TEXT ENDS ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_lshift_9 PROC push r12 push r13 @@ -67583,14 +67601,14 @@ sp_521_lshift_9 PROC pop r12 ret sp_521_lshift_9 ENDP -_text ENDS +_TEXT ENDS ; /* Shift number left by n bit. (r = a << n) ; * ; * r Result of left shift by n. ; * a Number to shift. ; * n Amoutnt o shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_lshift_18 PROC push r12 push r13 @@ -67657,13 +67675,13 @@ sp_521_lshift_18 PROC pop r12 ret sp_521_lshift_18 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_sub_in_place_9 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -67695,14 +67713,14 @@ sp_521_sub_in_place_9 PROC sbb rax, rax ret sp_521_sub_in_place_9 ENDP -_text ENDS +_TEXT ENDS ; /* Mul a by digit b into r. (r = a * b) ; * ; * r A single precision integer. ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mul_d_9 PROC push r12 mov r9, rdx @@ -67779,7 +67797,7 @@ sp_521_mul_d_9 PROC pop r12 ret sp_521_mul_d_9 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -67787,7 +67805,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_mul_d_avx2_9 PROC push r12 push r13 @@ -67851,7 +67869,7 @@ sp_521_mul_d_avx2_9 PROC pop r12 ret sp_521_mul_d_avx2_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -67861,7 +67879,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_521_word_asm_9 PROC mov r9, rdx mov rax, r9 @@ -67869,14 +67887,14 @@ div_521_word_asm_9 PROC div r8 ret div_521_word_asm_9 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Shift number right by 1 bit. (r = a >> 1) ; * ; * r Result of right shift by 1. ; * a Number to shift. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_rshift1_9 PROC push r12 mov rax, QWORD PTR [rdx] @@ -67909,14 +67927,14 @@ sp_521_rshift1_9 PROC pop r12 ret sp_521_rshift1_9 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the prime. (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_521_div2_mod_9 PROC push r12 mov rax, QWORD PTR [rdx] @@ -67989,8 +68007,8 @@ L_521_mod_inv_9_div2_mod_no_add: pop r12 ret sp_521_div2_mod_9 ENDP -_text ENDS -_text SEGMENT READONLY PARA +_TEXT ENDS +_TEXT SEGMENT READONLY PARA sp_521_num_bits_9 PROC xor rax, rax mov rdx, QWORD PTR [rcx+64] @@ -68068,7 +68086,7 @@ L_521_num_bits_9_end_0: L_521_num_bits_9_done: ret sp_521_num_bits_9 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF WOLFSSL_SP_1024 ; /* Multiply a and b into r. (r = a * b) @@ -68077,7 +68095,7 @@ IFDEF WOLFSSL_SP_1024 ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mul_16 PROC push r12 mov r9, rdx @@ -69713,13 +69731,13 @@ sp_1024_mul_16 PROC pop r12 ret sp_1024_mul_16 ENDP -_text ENDS +_TEXT ENDS ; /* Square a and put result in r. (r = a * a) ; * ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_sqr_16 PROC push r12 push r13 @@ -70801,7 +70819,7 @@ sp_1024_sqr_16 PROC pop r12 ret sp_1024_sqr_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Multiply a and b into r. (r = a * b) ; * @@ -70809,7 +70827,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a First number to multiply. ; * b Second number to multiply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mul_avx2_16 PROC push rbx push rbp @@ -72472,7 +72490,7 @@ L_end_1024_mul_avx2_16: pop rbx ret sp_1024_mul_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Square a and put result in r. (r = a * a) @@ -72480,7 +72498,7 @@ IFDEF HAVE_INTEL_AVX2 ; * r A single precision integer. ; * a A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_sqr_avx2_16 PROC push rbp push r12 @@ -73526,7 +73544,7 @@ L_end_1024_sqr_avx2_16: pop rbp ret sp_1024_sqr_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Add b to a into r. (r = a + b) ; * @@ -73534,7 +73552,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_add_16 PROC ; Add mov r9, QWORD PTR [rdx] @@ -73589,13 +73607,13 @@ sp_1024_add_16 PROC adc rax, 0 ret sp_1024_add_16 ENDP -_text ENDS +_TEXT ENDS ; /* Sub b from a into a. (a -= b) ; * ; * a A single precision integer and result. ; * b A single precision integer. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_sub_in_place_16 PROC mov r8, QWORD PTR [rcx] sub r8, QWORD PTR [rdx] @@ -73648,7 +73666,7 @@ sp_1024_sub_in_place_16 PROC sbb rax, rax ret sp_1024_sub_in_place_16 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. ; * @@ -73657,7 +73675,7 @@ _text ENDS ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_cond_sub_16 PROC sub rsp, 128 mov r10, QWORD PTR [r8] @@ -73776,7 +73794,7 @@ sp_1024_cond_sub_16 PROC add rsp, 128 ret sp_1024_cond_sub_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Conditionally subtract b from a using the mask m. ; * m is -1 to subtract and 0 when not copying. @@ -73786,7 +73804,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b A single precision number to subtract. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_cond_sub_avx2_16 PROC push r12 mov r12, QWORD PTR [r8] @@ -73873,7 +73891,7 @@ sp_1024_cond_sub_avx2_16 PROC pop r12 ret sp_1024_cond_sub_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Mul a by digit b into r. (r = a * b) ; * @@ -73881,7 +73899,7 @@ ENDIF ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mul_d_16 PROC push r12 mov r9, rdx @@ -74014,7 +74032,7 @@ sp_1024_mul_d_16 PROC pop r12 ret sp_1024_mul_d_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Mul a by digit b into r. (r = a * b) ; * @@ -74022,7 +74040,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a A single precision integer. ; * b A single precision digit. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mul_d_avx2_16 PROC push r12 push r13 @@ -74128,7 +74146,7 @@ sp_1024_mul_d_avx2_16 PROC pop r12 ret sp_1024_mul_d_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF _WIN64 ; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div) @@ -74138,7 +74156,7 @@ IFDEF _WIN64 ; * div The dividend. ; * returns the result of the division. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA div_1024_word_asm_16 PROC mov r9, rdx mov rax, r9 @@ -74146,7 +74164,7 @@ div_1024_word_asm_16 PROC div r8 ret div_1024_word_asm_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Compare a with b in constant time. ; * @@ -74155,7 +74173,7 @@ ENDIF ; * return -ve, 0 or +ve if a is less than, equal to or greater than b ; * respectively. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_cmp_16 PROC push r12 xor r9, r9 @@ -74294,7 +74312,7 @@ sp_1024_cmp_16 PROC pop r12 ret sp_1024_cmp_16 ENDP -_text ENDS +_TEXT ENDS ; /* Conditionally copy a into r using the mask m. ; * m is -1 to copy and 0 when not. ; * @@ -74302,7 +74320,7 @@ _text ENDS ; * a A single precision number to copy. ; * m Mask value to apply. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_cond_copy_16 PROC mov rax, QWORD PTR [rcx] mov r9, QWORD PTR [rcx+8] @@ -74370,14 +74388,14 @@ sp_1024_cond_copy_16 PROC xor QWORD PTR [rcx+120], r11 ret sp_1024_cond_copy_16 ENDP -_text ENDS +_TEXT ENDS ; /* Reduce the number back to 1024 bits using Montgomery reduction. ; * ; * a A single precision number to reduce in place. ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_reduce_16 PROC push r12 push r13 @@ -74582,7 +74600,7 @@ ENDIF pop r12 ret sp_1024_mont_reduce_16 ENDP -_text ENDS +_TEXT ENDS ; /* Add two Montgomery form numbers (r = a + b % m). ; * ; * r Result of addition. @@ -74590,7 +74608,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_add_16 PROC push r12 push r13 @@ -74750,14 +74768,14 @@ sp_1024_mont_add_16 PROC pop r12 ret sp_1024_mont_add_16 ENDP -_text ENDS +_TEXT ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_dbl_16 PROC push r12 sub rsp, 128 @@ -74915,14 +74933,14 @@ sp_1024_mont_dbl_16 PROC pop r12 ret sp_1024_mont_dbl_16 ENDP -_text ENDS +_TEXT ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of addition. ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_tpl_16 PROC push r12 sub rsp, 128 @@ -75230,7 +75248,7 @@ sp_1024_mont_tpl_16 PROC pop r12 ret sp_1024_mont_tpl_16 ENDP -_text ENDS +_TEXT ENDS ; /* Subtract two Montgomery form numbers (r = a - b % m). ; * ; * r Result of addition. @@ -75238,7 +75256,7 @@ _text ENDS ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_sub_16 PROC push r12 push r13 @@ -75394,14 +75412,14 @@ sp_1024_mont_sub_16 PROC pop r12 ret sp_1024_mont_sub_16 ENDP -_text ENDS +_TEXT ENDS ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; * ; * r Result of division by 2. ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_div2_16 PROC push r12 push r13 @@ -75544,7 +75562,7 @@ sp_1024_mont_div2_16 PROC pop r12 ret sp_1024_mont_div2_16 ENDP -_text ENDS +_TEXT ENDS IFDEF HAVE_INTEL_AVX2 ; /* Reduce the number back to 1024 bits using Montgomery reduction. ; * @@ -75552,7 +75570,7 @@ IFDEF HAVE_INTEL_AVX2 ; * m The single precision number representing the modulus. ; * mp The digit representing the negative inverse of m mod 2^n. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_reduce_avx2_16 PROC push r12 push r13 @@ -75879,7 +75897,7 @@ L_1024_mont_reduce_avx2_16_loop: pop r12 ret sp_1024_mont_reduce_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Add two Montgomery form numbers (r = a + b % m). @@ -75889,7 +75907,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_add_avx2_16 PROC push r12 push r13 @@ -76031,7 +76049,7 @@ sp_1024_mont_add_avx2_16 PROC pop r12 ret sp_1024_mont_add_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Double a Montgomery form number (r = a + a % m). @@ -76040,7 +76058,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_dbl_avx2_16 PROC push r12 mov rax, QWORD PTR [rdx] @@ -76180,7 +76198,7 @@ sp_1024_mont_dbl_avx2_16 PROC pop r12 ret sp_1024_mont_dbl_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Triple a Montgomery form number (r = a + a + a % m). @@ -76189,7 +76207,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_tpl_avx2_16 PROC push r12 mov rax, QWORD PTR [rdx] @@ -76463,7 +76481,7 @@ sp_1024_mont_tpl_avx2_16 PROC pop r12 ret sp_1024_mont_tpl_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Subtract two Montgomery form numbers (r = a - b % m). @@ -76473,7 +76491,7 @@ IFDEF HAVE_INTEL_AVX2 ; * b Second number to add in Montgomery form. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_sub_avx2_16 PROC push r12 push r13 @@ -76611,7 +76629,7 @@ sp_1024_mont_sub_avx2_16 PROC pop r12 ret sp_1024_mont_sub_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF IFDEF HAVE_INTEL_AVX2 ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) @@ -76620,7 +76638,7 @@ IFDEF HAVE_INTEL_AVX2 ; * a Number to divide. ; * m Modulus (prime). ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_mont_div2_avx2_16 PROC push r12 push r13 @@ -76762,7 +76780,7 @@ sp_1024_mont_div2_avx2_16 PROC pop r12 ret sp_1024_mont_div2_avx2_16 ENDP -_text ENDS +_TEXT ENDS ENDIF ; /* Read big endian unsigned byte array into r. ; * Uses the bswap instruction. @@ -76772,7 +76790,7 @@ ENDIF ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_from_bin_bswap PROC push r12 push r13 @@ -76850,7 +76868,7 @@ L_1024_from_bin_bswap_zero_end: pop r12 ret sp_1024_from_bin_bswap ENDP -_text ENDS +_TEXT ENDS IFNDEF NO_MOVBE_SUPPORT ; /* Read big endian unsigned byte array into r. ; * Uses the movbe instruction which is an optional instruction. @@ -76860,7 +76878,7 @@ IFNDEF NO_MOVBE_SUPPORT ; * a Byte array. ; * n Number of bytes in array to read. ; */ -_text SEGMENT READONLY PARA +_TEXT SEGMENT READONLY PARA sp_1024_from_bin_movbe PROC push r12 mov r11, r8 @@ -76926,7 +76944,7 @@ L_1024_from_bin_movbe_zero_end: pop r12 ret sp_1024_from_bin_movbe ENDP -_text ENDS +_TEXT ENDS ENDIF ENDIF END diff --git a/wolfcrypt/src/wc_mldsa_asm.S b/wolfcrypt/src/wc_mldsa_asm.S index f21bd8ff7fa..52acb659dcd 100644 --- a/wolfcrypt/src/wc_mldsa_asm.S +++ b/wolfcrypt/src/wc_mldsa_asm.S @@ -54,6 +54,11 @@ #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ mldsa_q: .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 @@ -62,6 +67,11 @@ mldsa_q: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ mldsa_qinv: .long 0x03802001,0x03802001,0x03802001,0x03802001 .long 0x03802001,0x03802001,0x03802001,0x03802001 @@ -70,6 +80,11 @@ mldsa_qinv: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ mldsa_v: .long 0x00400000,0x00400000,0x00400000,0x00400000 .long 0x00400000,0x00400000,0x00400000,0x00400000 @@ -78,6 +93,11 @@ mldsa_v: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_avx2_zetas: .long 0x000064f7,0x000064f7,0x000064f7,0x000064f7 .long 0x000064f7,0x000064f7,0x000064f7,0x000064f7 @@ -400,6 +420,11 @@ L_mldsa_avx2_zetas: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_avx2_zetas_basemul: .long 0xffc406e5,0xffe9d65d,0x003cf91b,0x001729a3 .long 0xffe8ac81,0x003509ee,0x0018537f,0xffcbf612 @@ -470,6 +495,11 @@ L_mldsa_avx2_zetas_basemul: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_avx2_zetas_1: .long 0xffc97e01,0xffc97e01,0xffc97e01,0xffc97e01 .long 0xffc97e01,0xffc97e01,0xffc97e01,0xffc97e01 @@ -478,6 +508,11 @@ L_mldsa_avx2_zetas_1: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_avx2_zetas_inv: .long 0xffe1d632,0x000ce94a,0xffeaa198,0xffc3ea36 .long 0x0014c921,0x0000bcb2,0xffc430d4,0x000875b0 @@ -16149,9 +16184,9 @@ _wc_mldsa_mul_vec_7_avx2: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_mldsa_rej_idx: .quad 0x0000000000000000,0x0000000000000000 @@ -16671,6 +16706,11 @@ L_mldsa_rej_idx: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_rej_q: .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 @@ -16685,8 +16725,8 @@ L_mldsa_rej_q: .p2align 5 #endif /* __APPLE__ */ L_mldsa_rej_mask: -.quad 0x7fffff007fffff, 0x7fffff007fffff -.quad 0x7fffff007fffff, 0x7fffff007fffff +.quad 0x007fffff007fffff,0x007fffff007fffff +.quad 0x007fffff007fffff,0x007fffff007fffff #ifndef __APPLE__ .data #else @@ -16698,8 +16738,8 @@ L_mldsa_rej_mask: .p2align 5 #endif /* __APPLE__ */ L_mldsa_rej_shuffle: -.quad 0x5040300020100, 0xb0a0900080706 -.quad 0x9080700060504, 0xf0e0d000c0b0a +.quad 0x0005040300020100,0x000b0a0900080706 +.quad 0x0009080700060504,0x000f0e0d000c0b0a #ifndef __APPLE__ .data #else @@ -16711,8 +16751,8 @@ L_mldsa_rej_shuffle: .p2align 5 #endif /* __APPLE__ */ L_mldsa_rej_ones: -.quad 0x101010101010101, 0x101010101010101 -.quad 0x101010101010101, 0x101010101010101 +.quad 0x0101010101010101,0x0101010101010101 +.quad 0x0101010101010101,0x0101010101010101 #ifndef __APPLE__ .text .globl wc_mldsa_rej_uniform_n_avx2 @@ -17307,9 +17347,9 @@ L_mldsa_rej_uniform_avx2_done_64: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_mldsa_shufb_rej_idx: .quad 0xffffffffffffffff,0xffffffffffffffff @@ -17579,14 +17619,8 @@ L_mldsa_shufb_rej_idx: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta2_mask_nibbles: -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f #ifndef __APPLE__ .data #else @@ -17598,14 +17632,8 @@ L_mldsa_extract_coeffs_eta2_mask_nibbles: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta2_mul: -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 -.value 0x3340,0x3340 +.short 0x3340,0x3340,0x3340,0x3340,0x3340,0x3340,0x3340,0x3340 +.short 0x3340,0x3340,0x3340,0x3340,0x3340,0x3340,0x3340,0x3340 #ifndef __APPLE__ .data #else @@ -17617,14 +17645,8 @@ L_mldsa_extract_coeffs_eta2_mul: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta2_five: -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 -.value 0x0005,0x0005 +.short 0x0005,0x0005,0x0005,0x0005,0x0005,0x0005,0x0005,0x0005 +.short 0x0005,0x0005,0x0005,0x0005,0x0005,0x0005,0x0005,0x0005 #ifndef __APPLE__ .data #else @@ -17636,19 +17658,18 @@ L_mldsa_extract_coeffs_eta2_five: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta2_two: -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 -.value 0x0002,0x0002 +.short 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002 +.short 0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta2_nibble_table: .long 0x00000002,0x00000001,0x00000000,0xffffffff .long 0xfffffffe,0x00000002,0x00000001,0x00000000 @@ -18228,7 +18249,7 @@ L_mldsa_extract_coeffs_eta2_start_byte: movl (%r13,%rbx,4), %r12d movl %r12d, (%rdx) addl %r11d, %r8d - shl $2, %r11d + shll $2, %r11d addq %r11, %rdx cmpl $0x100, %r8d je L_mldsa_extract_coeffs_eta2_done @@ -18238,7 +18259,7 @@ L_mldsa_extract_coeffs_eta2_start_byte: movl (%r13,%rax,4), %r12d movl %r12d, (%rdx) addl %r11d, %r8d - shl $2, %r11d + shll $2, %r11d addq %r11, %rdx cmpl $0x100, %r8d je L_mldsa_extract_coeffs_eta2_done @@ -18264,14 +18285,8 @@ L_mldsa_extract_coeffs_eta2_done: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta4_mask_nibbles: -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f #ifndef __APPLE__ .data #else @@ -18283,14 +18298,8 @@ L_mldsa_extract_coeffs_eta4_mask_nibbles: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta4_nine: -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 -.value 0x0009,0x0009 +.short 0x0009,0x0009,0x0009,0x0009,0x0009,0x0009,0x0009,0x0009 +.short 0x0009,0x0009,0x0009,0x0009,0x0009,0x0009,0x0009,0x0009 #ifndef __APPLE__ .data #else @@ -18302,14 +18311,8 @@ L_mldsa_extract_coeffs_eta4_nine: .p2align 4 #endif /* __APPLE__ */ L_mldsa_extract_coeffs_eta4_four: -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 -.value 0x0004,0x0004 +.short 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004 +.short 0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004 #ifndef __APPLE__ .text .globl wc_mldsa_extract_coeffs_eta4_avx2 @@ -18832,7 +18835,7 @@ L_mldsa_extract_coeffs_eta4_start_byte: subl %ebx, %r12d movl %r12d, (%rdx) addl %r11d, %r8d - shl $2, %r11d + shll $2, %r11d addq %r11, %rdx cmpl $0x100, %r8d je L_mldsa_extract_coeffs_eta4_done @@ -18843,7 +18846,7 @@ L_mldsa_extract_coeffs_eta4_start_byte: subl %eax, %r12d movl %r12d, (%rdx) addl %r11d, %r8d - shl $2, %r11d + shll $2, %r11d addq %r11, %rdx cmpl $0x100, %r8d je L_mldsa_extract_coeffs_eta4_done @@ -19057,6 +19060,11 @@ _wc_mldsa_redistribute_17_rand_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_two: .long 0x00000002,0x00000002,0x00000002,0x00000002 .long 0x00000002,0x00000002,0x00000002,0x00000002 @@ -19065,6 +19073,11 @@ L_mldsa_encode_eta_2_avx2_two: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_vs_3: .long 0x00000000,0x00000003,0x00000006,0x00000009 .long 0x00000004,0x00000007,0x0000000a,0x0000000d @@ -19079,14 +19092,10 @@ L_mldsa_encode_eta_2_avx2_vs_3: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_shuff_3_even: -.value 0xff00,0x504 -.value 0xff08,0xd0c -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xff00,0x504 -.value 0xff08,0xd0c -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x00,0xff,0x04,0x05,0x08,0xff,0x0c,0x0d +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x00,0xff,0x04,0x05,0x08,0xff,0x0c,0x0d +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -19098,14 +19107,10 @@ L_mldsa_encode_eta_2_avx2_shuff_3_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_shuff_3_odd: -.value 0xff02,0x7ff -.value 0xb0a,0xfff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xff02,0x7ff -.value 0xb0a,0xfff -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x02,0xff,0xff,0x07,0x0a,0x0b,0xff,0x0f +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x02,0xff,0xff,0x07,0x0a,0x0b,0xff,0x0f +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -19117,14 +19122,10 @@ L_mldsa_encode_eta_2_avx2_shuff_3_odd: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_shuff_6_even: -.value 0x400,0x805 -.value 0xd0c,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0x400 -.value 0x805,0xd0c -.value 0xffff,0xffff +.byte 0x00,0x04,0x05,0x08,0x0c,0x0d,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04 +.byte 0x05,0x08,0x0c,0x0d,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -19136,14 +19137,10 @@ L_mldsa_encode_eta_2_avx2_shuff_6_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_eta_2_avx2_shuff_6_odd: -.value 0x302,0xa07 -.value 0xf0b,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0x302 -.value 0xa07,0xf0b -.value 0xffff,0xffff +.byte 0x02,0x03,0x07,0x0a,0x0b,0x0f,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x03 +.byte 0x07,0x0a,0x0b,0x0f,0xff,0xff,0xff,0xff #ifndef __APPLE__ .text .globl wc_mldsa_vec_encode_eta_2_avx2 @@ -19424,6 +19421,11 @@ L_mldsa_encode_eta_2_avx2_loop: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_eta_4_avx2_four: .long 0x00000004,0x00000004,0x00000004,0x00000004 .long 0x00000004,0x00000004,0x00000004,0x00000004 @@ -19432,6 +19434,11 @@ L_mldsa_encode_eta_4_avx2_four: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_eta_4_avx2_vs_4: .long 0x00000000,0x00000004,0x00000000,0x00000004 .long 0x00000000,0x00000004,0x00000000,0x00000004 @@ -20450,14 +20457,10 @@ _wc_mldsa_vec_encode_eta_4_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_0: -.value 0xff00,0xffff -.value 0xff00,0xffff -.value 0x100,0xffff -.value 0xff01,0xffff -.value 0xff01,0xffff -.value 0x201,0xffff -.value 0xff02,0xffff -.value 0xff02,0xffff +.byte 0x00,0xff,0xff,0xff,0x00,0xff,0xff,0xff +.byte 0x00,0x01,0xff,0xff,0x01,0xff,0xff,0xff +.byte 0x01,0xff,0xff,0xff,0x01,0x02,0xff,0xff +.byte 0x02,0xff,0xff,0xff,0x02,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20469,14 +20472,10 @@ L_mldsa_decode_eta_2_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_1: -.value 0xff01,0xffff -.value 0xff01,0xffff -.value 0x201,0xffff -.value 0xff02,0xffff -.value 0xff02,0xffff -.value 0x302,0xffff -.value 0xff03,0xffff -.value 0xff03,0xffff +.byte 0x01,0xff,0xff,0xff,0x01,0xff,0xff,0xff +.byte 0x01,0x02,0xff,0xff,0x02,0xff,0xff,0xff +.byte 0x02,0xff,0xff,0xff,0x02,0x03,0xff,0xff +.byte 0x03,0xff,0xff,0xff,0x03,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20488,14 +20487,10 @@ L_mldsa_decode_eta_2_avx2_shuff_1: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_2: -.value 0xff02,0xffff -.value 0xff02,0xffff -.value 0x302,0xffff -.value 0xff03,0xffff -.value 0xff03,0xffff -.value 0x403,0xffff -.value 0xff04,0xffff -.value 0xff04,0xffff +.byte 0x02,0xff,0xff,0xff,0x02,0xff,0xff,0xff +.byte 0x02,0x03,0xff,0xff,0x03,0xff,0xff,0xff +.byte 0x03,0xff,0xff,0xff,0x03,0x04,0xff,0xff +.byte 0x04,0xff,0xff,0xff,0x04,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20507,14 +20502,10 @@ L_mldsa_decode_eta_2_avx2_shuff_2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_3: -.value 0xff03,0xffff -.value 0xff03,0xffff -.value 0x403,0xffff -.value 0xff04,0xffff -.value 0xff04,0xffff -.value 0x504,0xffff -.value 0xff05,0xffff -.value 0xff05,0xffff +.byte 0x03,0xff,0xff,0xff,0x03,0xff,0xff,0xff +.byte 0x03,0x04,0xff,0xff,0x04,0xff,0xff,0xff +.byte 0x04,0xff,0xff,0xff,0x04,0x05,0xff,0xff +.byte 0x05,0xff,0xff,0xff,0x05,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20526,14 +20517,10 @@ L_mldsa_decode_eta_2_avx2_shuff_3: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_4: -.value 0xff04,0xffff -.value 0xff04,0xffff -.value 0x504,0xffff -.value 0xff05,0xffff -.value 0xff05,0xffff -.value 0x605,0xffff -.value 0xff06,0xffff -.value 0xff06,0xffff +.byte 0x04,0xff,0xff,0xff,0x04,0xff,0xff,0xff +.byte 0x04,0x05,0xff,0xff,0x05,0xff,0xff,0xff +.byte 0x05,0xff,0xff,0xff,0x05,0x06,0xff,0xff +.byte 0x06,0xff,0xff,0xff,0x06,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20545,14 +20532,10 @@ L_mldsa_decode_eta_2_avx2_shuff_4: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_5: -.value 0xff05,0xffff -.value 0xff05,0xffff -.value 0x605,0xffff -.value 0xff06,0xffff -.value 0xff06,0xffff -.value 0x706,0xffff -.value 0xff07,0xffff -.value 0xff07,0xffff +.byte 0x05,0xff,0xff,0xff,0x05,0xff,0xff,0xff +.byte 0x05,0x06,0xff,0xff,0x06,0xff,0xff,0xff +.byte 0x06,0xff,0xff,0xff,0x06,0x07,0xff,0xff +.byte 0x07,0xff,0xff,0xff,0x07,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20564,14 +20547,10 @@ L_mldsa_decode_eta_2_avx2_shuff_5: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_6: -.value 0xff06,0xffff -.value 0xff06,0xffff -.value 0x706,0xffff -.value 0xff07,0xffff -.value 0xff07,0xffff -.value 0x807,0xffff -.value 0xff08,0xffff -.value 0xff08,0xffff +.byte 0x06,0xff,0xff,0xff,0x06,0xff,0xff,0xff +.byte 0x06,0x07,0xff,0xff,0x07,0xff,0xff,0xff +.byte 0x07,0xff,0xff,0xff,0x07,0x08,0xff,0xff +.byte 0x08,0xff,0xff,0xff,0x08,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20583,19 +20562,20 @@ L_mldsa_decode_eta_2_avx2_shuff_6: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_shuff_7: -.value 0xff07,0xffff -.value 0xff07,0xffff -.value 0x807,0xffff -.value 0xff08,0xffff -.value 0xff08,0xffff -.value 0x908,0xffff -.value 0xff09,0xffff -.value 0xff09,0xffff +.byte 0x07,0xff,0xff,0xff,0x07,0xff,0xff,0xff +.byte 0x07,0x08,0xff,0xff,0x08,0xff,0xff,0xff +.byte 0x08,0xff,0xff,0xff,0x08,0x09,0xff,0xff +.byte 0x09,0xff,0xff,0xff,0x09,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_two: .long 0x00000002,0x00000002,0x00000002,0x00000002 .long 0x00000002,0x00000002,0x00000002,0x00000002 @@ -20604,6 +20584,11 @@ L_mldsa_decode_eta_2_avx2_two: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_vs: .long 0x00000000,0x00000003,0x00000006,0x00000001 .long 0x00000004,0x00000007,0x00000002,0x00000005 @@ -20612,6 +20597,11 @@ L_mldsa_decode_eta_2_avx2_vs: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_2_avx2_mask: .long 0x00000007,0x00000007,0x00000007,0x00000007 .long 0x00000007,0x00000007,0x00000007,0x00000007 @@ -20863,14 +20853,10 @@ _wc_mldsa_decode_eta_2_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_0: -.value 0xff00,0xffff -.value 0xff00,0xffff -.value 0x100,0xffff -.value 0xff01,0xffff -.value 0xff01,0xffff -.value 0x201,0xffff -.value 0xff02,0xffff -.value 0xff02,0xffff +.byte 0x00,0xff,0xff,0xff,0x00,0xff,0xff,0xff +.byte 0x00,0x01,0xff,0xff,0x01,0xff,0xff,0xff +.byte 0x01,0xff,0xff,0xff,0x01,0x02,0xff,0xff +.byte 0x02,0xff,0xff,0xff,0x02,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20882,14 +20868,10 @@ L_mldsa_decode_eta_4_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_1: -.value 0xff01,0xffff -.value 0xff01,0xffff -.value 0x201,0xffff -.value 0xff02,0xffff -.value 0xff02,0xffff -.value 0x302,0xffff -.value 0xff03,0xffff -.value 0xff03,0xffff +.byte 0x01,0xff,0xff,0xff,0x01,0xff,0xff,0xff +.byte 0x01,0x02,0xff,0xff,0x02,0xff,0xff,0xff +.byte 0x02,0xff,0xff,0xff,0x02,0x03,0xff,0xff +.byte 0x03,0xff,0xff,0xff,0x03,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20901,14 +20883,10 @@ L_mldsa_decode_eta_4_avx2_shuff_1: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_2: -.value 0xff02,0xffff -.value 0xff02,0xffff -.value 0x302,0xffff -.value 0xff03,0xffff -.value 0xff03,0xffff -.value 0x403,0xffff -.value 0xff04,0xffff -.value 0xff04,0xffff +.byte 0x02,0xff,0xff,0xff,0x02,0xff,0xff,0xff +.byte 0x02,0x03,0xff,0xff,0x03,0xff,0xff,0xff +.byte 0x03,0xff,0xff,0xff,0x03,0x04,0xff,0xff +.byte 0x04,0xff,0xff,0xff,0x04,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20920,14 +20898,10 @@ L_mldsa_decode_eta_4_avx2_shuff_2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_3: -.value 0xff03,0xffff -.value 0xff03,0xffff -.value 0x403,0xffff -.value 0xff04,0xffff -.value 0xff04,0xffff -.value 0x504,0xffff -.value 0xff05,0xffff -.value 0xff05,0xffff +.byte 0x03,0xff,0xff,0xff,0x03,0xff,0xff,0xff +.byte 0x03,0x04,0xff,0xff,0x04,0xff,0xff,0xff +.byte 0x04,0xff,0xff,0xff,0x04,0x05,0xff,0xff +.byte 0x05,0xff,0xff,0xff,0x05,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20939,14 +20913,10 @@ L_mldsa_decode_eta_4_avx2_shuff_3: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_4: -.value 0xff04,0xffff -.value 0xff04,0xffff -.value 0x504,0xffff -.value 0xff05,0xffff -.value 0xff05,0xffff -.value 0x605,0xffff -.value 0xff06,0xffff -.value 0xff06,0xffff +.byte 0x04,0xff,0xff,0xff,0x04,0xff,0xff,0xff +.byte 0x04,0x05,0xff,0xff,0x05,0xff,0xff,0xff +.byte 0x05,0xff,0xff,0xff,0x05,0x06,0xff,0xff +.byte 0x06,0xff,0xff,0xff,0x06,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20958,14 +20928,10 @@ L_mldsa_decode_eta_4_avx2_shuff_4: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_5: -.value 0xff05,0xffff -.value 0xff05,0xffff -.value 0x605,0xffff -.value 0xff06,0xffff -.value 0xff06,0xffff -.value 0x706,0xffff -.value 0xff07,0xffff -.value 0xff07,0xffff +.byte 0x05,0xff,0xff,0xff,0x05,0xff,0xff,0xff +.byte 0x05,0x06,0xff,0xff,0x06,0xff,0xff,0xff +.byte 0x06,0xff,0xff,0xff,0x06,0x07,0xff,0xff +.byte 0x07,0xff,0xff,0xff,0x07,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20977,14 +20943,10 @@ L_mldsa_decode_eta_4_avx2_shuff_5: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_6: -.value 0xff06,0xffff -.value 0xff06,0xffff -.value 0x706,0xffff -.value 0xff07,0xffff -.value 0xff07,0xffff -.value 0x807,0xffff -.value 0xff08,0xffff -.value 0xff08,0xffff +.byte 0x06,0xff,0xff,0xff,0x06,0xff,0xff,0xff +.byte 0x06,0x07,0xff,0xff,0x07,0xff,0xff,0xff +.byte 0x07,0xff,0xff,0xff,0x07,0x08,0xff,0xff +.byte 0x08,0xff,0xff,0xff,0x08,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -20996,19 +20958,20 @@ L_mldsa_decode_eta_4_avx2_shuff_6: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_shuff_7: -.value 0xff07,0xffff -.value 0xff07,0xffff -.value 0x807,0xffff -.value 0xff08,0xffff -.value 0xff08,0xffff -.value 0x908,0xffff -.value 0xff09,0xffff -.value 0xff09,0xffff +.byte 0x07,0xff,0xff,0xff,0x07,0xff,0xff,0xff +.byte 0x07,0x08,0xff,0xff,0x08,0xff,0xff,0xff +.byte 0x08,0xff,0xff,0xff,0x08,0x09,0xff,0xff +.byte 0x09,0xff,0xff,0xff,0x09,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_four: .long 0x00000004,0x00000004,0x00000004,0x00000004 .long 0x00000004,0x00000004,0x00000004,0x00000004 @@ -21017,6 +20980,11 @@ L_mldsa_decode_eta_4_avx2_four: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_vs: .long 0x00000000,0x00000004,0x00000008,0x0000000c .long 0x00000010,0x00000014,0x00000018,0x0000001c @@ -21025,6 +20993,11 @@ L_mldsa_decode_eta_4_avx2_vs: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_eta_4_avx2_mask: .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f @@ -21220,14 +21193,10 @@ _wc_mldsa_decode_eta_4_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_w1_88_avx2_shuff_0_even: -.value 0x900,0xff0a -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xff -.value 0xa09,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x00,0x09,0x0a,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0x00,0x09,0x0a,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -21239,14 +21208,10 @@ L_mldsa_encode_w1_88_avx2_shuff_0_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_w1_88_avx2_shuff_0_odd: -.value 0x504,0xff0e -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0x4ff -.value 0xe05,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x04,0x05,0x0e,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0x04,0x05,0x0e,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -21258,14 +21223,10 @@ L_mldsa_encode_w1_88_avx2_shuff_0_odd: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_w1_88_avx2_shuff_1_even: -.value 0xffff,0xffff -.value 0xffff,0x900 -.value 0xff0a,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xff,0xa09 -.value 0xffff,0xffff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x09 +.byte 0x0a,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0x00,0x09,0x0a,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -21277,19 +21238,20 @@ L_mldsa_encode_w1_88_avx2_shuff_1_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_w1_88_avx2_shuff_1_odd: -.value 0xffff,0xffff -.value 0xffff,0x504 -.value 0xff0e,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0x4ff,0xe05 -.value 0xffff,0xffff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x04,0x05 +.byte 0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0x04,0x05,0x0e,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_w1_88_avx2_vs: .long 0x00000000,0x00000006,0x0000000c,0x00000012 .long 0x00000000,0x00000006,0x0000000c,0x00000012 @@ -21577,6 +21539,11 @@ _wc_mldsa_encode_w1_88_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_w1_32_avx2_vs_4: .long 0x00000000,0x00000004,0x00000000,0x00000004 .long 0x00000000,0x00000004,0x00000000,0x00000004 @@ -21736,6 +21703,11 @@ _wc_mldsa_encode_w1_32_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_d_max_half_m1: .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff @@ -21744,6 +21716,11 @@ L_mldsa_encode_t0_t1_avx2_d_max_half_m1: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_d_max_half: .long 0x00001000,0x00001000,0x00001000,0x00001000 .long 0x00001000,0x00001000,0x00001000,0x00001000 @@ -21752,6 +21729,11 @@ L_mldsa_encode_t0_t1_avx2_d_max_half: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_vs_13: .long 0x00000000,0x0000000d,0x00000002,0x0000000f .long 0x00000004,0x00000011,0x00000006,0x00000013 @@ -21766,14 +21748,10 @@ L_mldsa_encode_t0_t1_avx2_vs_13: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_shuff_13_even: -.value 0x100,0x8ff -.value 0xff09,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0x100 -.value 0x802,0xa09 -.value 0xffff,0xffff +.byte 0x00,0x01,0xff,0x08,0x09,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x01 +.byte 0x02,0x08,0x09,0x0a,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -21785,19 +21763,20 @@ L_mldsa_encode_t0_t1_avx2_shuff_13_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_shuff_13_odd: -.value 0x5ff,0x706 -.value 0xe0d,0xff0f -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0x706,0xeff -.value 0xff0f,0xffff +.byte 0xff,0x05,0x06,0x07,0x0d,0x0e,0x0f,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x06,0x07,0xff,0x0e,0x0f,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_vs_10: .long 0x00000000,0x0000000a,0x00000004,0x0000000e .long 0x00000000,0x0000000a,0x00000004,0x0000000e @@ -21812,14 +21791,10 @@ L_mldsa_encode_t0_t1_avx2_vs_10: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_shuff_10_even: -.value 0x100,0x908 -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xff,0x801 -.value 0xff09,0xffff -.value 0xffff,0xffff +.byte 0x00,0x01,0x08,0x09,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x01,0x08 +.byte 0x09,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -21831,14 +21806,10 @@ L_mldsa_encode_t0_t1_avx2_shuff_10_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_t0_t1_avx2_shuff_10_odd: -.value 0x5ff,0xd06 -.value 0xff0e,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0xffff,0x605 -.value 0xe0d,0xffff -.value 0xffff,0xffff +.byte 0xff,0x05,0x06,0x0d,0x0e,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x05,0x06 +.byte 0x0d,0x0e,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .text .globl wc_mldsa_vec_encode_t0_t1_avx2 @@ -22583,14 +22554,10 @@ L_mldsa_encode_t0_t1_avx2_loop: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_0: -.value 0x100,0xffff -.value 0x1ff,0x302 -.value 0x403,0xff05 -.value 0x504,0x706 -.value 0x706,0xff08 -.value 0x8ff,0xff09 -.value 0xa09,0xff0b -.value 0xffff,0xc0b +.byte 0x00,0x01,0xff,0xff,0xff,0x01,0x02,0x03 +.byte 0x03,0x04,0x05,0xff,0x04,0x05,0x06,0x07 +.byte 0x06,0x07,0x08,0xff,0xff,0x08,0x09,0xff +.byte 0x09,0x0a,0x0b,0xff,0xff,0xff,0x0b,0x0c #ifndef __APPLE__ .data #else @@ -22602,14 +22569,10 @@ L_mldsa_decode_t0_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_1: -.value 0x605,0xffff -.value 0x6ff,0x807 -.value 0x908,0xffff -.value 0xa09,0xff0b -.value 0x403,0xff05 -.value 0x5ff,0xff06 -.value 0x706,0xff08 -.value 0xffff,0x908 +.byte 0x05,0x06,0xff,0xff,0xff,0x06,0x07,0x08 +.byte 0x08,0x09,0xff,0xff,0x09,0x0a,0x0b,0xff +.byte 0x03,0x04,0x05,0xff,0xff,0x05,0x06,0xff +.byte 0x06,0x07,0x08,0xff,0xff,0xff,0x08,0x09 #ifndef __APPLE__ .data #else @@ -22621,14 +22584,10 @@ L_mldsa_decode_t0_avx2_shuff_1: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_2: -.value 0x302,0xffff -.value 0x3ff,0x504 -.value 0x605,0xffff -.value 0x706,0xff08 -.value 0x100,0xff02 -.value 0x2ff,0xff03 -.value 0x403,0xff05 -.value 0xffff,0x605 +.byte 0x02,0x03,0xff,0xff,0xff,0x03,0x04,0x05 +.byte 0x05,0x06,0xff,0xff,0x06,0x07,0x08,0xff +.byte 0x00,0x01,0x02,0xff,0xff,0x02,0x03,0xff +.byte 0x03,0x04,0x05,0xff,0xff,0xff,0x05,0x06 #ifndef __APPLE__ .data #else @@ -22640,14 +22599,10 @@ L_mldsa_decode_t0_avx2_shuff_2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_3: -.value 0x807,0xffff -.value 0x8ff,0xa09 -.value 0xb0a,0xffff -.value 0xc0b,0xff0d -.value 0x605,0xff07 -.value 0x7ff,0xff08 -.value 0x908,0xff0a -.value 0xffff,0xb0a +.byte 0x07,0x08,0xff,0xff,0xff,0x08,0x09,0x0a +.byte 0x0a,0x0b,0xff,0xff,0x0b,0x0c,0x0d,0xff +.byte 0x05,0x06,0x07,0xff,0xff,0x07,0x08,0xff +.byte 0x08,0x09,0x0a,0xff,0xff,0xff,0x0a,0x0b #ifndef __APPLE__ .data #else @@ -22659,14 +22614,10 @@ L_mldsa_decode_t0_avx2_shuff_3: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_4: -.value 0x504,0xffff -.value 0x5ff,0x706 -.value 0x807,0xffff -.value 0x908,0xff0a -.value 0x302,0xff04 -.value 0x4ff,0xff05 -.value 0x605,0xff07 -.value 0xffff,0x807 +.byte 0x04,0x05,0xff,0xff,0xff,0x05,0x06,0x07 +.byte 0x07,0x08,0xff,0xff,0x08,0x09,0x0a,0xff +.byte 0x02,0x03,0x04,0xff,0xff,0x04,0x05,0xff +.byte 0x05,0x06,0x07,0xff,0xff,0xff,0x07,0x08 #ifndef __APPLE__ .data #else @@ -22678,14 +22629,10 @@ L_mldsa_decode_t0_avx2_shuff_4: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_5: -.value 0x201,0xffff -.value 0x2ff,0x403 -.value 0x504,0xffff -.value 0x605,0xff07 -.value 0x807,0xff09 -.value 0x9ff,0xff0a -.value 0xb0a,0xff0c -.value 0xffff,0xd0c +.byte 0x01,0x02,0xff,0xff,0xff,0x02,0x03,0x04 +.byte 0x04,0x05,0xff,0xff,0x05,0x06,0x07,0xff +.byte 0x07,0x08,0x09,0xff,0xff,0x09,0x0a,0xff +.byte 0x0a,0x0b,0x0c,0xff,0xff,0xff,0x0c,0x0d #ifndef __APPLE__ .data #else @@ -22697,14 +22644,10 @@ L_mldsa_decode_t0_avx2_shuff_5: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_6: -.value 0x706,0xffff -.value 0x7ff,0x908 -.value 0xa09,0xffff -.value 0xb0a,0xff0c -.value 0x504,0xff06 -.value 0x6ff,0x807 -.value 0x807,0xff09 -.value 0xffff,0xa09 +.byte 0x06,0x07,0xff,0xff,0xff,0x07,0x08,0x09 +.byte 0x09,0x0a,0xff,0xff,0x0a,0x0b,0x0c,0xff +.byte 0x04,0x05,0x06,0xff,0xff,0x06,0x07,0x08 +.byte 0x07,0x08,0x09,0xff,0xff,0xff,0x09,0x0a #ifndef __APPLE__ .data #else @@ -22716,19 +22659,20 @@ L_mldsa_decode_t0_avx2_shuff_6: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_shuff_7: -.value 0x403,0xffff -.value 0x4ff,0x605 -.value 0x706,0xffff -.value 0x807,0xff09 -.value 0x201,0xff03 -.value 0x3ff,0xff04 -.value 0x504,0xff06 -.value 0xffff,0x706 +.byte 0x03,0x04,0xff,0xff,0xff,0x04,0x05,0x06 +.byte 0x06,0x07,0xff,0xff,0x07,0x08,0x09,0xff +.byte 0x01,0x02,0x03,0xff,0xff,0x03,0x04,0xff +.byte 0x04,0x05,0x06,0xff,0xff,0xff,0x06,0x07 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_vs_8: .long 0x00000000,0x0000000d,0x00000002,0x00000007 .long 0x00000004,0x00000009,0x00000006,0x00000013 @@ -22737,6 +22681,11 @@ L_mldsa_decode_t0_avx2_vs_8: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_mask: .long 0x00001fff,0x00001fff,0x00001fff,0x00001fff .long 0x00001fff,0x00001fff,0x00001fff,0x00001fff @@ -22745,6 +22694,11 @@ L_mldsa_decode_t0_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_t0_avx2_d_max_half: .long 0x00001000,0x00001000,0x00001000,0x00001000 .long 0x00001000,0x00001000,0x00001000,0x00001000 @@ -23038,14 +22992,10 @@ _wc_mldsa_decode_t0_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_shuff_0: -.value 0x100,0xffff -.value 0x1ff,0xff02 -.value 0x302,0xffff -.value 0x3ff,0xff04 -.value 0x605,0xffff -.value 0x6ff,0xff07 -.value 0x807,0xffff -.value 0x8ff,0xff09 +.byte 0x00,0x01,0xff,0xff,0xff,0x01,0x02,0xff +.byte 0x02,0x03,0xff,0xff,0xff,0x03,0x04,0xff +.byte 0x05,0x06,0xff,0xff,0xff,0x06,0x07,0xff +.byte 0x07,0x08,0xff,0xff,0xff,0x08,0x09,0xff #ifndef __APPLE__ .data #else @@ -23057,14 +23007,10 @@ L_mldsa_decode_t1_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_shuff_1: -.value 0x302,0xffff -.value 0x3ff,0xff04 -.value 0x504,0xffff -.value 0x5ff,0xff06 -.value 0x807,0xffff -.value 0x8ff,0xff09 -.value 0xa09,0xff08 -.value 0xaff,0xff0b +.byte 0x02,0x03,0xff,0xff,0xff,0x03,0x04,0xff +.byte 0x04,0x05,0xff,0xff,0xff,0x05,0x06,0xff +.byte 0x07,0x08,0xff,0xff,0xff,0x08,0x09,0xff +.byte 0x09,0x0a,0x08,0xff,0xff,0x0a,0x0b,0xff #ifndef __APPLE__ .data #else @@ -23076,14 +23022,10 @@ L_mldsa_decode_t1_avx2_shuff_1: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_shuff_2: -.value 0x504,0xffff -.value 0x5ff,0xff06 -.value 0x706,0xffff -.value 0x7ff,0xff08 -.value 0x201,0xffff -.value 0x2ff,0xff03 -.value 0x403,0xffff -.value 0x4ff,0xff05 +.byte 0x04,0x05,0xff,0xff,0xff,0x05,0x06,0xff +.byte 0x06,0x07,0xff,0xff,0xff,0x07,0x08,0xff +.byte 0x01,0x02,0xff,0xff,0xff,0x02,0x03,0xff +.byte 0x03,0x04,0xff,0xff,0xff,0x04,0x05,0xff #ifndef __APPLE__ .data #else @@ -23095,19 +23037,20 @@ L_mldsa_decode_t1_avx2_shuff_2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_shuff_3: -.value 0x706,0xffff -.value 0x7ff,0xff08 -.value 0x908,0xffff -.value 0x9ff,0xff0a -.value 0x403,0xffff -.value 0x4ff,0xff05 -.value 0x605,0xffff -.value 0x6ff,0xff07 +.byte 0x06,0x07,0xff,0xff,0xff,0x07,0x08,0xff +.byte 0x08,0x09,0xff,0xff,0xff,0x09,0x0a,0xff +.byte 0x03,0x04,0xff,0xff,0xff,0x04,0x05,0xff +.byte 0x05,0x06,0xff,0xff,0xff,0x06,0x07,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_vs_8: .long 0x00000000,0x0000000a,0x00000004,0x0000000e .long 0x00000000,0x0000000a,0x00000004,0x0000000e @@ -23116,6 +23059,11 @@ L_mldsa_decode_t1_avx2_vs_8: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_t1_avx2_mask: .long 0x000003ff,0x000003ff,0x000003ff,0x000003ff .long 0x000003ff,0x000003ff,0x000003ff,0x000003ff @@ -23396,14 +23344,10 @@ _wc_mldsa_decode_t1_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_shuff_0: -.value 0x100,0xff02 -.value 0x302,0xff04 -.value 0x504,0xff06 -.value 0x706,0xff08 -.value 0x1ff,0x302 -.value 0x3ff,0x504 -.value 0x5ff,0x706 -.value 0x7ff,0x908 +.byte 0x00,0x01,0x02,0xff,0x02,0x03,0x04,0xff +.byte 0x04,0x05,0x06,0xff,0x06,0x07,0x08,0xff +.byte 0xff,0x01,0x02,0x03,0xff,0x03,0x04,0x05 +.byte 0xff,0x05,0x06,0x07,0xff,0x07,0x08,0x09 #ifndef __APPLE__ .data #else @@ -23415,14 +23359,10 @@ L_mldsa_decode_gamma1_17_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_shuff_1: -.value 0x302,0xff04 -.value 0x504,0xff06 -.value 0x706,0xff08 -.value 0x908,0xff0a -.value 0x3ff,0x504 -.value 0x5ff,0x706 -.value 0x7ff,0x908 -.value 0x9ff,0xb0a +.byte 0x02,0x03,0x04,0xff,0x04,0x05,0x06,0xff +.byte 0x06,0x07,0x08,0xff,0x08,0x09,0x0a,0xff +.byte 0xff,0x03,0x04,0x05,0xff,0x05,0x06,0x07 +.byte 0xff,0x07,0x08,0x09,0xff,0x09,0x0a,0x0b #ifndef __APPLE__ .data #else @@ -23434,14 +23374,10 @@ L_mldsa_decode_gamma1_17_avx2_shuff_1: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_shuff_2: -.value 0x504,0xff06 -.value 0x706,0xff08 -.value 0x908,0xff0a -.value 0xb0a,0xff0c -.value 0x5ff,0x706 -.value 0x7ff,0x908 -.value 0x9ff,0xb0a -.value 0xbff,0xd0c +.byte 0x04,0x05,0x06,0xff,0x06,0x07,0x08,0xff +.byte 0x08,0x09,0x0a,0xff,0x0a,0x0b,0x0c,0xff +.byte 0xff,0x05,0x06,0x07,0xff,0x07,0x08,0x09 +.byte 0xff,0x09,0x0a,0x0b,0xff,0x0b,0x0c,0x0d #ifndef __APPLE__ .data #else @@ -23453,19 +23389,20 @@ L_mldsa_decode_gamma1_17_avx2_shuff_2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_shuff_3: -.value 0x706,0xff08 -.value 0x908,0xff0a -.value 0xb0a,0xff0c -.value 0xd0c,0xff0e -.value 0x7ff,0x908 -.value 0x9ff,0xb0a -.value 0xbff,0xd0c -.value 0xdff,0xf0e +.byte 0x06,0x07,0x08,0xff,0x08,0x09,0x0a,0xff +.byte 0x0a,0x0b,0x0c,0xff,0x0c,0x0d,0x0e,0xff +.byte 0xff,0x07,0x08,0x09,0xff,0x09,0x0a,0x0b +.byte 0xff,0x0b,0x0c,0x0d,0xff,0x0d,0x0e,0x0f #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_vs_8: .long 0x00000000,0x00000002,0x00000004,0x00000006 .long 0x00000008,0x0000000a,0x0000000c,0x0000000e @@ -23474,6 +23411,11 @@ L_mldsa_decode_gamma1_17_avx2_vs_8: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_mask: .long 0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff .long 0x0003ffff,0x0003ffff,0x0003ffff,0x0003ffff @@ -23482,6 +23424,11 @@ L_mldsa_decode_gamma1_17_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_17_avx2_gamma17: .long 0x00020000,0x00020000,0x00020000,0x00020000 .long 0x00020000,0x00020000,0x00020000,0x00020000 @@ -23778,14 +23725,10 @@ _wc_mldsa_decode_gamma1_17_avx2: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_20_avx2_shuff_0: -.value 0x100,0xff02 -.value 0x302,0xff04 -.value 0x605,0xff07 -.value 0x807,0xff09 -.value 0x2ff,0x403 -.value 0x4ff,0x605 -.value 0x7ff,0x908 -.value 0x9ff,0xb0a +.byte 0x00,0x01,0x02,0xff,0x02,0x03,0x04,0xff +.byte 0x05,0x06,0x07,0xff,0x07,0x08,0x09,0xff +.byte 0xff,0x02,0x03,0x04,0xff,0x04,0x05,0x06 +.byte 0xff,0x07,0x08,0x09,0xff,0x09,0x0a,0x0b #ifndef __APPLE__ .data #else @@ -23797,19 +23740,20 @@ L_mldsa_decode_gamma1_20_avx2_shuff_0: .p2align 4 #endif /* __APPLE__ */ L_mldsa_decode_gamma1_20_avx2_shuff_1: -.value 0x504,0xff06 -.value 0x706,0xff08 -.value 0xa09,0xff0b -.value 0xc0b,0xff0d -.value 0x6ff,0x807 -.value 0x8ff,0xa09 -.value 0xbff,0xd0c -.value 0xdff,0xf0e +.byte 0x04,0x05,0x06,0xff,0x06,0x07,0x08,0xff +.byte 0x09,0x0a,0x0b,0xff,0x0b,0x0c,0x0d,0xff +.byte 0xff,0x06,0x07,0x08,0xff,0x08,0x09,0x0a +.byte 0xff,0x0b,0x0c,0x0d,0xff,0x0d,0x0e,0x0f #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_20_avx2_vs_8: .long 0x00000000,0x00000004,0x00000000,0x00000004 .long 0x00000008,0x0000000c,0x00000008,0x0000000c @@ -23818,6 +23762,11 @@ L_mldsa_decode_gamma1_20_avx2_vs_8: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_20_avx2_mask: .long 0x000fffff,0x000fffff,0x000fffff,0x000fffff .long 0x000fffff,0x000fffff,0x000fffff,0x000fffff @@ -23826,6 +23775,11 @@ L_mldsa_decode_gamma1_20_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decode_gamma1_20_avx2_gamma19: .long 0x00080000,0x00080000,0x00080000,0x00080000 .long 0x00080000,0x00080000,0x00080000,0x00080000 @@ -24116,6 +24070,11 @@ _wc_mldsa_decode_gamma1_19_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_gamma1_17_avx2_gamma17: .long 0x00020000,0x00020000,0x00020000,0x00020000 .long 0x00020000,0x00020000,0x00020000,0x00020000 @@ -24130,14 +24089,10 @@ L_mldsa_encode_gamma1_17_avx2_gamma17: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_gamma1_17_avx2_shuff_even: -.value 0x100,0xff02 -.value 0x908,0xff0a -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0x100,0xff02 -.value 0x908,0xff0a -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x00,0x01,0x02,0xff,0x08,0x09,0x0a,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x00,0x01,0x02,0xff,0x08,0x09,0x0a,0xff +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -24149,19 +24104,20 @@ L_mldsa_encode_gamma1_17_avx2_shuff_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_gamma1_17_avx2_shuff_odd: -.value 0xffff,0x504 -.value 0xff06,0xd0c -.value 0xff0e,0xffff -.value 0xffff,0xffff -.value 0xffff,0x504 -.value 0xff06,0xd0c -.value 0xff0e,0xffff -.value 0xffff,0xffff +.byte 0xff,0xff,0x04,0x05,0x06,0xff,0x0c,0x0d +.byte 0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0x04,0x05,0x06,0xff,0x0c,0x0d +.byte 0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_gamma1_17_avx2_vs: .long 0x00000000,0x00000002,0x00000004,0x00000006 .long 0x00000000,0x00000002,0x00000004,0x00000006 @@ -24576,6 +24532,11 @@ _wc_mldsa_encode_gamma1_17_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_gamma1_19_avx2_gamma19: .long 0x00080000,0x00080000,0x00080000,0x00080000 .long 0x00080000,0x00080000,0x00080000,0x00080000 @@ -24590,14 +24551,10 @@ L_mldsa_encode_gamma1_19_avx2_gamma19: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_gamma1_19_avx2_shuff_even: -.value 0x100,0xff02 -.value 0x8ff,0xa09 -.value 0xffff,0xffff -.value 0xffff,0xffff -.value 0x100,0xff02 -.value 0x8ff,0xa09 -.value 0xffff,0xffff -.value 0xffff,0xffff +.byte 0x00,0x01,0x02,0xff,0xff,0x08,0x09,0x0a +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x00,0x01,0x02,0xff,0xff,0x08,0x09,0x0a +.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else @@ -24609,19 +24566,20 @@ L_mldsa_encode_gamma1_19_avx2_shuff_even: .p2align 4 #endif /* __APPLE__ */ L_mldsa_encode_gamma1_19_avx2_shuff_odd: -.value 0xffff,0x504 -.value 0xff06,0xcff -.value 0xe0d,0xffff -.value 0xffff,0xffff -.value 0xffff,0x504 -.value 0xff06,0xcff -.value 0xe0d,0xffff -.value 0xffff,0xffff +.byte 0xff,0xff,0x04,0x05,0x06,0xff,0xff,0x0c +.byte 0x0d,0x0e,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0x04,0x05,0x06,0xff,0xff,0x0c +.byte 0x0d,0x0e,0xff,0xff,0xff,0xff,0xff,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_encode_gamma1_19_avx2_vs: .long 0x00000000,0x00000004,0x00000000,0x00000004 .long 0x00000000,0x00000004,0x00000000,0x00000004 @@ -25036,6 +24994,11 @@ _wc_mldsa_encode_gamma1_19_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q88_avx2_q_low_88: .long 0x00017400,0x00017400,0x00017400,0x00017400 .long 0x00017400,0x00017400,0x00017400,0x00017400 @@ -25044,6 +25007,11 @@ L_mldsa_decompose_q88_avx2_q_low_88: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q88_avx2_q_low_88_2: .long 0x0002e800,0x0002e800,0x0002e800,0x0002e800 .long 0x0002e800,0x0002e800,0x0002e800,0x0002e800 @@ -25052,6 +25020,11 @@ L_mldsa_decompose_q88_avx2_q_low_88_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q88_avx2_q_2: .long 0x003fefd4,0x003fefd4,0x003fefd4,0x003fefd4 .long 0x003fefd4,0x003fefd4,0x003fefd4,0x003fefd4 @@ -25060,6 +25033,11 @@ L_mldsa_decompose_q88_avx2_q_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q88_avx2_44: .long 0x0000002c,0x0000002c,0x0000002c,0x0000002c .long 0x0000002c,0x0000002c,0x0000002c,0x0000002c @@ -27269,6 +27247,11 @@ _wc_mldsa_decompose_q88_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q32_avx2_q_low_32: .long 0x0003ff00,0x0003ff00,0x0003ff00,0x0003ff00 .long 0x0003ff00,0x0003ff00,0x0003ff00,0x0003ff00 @@ -27277,6 +27260,11 @@ L_mldsa_decompose_q32_avx2_q_low_32: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q32_avx2_q_low_32_2: .long 0x0007fe00,0x0007fe00,0x0007fe00,0x0007fe00 .long 0x0007fe00,0x0007fe00,0x0007fe00,0x0007fe00 @@ -27285,6 +27273,11 @@ L_mldsa_decompose_q32_avx2_q_low_32_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q32_avx2_q_low_32_m1: .long 0x0003feff,0x0003feff,0x0003feff,0x0003feff .long 0x0003feff,0x0003feff,0x0003feff,0x0003feff @@ -27293,6 +27286,11 @@ L_mldsa_decompose_q32_avx2_q_low_32_m1: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_decompose_q32_avx2_mask: .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f @@ -27808,6 +27806,11 @@ L_mldsa_decompose_q32_avx2_start_256: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_q: .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 @@ -27816,6 +27819,11 @@ L_mldsa_use_hint_88_avx2_q: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_q_low_88: .long 0x00017400,0x00017400,0x00017400,0x00017400 .long 0x00017400,0x00017400,0x00017400,0x00017400 @@ -27824,6 +27832,11 @@ L_mldsa_use_hint_88_avx2_q_low_88: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_q_low_88_2: .long 0x0002e800,0x0002e800,0x0002e800,0x0002e800 .long 0x0002e800,0x0002e800,0x0002e800,0x0002e800 @@ -27832,6 +27845,11 @@ L_mldsa_use_hint_88_avx2_q_low_88_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_q_2: .long 0x003fefd4,0x003fefd4,0x003fefd4,0x003fefd4 .long 0x003fefd4,0x003fefd4,0x003fefd4,0x003fefd4 @@ -27840,6 +27858,11 @@ L_mldsa_use_hint_88_avx2_q_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_44: .long 0x0000002c,0x0000002c,0x0000002c,0x0000002c .long 0x0000002c,0x0000002c,0x0000002c,0x0000002c @@ -27848,6 +27871,11 @@ L_mldsa_use_hint_88_avx2_44: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_vsl: .long 0x0000001f,0x0000001e,0x0000001d,0x0000001c .long 0x0000001b,0x0000001a,0x00000019,0x00000018 @@ -27856,6 +27884,11 @@ L_mldsa_use_hint_88_avx2_vsl: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_88_avx2_one: .long 0x00000001,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000001,0x00000001,0x00000001 @@ -33271,6 +33304,11 @@ L_mldsa_use_hint_88_avx2_hints_done_3_15: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_q: .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 .long 0x007fe001,0x007fe001,0x007fe001,0x007fe001 @@ -33279,6 +33317,11 @@ L_mldsa_use_hint_32_avx2_q: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_q_low_32: .long 0x0003ff00,0x0003ff00,0x0003ff00,0x0003ff00 .long 0x0003ff00,0x0003ff00,0x0003ff00,0x0003ff00 @@ -33287,6 +33330,11 @@ L_mldsa_use_hint_32_avx2_q_low_32: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_q_low_32_2: .long 0x0007fe00,0x0007fe00,0x0007fe00,0x0007fe00 .long 0x0007fe00,0x0007fe00,0x0007fe00,0x0007fe00 @@ -33295,6 +33343,11 @@ L_mldsa_use_hint_32_avx2_q_low_32_2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_q_low_32_m1: .long 0x0003feff,0x0003feff,0x0003feff,0x0003feff .long 0x0003feff,0x0003feff,0x0003feff,0x0003feff @@ -33303,6 +33356,11 @@ L_mldsa_use_hint_32_avx2_q_low_32_m1: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_mask: .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f .long 0x0000000f,0x0000000f,0x0000000f,0x0000000f @@ -33311,6 +33369,11 @@ L_mldsa_use_hint_32_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_vsl: .long 0x0000001f,0x0000001e,0x0000001d,0x0000001c .long 0x0000001b,0x0000001a,0x00000019,0x00000018 @@ -33319,6 +33382,11 @@ L_mldsa_use_hint_32_avx2_vsl: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mldsa_use_hint_32_avx2_one: .long 0x00000001,0x00000001,0x00000001,0x00000001 .long 0x00000001,0x00000001,0x00000001,0x00000001 diff --git a/wolfcrypt/src/wc_mlkem_asm.S b/wolfcrypt/src/wc_mlkem_asm.S index 3bc7073a6fd..5eb503c1f53 100644 --- a/wolfcrypt/src/wc_mlkem_asm.S +++ b/wolfcrypt/src/wc_mlkem_asm.S @@ -60,14 +60,8 @@ .p2align 4 #endif /* __APPLE__ */ mlkem_q: -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .data #else @@ -79,14 +73,8 @@ mlkem_q: .p2align 4 #endif /* __APPLE__ */ mlkem_qinv: -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 -.value 0xf301,0xf301 +.short 0xf301,0xf301,0xf301,0xf301,0xf301,0xf301,0xf301,0xf301 +.short 0xf301,0xf301,0xf301,0xf301,0xf301,0xf301,0xf301,0xf301 #ifndef __APPLE__ .data #else @@ -98,14 +86,8 @@ mlkem_qinv: .p2align 4 #endif /* __APPLE__ */ mlkem_f: -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 -.value 0x0549,0x0549 +.short 0x0549,0x0549,0x0549,0x0549,0x0549,0x0549,0x0549,0x0549 +.short 0x0549,0x0549,0x0549,0x0549,0x0549,0x0549,0x0549,0x0549 #ifndef __APPLE__ .data #else @@ -117,14 +99,8 @@ mlkem_f: .p2align 4 #endif /* __APPLE__ */ mlkem_f_qinv: -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 -.value 0x5049,0x5049 +.short 0x5049,0x5049,0x5049,0x5049,0x5049,0x5049,0x5049,0x5049 +.short 0x5049,0x5049,0x5049,0x5049,0x5049,0x5049,0x5049,0x5049 #ifndef __APPLE__ .data #else @@ -136,14 +112,8 @@ mlkem_f_qinv: .p2align 4 #endif /* __APPLE__ */ mlkem_v: -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf #ifndef __APPLE__ .data #else @@ -155,630 +125,162 @@ mlkem_v: .p2align 4 #endif /* __APPLE__ */ L_mlkem_avx2_zetas: -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x0a0b,0x0a0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x7b0b,0x7b0b -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x0b9a,0x0b9a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x399a,0x399a -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x05d5,0x05d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x34d5,0x34d5 -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0x058e,0x058e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0xcf8e,0xcf8e -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0x0c56,0x0c56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0xae56,0xae56 -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x026e,0x026e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x6c6e,0x6c6e -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0x0629,0x0629 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0xf129,0xf129 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0x00b6,0x00b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0xc2b6,0xc2b6 -.value 0x023d,0x023d -.value 0x023d,0x023d -.value 0x023d,0x023d -.value 0x023d,0x023d -.value 0x07d4,0x07d4 -.value 0x07d4,0x07d4 -.value 0x07d4,0x07d4 -.value 0x07d4,0x07d4 -.value 0xe93d,0xe93d -.value 0xe93d,0xe93d -.value 0xe93d,0xe93d -.value 0xe93d,0xe93d -.value 0x43d4,0x43d4 -.value 0x43d4,0x43d4 -.value 0x43d4,0x43d4 -.value 0x43d4,0x43d4 -.value 0x0108,0x0108 -.value 0x0108,0x0108 -.value 0x0108,0x0108 -.value 0x0108,0x0108 -.value 0x017f,0x017f -.value 0x017f,0x017f -.value 0x017f,0x017f -.value 0x017f,0x017f -.value 0x9908,0x9908 -.value 0x9908,0x9908 -.value 0x9908,0x9908 -.value 0x9908,0x9908 -.value 0x8e7f,0x8e7f -.value 0x8e7f,0x8e7f -.value 0x8e7f,0x8e7f -.value 0x8e7f,0x8e7f -.value 0x04c7,0x04c7 -.value 0x04c7,0x04c7 -.value 0x028c,0x028c -.value 0x028c,0x028c -.value 0x0ad9,0x0ad9 -.value 0x0ad9,0x0ad9 -.value 0x03f7,0x03f7 -.value 0x03f7,0x03f7 -.value 0xe9c7,0xe9c7 -.value 0xe9c7,0xe9c7 -.value 0xe68c,0xe68c -.value 0xe68c,0xe68c -.value 0x05d9,0x05d9 -.value 0x05d9,0x05d9 -.value 0x78f7,0x78f7 -.value 0x78f7,0x78f7 -.value 0x07f4,0x07f4 -.value 0x07f4,0x07f4 -.value 0x05d3,0x05d3 -.value 0x05d3,0x05d3 -.value 0x0be7,0x0be7 -.value 0x0be7,0x0be7 -.value 0x06f9,0x06f9 -.value 0x06f9,0x06f9 -.value 0xa3f4,0xa3f4 -.value 0xa3f4,0xa3f4 -.value 0x4ed3,0x4ed3 -.value 0x4ed3,0x4ed3 -.value 0x50e7,0x50e7 -.value 0x50e7,0x50e7 -.value 0x61f9,0x61f9 -.value 0x61f9,0x61f9 -.value 0x09c4,0x09c4 -.value 0x09c4,0x09c4 -.value 0x09c4,0x09c4 -.value 0x09c4,0x09c4 -.value 0x05b2,0x05b2 -.value 0x05b2,0x05b2 -.value 0x05b2,0x05b2 -.value 0x05b2,0x05b2 -.value 0x15c4,0x15c4 -.value 0x15c4,0x15c4 -.value 0x15c4,0x15c4 -.value 0x15c4,0x15c4 -.value 0xfbb2,0xfbb2 -.value 0xfbb2,0xfbb2 -.value 0xfbb2,0xfbb2 -.value 0xfbb2,0xfbb2 -.value 0x06bf,0x06bf -.value 0x06bf,0x06bf -.value 0x06bf,0x06bf -.value 0x06bf,0x06bf -.value 0x0c7f,0x0c7f -.value 0x0c7f,0x0c7f -.value 0x0c7f,0x0c7f -.value 0x0c7f,0x0c7f -.value 0x53bf,0x53bf -.value 0x53bf,0x53bf -.value 0x53bf,0x53bf -.value 0x53bf,0x53bf -.value 0x997f,0x997f -.value 0x997f,0x997f -.value 0x997f,0x997f -.value 0x997f,0x997f -.value 0x0204,0x0204 -.value 0x0204,0x0204 -.value 0x0cf9,0x0cf9 -.value 0x0cf9,0x0cf9 -.value 0x0bc1,0x0bc1 -.value 0x0bc1,0x0bc1 -.value 0x0a67,0x0a67 -.value 0x0a67,0x0a67 -.value 0xce04,0xce04 -.value 0xce04,0xce04 -.value 0x67f9,0x67f9 -.value 0x67f9,0x67f9 -.value 0x3ec1,0x3ec1 -.value 0x3ec1,0x3ec1 -.value 0xcf67,0xcf67 -.value 0xcf67,0xcf67 -.value 0x06af,0x06af -.value 0x06af,0x06af -.value 0x0877,0x0877 -.value 0x0877,0x0877 -.value 0x007e,0x007e -.value 0x007e,0x007e -.value 0x05bd,0x05bd -.value 0x05bd,0x05bd -.value 0x23af,0x23af -.value 0x23af,0x23af -.value 0xfd77,0xfd77 -.value 0xfd77,0xfd77 -.value 0x9a7e,0x9a7e -.value 0x9a7e,0x9a7e -.value 0x6cbd,0x6cbd -.value 0x6cbd,0x6cbd -.value 0x08b2,0x08b2 -.value 0x01ae,0x01ae -.value 0x022b,0x022b -.value 0x034b,0x034b -.value 0x081e,0x081e -.value 0x0367,0x0367 -.value 0x060e,0x060e -.value 0x0069,0x0069 -.value 0xfeb2,0xfeb2 -.value 0x2bae,0x2bae -.value 0xd32b,0xd32b -.value 0x344b,0x344b -.value 0x821e,0x821e -.value 0xc867,0xc867 -.value 0x500e,0x500e -.value 0xab69,0xab69 -.value 0x01a6,0x01a6 -.value 0x024b,0x024b -.value 0x00b1,0x00b1 -.value 0x0c16,0x0c16 -.value 0x0bde,0x0bde -.value 0x0b35,0x0b35 -.value 0x0626,0x0626 -.value 0x0675,0x0675 -.value 0x93a6,0x93a6 -.value 0x334b,0x334b -.value 0x03b1,0x03b1 -.value 0xee16,0xee16 -.value 0xc5de,0xc5de -.value 0x5a35,0x5a35 -.value 0x1826,0x1826 -.value 0x1575,0x1575 -.value 0x0c0b,0x0c0b -.value 0x030a,0x030a -.value 0x0487,0x0487 -.value 0x0c6e,0x0c6e -.value 0x09f8,0x09f8 -.value 0x05cb,0x05cb -.value 0x0aa7,0x0aa7 -.value 0x045f,0x045f -.value 0x7d0b,0x7d0b -.value 0x810a,0x810a -.value 0x2987,0x2987 -.value 0x766e,0x766e -.value 0x71f8,0x71f8 -.value 0xb6cb,0xb6cb -.value 0x8fa7,0x8fa7 -.value 0x315f,0x315f -.value 0x06cb,0x06cb -.value 0x0284,0x0284 -.value 0x0999,0x0999 -.value 0x015d,0x015d -.value 0x01a2,0x01a2 -.value 0x0149,0x0149 -.value 0x0c65,0x0c65 -.value 0x0cb6,0x0cb6 -.value 0xb7cb,0xb7cb -.value 0x4e84,0x4e84 -.value 0x4499,0x4499 -.value 0x485d,0x485d -.value 0xc7a2,0xc7a2 -.value 0x4c49,0x4c49 -.value 0xeb65,0xeb65 -.value 0xceb6,0xceb6 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0714,0x0714 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x0314,0x0314 -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x011f,0x011f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x6e1f,0x6e1f -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0x00ca,0x00ca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0xbeca,0xbeca -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x03c2,0x03c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x29c2,0x29c2 -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x084f,0x084f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x054f,0x054f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0x073f,0x073f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0xd43f,0xd43f -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x05bc,0x05bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x79bc,0x79bc -.value 0x0a58,0x0a58 -.value 0x0a58,0x0a58 -.value 0x0a58,0x0a58 -.value 0x0a58,0x0a58 -.value 0x03f9,0x03f9 -.value 0x03f9,0x03f9 -.value 0x03f9,0x03f9 -.value 0x03f9,0x03f9 -.value 0x9258,0x9258 -.value 0x9258,0x9258 -.value 0x9258,0x9258 -.value 0x9258,0x9258 -.value 0x5ef9,0x5ef9 -.value 0x5ef9,0x5ef9 -.value 0x5ef9,0x5ef9 -.value 0x5ef9,0x5ef9 -.value 0x02dc,0x02dc -.value 0x02dc,0x02dc -.value 0x02dc,0x02dc -.value 0x02dc,0x02dc -.value 0x0260,0x0260 -.value 0x0260,0x0260 -.value 0x0260,0x0260 -.value 0x0260,0x0260 -.value 0xd6dc,0xd6dc -.value 0xd6dc,0xd6dc -.value 0xd6dc,0xd6dc -.value 0xd6dc,0xd6dc -.value 0x2260,0x2260 -.value 0x2260,0x2260 -.value 0x2260,0x2260 -.value 0x2260,0x2260 -.value 0x09ac,0x09ac -.value 0x09ac,0x09ac -.value 0x0ca7,0x0ca7 -.value 0x0ca7,0x0ca7 -.value 0x0bf2,0x0bf2 -.value 0x0bf2,0x0bf2 -.value 0x033e,0x033e -.value 0x033e,0x033e -.value 0x4dac,0x4dac -.value 0x4dac,0x4dac -.value 0x91a7,0x91a7 -.value 0x91a7,0x91a7 -.value 0xc1f2,0xc1f2 -.value 0xc1f2,0xc1f2 -.value 0xdd3e,0xdd3e -.value 0xdd3e,0xdd3e -.value 0x006b,0x006b -.value 0x006b,0x006b -.value 0x0774,0x0774 -.value 0x0774,0x0774 -.value 0x0c0a,0x0c0a -.value 0x0c0a,0x0c0a -.value 0x094a,0x094a -.value 0x094a,0x094a -.value 0x916b,0x916b -.value 0x916b,0x916b -.value 0x2374,0x2374 -.value 0x2374,0x2374 -.value 0x8a0a,0x8a0a -.value 0x8a0a,0x8a0a -.value 0x474a,0x474a -.value 0x474a,0x474a -.value 0x06fb,0x06fb -.value 0x06fb,0x06fb -.value 0x06fb,0x06fb -.value 0x06fb,0x06fb -.value 0x019b,0x019b -.value 0x019b,0x019b -.value 0x019b,0x019b -.value 0x019b,0x019b -.value 0x47fb,0x47fb -.value 0x47fb,0x47fb -.value 0x47fb,0x47fb -.value 0x47fb,0x47fb -.value 0x229b,0x229b -.value 0x229b,0x229b -.value 0x229b,0x229b -.value 0x229b,0x229b -.value 0x0c34,0x0c34 -.value 0x0c34,0x0c34 -.value 0x0c34,0x0c34 -.value 0x0c34,0x0c34 -.value 0x06de,0x06de -.value 0x06de,0x06de -.value 0x06de,0x06de -.value 0x06de,0x06de -.value 0x6834,0x6834 -.value 0x6834,0x6834 -.value 0x6834,0x6834 -.value 0x6834,0x6834 -.value 0xc0de,0xc0de -.value 0xc0de,0xc0de -.value 0xc0de,0xc0de -.value 0xc0de,0xc0de -.value 0x0b73,0x0b73 -.value 0x0b73,0x0b73 -.value 0x03c1,0x03c1 -.value 0x03c1,0x03c1 -.value 0x071d,0x071d -.value 0x071d,0x071d -.value 0x0a2c,0x0a2c -.value 0x0a2c,0x0a2c -.value 0x3473,0x3473 -.value 0x3473,0x3473 -.value 0x36c1,0x36c1 -.value 0x36c1,0x36c1 -.value 0x8e1d,0x8e1d -.value 0x8e1d,0x8e1d -.value 0xce2c,0xce2c -.value 0xce2c,0xce2c -.value 0x01c0,0x01c0 -.value 0x01c0,0x01c0 -.value 0x08d8,0x08d8 -.value 0x08d8,0x08d8 -.value 0x02a5,0x02a5 -.value 0x02a5,0x02a5 -.value 0x0806,0x0806 -.value 0x0806,0x0806 -.value 0x41c0,0x41c0 -.value 0x41c0,0x41c0 -.value 0x10d8,0x10d8 -.value 0x10d8,0x10d8 -.value 0xa1a5,0xa1a5 -.value 0xa1a5,0xa1a5 -.value 0xba06,0xba06 -.value 0xba06,0xba06 -.value 0x0331,0x0331 -.value 0x0449,0x0449 -.value 0x025b,0x025b -.value 0x0262,0x0262 -.value 0x052a,0x052a -.value 0x07fc,0x07fc -.value 0x0748,0x0748 -.value 0x0180,0x0180 -.value 0x8631,0x8631 -.value 0x4f49,0x4f49 -.value 0x635b,0x635b -.value 0x0862,0x0862 -.value 0xe32a,0xe32a -.value 0x3bfc,0x3bfc -.value 0x5f48,0x5f48 -.value 0x8180,0x8180 -.value 0x0842,0x0842 -.value 0x0c79,0x0c79 -.value 0x04c2,0x04c2 -.value 0x07ca,0x07ca -.value 0x0997,0x0997 -.value 0x00dc,0x00dc -.value 0x085e,0x085e -.value 0x0686,0x0686 -.value 0xae42,0xae42 -.value 0xe779,0xe779 -.value 0x2ac2,0x2ac2 -.value 0xc5ca,0xc5ca -.value 0x5e97,0x5e97 -.value 0xd4dc,0xd4dc -.value 0x425e,0x425e -.value 0x3886,0x3886 -.value 0x0860,0x0860 -.value 0x0707,0x0707 -.value 0x0803,0x0803 -.value 0x031a,0x031a -.value 0x071b,0x071b -.value 0x09ab,0x09ab -.value 0x099b,0x099b -.value 0x01de,0x01de -.value 0x2860,0x2860 -.value 0xac07,0xac07 -.value 0xe103,0xe103 -.value 0xb11a,0xb11a -.value 0xa81b,0xa81b -.value 0x5aab,0x5aab -.value 0x2a9b,0x2a9b -.value 0xbbde,0xbbde -.value 0x0c95,0x0c95 -.value 0x0bcd,0x0bcd -.value 0x03e4,0x03e4 -.value 0x03df,0x03df -.value 0x03be,0x03be -.value 0x074d,0x074d -.value 0x05f2,0x05f2 -.value 0x065c,0x065c -.value 0x7b95,0x7b95 -.value 0xa2cd,0xa2cd -.value 0x6fe4,0x6fe4 -.value 0xb0df,0xb0df -.value 0x5dbe,0x5dbe -.value 0x1e4d,0x1e4d -.value 0xbbf2,0xbbf2 -.value 0x5a5c,0x5a5c +.short 0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b +.short 0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b,0x0a0b +.short 0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b +.short 0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b,0x7b0b +.short 0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a +.short 0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a,0x0b9a +.short 0x399a,0x399a,0x399a,0x399a,0x399a,0x399a,0x399a,0x399a +.short 0x399a,0x399a,0x399a,0x399a,0x399a,0x399a,0x399a,0x399a +.short 0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5 +.short 0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5,0x05d5 +.short 0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5 +.short 0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5,0x34d5 +.short 0x058e,0x058e,0x058e,0x058e,0x058e,0x058e,0x058e,0x058e +.short 0x058e,0x058e,0x058e,0x058e,0x058e,0x058e,0x058e,0x058e +.short 0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e +.short 0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e,0xcf8e +.short 0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56 +.short 0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56,0x0c56 +.short 0xae56,0xae56,0xae56,0xae56,0xae56,0xae56,0xae56,0xae56 +.short 0xae56,0xae56,0xae56,0xae56,0xae56,0xae56,0xae56,0xae56 +.short 0x026e,0x026e,0x026e,0x026e,0x026e,0x026e,0x026e,0x026e +.short 0x026e,0x026e,0x026e,0x026e,0x026e,0x026e,0x026e,0x026e +.short 0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e +.short 0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e,0x6c6e +.short 0x0629,0x0629,0x0629,0x0629,0x0629,0x0629,0x0629,0x0629 +.short 0x0629,0x0629,0x0629,0x0629,0x0629,0x0629,0x0629,0x0629 +.short 0xf129,0xf129,0xf129,0xf129,0xf129,0xf129,0xf129,0xf129 +.short 0xf129,0xf129,0xf129,0xf129,0xf129,0xf129,0xf129,0xf129 +.short 0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6 +.short 0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6,0x00b6 +.short 0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6 +.short 0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6,0xc2b6 +.short 0x023d,0x023d,0x023d,0x023d,0x023d,0x023d,0x023d,0x023d +.short 0x07d4,0x07d4,0x07d4,0x07d4,0x07d4,0x07d4,0x07d4,0x07d4 +.short 0xe93d,0xe93d,0xe93d,0xe93d,0xe93d,0xe93d,0xe93d,0xe93d +.short 0x43d4,0x43d4,0x43d4,0x43d4,0x43d4,0x43d4,0x43d4,0x43d4 +.short 0x0108,0x0108,0x0108,0x0108,0x0108,0x0108,0x0108,0x0108 +.short 0x017f,0x017f,0x017f,0x017f,0x017f,0x017f,0x017f,0x017f +.short 0x9908,0x9908,0x9908,0x9908,0x9908,0x9908,0x9908,0x9908 +.short 0x8e7f,0x8e7f,0x8e7f,0x8e7f,0x8e7f,0x8e7f,0x8e7f,0x8e7f +.short 0x04c7,0x04c7,0x04c7,0x04c7,0x028c,0x028c,0x028c,0x028c +.short 0x0ad9,0x0ad9,0x0ad9,0x0ad9,0x03f7,0x03f7,0x03f7,0x03f7 +.short 0xe9c7,0xe9c7,0xe9c7,0xe9c7,0xe68c,0xe68c,0xe68c,0xe68c +.short 0x05d9,0x05d9,0x05d9,0x05d9,0x78f7,0x78f7,0x78f7,0x78f7 +.short 0x07f4,0x07f4,0x07f4,0x07f4,0x05d3,0x05d3,0x05d3,0x05d3 +.short 0x0be7,0x0be7,0x0be7,0x0be7,0x06f9,0x06f9,0x06f9,0x06f9 +.short 0xa3f4,0xa3f4,0xa3f4,0xa3f4,0x4ed3,0x4ed3,0x4ed3,0x4ed3 +.short 0x50e7,0x50e7,0x50e7,0x50e7,0x61f9,0x61f9,0x61f9,0x61f9 +.short 0x09c4,0x09c4,0x09c4,0x09c4,0x09c4,0x09c4,0x09c4,0x09c4 +.short 0x05b2,0x05b2,0x05b2,0x05b2,0x05b2,0x05b2,0x05b2,0x05b2 +.short 0x15c4,0x15c4,0x15c4,0x15c4,0x15c4,0x15c4,0x15c4,0x15c4 +.short 0xfbb2,0xfbb2,0xfbb2,0xfbb2,0xfbb2,0xfbb2,0xfbb2,0xfbb2 +.short 0x06bf,0x06bf,0x06bf,0x06bf,0x06bf,0x06bf,0x06bf,0x06bf +.short 0x0c7f,0x0c7f,0x0c7f,0x0c7f,0x0c7f,0x0c7f,0x0c7f,0x0c7f +.short 0x53bf,0x53bf,0x53bf,0x53bf,0x53bf,0x53bf,0x53bf,0x53bf +.short 0x997f,0x997f,0x997f,0x997f,0x997f,0x997f,0x997f,0x997f +.short 0x0204,0x0204,0x0204,0x0204,0x0cf9,0x0cf9,0x0cf9,0x0cf9 +.short 0x0bc1,0x0bc1,0x0bc1,0x0bc1,0x0a67,0x0a67,0x0a67,0x0a67 +.short 0xce04,0xce04,0xce04,0xce04,0x67f9,0x67f9,0x67f9,0x67f9 +.short 0x3ec1,0x3ec1,0x3ec1,0x3ec1,0xcf67,0xcf67,0xcf67,0xcf67 +.short 0x06af,0x06af,0x06af,0x06af,0x0877,0x0877,0x0877,0x0877 +.short 0x007e,0x007e,0x007e,0x007e,0x05bd,0x05bd,0x05bd,0x05bd +.short 0x23af,0x23af,0x23af,0x23af,0xfd77,0xfd77,0xfd77,0xfd77 +.short 0x9a7e,0x9a7e,0x9a7e,0x9a7e,0x6cbd,0x6cbd,0x6cbd,0x6cbd +.short 0x08b2,0x08b2,0x01ae,0x01ae,0x022b,0x022b,0x034b,0x034b +.short 0x081e,0x081e,0x0367,0x0367,0x060e,0x060e,0x0069,0x0069 +.short 0xfeb2,0xfeb2,0x2bae,0x2bae,0xd32b,0xd32b,0x344b,0x344b +.short 0x821e,0x821e,0xc867,0xc867,0x500e,0x500e,0xab69,0xab69 +.short 0x01a6,0x01a6,0x024b,0x024b,0x00b1,0x00b1,0x0c16,0x0c16 +.short 0x0bde,0x0bde,0x0b35,0x0b35,0x0626,0x0626,0x0675,0x0675 +.short 0x93a6,0x93a6,0x334b,0x334b,0x03b1,0x03b1,0xee16,0xee16 +.short 0xc5de,0xc5de,0x5a35,0x5a35,0x1826,0x1826,0x1575,0x1575 +.short 0x0c0b,0x0c0b,0x030a,0x030a,0x0487,0x0487,0x0c6e,0x0c6e +.short 0x09f8,0x09f8,0x05cb,0x05cb,0x0aa7,0x0aa7,0x045f,0x045f +.short 0x7d0b,0x7d0b,0x810a,0x810a,0x2987,0x2987,0x766e,0x766e +.short 0x71f8,0x71f8,0xb6cb,0xb6cb,0x8fa7,0x8fa7,0x315f,0x315f +.short 0x06cb,0x06cb,0x0284,0x0284,0x0999,0x0999,0x015d,0x015d +.short 0x01a2,0x01a2,0x0149,0x0149,0x0c65,0x0c65,0x0cb6,0x0cb6 +.short 0xb7cb,0xb7cb,0x4e84,0x4e84,0x4499,0x4499,0x485d,0x485d +.short 0xc7a2,0xc7a2,0x4c49,0x4c49,0xeb65,0xeb65,0xceb6,0xceb6 +.short 0x0714,0x0714,0x0714,0x0714,0x0714,0x0714,0x0714,0x0714 +.short 0x0714,0x0714,0x0714,0x0714,0x0714,0x0714,0x0714,0x0714 +.short 0x0314,0x0314,0x0314,0x0314,0x0314,0x0314,0x0314,0x0314 +.short 0x0314,0x0314,0x0314,0x0314,0x0314,0x0314,0x0314,0x0314 +.short 0x011f,0x011f,0x011f,0x011f,0x011f,0x011f,0x011f,0x011f +.short 0x011f,0x011f,0x011f,0x011f,0x011f,0x011f,0x011f,0x011f +.short 0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f +.short 0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f,0x6e1f +.short 0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca +.short 0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca,0x00ca +.short 0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca +.short 0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca,0xbeca +.short 0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2 +.short 0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2,0x03c2 +.short 0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2 +.short 0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2,0x29c2 +.short 0x084f,0x084f,0x084f,0x084f,0x084f,0x084f,0x084f,0x084f +.short 0x084f,0x084f,0x084f,0x084f,0x084f,0x084f,0x084f,0x084f +.short 0x054f,0x054f,0x054f,0x054f,0x054f,0x054f,0x054f,0x054f +.short 0x054f,0x054f,0x054f,0x054f,0x054f,0x054f,0x054f,0x054f +.short 0x073f,0x073f,0x073f,0x073f,0x073f,0x073f,0x073f,0x073f +.short 0x073f,0x073f,0x073f,0x073f,0x073f,0x073f,0x073f,0x073f +.short 0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f +.short 0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f,0xd43f +.short 0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc +.short 0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc,0x05bc +.short 0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc +.short 0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc,0x79bc +.short 0x0a58,0x0a58,0x0a58,0x0a58,0x0a58,0x0a58,0x0a58,0x0a58 +.short 0x03f9,0x03f9,0x03f9,0x03f9,0x03f9,0x03f9,0x03f9,0x03f9 +.short 0x9258,0x9258,0x9258,0x9258,0x9258,0x9258,0x9258,0x9258 +.short 0x5ef9,0x5ef9,0x5ef9,0x5ef9,0x5ef9,0x5ef9,0x5ef9,0x5ef9 +.short 0x02dc,0x02dc,0x02dc,0x02dc,0x02dc,0x02dc,0x02dc,0x02dc +.short 0x0260,0x0260,0x0260,0x0260,0x0260,0x0260,0x0260,0x0260 +.short 0xd6dc,0xd6dc,0xd6dc,0xd6dc,0xd6dc,0xd6dc,0xd6dc,0xd6dc +.short 0x2260,0x2260,0x2260,0x2260,0x2260,0x2260,0x2260,0x2260 +.short 0x09ac,0x09ac,0x09ac,0x09ac,0x0ca7,0x0ca7,0x0ca7,0x0ca7 +.short 0x0bf2,0x0bf2,0x0bf2,0x0bf2,0x033e,0x033e,0x033e,0x033e +.short 0x4dac,0x4dac,0x4dac,0x4dac,0x91a7,0x91a7,0x91a7,0x91a7 +.short 0xc1f2,0xc1f2,0xc1f2,0xc1f2,0xdd3e,0xdd3e,0xdd3e,0xdd3e +.short 0x006b,0x006b,0x006b,0x006b,0x0774,0x0774,0x0774,0x0774 +.short 0x0c0a,0x0c0a,0x0c0a,0x0c0a,0x094a,0x094a,0x094a,0x094a +.short 0x916b,0x916b,0x916b,0x916b,0x2374,0x2374,0x2374,0x2374 +.short 0x8a0a,0x8a0a,0x8a0a,0x8a0a,0x474a,0x474a,0x474a,0x474a +.short 0x06fb,0x06fb,0x06fb,0x06fb,0x06fb,0x06fb,0x06fb,0x06fb +.short 0x019b,0x019b,0x019b,0x019b,0x019b,0x019b,0x019b,0x019b +.short 0x47fb,0x47fb,0x47fb,0x47fb,0x47fb,0x47fb,0x47fb,0x47fb +.short 0x229b,0x229b,0x229b,0x229b,0x229b,0x229b,0x229b,0x229b +.short 0x0c34,0x0c34,0x0c34,0x0c34,0x0c34,0x0c34,0x0c34,0x0c34 +.short 0x06de,0x06de,0x06de,0x06de,0x06de,0x06de,0x06de,0x06de +.short 0x6834,0x6834,0x6834,0x6834,0x6834,0x6834,0x6834,0x6834 +.short 0xc0de,0xc0de,0xc0de,0xc0de,0xc0de,0xc0de,0xc0de,0xc0de +.short 0x0b73,0x0b73,0x0b73,0x0b73,0x03c1,0x03c1,0x03c1,0x03c1 +.short 0x071d,0x071d,0x071d,0x071d,0x0a2c,0x0a2c,0x0a2c,0x0a2c +.short 0x3473,0x3473,0x3473,0x3473,0x36c1,0x36c1,0x36c1,0x36c1 +.short 0x8e1d,0x8e1d,0x8e1d,0x8e1d,0xce2c,0xce2c,0xce2c,0xce2c +.short 0x01c0,0x01c0,0x01c0,0x01c0,0x08d8,0x08d8,0x08d8,0x08d8 +.short 0x02a5,0x02a5,0x02a5,0x02a5,0x0806,0x0806,0x0806,0x0806 +.short 0x41c0,0x41c0,0x41c0,0x41c0,0x10d8,0x10d8,0x10d8,0x10d8 +.short 0xa1a5,0xa1a5,0xa1a5,0xa1a5,0xba06,0xba06,0xba06,0xba06 +.short 0x0331,0x0331,0x0449,0x0449,0x025b,0x025b,0x0262,0x0262 +.short 0x052a,0x052a,0x07fc,0x07fc,0x0748,0x0748,0x0180,0x0180 +.short 0x8631,0x8631,0x4f49,0x4f49,0x635b,0x635b,0x0862,0x0862 +.short 0xe32a,0xe32a,0x3bfc,0x3bfc,0x5f48,0x5f48,0x8180,0x8180 +.short 0x0842,0x0842,0x0c79,0x0c79,0x04c2,0x04c2,0x07ca,0x07ca +.short 0x0997,0x0997,0x00dc,0x00dc,0x085e,0x085e,0x0686,0x0686 +.short 0xae42,0xae42,0xe779,0xe779,0x2ac2,0x2ac2,0xc5ca,0xc5ca +.short 0x5e97,0x5e97,0xd4dc,0xd4dc,0x425e,0x425e,0x3886,0x3886 +.short 0x0860,0x0860,0x0707,0x0707,0x0803,0x0803,0x031a,0x031a +.short 0x071b,0x071b,0x09ab,0x09ab,0x099b,0x099b,0x01de,0x01de +.short 0x2860,0x2860,0xac07,0xac07,0xe103,0xe103,0xb11a,0xb11a +.short 0xa81b,0xa81b,0x5aab,0x5aab,0x2a9b,0x2a9b,0xbbde,0xbbde +.short 0x0c95,0x0c95,0x0bcd,0x0bcd,0x03e4,0x03e4,0x03df,0x03df +.short 0x03be,0x03be,0x074d,0x074d,0x05f2,0x05f2,0x065c,0x065c +.short 0x7b95,0x7b95,0xa2cd,0xa2cd,0x6fe4,0x6fe4,0xb0df,0xb0df +.short 0x5dbe,0x5dbe,0x1e4d,0x1e4d,0xbbf2,0xbbf2,0x5a5c,0x5a5c #ifndef __APPLE__ .data #else @@ -790,134 +292,38 @@ L_mlkem_avx2_zetas: .p2align 4 #endif /* __APPLE__ */ L_mlkem_avx2_zetas_basemul: -.value 0x08b2,0x081e -.value 0xf74e,0xf7e2 -.value 0x01ae,0x0367 -.value 0xfe52,0xfc99 -.value 0x022b,0x060e -.value 0xfdd5,0xf9f2 -.value 0x034b,0x0069 -.value 0xfcb5,0xff97 -.value 0xfeb2,0x821e -.value 0x014e,0x7de2 -.value 0x2bae,0xc867 -.value 0xd452,0x3799 -.value 0xd32b,0x500e -.value 0x2cd5,0xaff2 -.value 0x344b,0xab69 -.value 0xcbb5,0x5497 -.value 0x01a6,0x0bde -.value 0xfe5a,0xf422 -.value 0x024b,0x0b35 -.value 0xfdb5,0xf4cb -.value 0x00b1,0x0626 -.value 0xff4f,0xf9da -.value 0x0c16,0x0675 -.value 0xf3ea,0xf98b -.value 0x93a6,0xc5de -.value 0x6c5a,0x3a22 -.value 0x334b,0x5a35 -.value 0xccb5,0xa5cb -.value 0x03b1,0x1826 -.value 0xfc4f,0xe7da -.value 0xee16,0x1575 -.value 0x11ea,0xea8b -.value 0x0c0b,0x09f8 -.value 0xf3f5,0xf608 -.value 0x030a,0x05cb -.value 0xfcf6,0xfa35 -.value 0x0487,0x0aa7 -.value 0xfb79,0xf559 -.value 0x0c6e,0x045f -.value 0xf392,0xfba1 -.value 0x7d0b,0x71f8 -.value 0x82f5,0x8e08 -.value 0x810a,0xb6cb -.value 0x7ef6,0x4935 -.value 0x2987,0x8fa7 -.value 0xd679,0x7059 -.value 0x766e,0x315f -.value 0x8992,0xcea1 -.value 0x06cb,0x01a2 -.value 0xf935,0xfe5e -.value 0x0284,0x0149 -.value 0xfd7c,0xfeb7 -.value 0x0999,0x0c65 -.value 0xf667,0xf39b -.value 0x015d,0x0cb6 -.value 0xfea3,0xf34a -.value 0xb7cb,0xc7a2 -.value 0x4835,0x385e -.value 0x4e84,0x4c49 -.value 0xb17c,0xb3b7 -.value 0x4499,0xeb65 -.value 0xbb67,0x149b -.value 0x485d,0xceb6 -.value 0xb7a3,0x314a -.value 0x0331,0x052a -.value 0xfccf,0xfad6 -.value 0x0449,0x07fc -.value 0xfbb7,0xf804 -.value 0x025b,0x0748 -.value 0xfda5,0xf8b8 -.value 0x0262,0x0180 -.value 0xfd9e,0xfe80 -.value 0x8631,0xe32a -.value 0x79cf,0x1cd6 -.value 0x4f49,0x3bfc -.value 0xb0b7,0xc404 -.value 0x635b,0x5f48 -.value 0x9ca5,0xa0b8 -.value 0x0862,0x8180 -.value 0xf79e,0x7e80 -.value 0x0842,0x0997 -.value 0xf7be,0xf669 -.value 0x0c79,0x00dc -.value 0xf387,0xff24 -.value 0x04c2,0x085e -.value 0xfb3e,0xf7a2 -.value 0x07ca,0x0686 -.value 0xf836,0xf97a -.value 0xae42,0x5e97 -.value 0x51be,0xa169 -.value 0xe779,0xd4dc -.value 0x1887,0x2b24 -.value 0x2ac2,0x425e -.value 0xd53e,0xbda2 -.value 0xc5ca,0x3886 -.value 0x3a36,0xc77a -.value 0x0860,0x071b -.value 0xf7a0,0xf8e5 -.value 0x0707,0x09ab -.value 0xf8f9,0xf655 -.value 0x0803,0x099b -.value 0xf7fd,0xf665 -.value 0x031a,0x01de -.value 0xfce6,0xfe22 -.value 0x2860,0xa81b -.value 0xd7a0,0x57e5 -.value 0xac07,0x5aab -.value 0x53f9,0xa555 -.value 0xe103,0x2a9b -.value 0x1efd,0xd565 -.value 0xb11a,0xbbde -.value 0x4ee6,0x4422 -.value 0x0c95,0x03be -.value 0xf36b,0xfc42 -.value 0x0bcd,0x074d -.value 0xf433,0xf8b3 -.value 0x03e4,0x05f2 -.value 0xfc1c,0xfa0e -.value 0x03df,0x065c -.value 0xfc21,0xf9a4 -.value 0x7b95,0x5dbe -.value 0x846b,0xa242 -.value 0xa2cd,0x1e4d -.value 0x5d33,0xe1b3 -.value 0x6fe4,0xbbf2 -.value 0x901c,0x440e -.value 0xb0df,0x5a5c -.value 0x4f21,0xa5a4 +.short 0x08b2,0x081e,0xf74e,0xf7e2,0x01ae,0x0367,0xfe52,0xfc99 +.short 0x022b,0x060e,0xfdd5,0xf9f2,0x034b,0x0069,0xfcb5,0xff97 +.short 0xfeb2,0x821e,0x014e,0x7de2,0x2bae,0xc867,0xd452,0x3799 +.short 0xd32b,0x500e,0x2cd5,0xaff2,0x344b,0xab69,0xcbb5,0x5497 +.short 0x01a6,0x0bde,0xfe5a,0xf422,0x024b,0x0b35,0xfdb5,0xf4cb +.short 0x00b1,0x0626,0xff4f,0xf9da,0x0c16,0x0675,0xf3ea,0xf98b +.short 0x93a6,0xc5de,0x6c5a,0x3a22,0x334b,0x5a35,0xccb5,0xa5cb +.short 0x03b1,0x1826,0xfc4f,0xe7da,0xee16,0x1575,0x11ea,0xea8b +.short 0x0c0b,0x09f8,0xf3f5,0xf608,0x030a,0x05cb,0xfcf6,0xfa35 +.short 0x0487,0x0aa7,0xfb79,0xf559,0x0c6e,0x045f,0xf392,0xfba1 +.short 0x7d0b,0x71f8,0x82f5,0x8e08,0x810a,0xb6cb,0x7ef6,0x4935 +.short 0x2987,0x8fa7,0xd679,0x7059,0x766e,0x315f,0x8992,0xcea1 +.short 0x06cb,0x01a2,0xf935,0xfe5e,0x0284,0x0149,0xfd7c,0xfeb7 +.short 0x0999,0x0c65,0xf667,0xf39b,0x015d,0x0cb6,0xfea3,0xf34a +.short 0xb7cb,0xc7a2,0x4835,0x385e,0x4e84,0x4c49,0xb17c,0xb3b7 +.short 0x4499,0xeb65,0xbb67,0x149b,0x485d,0xceb6,0xb7a3,0x314a +.short 0x0331,0x052a,0xfccf,0xfad6,0x0449,0x07fc,0xfbb7,0xf804 +.short 0x025b,0x0748,0xfda5,0xf8b8,0x0262,0x0180,0xfd9e,0xfe80 +.short 0x8631,0xe32a,0x79cf,0x1cd6,0x4f49,0x3bfc,0xb0b7,0xc404 +.short 0x635b,0x5f48,0x9ca5,0xa0b8,0x0862,0x8180,0xf79e,0x7e80 +.short 0x0842,0x0997,0xf7be,0xf669,0x0c79,0x00dc,0xf387,0xff24 +.short 0x04c2,0x085e,0xfb3e,0xf7a2,0x07ca,0x0686,0xf836,0xf97a +.short 0xae42,0x5e97,0x51be,0xa169,0xe779,0xd4dc,0x1887,0x2b24 +.short 0x2ac2,0x425e,0xd53e,0xbda2,0xc5ca,0x3886,0x3a36,0xc77a +.short 0x0860,0x071b,0xf7a0,0xf8e5,0x0707,0x09ab,0xf8f9,0xf655 +.short 0x0803,0x099b,0xf7fd,0xf665,0x031a,0x01de,0xfce6,0xfe22 +.short 0x2860,0xa81b,0xd7a0,0x57e5,0xac07,0x5aab,0x53f9,0xa555 +.short 0xe103,0x2a9b,0x1efd,0xd565,0xb11a,0xbbde,0x4ee6,0x4422 +.short 0x0c95,0x03be,0xf36b,0xfc42,0x0bcd,0x074d,0xf433,0xf8b3 +.short 0x03e4,0x05f2,0xfc1c,0xfa0e,0x03df,0x065c,0xfc21,0xf9a4 +.short 0x7b95,0x5dbe,0x846b,0xa242,0xa2cd,0x1e4d,0x5d33,0xe1b3 +.short 0x6fe4,0xbbf2,0x901c,0x440e,0xb0df,0x5a5c,0x4f21,0xa5a4 #ifndef __APPLE__ .data #else @@ -929,646 +335,166 @@ L_mlkem_avx2_zetas_basemul: .p2align 4 #endif /* __APPLE__ */ L_mlkem_avx2_zetas_inv: -.value 0x06a5,0x06a5 -.value 0x05b4,0x05b4 -.value 0x070f,0x070f -.value 0x0943,0x0943 -.value 0x0922,0x0922 -.value 0x0134,0x0134 -.value 0x091d,0x091d -.value 0x006c,0x006c -.value 0xa5a5,0xa5a5 -.value 0xe1b4,0xe1b4 -.value 0x440f,0x440f -.value 0xa243,0xa243 -.value 0x4f22,0x4f22 -.value 0x5d34,0x5d34 -.value 0x901d,0x901d -.value 0x846c,0x846c -.value 0x0b23,0x0b23 -.value 0x0356,0x0356 -.value 0x0366,0x0366 -.value 0x05e6,0x05e6 -.value 0x09e7,0x09e7 -.value 0x05fa,0x05fa -.value 0x04fe,0x04fe -.value 0x04a1,0x04a1 -.value 0x4423,0x4423 -.value 0xa556,0xa556 -.value 0xd566,0xd566 -.value 0x57e6,0x57e6 -.value 0x4ee7,0x4ee7 -.value 0x53fa,0x53fa -.value 0x1efe,0x1efe -.value 0xd7a1,0xd7a1 -.value 0x04fb,0x04fb -.value 0x04fb,0x04fb -.value 0x0a5c,0x0a5c -.value 0x0a5c,0x0a5c -.value 0x0429,0x0429 -.value 0x0429,0x0429 -.value 0x0b41,0x0b41 -.value 0x0b41,0x0b41 -.value 0x45fb,0x45fb -.value 0x45fb,0x45fb -.value 0x5e5c,0x5e5c -.value 0x5e5c,0x5e5c -.value 0xef29,0xef29 -.value 0xef29,0xef29 -.value 0xbe41,0xbe41 -.value 0xbe41,0xbe41 -.value 0x02d5,0x02d5 -.value 0x02d5,0x02d5 -.value 0x05e4,0x05e4 -.value 0x05e4,0x05e4 -.value 0x0940,0x0940 -.value 0x0940,0x0940 -.value 0x018e,0x018e -.value 0x018e,0x018e -.value 0x31d5,0x31d5 -.value 0x31d5,0x31d5 -.value 0x71e4,0x71e4 -.value 0x71e4,0x71e4 -.value 0xc940,0xc940 -.value 0xc940,0xc940 -.value 0xcb8e,0xcb8e -.value 0xcb8e,0xcb8e -.value 0x0623,0x0623 -.value 0x0623,0x0623 -.value 0x0623,0x0623 -.value 0x0623,0x0623 -.value 0x00cd,0x00cd -.value 0x00cd,0x00cd -.value 0x00cd,0x00cd -.value 0x00cd,0x00cd -.value 0x3f23,0x3f23 -.value 0x3f23,0x3f23 -.value 0x3f23,0x3f23 -.value 0x3f23,0x3f23 -.value 0x97cd,0x97cd -.value 0x97cd,0x97cd -.value 0x97cd,0x97cd -.value 0x97cd,0x97cd -.value 0x0b66,0x0b66 -.value 0x0b66,0x0b66 -.value 0x0b66,0x0b66 -.value 0x0b66,0x0b66 -.value 0x0606,0x0606 -.value 0x0606,0x0606 -.value 0x0606,0x0606 -.value 0x0606,0x0606 -.value 0xdd66,0xdd66 -.value 0xdd66,0xdd66 -.value 0xdd66,0xdd66 -.value 0xdd66,0xdd66 -.value 0xb806,0xb806 -.value 0xb806,0xb806 -.value 0xb806,0xb806 -.value 0xb806,0xb806 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x0745,0x0745 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x8645,0x8645 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x05c2,0x05c2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x2bc2,0x2bc2 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x0c37,0x0c37 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x4137,0x4137 -.value 0x067b,0x067b -.value 0x0c25,0x0c25 -.value 0x04a3,0x04a3 -.value 0x036a,0x036a -.value 0x0537,0x0537 -.value 0x0088,0x0088 -.value 0x083f,0x083f -.value 0x04bf,0x04bf -.value 0xc77b,0xc77b -.value 0x2b25,0x2b25 -.value 0xbda3,0xbda3 -.value 0xa16a,0xa16a -.value 0x3a37,0x3a37 -.value 0x1888,0x1888 -.value 0xd53f,0xd53f -.value 0x51bf,0x51bf -.value 0x0b81,0x0b81 -.value 0x0505,0x0505 -.value 0x05b9,0x05b9 -.value 0x07d7,0x07d7 -.value 0x0a9f,0x0a9f -.value 0x08b8,0x08b8 -.value 0x0aa6,0x0aa6 -.value 0x09d0,0x09d0 -.value 0x7e81,0x7e81 -.value 0xc405,0xc405 -.value 0xa0b9,0xa0b9 -.value 0x1cd7,0x1cd7 -.value 0xf79f,0xf79f -.value 0xb0b8,0xb0b8 -.value 0x9ca6,0x9ca6 -.value 0x79d0,0x79d0 -.value 0x03b7,0x03b7 -.value 0x03b7,0x03b7 -.value 0x00f7,0x00f7 -.value 0x00f7,0x00f7 -.value 0x058d,0x058d -.value 0x058d,0x058d -.value 0x0c96,0x0c96 -.value 0x0c96,0x0c96 -.value 0xb8b7,0xb8b7 -.value 0xb8b7,0xb8b7 -.value 0x75f7,0x75f7 -.value 0x75f7,0x75f7 -.value 0xdc8d,0xdc8d -.value 0xdc8d,0xdc8d -.value 0x6e96,0x6e96 -.value 0x6e96,0x6e96 -.value 0x09c3,0x09c3 -.value 0x09c3,0x09c3 -.value 0x010f,0x010f -.value 0x010f,0x010f -.value 0x005a,0x005a -.value 0x005a,0x005a -.value 0x0355,0x0355 -.value 0x0355,0x0355 -.value 0x22c3,0x22c3 -.value 0x22c3,0x22c3 -.value 0x3e0f,0x3e0f -.value 0x3e0f,0x3e0f -.value 0x6e5a,0x6e5a -.value 0x6e5a,0x6e5a -.value 0xb255,0xb255 -.value 0xb255,0xb255 -.value 0x0aa1,0x0aa1 -.value 0x0aa1,0x0aa1 -.value 0x0aa1,0x0aa1 -.value 0x0aa1,0x0aa1 -.value 0x0a25,0x0a25 -.value 0x0a25,0x0a25 -.value 0x0a25,0x0a25 -.value 0x0a25,0x0a25 -.value 0xdda1,0xdda1 -.value 0xdda1,0xdda1 -.value 0xdda1,0xdda1 -.value 0xdda1,0xdda1 -.value 0x2925,0x2925 -.value 0x2925,0x2925 -.value 0x2925,0x2925 -.value 0x2925,0x2925 -.value 0x0908,0x0908 -.value 0x0908,0x0908 -.value 0x0908,0x0908 -.value 0x0908,0x0908 -.value 0x02a9,0x02a9 -.value 0x02a9,0x02a9 -.value 0x02a9,0x02a9 -.value 0x02a9,0x02a9 -.value 0xa108,0xa108 -.value 0xa108,0xa108 -.value 0xa108,0xa108 -.value 0xa108,0xa108 -.value 0x6da9,0x6da9 -.value 0x6da9,0x6da9 -.value 0x6da9,0x6da9 -.value 0x6da9,0x6da9 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0x04b2,0x04b2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0xfab2,0xfab2 -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0x093f,0x093f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0xd63f,0xd63f -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x0be2,0x0be2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x91e2,0x91e2 -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0x05ed,0x05ed -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0xfced,0xfced -.value 0x004b,0x004b -.value 0x0bb8,0x0bb8 -.value 0x009c,0x009c -.value 0x0b5f,0x0b5f -.value 0x0ba4,0x0ba4 -.value 0x0a7d,0x0a7d -.value 0x0368,0x0368 -.value 0x0636,0x0636 -.value 0x314b,0x314b -.value 0xb3b8,0xb3b8 -.value 0x149c,0x149c -.value 0x385f,0x385f -.value 0xb7a4,0xb7a4 -.value 0xb17d,0xb17d -.value 0xbb68,0xbb68 -.value 0x4836,0x4836 -.value 0x08a2,0x08a2 -.value 0x0736,0x0736 -.value 0x025a,0x025a -.value 0x0309,0x0309 -.value 0x0093,0x0093 -.value 0x09f7,0x09f7 -.value 0x087a,0x087a -.value 0x00f6,0x00f6 -.value 0xcea2,0xcea2 -.value 0x4936,0x4936 -.value 0x705a,0x705a -.value 0x8e09,0x8e09 -.value 0x8993,0x8993 -.value 0x7ef7,0x7ef7 -.value 0xd67a,0xd67a -.value 0x82f6,0x82f6 -.value 0x0744,0x0744 -.value 0x0744,0x0744 -.value 0x0c83,0x0c83 -.value 0x0c83,0x0c83 -.value 0x048a,0x048a -.value 0x048a,0x048a -.value 0x0652,0x0652 -.value 0x0652,0x0652 -.value 0x9344,0x9344 -.value 0x9344,0x9344 -.value 0x6583,0x6583 -.value 0x6583,0x6583 -.value 0x028a,0x028a -.value 0x028a,0x028a -.value 0xdc52,0xdc52 -.value 0xdc52,0xdc52 -.value 0x029a,0x029a -.value 0x029a,0x029a -.value 0x0140,0x0140 -.value 0x0140,0x0140 -.value 0x0008,0x0008 -.value 0x0008,0x0008 -.value 0x0afd,0x0afd -.value 0x0afd,0x0afd -.value 0x309a,0x309a -.value 0x309a,0x309a -.value 0xc140,0xc140 -.value 0xc140,0xc140 -.value 0x9808,0x9808 -.value 0x9808,0x9808 -.value 0x31fd,0x31fd -.value 0x31fd,0x31fd -.value 0x0082,0x0082 -.value 0x0082,0x0082 -.value 0x0082,0x0082 -.value 0x0082,0x0082 -.value 0x0642,0x0642 -.value 0x0642,0x0642 -.value 0x0642,0x0642 -.value 0x0642,0x0642 -.value 0x6682,0x6682 -.value 0x6682,0x6682 -.value 0x6682,0x6682 -.value 0x6682,0x6682 -.value 0xac42,0xac42 -.value 0xac42,0xac42 -.value 0xac42,0xac42 -.value 0xac42,0xac42 -.value 0x074f,0x074f -.value 0x074f,0x074f -.value 0x074f,0x074f -.value 0x074f,0x074f -.value 0x033d,0x033d -.value 0x033d,0x033d -.value 0x033d,0x033d -.value 0x033d,0x033d -.value 0x044f,0x044f -.value 0x044f,0x044f -.value 0x044f,0x044f -.value 0x044f,0x044f -.value 0xea3d,0xea3d -.value 0xea3d,0xea3d -.value 0xea3d,0xea3d -.value 0xea3d,0xea3d -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x0c4b,0x0c4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x3d4b,0x3d4b -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x06d8,0x06d8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0ed8,0x0ed8 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x0773,0x0773 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x3073,0x3073 -.value 0x068c,0x068c -.value 0x01cc,0x01cc -.value 0x06db,0x06db -.value 0x0123,0x0123 -.value 0x00eb,0x00eb -.value 0x0ab6,0x0ab6 -.value 0x0c50,0x0c50 -.value 0x0b5b,0x0b5b -.value 0xea8c,0xea8c -.value 0xa5cc,0xa5cc -.value 0xe7db,0xe7db -.value 0x3a23,0x3a23 -.value 0x11eb,0x11eb -.value 0xccb6,0xccb6 -.value 0xfc50,0xfc50 -.value 0x6c5b,0x6c5b -.value 0x0c98,0x0c98 -.value 0x099a,0x099a -.value 0x06f3,0x06f3 -.value 0x04e3,0x04e3 -.value 0x09b6,0x09b6 -.value 0x0b53,0x0b53 -.value 0x0ad6,0x0ad6 -.value 0x044f,0x044f -.value 0x5498,0x5498 -.value 0x379a,0x379a -.value 0xaff3,0xaff3 -.value 0x7de3,0x7de3 -.value 0xcbb6,0xcbb6 -.value 0xd453,0xd453 -.value 0x2cd6,0x2cd6 -.value 0x014f,0x014f -.value 0x0608,0x0608 -.value 0x0608,0x0608 -.value 0x011a,0x011a -.value 0x011a,0x011a -.value 0x072e,0x072e -.value 0x072e,0x072e -.value 0x050d,0x050d -.value 0x050d,0x050d -.value 0x9e08,0x9e08 -.value 0x9e08,0x9e08 -.value 0xaf1a,0xaf1a -.value 0xaf1a,0xaf1a -.value 0xb12e,0xb12e -.value 0xb12e,0xb12e -.value 0x5c0d,0x5c0d -.value 0x5c0d,0x5c0d -.value 0x090a,0x090a -.value 0x090a,0x090a -.value 0x0228,0x0228 -.value 0x0228,0x0228 -.value 0x0a75,0x0a75 -.value 0x0a75,0x0a75 -.value 0x083a,0x083a -.value 0x083a,0x083a -.value 0x870a,0x870a -.value 0x870a,0x870a -.value 0xfa28,0xfa28 -.value 0xfa28,0xfa28 -.value 0x1975,0x1975 -.value 0x1975,0x1975 -.value 0x163a,0x163a -.value 0x163a,0x163a -.value 0x0b82,0x0b82 -.value 0x0b82,0x0b82 -.value 0x0b82,0x0b82 -.value 0x0b82,0x0b82 -.value 0x0bf9,0x0bf9 -.value 0x0bf9,0x0bf9 -.value 0x0bf9,0x0bf9 -.value 0x0bf9,0x0bf9 -.value 0x7182,0x7182 -.value 0x7182,0x7182 -.value 0x7182,0x7182 -.value 0x7182,0x7182 -.value 0x66f9,0x66f9 -.value 0x66f9,0x66f9 -.value 0x66f9,0x66f9 -.value 0x66f9,0x66f9 -.value 0x052d,0x052d -.value 0x052d,0x052d -.value 0x052d,0x052d -.value 0x052d,0x052d -.value 0x0ac4,0x0ac4 -.value 0x0ac4,0x0ac4 -.value 0x0ac4,0x0ac4 -.value 0x0ac4,0x0ac4 -.value 0xbc2d,0xbc2d -.value 0xbc2d,0xbc2d -.value 0xbc2d,0xbc2d -.value 0xbc2d,0xbc2d -.value 0x16c4,0x16c4 -.value 0x16c4,0x16c4 -.value 0x16c4,0x16c4 -.value 0x16c4,0x16c4 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x0a93,0x0a93 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x9393,0x9393 -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x00ab,0x00ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x51ab,0x51ab -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0x072c,0x072c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0xcb2c,0xcb2c -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0x0167,0x0167 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0xc667,0xc667 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x02f6,0x02f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x84f6,0x84f6 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0x05a1,0x05a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 -.value 0xd8a1,0xd8a1 +.short 0x06a5,0x06a5,0x05b4,0x05b4,0x070f,0x070f,0x0943,0x0943 +.short 0x0922,0x0922,0x0134,0x0134,0x091d,0x091d,0x006c,0x006c +.short 0xa5a5,0xa5a5,0xe1b4,0xe1b4,0x440f,0x440f,0xa243,0xa243 +.short 0x4f22,0x4f22,0x5d34,0x5d34,0x901d,0x901d,0x846c,0x846c +.short 0x0b23,0x0b23,0x0356,0x0356,0x0366,0x0366,0x05e6,0x05e6 +.short 0x09e7,0x09e7,0x05fa,0x05fa,0x04fe,0x04fe,0x04a1,0x04a1 +.short 0x4423,0x4423,0xa556,0xa556,0xd566,0xd566,0x57e6,0x57e6 +.short 0x4ee7,0x4ee7,0x53fa,0x53fa,0x1efe,0x1efe,0xd7a1,0xd7a1 +.short 0x04fb,0x04fb,0x04fb,0x04fb,0x0a5c,0x0a5c,0x0a5c,0x0a5c +.short 0x0429,0x0429,0x0429,0x0429,0x0b41,0x0b41,0x0b41,0x0b41 +.short 0x45fb,0x45fb,0x45fb,0x45fb,0x5e5c,0x5e5c,0x5e5c,0x5e5c +.short 0xef29,0xef29,0xef29,0xef29,0xbe41,0xbe41,0xbe41,0xbe41 +.short 0x02d5,0x02d5,0x02d5,0x02d5,0x05e4,0x05e4,0x05e4,0x05e4 +.short 0x0940,0x0940,0x0940,0x0940,0x018e,0x018e,0x018e,0x018e +.short 0x31d5,0x31d5,0x31d5,0x31d5,0x71e4,0x71e4,0x71e4,0x71e4 +.short 0xc940,0xc940,0xc940,0xc940,0xcb8e,0xcb8e,0xcb8e,0xcb8e +.short 0x0623,0x0623,0x0623,0x0623,0x0623,0x0623,0x0623,0x0623 +.short 0x00cd,0x00cd,0x00cd,0x00cd,0x00cd,0x00cd,0x00cd,0x00cd +.short 0x3f23,0x3f23,0x3f23,0x3f23,0x3f23,0x3f23,0x3f23,0x3f23 +.short 0x97cd,0x97cd,0x97cd,0x97cd,0x97cd,0x97cd,0x97cd,0x97cd +.short 0x0b66,0x0b66,0x0b66,0x0b66,0x0b66,0x0b66,0x0b66,0x0b66 +.short 0x0606,0x0606,0x0606,0x0606,0x0606,0x0606,0x0606,0x0606 +.short 0xdd66,0xdd66,0xdd66,0xdd66,0xdd66,0xdd66,0xdd66,0xdd66 +.short 0xb806,0xb806,0xb806,0xb806,0xb806,0xb806,0xb806,0xb806 +.short 0x0745,0x0745,0x0745,0x0745,0x0745,0x0745,0x0745,0x0745 +.short 0x0745,0x0745,0x0745,0x0745,0x0745,0x0745,0x0745,0x0745 +.short 0x8645,0x8645,0x8645,0x8645,0x8645,0x8645,0x8645,0x8645 +.short 0x8645,0x8645,0x8645,0x8645,0x8645,0x8645,0x8645,0x8645 +.short 0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2 +.short 0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2,0x05c2 +.short 0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2 +.short 0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2,0x2bc2 +.short 0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37 +.short 0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37,0x0c37 +.short 0x4137,0x4137,0x4137,0x4137,0x4137,0x4137,0x4137,0x4137 +.short 0x4137,0x4137,0x4137,0x4137,0x4137,0x4137,0x4137,0x4137 +.short 0x067b,0x067b,0x0c25,0x0c25,0x04a3,0x04a3,0x036a,0x036a +.short 0x0537,0x0537,0x0088,0x0088,0x083f,0x083f,0x04bf,0x04bf +.short 0xc77b,0xc77b,0x2b25,0x2b25,0xbda3,0xbda3,0xa16a,0xa16a +.short 0x3a37,0x3a37,0x1888,0x1888,0xd53f,0xd53f,0x51bf,0x51bf +.short 0x0b81,0x0b81,0x0505,0x0505,0x05b9,0x05b9,0x07d7,0x07d7 +.short 0x0a9f,0x0a9f,0x08b8,0x08b8,0x0aa6,0x0aa6,0x09d0,0x09d0 +.short 0x7e81,0x7e81,0xc405,0xc405,0xa0b9,0xa0b9,0x1cd7,0x1cd7 +.short 0xf79f,0xf79f,0xb0b8,0xb0b8,0x9ca6,0x9ca6,0x79d0,0x79d0 +.short 0x03b7,0x03b7,0x03b7,0x03b7,0x00f7,0x00f7,0x00f7,0x00f7 +.short 0x058d,0x058d,0x058d,0x058d,0x0c96,0x0c96,0x0c96,0x0c96 +.short 0xb8b7,0xb8b7,0xb8b7,0xb8b7,0x75f7,0x75f7,0x75f7,0x75f7 +.short 0xdc8d,0xdc8d,0xdc8d,0xdc8d,0x6e96,0x6e96,0x6e96,0x6e96 +.short 0x09c3,0x09c3,0x09c3,0x09c3,0x010f,0x010f,0x010f,0x010f +.short 0x005a,0x005a,0x005a,0x005a,0x0355,0x0355,0x0355,0x0355 +.short 0x22c3,0x22c3,0x22c3,0x22c3,0x3e0f,0x3e0f,0x3e0f,0x3e0f +.short 0x6e5a,0x6e5a,0x6e5a,0x6e5a,0xb255,0xb255,0xb255,0xb255 +.short 0x0aa1,0x0aa1,0x0aa1,0x0aa1,0x0aa1,0x0aa1,0x0aa1,0x0aa1 +.short 0x0a25,0x0a25,0x0a25,0x0a25,0x0a25,0x0a25,0x0a25,0x0a25 +.short 0xdda1,0xdda1,0xdda1,0xdda1,0xdda1,0xdda1,0xdda1,0xdda1 +.short 0x2925,0x2925,0x2925,0x2925,0x2925,0x2925,0x2925,0x2925 +.short 0x0908,0x0908,0x0908,0x0908,0x0908,0x0908,0x0908,0x0908 +.short 0x02a9,0x02a9,0x02a9,0x02a9,0x02a9,0x02a9,0x02a9,0x02a9 +.short 0xa108,0xa108,0xa108,0xa108,0xa108,0xa108,0xa108,0xa108 +.short 0x6da9,0x6da9,0x6da9,0x6da9,0x6da9,0x6da9,0x6da9,0x6da9 +.short 0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2 +.short 0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2,0x04b2 +.short 0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2 +.short 0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2,0xfab2 +.short 0x093f,0x093f,0x093f,0x093f,0x093f,0x093f,0x093f,0x093f +.short 0x093f,0x093f,0x093f,0x093f,0x093f,0x093f,0x093f,0x093f +.short 0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f +.short 0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f,0xd63f +.short 0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2 +.short 0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2,0x0be2 +.short 0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2 +.short 0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2,0x91e2 +.short 0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed +.short 0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed,0x05ed +.short 0xfced,0xfced,0xfced,0xfced,0xfced,0xfced,0xfced,0xfced +.short 0xfced,0xfced,0xfced,0xfced,0xfced,0xfced,0xfced,0xfced +.short 0x004b,0x004b,0x0bb8,0x0bb8,0x009c,0x009c,0x0b5f,0x0b5f +.short 0x0ba4,0x0ba4,0x0a7d,0x0a7d,0x0368,0x0368,0x0636,0x0636 +.short 0x314b,0x314b,0xb3b8,0xb3b8,0x149c,0x149c,0x385f,0x385f +.short 0xb7a4,0xb7a4,0xb17d,0xb17d,0xbb68,0xbb68,0x4836,0x4836 +.short 0x08a2,0x08a2,0x0736,0x0736,0x025a,0x025a,0x0309,0x0309 +.short 0x0093,0x0093,0x09f7,0x09f7,0x087a,0x087a,0x00f6,0x00f6 +.short 0xcea2,0xcea2,0x4936,0x4936,0x705a,0x705a,0x8e09,0x8e09 +.short 0x8993,0x8993,0x7ef7,0x7ef7,0xd67a,0xd67a,0x82f6,0x82f6 +.short 0x0744,0x0744,0x0744,0x0744,0x0c83,0x0c83,0x0c83,0x0c83 +.short 0x048a,0x048a,0x048a,0x048a,0x0652,0x0652,0x0652,0x0652 +.short 0x9344,0x9344,0x9344,0x9344,0x6583,0x6583,0x6583,0x6583 +.short 0x028a,0x028a,0x028a,0x028a,0xdc52,0xdc52,0xdc52,0xdc52 +.short 0x029a,0x029a,0x029a,0x029a,0x0140,0x0140,0x0140,0x0140 +.short 0x0008,0x0008,0x0008,0x0008,0x0afd,0x0afd,0x0afd,0x0afd +.short 0x309a,0x309a,0x309a,0x309a,0xc140,0xc140,0xc140,0xc140 +.short 0x9808,0x9808,0x9808,0x9808,0x31fd,0x31fd,0x31fd,0x31fd +.short 0x0082,0x0082,0x0082,0x0082,0x0082,0x0082,0x0082,0x0082 +.short 0x0642,0x0642,0x0642,0x0642,0x0642,0x0642,0x0642,0x0642 +.short 0x6682,0x6682,0x6682,0x6682,0x6682,0x6682,0x6682,0x6682 +.short 0xac42,0xac42,0xac42,0xac42,0xac42,0xac42,0xac42,0xac42 +.short 0x074f,0x074f,0x074f,0x074f,0x074f,0x074f,0x074f,0x074f +.short 0x033d,0x033d,0x033d,0x033d,0x033d,0x033d,0x033d,0x033d +.short 0x044f,0x044f,0x044f,0x044f,0x044f,0x044f,0x044f,0x044f +.short 0xea3d,0xea3d,0xea3d,0xea3d,0xea3d,0xea3d,0xea3d,0xea3d +.short 0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b +.short 0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b,0x0c4b +.short 0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b +.short 0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b,0x3d4b +.short 0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8 +.short 0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8,0x06d8 +.short 0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8 +.short 0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8,0x0ed8 +.short 0x0773,0x0773,0x0773,0x0773,0x0773,0x0773,0x0773,0x0773 +.short 0x0773,0x0773,0x0773,0x0773,0x0773,0x0773,0x0773,0x0773 +.short 0x3073,0x3073,0x3073,0x3073,0x3073,0x3073,0x3073,0x3073 +.short 0x3073,0x3073,0x3073,0x3073,0x3073,0x3073,0x3073,0x3073 +.short 0x068c,0x068c,0x01cc,0x01cc,0x06db,0x06db,0x0123,0x0123 +.short 0x00eb,0x00eb,0x0ab6,0x0ab6,0x0c50,0x0c50,0x0b5b,0x0b5b +.short 0xea8c,0xea8c,0xa5cc,0xa5cc,0xe7db,0xe7db,0x3a23,0x3a23 +.short 0x11eb,0x11eb,0xccb6,0xccb6,0xfc50,0xfc50,0x6c5b,0x6c5b +.short 0x0c98,0x0c98,0x099a,0x099a,0x06f3,0x06f3,0x04e3,0x04e3 +.short 0x09b6,0x09b6,0x0b53,0x0b53,0x0ad6,0x0ad6,0x044f,0x044f +.short 0x5498,0x5498,0x379a,0x379a,0xaff3,0xaff3,0x7de3,0x7de3 +.short 0xcbb6,0xcbb6,0xd453,0xd453,0x2cd6,0x2cd6,0x014f,0x014f +.short 0x0608,0x0608,0x0608,0x0608,0x011a,0x011a,0x011a,0x011a +.short 0x072e,0x072e,0x072e,0x072e,0x050d,0x050d,0x050d,0x050d +.short 0x9e08,0x9e08,0x9e08,0x9e08,0xaf1a,0xaf1a,0xaf1a,0xaf1a +.short 0xb12e,0xb12e,0xb12e,0xb12e,0x5c0d,0x5c0d,0x5c0d,0x5c0d +.short 0x090a,0x090a,0x090a,0x090a,0x0228,0x0228,0x0228,0x0228 +.short 0x0a75,0x0a75,0x0a75,0x0a75,0x083a,0x083a,0x083a,0x083a +.short 0x870a,0x870a,0x870a,0x870a,0xfa28,0xfa28,0xfa28,0xfa28 +.short 0x1975,0x1975,0x1975,0x1975,0x163a,0x163a,0x163a,0x163a +.short 0x0b82,0x0b82,0x0b82,0x0b82,0x0b82,0x0b82,0x0b82,0x0b82 +.short 0x0bf9,0x0bf9,0x0bf9,0x0bf9,0x0bf9,0x0bf9,0x0bf9,0x0bf9 +.short 0x7182,0x7182,0x7182,0x7182,0x7182,0x7182,0x7182,0x7182 +.short 0x66f9,0x66f9,0x66f9,0x66f9,0x66f9,0x66f9,0x66f9,0x66f9 +.short 0x052d,0x052d,0x052d,0x052d,0x052d,0x052d,0x052d,0x052d +.short 0x0ac4,0x0ac4,0x0ac4,0x0ac4,0x0ac4,0x0ac4,0x0ac4,0x0ac4 +.short 0xbc2d,0xbc2d,0xbc2d,0xbc2d,0xbc2d,0xbc2d,0xbc2d,0xbc2d +.short 0x16c4,0x16c4,0x16c4,0x16c4,0x16c4,0x16c4,0x16c4,0x16c4 +.short 0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93 +.short 0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93,0x0a93 +.short 0x9393,0x9393,0x9393,0x9393,0x9393,0x9393,0x9393,0x9393 +.short 0x9393,0x9393,0x9393,0x9393,0x9393,0x9393,0x9393,0x9393 +.short 0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab +.short 0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab,0x00ab +.short 0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab +.short 0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab,0x51ab +.short 0x072c,0x072c,0x072c,0x072c,0x072c,0x072c,0x072c,0x072c +.short 0x072c,0x072c,0x072c,0x072c,0x072c,0x072c,0x072c,0x072c +.short 0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c +.short 0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c,0xcb2c +.short 0x0167,0x0167,0x0167,0x0167,0x0167,0x0167,0x0167,0x0167 +.short 0x0167,0x0167,0x0167,0x0167,0x0167,0x0167,0x0167,0x0167 +.short 0xc667,0xc667,0xc667,0xc667,0xc667,0xc667,0xc667,0xc667 +.short 0xc667,0xc667,0xc667,0xc667,0xc667,0xc667,0xc667,0xc667 +.short 0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6 +.short 0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6,0x02f6 +.short 0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6 +.short 0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6,0x84f6 +.short 0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1 +.short 0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1,0x05a1 +.short 0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1 +.short 0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1,0xd8a1 #ifndef __APPLE__ .text .globl mlkem_keygen_avx2 @@ -11907,9 +10833,9 @@ _mlkem_csubq_avx2: .section __DATA,__data #endif /* __APPLE__ */ #ifndef __APPLE__ -.align 16 +.align 32 #else -.p2align 4 +.p2align 5 #endif /* __APPLE__ */ L_mlkem_rej_idx: .quad 0xffffffffffffffff,0xffffffffffffff00 @@ -12051,8 +10977,8 @@ L_mlkem_rej_idx: .p2align 5 #endif /* __APPLE__ */ L_mlkem_rej_q: -.quad 0xd010d010d010d01, 0xd010d010d010d01 -.quad 0xd010d010d010d01, 0xd010d010d010d01 +.quad 0x0d010d010d010d01,0x0d010d010d010d01 +.quad 0x0d010d010d010d01,0x0d010d010d010d01 #ifndef __APPLE__ .data #else @@ -12064,8 +10990,8 @@ L_mlkem_rej_q: .p2align 5 #endif /* __APPLE__ */ L_mlkem_rej_ones: -.quad 0x101010101010101, 0x101010101010101 -.quad 0x101010101010101, 0x101010101010101 +.quad 0x0101010101010101,0x0101010101010101 +.quad 0x0101010101010101,0x0101010101010101 #ifndef __APPLE__ .data #else @@ -12077,8 +11003,8 @@ L_mlkem_rej_ones: .p2align 5 #endif /* __APPLE__ */ L_mlkem_rej_mask: -.quad 0xfff0fff0fff0fff, 0xfff0fff0fff0fff -.quad 0xfff0fff0fff0fff, 0xfff0fff0fff0fff +.quad 0x0fff0fff0fff0fff,0x0fff0fff0fff0fff +.quad 0x0fff0fff0fff0fff,0x0fff0fff0fff0fff #ifndef __APPLE__ .data #else @@ -12090,8 +11016,8 @@ L_mlkem_rej_mask: .p2align 5 #endif /* __APPLE__ */ L_mlkem_rej_shuffle: -.quad 0x504040302010100, 0xb0a0a0908070706 -.quad 0x908080706050504, 0xf0e0e0d0c0b0b0a +.quad 0x0504040302010100,0x0b0a0a0908070706 +.quad 0x0908080706050504,0x0f0e0e0d0c0b0b0a #ifndef __APPLE__ .text .globl mlkem_rej_uniform_n_avx2 @@ -13040,8 +11966,8 @@ L_mlkem_rej_uniform_avx2_done_64: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_249: -.quad 0x24924900249249, 0x24924900249249 -.quad 0x24924900249249, 0x24924900249249 +.quad 0x0024924900249249,0x0024924900249249 +.quad 0x0024924900249249,0x0024924900249249 #ifndef __APPLE__ .data #else @@ -13053,8 +11979,8 @@ L_mlkem_mask_249: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_6db: -.quad 0x6db6db006db6db, 0x6db6db006db6db -.quad 0x6db6db006db6db, 0x6db6db006db6db +.quad 0x006db6db006db6db,0x006db6db006db6db +.quad 0x006db6db006db6db,0x006db6db006db6db #ifndef __APPLE__ .data #else @@ -13066,8 +11992,8 @@ L_mlkem_mask_6db: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_07: -.quad 0x700000007, 0x700000007 -.quad 0x700000007, 0x700000007 +.quad 0x0000000700000007,0x0000000700000007 +.quad 0x0000000700000007,0x0000000700000007 #ifndef __APPLE__ .data #else @@ -13079,8 +12005,8 @@ L_mlkem_mask_07: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_70: -.quad 0x7000000070000, 0x7000000070000 -.quad 0x7000000070000, 0x7000000070000 +.quad 0x0007000000070000,0x0007000000070000 +.quad 0x0007000000070000,0x0007000000070000 #ifndef __APPLE__ .data #else @@ -13092,8 +12018,8 @@ L_mlkem_mask_70: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_3: -.quad 0x3000300030003, 0x3000300030003 -.quad 0x3000300030003, 0x3000300030003 +.quad 0x0003000300030003,0x0003000300030003 +.quad 0x0003000300030003,0x0003000300030003 #ifndef __APPLE__ .data #else @@ -13105,8 +12031,8 @@ L_mlkem_mask_3: .p2align 5 #endif /* __APPLE__ */ L_mlkem_shuff: -.quad 0xff050403ff020100, 0xff0b0a09ff080706 -.quad 0xff090807ff060504, 0xff0f0e0dff0c0b0a +.quad 0xff050403ff020100,0xff0b0a09ff080706 +.quad 0xff090807ff060504,0xff0f0e0dff0c0b0a #ifndef __APPLE__ .text .globl mlkem_cbd_eta3_avx2 @@ -13381,8 +12307,8 @@ _mlkem_cbd_eta3_avx2: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_55: -.quad 0x5555555555555555, 0x5555555555555555 -.quad 0x5555555555555555, 0x5555555555555555 +.quad 0x5555555555555555,0x5555555555555555 +.quad 0x5555555555555555,0x5555555555555555 #ifndef __APPLE__ .data #else @@ -13394,8 +12320,8 @@ L_mlkem_mask_55: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_33: -.quad 0x3333333333333333, 0x3333333333333333 -.quad 0x3333333333333333, 0x3333333333333333 +.quad 0x3333333333333333,0x3333333333333333 +.quad 0x3333333333333333,0x3333333333333333 #ifndef __APPLE__ .data #else @@ -13407,8 +12333,8 @@ L_mlkem_mask_33: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_03: -.quad 0x303030303030303, 0x303030303030303 -.quad 0x303030303030303, 0x303030303030303 +.quad 0x0303030303030303,0x0303030303030303 +.quad 0x0303030303030303,0x0303030303030303 #ifndef __APPLE__ .data #else @@ -13420,8 +12346,8 @@ L_mlkem_mask_03: .p2align 5 #endif /* __APPLE__ */ L_mlkem_mask_0f: -.quad 0xf0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f -.quad 0xf0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f +.quad 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f +.quad 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f #ifndef __APPLE__ .text .globl mlkem_cbd_eta2_avx2 @@ -13562,14 +12488,8 @@ _mlkem_cbd_eta2_avx2: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_mask: -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff -.value 0x03ff,0x03ff +.short 0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff +.short 0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff,0x03ff #ifndef __APPLE__ .data #else @@ -13581,8 +12501,8 @@ L_mlkem_compress_10_avx2_mask: .p2align 5 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_shift: -.quad 0x400000104000001, 0x400000104000001 -.quad 0x400000104000001, 0x400000104000001 +.quad 0x0400000104000001,0x0400000104000001 +.quad 0x0400000104000001,0x0400000104000001 #ifndef __APPLE__ .data #else @@ -13594,8 +12514,8 @@ L_mlkem_compress_10_avx2_shift: .p2align 5 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_shlv: -.quad 0xc, 0xc -.quad 0xc, 0xc +.quad 0x000000000000000c,0x000000000000000c +.quad 0x000000000000000c,0x000000000000000c #ifndef __APPLE__ .data #else @@ -13607,14 +12527,10 @@ L_mlkem_compress_10_avx2_shlv: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_shuf: -.value 0x100,0x302 -.value 0x804,0xa09 -.value 0xc0b,0xffff -.value 0xffff,0xffff -.value 0xa09,0xc0b -.value 0xffff,0xffff -.value 0xffff,0x100 -.value 0x302,0x804 +.byte 0x00,0x01,0x02,0x03,0x04,0x08,0x09,0x0a +.byte 0x0b,0x0c,0xff,0xff,0xff,0xff,0xff,0xff +.byte 0x09,0x0a,0x0b,0x0c,0xff,0xff,0xff,0xff +.byte 0xff,0xff,0x00,0x01,0x02,0x03,0x04,0x08 #ifndef __APPLE__ .data #else @@ -13626,14 +12542,8 @@ L_mlkem_compress_10_avx2_shuf: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_v: -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf #ifndef __APPLE__ .data #else @@ -13645,14 +12555,8 @@ L_mlkem_compress_10_avx2_v: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_offset: -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f #ifndef __APPLE__ .data #else @@ -13664,14 +12568,8 @@ L_mlkem_compress_10_avx2_offset: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_10_avx2_shift12: -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 -.value 0x1000,0x1000 +.short 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000 +.short 0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000,0x1000 #ifndef __APPLE__ .text .globl mlkem_compress_10_avx2 @@ -14012,6 +12910,11 @@ L_mlkem_compress_10_avx2_start: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_decompress_10_avx2_mask: .long 0x7fe01ff8,0x7fe01ff8,0x7fe01ff8,0x7fe01ff8 .long 0x7fe01ff8,0x7fe01ff8,0x7fe01ff8,0x7fe01ff8 @@ -14026,13 +12929,18 @@ L_mlkem_decompress_10_avx2_mask: .p2align 5 #endif /* __APPLE__ */ L_mlkem_decompress_10_avx2_sllv: -.quad 0x4, 0x4 -.quad 0x4, 0x4 +.quad 0x0000000000000004,0x0000000000000004 +.quad 0x0000000000000004,0x0000000000000004 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_decompress_10_avx2_q: .long 0x0d013404,0x0d013404,0x0d013404,0x0d013404 .long 0x0d013404,0x0d013404,0x0d013404,0x0d013404 @@ -14047,14 +12955,10 @@ L_mlkem_decompress_10_avx2_q: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_10_avx2_shuf: -.value 0x100,0x201 -.value 0x302,0x403 -.value 0x605,0x706 -.value 0x807,0x908 -.value 0x302,0x403 -.value 0x504,0x605 -.value 0x807,0x908 -.value 0xa09,0xb0a +.byte 0x00,0x01,0x01,0x02,0x02,0x03,0x03,0x04 +.byte 0x05,0x06,0x06,0x07,0x07,0x08,0x08,0x09 +.byte 0x02,0x03,0x03,0x04,0x04,0x05,0x05,0x06 +.byte 0x07,0x08,0x08,0x09,0x09,0x0a,0x0a,0x0b #ifndef __APPLE__ .text .globl mlkem_decompress_10_avx2 @@ -14204,14 +13108,8 @@ L_mlkem_decompress_10_avx2_start: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_v: -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf #ifndef __APPLE__ .data #else @@ -14223,14 +13121,8 @@ L_mlkem_compress_11_avx2_v: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_off: -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 -.value 0x0024,0x0024 +.short 0x0024,0x0024,0x0024,0x0024,0x0024,0x0024,0x0024,0x0024 +.short 0x0024,0x0024,0x0024,0x0024,0x0024,0x0024,0x0024,0x0024 #ifndef __APPLE__ .data #else @@ -14242,14 +13134,8 @@ L_mlkem_compress_11_avx2_off: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_shift13: -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 -.value 0x2000,0x2000 +.short 0x2000,0x2000,0x2000,0x2000,0x2000,0x2000,0x2000,0x2000 +.short 0x2000,0x2000,0x2000,0x2000,0x2000,0x2000,0x2000,0x2000 #ifndef __APPLE__ .data #else @@ -14261,14 +13147,8 @@ L_mlkem_compress_11_avx2_shift13: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_mask: -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff -.value 0x07ff,0x07ff +.short 0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff +.short 0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff,0x07ff #ifndef __APPLE__ .data #else @@ -14280,13 +13160,18 @@ L_mlkem_compress_11_avx2_mask: .p2align 5 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_shift: -.quad 0x800000108000001, 0x800000108000001 -.quad 0x800000108000001, 0x800000108000001 +.quad 0x0800000108000001,0x0800000108000001 +.quad 0x0800000108000001,0x0800000108000001 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_compress_11_avx2_sllvd: .long 0x0000000a,0x00000000,0x0000000a,0x00000000 .long 0x0000000a,0x00000000,0x0000000a,0x00000000 @@ -14301,8 +13186,8 @@ L_mlkem_compress_11_avx2_sllvd: .p2align 5 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_srlvq: -.quad 0xa, 0x1e -.quad 0xa, 0x1e +.quad 0x000000000000000a,0x000000000000001e +.quad 0x000000000000000a,0x000000000000001e #ifndef __APPLE__ .data #else @@ -14314,14 +13199,10 @@ L_mlkem_compress_11_avx2_srlvq: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_11_avx2_shuf: -.value 0x100,0x302 -.value 0x504,0x706 -.value 0x908,0xff0a -.value 0xffff,0xffff -.value 0x605,0x807 -.value 0xa09,0xffff -.value 0xffff,0x0 -.value 0x201,0x403 +.byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 +.byte 0x08,0x09,0x0a,0xff,0xff,0xff,0xff,0xff +.byte 0x05,0x06,0x07,0x08,0x09,0x0a,0xff,0xff +.byte 0xff,0xff,0x00,0x00,0x01,0x02,0x03,0x04 #ifndef __APPLE__ .text .globl mlkem_compress_11_avx2 @@ -14717,14 +13598,8 @@ L_mlkem_compress_11_avx2_start: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_q: -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .data #else @@ -14736,19 +13611,20 @@ L_mlkem_decompress_11_avx2_q: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_shuf: -.value 0x100,0x201 -.value 0x302,0x504 -.value 0x605,0x706 -.value 0x908,0xa09 -.value 0x403,0x504 -.value 0x605,0x807 -.value 0x908,0xa09 -.value 0xc0b,0xd0c +.byte 0x00,0x01,0x01,0x02,0x02,0x03,0x04,0x05 +.byte 0x05,0x06,0x06,0x07,0x08,0x09,0x09,0x0a +.byte 0x03,0x04,0x04,0x05,0x05,0x06,0x07,0x08 +.byte 0x08,0x09,0x09,0x0a,0x0b,0x0c,0x0c,0x0d #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_sllv: .long 0x00000000,0x00000001,0x00000000,0x00000000 .long 0x00000000,0x00000001,0x00000000,0x00000000 @@ -14763,8 +13639,8 @@ L_mlkem_decompress_11_avx2_sllv: .p2align 5 #endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_srlv: -.quad 0x0, 0x2 -.quad 0x0, 0x2 +.quad 0x0000000000000000,0x0000000000000002 +.quad 0x0000000000000000,0x0000000000000002 #ifndef __APPLE__ .data #else @@ -14776,14 +13652,8 @@ L_mlkem_decompress_11_avx2_srlv: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_shift: -.value 0x0020,0x0004 -.value 0x0001,0x0020 -.value 0x0008,0x0001 -.value 0x0020,0x0004 -.value 0x0020,0x0004 -.value 0x0001,0x0020 -.value 0x0008,0x0001 -.value 0x0020,0x0004 +.short 0x0020,0x0004,0x0001,0x0020,0x0008,0x0001,0x0020,0x0004 +.short 0x0020,0x0004,0x0001,0x0020,0x0008,0x0001,0x0020,0x0004 #ifndef __APPLE__ .data #else @@ -14795,14 +13665,8 @@ L_mlkem_decompress_11_avx2_shift: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_11_avx2_mask: -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 -.value 0x7ff0,0x7ff0 +.short 0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0 +.short 0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0,0x7ff0 #ifndef __APPLE__ .text .globl mlkem_decompress_11_avx2 @@ -14986,14 +13850,8 @@ L_mlkem_decompress_11_avx2_start: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_4_avx2_mask: -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f -.value 0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f +.short 0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f,0x000f #ifndef __APPLE__ .data #else @@ -15005,19 +13863,18 @@ L_mlkem_compress_4_avx2_mask: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_4_avx2_shift: -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 -.value 0x0200,0x0200 +.short 0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200 +.short 0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200,0x0200 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_compress_4_avx2_perm: .long 0x00000000,0x00000004,0x00000001,0x00000005 .long 0x00000002,0x00000006,0x00000003,0x00000007 @@ -15032,14 +13889,8 @@ L_mlkem_compress_4_avx2_perm: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_4_avx2_v: -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf #ifndef __APPLE__ .data #else @@ -15051,14 +13902,8 @@ L_mlkem_compress_4_avx2_v: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_4_avx2_shift12: -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 -.value 0x1001,0x1001 +.short 0x1001,0x1001,0x1001,0x1001,0x1001,0x1001,0x1001,0x1001 +.short 0x1001,0x1001,0x1001,0x1001,0x1001,0x1001,0x1001,0x1001 #ifndef __APPLE__ .text .globl mlkem_compress_4_avx2 @@ -15162,6 +14007,11 @@ _mlkem_compress_4_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_decompress_4_avx2_mask: .long 0x00f0000f,0x00f0000f,0x00f0000f,0x00f0000f .long 0x00f0000f,0x00f0000f,0x00f0000f,0x00f0000f @@ -15170,6 +14020,11 @@ L_mlkem_decompress_4_avx2_mask: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_decompress_4_avx2_shift: .long 0x00800800,0x00800800,0x00800800,0x00800800 .long 0x00800800,0x00800800,0x00800800,0x00800800 @@ -15184,14 +14039,8 @@ L_mlkem_decompress_4_avx2_shift: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_4_avx2_q: -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .data #else @@ -15203,14 +14052,10 @@ L_mlkem_decompress_4_avx2_q: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_4_avx2_shuf: -.value 0x0,0x0 -.value 0x101,0x101 -.value 0x202,0x202 -.value 0x303,0x303 -.value 0x404,0x404 -.value 0x505,0x505 -.value 0x606,0x606 -.value 0x707,0x707 +.byte 0x00,0x00,0x00,0x00,0x01,0x01,0x01,0x01 +.byte 0x02,0x02,0x02,0x02,0x03,0x03,0x03,0x03 +.byte 0x04,0x04,0x04,0x04,0x05,0x05,0x05,0x05 +.byte 0x06,0x06,0x06,0x06,0x07,0x07,0x07,0x07 #ifndef __APPLE__ .text .globl mlkem_decompress_4_avx2 @@ -15339,14 +14184,8 @@ _mlkem_decompress_4_avx2: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_v: -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf -.value 0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf +.short 0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf,0x4ebf #ifndef __APPLE__ .data #else @@ -15358,14 +14197,8 @@ L_mlkem_compress_5_avx2_v: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_shift: -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 -.value 0x0400,0x0400 +.short 0x0400,0x0400,0x0400,0x0400,0x0400,0x0400,0x0400,0x0400 +.short 0x0400,0x0400,0x0400,0x0400,0x0400,0x0400,0x0400,0x0400 #ifndef __APPLE__ .data #else @@ -15377,14 +14210,8 @@ L_mlkem_compress_5_avx2_shift: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_mask: -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f -.value 0x001f,0x001f +.short 0x001f,0x001f,0x001f,0x001f,0x001f,0x001f,0x001f,0x001f +.short 0x001f,0x001f,0x001f,0x001f,0x001f,0x001f,0x001f,0x001f #ifndef __APPLE__ .data #else @@ -15396,19 +14223,18 @@ L_mlkem_compress_5_avx2_mask: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_shift1: -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 -.value 0x2001,0x2001 +.short 0x2001,0x2001,0x2001,0x2001,0x2001,0x2001,0x2001,0x2001 +.short 0x2001,0x2001,0x2001,0x2001,0x2001,0x2001,0x2001,0x2001 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_compress_5_avx2_shift2: .long 0x04000001,0x04000001,0x04000001,0x04000001 .long 0x04000001,0x04000001,0x04000001,0x04000001 @@ -15423,8 +14249,8 @@ L_mlkem_compress_5_avx2_shift2: .p2align 5 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_shlv: -.quad 0xc, 0xc -.quad 0xc, 0xc +.quad 0x000000000000000c,0x000000000000000c +.quad 0x000000000000000c,0x000000000000000c #ifndef __APPLE__ .data #else @@ -15436,14 +14262,10 @@ L_mlkem_compress_5_avx2_shlv: .p2align 4 #endif /* __APPLE__ */ L_mlkem_compress_5_avx2_shuffle: -.value 0x100,0x302 -.value 0xff04,0xffff -.value 0xffff,0x908 -.value 0xb0a,0xff0c -.value 0xa09,0xc0b -.value 0xff,0x201 -.value 0x403,0xffff -.value 0xffff,0x8ff +.byte 0x00,0x01,0x02,0x03,0x04,0xff,0xff,0xff +.byte 0xff,0xff,0x08,0x09,0x0a,0x0b,0x0c,0xff +.byte 0x09,0x0a,0x0b,0x0c,0xff,0x00,0x01,0x02 +.byte 0x03,0x04,0xff,0xff,0xff,0xff,0xff,0x08 #ifndef __APPLE__ .text .globl mlkem_compress_5_avx2 @@ -15608,14 +14430,8 @@ _mlkem_compress_5_avx2: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_5_avx2_q: -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 -.value 0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 +.short 0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01,0x0d01 #ifndef __APPLE__ .data #else @@ -15627,14 +14443,10 @@ L_mlkem_decompress_5_avx2_q: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_5_avx2_shuf: -.value 0x0,0x100 -.value 0x101,0x201 -.value 0x302,0x303 -.value 0x403,0x404 -.value 0x505,0x605 -.value 0x606,0x706 -.value 0x807,0x808 -.value 0x908,0x909 +.byte 0x00,0x00,0x00,0x01,0x01,0x01,0x01,0x02 +.byte 0x02,0x03,0x03,0x03,0x03,0x04,0x04,0x04 +.byte 0x05,0x05,0x05,0x06,0x06,0x06,0x06,0x07 +.byte 0x07,0x08,0x08,0x08,0x08,0x09,0x09,0x09 #ifndef __APPLE__ .data #else @@ -15646,14 +14458,8 @@ L_mlkem_decompress_5_avx2_shuf: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_5_avx2_mask: -.value 0x001f,0x03e0 -.value 0x007c,0x0f80 -.value 0x01f0,0x003e -.value 0x07c0,0x00fb -.value 0x001f,0x03e0 -.value 0x007c,0x0f80 -.value 0x01f0,0x003e -.value 0x07c0,0x00fb +.short 0x001f,0x03e0,0x007c,0x0f80,0x01f0,0x003e,0x07c0,0x00fb +.short 0x001f,0x03e0,0x007c,0x0f80,0x01f0,0x003e,0x07c0,0x00fb #ifndef __APPLE__ .data #else @@ -15665,14 +14471,8 @@ L_mlkem_decompress_5_avx2_mask: .p2align 4 #endif /* __APPLE__ */ L_mlkem_decompress_5_avx2_shift: -.value 0x0400,0x0020 -.value 0x0100,0x0008 -.value 0x0040,0x0200 -.value 0x0010,0x0080 -.value 0x0400,0x0020 -.value 0x0100,0x0008 -.value 0x0040,0x0200 -.value 0x0010,0x0080 +.short 0x0400,0x0020,0x0100,0x0008,0x0040,0x0200,0x0010,0x0080 +.short 0x0400,0x0020,0x0100,0x0008,0x0040,0x0200,0x0010,0x0080 #ifndef __APPLE__ .text .globl mlkem_decompress_5_avx2 @@ -15798,6 +14598,11 @@ _mlkem_decompress_5_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_from_msg_avx2_shift: .long 0x00000003,0x00000002,0x00000001,0x00000000 .long 0x00000003,0x00000002,0x00000001,0x00000000 @@ -15812,14 +14617,10 @@ L_mlkem_from_msg_avx2_shift: .p2align 4 #endif /* __APPLE__ */ L_mlkem_from_msg_avx2_shuf: -.value 0x100,0x504 -.value 0x908,0xd0c -.value 0x302,0x706 -.value 0xb0a,0xf0e -.value 0x100,0x504 -.value 0x908,0xd0c -.value 0x302,0x706 -.value 0xb0a,0xf0e +.byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0c,0x0d +.byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0x0e,0x0f +.byte 0x00,0x01,0x04,0x05,0x08,0x09,0x0c,0x0d +.byte 0x02,0x03,0x06,0x07,0x0a,0x0b,0x0e,0x0f #ifndef __APPLE__ .data #else @@ -15831,14 +14632,8 @@ L_mlkem_from_msg_avx2_shuf: .p2align 4 #endif /* __APPLE__ */ L_mlkem_from_msg_avx2_hqs: -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 -.value 0x0681,0x0681 +.short 0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681 +.short 0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681,0x0681 #ifndef __APPLE__ .text .globl mlkem_from_msg_avx2 @@ -15975,14 +14770,8 @@ _mlkem_from_msg_avx2: .p2align 4 #endif /* __APPLE__ */ L_mlkem_to_msg_avx2_hqs: -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 -.value 0x0680,0x0680 +.short 0x0680,0x0680,0x0680,0x0680,0x0680,0x0680,0x0680,0x0680 +.short 0x0680,0x0680,0x0680,0x0680,0x0680,0x0680,0x0680,0x0680 #ifndef __APPLE__ .data #else @@ -15994,14 +14783,8 @@ L_mlkem_to_msg_avx2_hqs: .p2align 4 #endif /* __APPLE__ */ L_mlkem_to_msg_avx2_hhqs: -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 -.value 0xfcc1,0xfcc1 +.short 0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1 +.short 0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1,0xfcc1 #ifndef __APPLE__ .text .globl mlkem_to_msg_avx2 @@ -16128,19 +14911,20 @@ _mlkem_to_msg_avx2: .p2align 4 #endif /* __APPLE__ */ L_mlkem_from_bytes_avx2_shuf: -.value 0x100,0xff02 -.value 0x403,0xff05 -.value 0x706,0xff08 -.value 0xa09,0xff0b -.value 0x504,0xff06 -.value 0x807,0xff09 -.value 0xb0a,0xff0c -.value 0xe0d,0xff0f +.byte 0x00,0x01,0x02,0xff,0x03,0x04,0x05,0xff +.byte 0x06,0x07,0x08,0xff,0x09,0x0a,0x0b,0xff +.byte 0x04,0x05,0x06,0xff,0x07,0x08,0x09,0xff +.byte 0x0a,0x0b,0x0c,0xff,0x0d,0x0e,0x0f,0xff #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_from_bytes_avx2_mask: .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff @@ -16309,6 +15093,11 @@ _mlkem_from_bytes_avx2: #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_to_bytes_avx2_mask: .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff .long 0x00000fff,0x00000fff,0x00000fff,0x00000fff @@ -16323,19 +15112,20 @@ L_mlkem_to_bytes_avx2_mask: .p2align 4 #endif /* __APPLE__ */ L_mlkem_to_bytes_avx2_shuf: -.value 0x100,0x402 -.value 0x605,0x908 -.value 0xc0a,0xe0d -.value 0xffff,0xffff -.value 0x605,0x908 -.value 0xc0a,0xe0d -.value 0xffff,0xffff -.value 0x100,0x402 +.byte 0x00,0x01,0x02,0x04,0x05,0x06,0x08,0x09 +.byte 0x0a,0x0c,0x0d,0x0e,0xff,0xff,0xff,0xff +.byte 0x05,0x06,0x08,0x09,0x0a,0x0c,0x0d,0x0e +.byte 0xff,0xff,0xff,0xff,0x00,0x01,0x02,0x04 #ifndef __APPLE__ .data #else .section __DATA,__data #endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_mlkem_to_bytes_avx2_perm: .long 0x00000000,0x00000001,0x00000002,0x00000007 .long 0x00000004,0x00000005,0x00000003,0x00000006