diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM3_CIS_X86_64 b/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM3_CIS_X86_64 new file mode 100644 index 0000000000000000000000000000000000000000..a8a46d5e8a714cf49e2e2ad87d5d542eda78093b --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM3_CIS_X86_64 @@ -0,0 +1 @@ +CONFIG_CRYPTO_SM3_CIS_X86_64=m diff --git a/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM4_CIS_X86_64 b/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM4_CIS_X86_64 new file mode 100644 index 0000000000000000000000000000000000000000..ac03c10624d226673c53ca688b4a5a2b30134511 --- /dev/null +++ b/anolis/configs/L1-RECOMMEND/x86/CONFIG_CRYPTO_SM4_CIS_X86_64 @@ -0,0 +1 @@ +CONFIG_CRYPTO_SM4_CIS_X86_64=m diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e9b5d02e743e2786597bf1984aa56424a4693598..803e47c0267640a830d847e5d94d61c2b0254eed 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -97,6 +97,12 @@ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o +obj-$(CONFIG_CRYPTO_SM3_CIS_X86_64) += sm3-cis-x86_64.o +sm3-cis-x86_64-y = sm3-cis-asm_64.o sm3_cis_glue.o + +obj-$(CONFIG_CRYPTO_SM4_CIS_X86_64) += sm4-cis-x86_64.o +sm4-cis-x86_64-y = sm4-cis-asm_64.o sm4_cis_glue.o + obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o diff --git a/arch/x86/crypto/sm3-cis-asm_64.S b/arch/x86/crypto/sm3-cis-asm_64.S new file mode 100644 index 0000000000000000000000000000000000000000..89668c38b17ccd2bcfc07d2f7511d4d8c7ca95ff --- /dev/null +++ b/arch/x86/crypto/sm3-cis-asm_64.S @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * HYGON SM3 Cipher Algorithm, using cis instructions. + * + * Copyright (C) 2026 Hygon Information Technology Co., Ltd. + * + */ + +#include +#include + +#define rRIP (%rip) + +#define W_A_T0 %xmm0 +#define W_A_T1 %xmm1 +#define W_A_T2 %xmm2 +#define W_A_T3 %xmm3 +#define W_E_T0 %xmm4 +#define W_E_T1 %xmm5 +#define D_B_T %xmm6 +#define D_A_T %xmm7 +#define W_A_T4 %xmm8 +#define D_B %xmm9 +#define D_A %xmm10 +#define D_S_T %xmm11 + +#define digest_transpose_pre() \ + vpshufd $0xd8, D_B, D_B_T; \ + vpshufd $0xd8, D_A, D_A_T; \ + vpunpcklqdq D_A_T, D_B_T, D_A; \ + vpunpckhqdq D_A_T, D_B_T, D_B + +#define digest_transpose_post() \ + vpunpcklqdq D_B, D_A, D_B_T; \ + vpunpckhqdq D_B, D_A, D_A_T; \ + vpshufd $0xd8, D_A_T, D_A; \ + vpshufd $0xd8, D_B_T, D_B + +#define MSG_EXT_W_L() \ + vpxor W_A_T1, W_A_T0, W_E_T0; \ + vpunpckldq W_E_T0, W_A_T0, W_E_T1; \ + vpunpckhdq W_E_T0, W_A_T0, W_E_T0; + +#define MSG_EXT_W() \ + /* vsm3msga xmm0 xmm0 xmm1 xmm2 */ \ + .byte 0xc4,0xe3,0xf9,0x89,0xc1,0x20; \ + /* vsm3msgb xmm0, xmm0,xmm3 */ \ + .byte 0xc4,0x62,0xf9,0xf9,0xc3; \ + vmovdqa W_A_T1, W_A_T0; \ + vmovdqa W_A_T2, W_A_T1; \ + vmovdqa W_A_T3, W_A_T2; \ + vmovdqa W_A_T4, W_A_T3; + +#define MSG_EXT_W_56() \ + vmovdqa W_A_T1, W_A_T0; \ + vmovdqa W_A_T2, W_A_T1; \ + vmovdqa W_A_T3, W_A_T2; + +#define MSG_EXT_W_60() \ + vmovdqa W_A_T1, W_A_T0; \ + vmovdqa W_A_T2, W_A_T1; + +#define MSG_HASH_ROUND_INST_FOUR(IDX0, IDX1, IDX2, IDX3) \ + /* vsm3rnda xmm6 xmm6 xmm7 IDX0*/ \ + .byte 0xc4,0xe3,0xc9,0x88,0xf7,IDX0; \ + /* vsm3rndb xmm6 xmm6 xmm5 */ \ + .byte 0xc4,0xe2,0xc9,0xf8,0xf5; \ + vpshufd $0x0e, W_E_T1, W_E_T1; \ + /* vsm3rnda xmm7 xmm7 xmm6 IDX1*/ \ + .byte 0xc4,0xe3,0xc1,0x88,0xfe,IDX1; \ + /* vsm3rndb xmm7 xmm7 xmm5 */ \ + .byte 0xc4,0xe2,0xc1,0xf8,0xfd; \ + /* vsm3rnda xmm6 xmm6 xmm7 IDX2*/ \ + .byte 0xc4,0xe3,0xc9,0x88,0xf7,IDX2; \ + /* vsm3rndb xmm6 xmm6 xmm4 */ \ + .byte 0xc4,0xe2,0xc9,0xf8,0xf4; \ + vpshufd $0x0e, W_E_T0, W_E_T0; \ + /* vsm3rnda xmm7 xmm7 xmm6 IDX3*/ \ + .byte 0xc4,0xe3,0xc1,0x88,0xfe,IDX3; \ + /* vsm3rndb xmm7 xmm7 xmm4 */ \ + .byte 0xc4,0xe2,0xc1,0xf8,0xfc; + +#define MSG_HASH_ROUND_FOUR_0(IDX0, IDX1, IDX2, IDX3) \ + MSG_EXT_W_L() \ + MSG_HASH_ROUND_INST_FOUR(IDX0, IDX1, IDX2, IDX3) + +#define MSG_HASH_ROUND_FOUR(IDX0, IDX1, IDX2, IDX3) \ + MSG_EXT_W() \ + MSG_EXT_W_L() \ + MSG_HASH_ROUND_INST_FOUR(IDX0, IDX1, IDX2, IDX3) + +#define MSG_HASH_ROUND_FOUR_56(IDX0, IDX1, IDX2, IDX3) \ + MSG_EXT_W_56() \ + MSG_EXT_W_L() \ + MSG_HASH_ROUND_INST_FOUR(IDX0, IDX1, IDX2, IDX3) + +#define MSG_HASH_ROUND_FOUR_60(IDX0, IDX1, IDX2, IDX3) \ + MSG_EXT_W_60() \ + MSG_EXT_W_L() \ + MSG_HASH_ROUND_INST_FOUR(IDX0, IDX1, IDX2, IDX3) + +#define MSG_HASH_ROUND_LOOP() \ + MSG_HASH_ROUND_FOUR_0(0, 1, 2, 3) \ + MSG_HASH_ROUND_FOUR(4, 5, 6, 7) \ + MSG_HASH_ROUND_FOUR(8, 9, 10, 11) \ + MSG_HASH_ROUND_FOUR(12, 13, 14, 15) \ + MSG_HASH_ROUND_FOUR(16, 17, 18, 19) \ + MSG_HASH_ROUND_FOUR(20, 21, 22, 23) \ + MSG_HASH_ROUND_FOUR(24, 25, 26, 27) \ + MSG_HASH_ROUND_FOUR(28, 29, 30, 31) \ + MSG_HASH_ROUND_FOUR(32, 33, 34, 35) \ + MSG_HASH_ROUND_FOUR(36, 37, 38, 39) \ + MSG_HASH_ROUND_FOUR(40, 41, 42, 43) \ + MSG_HASH_ROUND_FOUR(44, 45, 46, 47) \ + MSG_HASH_ROUND_FOUR(48, 49, 50, 51) \ + MSG_HASH_ROUND_FOUR(52, 53, 54, 55) \ + MSG_HASH_ROUND_FOUR_56(56, 57, 58, 59) \ + MSG_HASH_ROUND_FOUR_60(60, 61, 62, 63) + + +.section .data +.align 16 + +.Lbswap32_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + +.section .text +.align 16 + +/** + * void cis_sm3_transform(u32 *digest, + const u8 *data, + int nblocks); + */ +SYM_FUNC_START(cis_sm3_transform) + /* input: + * %rdi: digest context, digest + * %rsi: input data, data + * %rdx: block count, nblocks + */ + FRAME_BEGIN + + test %rdx, %rdx + jle Over + vmovdqu (%rdi), D_B + vmovdqu 16(%rdi), D_A + digest_transpose_pre() + +Loops: + vmovdqa D_B, D_B_T + vmovdqa D_A, D_A_T + + vmovdqu .Lbswap32_mask rRIP, D_S_T + vmovdqu 0*16(%rsi), W_A_T0 + vmovdqu 1*16(%rsi), W_A_T1 + vmovdqu 2*16(%rsi), W_A_T2 + vmovdqu 3*16(%rsi), W_A_T3 + + vpshufb D_S_T, W_A_T0, W_A_T0 + vpshufb D_S_T, W_A_T1, W_A_T1 + vpshufb D_S_T, W_A_T2, W_A_T2 + vpshufb D_S_T, W_A_T3, W_A_T3 + + MSG_HASH_ROUND_LOOP() + + vpxor D_B, D_B_T, D_B + vpxor D_A, D_A_T, D_A + + addq $64, %rsi + dec %rdx + jnz Loops + + digest_transpose_post() + vmovdqu D_B, (%rdi) + vmovdqu D_A, 16(%rdi) + +Over: + FRAME_END + RET +SYM_FUNC_END(cis_sm3_transform) diff --git a/arch/x86/crypto/sm3_cis_glue.c b/arch/x86/crypto/sm3_cis_glue.c new file mode 100644 index 0000000000000000000000000000000000000000..69b758e8d2b2b92c75ab0e4e6bab993c3a78fe1f --- /dev/null +++ b/arch/x86/crypto/sm3_cis_glue.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HYGON SM3 Cipher Algorithm, using cis instructions. + * + * Copyright (C) 2026 Hygon Information Technology Co., Ltd. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void cis_sm3_transform(u32 *digest, + const u8 *data, + int nblocks); + +static inline void sm3_transform_cis(struct sm3_state *sctx, + const u8 *data, int nblocks) +{ + cis_sm3_transform(sctx->state, data, nblocks); +} + +static int sm3_cis_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) { + sm3_update(sctx, data, len); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_cis); + kernel_fpu_end(); + + return 0; +} + +static int sm3_cis_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + + sm3_final(sctx, out); + return 0; + } + + kernel_fpu_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_transform_cis); + sm3_base_do_finalize(desc, sm3_transform_cis); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_cis_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_finalize(desc, sm3_transform_cis); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_cis_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_cis_update, + .final = sm3_cis_final, + .finup = sm3_cis_finup, + .descsize = sizeof(struct sm3_state), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-cis", + .cra_priority = 400, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sm3_cis_init(void) +{ +#ifdef CONFIG_X86_64 + if (!boot_cpu_has(X86_FEATURE_HYGON_SM3)) { + pr_err("CIS SM3 Not Support"); + return -ENODEV; + } +#endif /* CONFIG_X86_64 */ + + return crypto_register_shash(&sm3_cis_alg); +} + +static void __exit sm3_cis_exit(void) +{ + crypto_unregister_shash(&sm3_cis_alg); +} + +module_init(sm3_cis_init); +module_exit(sm3_cis_exit); + +MODULE_DESCRIPTION("SM3 Cipher Algorithm, Hygon CIS optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sm3"); diff --git a/arch/x86/crypto/sm4-cis-asm_64.S b/arch/x86/crypto/sm4-cis-asm_64.S new file mode 100644 index 0000000000000000000000000000000000000000..2bf574d118df323bba6bad38e8ecde71938781d4 --- /dev/null +++ b/arch/x86/crypto/sm4-cis-asm_64.S @@ -0,0 +1,634 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * HYGON SM4 Cipher Algorithm, using cis instructions. + * + * Copyright (C) 2026 Hygon Information Technology Co., Ltd. + * + */ + +#include +#include + +#define rRIP (%rip) + +#define RX0 %xmm0 +#define RX1 %xmm1 +#define RX2 %xmm2 +#define RX3 %xmm3 +#define RX4 %xmm4 +#define RX5 %xmm5 +#define RX6 %xmm6 +#define RX7 %xmm7 + +#define XMM0 1111b /* xmm0 */ +#define XMM1 1110b /* xmm1 */ +#define XMM2 1101b /* xmm2 */ + +/** + * SM4RK xmm0, xmm2, 0x0 + * SM4RK xmm0, xmm2, 0x1 + * SM4RK xmm0, xmm2, 0x2 + * SM4RK xmm0, xmm2, 0x3 + */ +#define SM4_ROUND_INST_SM4RK(XMM0, XMM2) \ + .byte 0x66,0x0f,0x3a,0x8b,0xc2,0x00; \ + .byte 0x66,0x0f,0x3a,0x8b,0xc2,0x01; \ + .byte 0x66,0x0f,0x3a,0x8b,0xc2,0x02; \ + .byte 0x66,0x0f,0x3a,0x8b,0xc2,0x03 + +/** + * SM4RND xmm0, xmm1, 0x0 + * SM4RND xmm0, xmm1, 0x1 + * SM4RND xmm0, xmm1, 0x2 + * SM4RND xmm0, xmm1, 0x3 + */ +#define SM4_ROUND_INST_SM4RND(XMM0, XMM1) \ + .byte 0x66,0x0f,0x3a,0x8a,0xc1,0x00; \ + .byte 0x66,0x0f,0x3a,0x8a,0xc1,0x01; \ + .byte 0x66,0x0f,0x3a,0x8a,0xc1,0x02; \ + .byte 0x66,0x0f,0x3a,0x8a,0xc1,0x03 + +/** + * Multiply in GF(2^128) for XTS IVs (tweak) + */ +#define _gf128_primitive_mul(tw, xm0, xm1) \ + movdqa .Gf128mul_shuf_data rRIP, xm1; \ + vpshufb xm1, tw, xm0; \ + movdqa .Gf128mul_comp_data rRIP, xm1; \ + vpsrlq $1, tw, tw; \ + pand xm1, xm0; \ + pcmpeqd xm1, xm0; \ + movdqa .Gf128mul_mask rRIP, xm1; \ + pand xm1, xm0; \ + pxor xm0, tw + +.section .data +.align 16 + +/** + * SM4 key schedule constants (FK and CK) + * Standard: GB/T 32907-2016 + * FK: 4x32-bit system parameters + * CK: 32x32-bit round constants + */ +.L_sm4_fk: + .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc + +.L_sm4_ck: + .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 + .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 + .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 + .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 + .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 + .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 + .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 + .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 + +/* For byteswap */ +.Lbswap128_mask: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +/* For input word byte-swap */ +.Lbswap32_mask: + .byte 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04 + .byte 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c + +/* Process the carry of tweak */ +.Gf128mul_mask: + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe1 + +/* Retrieve the middle and highest bytes of tweak */ +.Gf128mul_shuf_data: + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08 + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + +/* Check if there is any carry in tweak */ +.Gf128mul_comp_data: + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 + +.text + +/** + * void gf128_mul_by_2(unsigned char *twk) + */ +SYM_FUNC_START(gf128_mul_by_2) + /* input: + * %rdi: tweak value, twk + */ + FRAME_BEGIN + + movdqa .Lbswap128_mask rRIP, RX3; + movdqu (%rdi), RX2; + pshufb RX3, RX2; + _gf128_primitive_mul(RX2, RX5, RX7); + pshufb RX3, RX2; + movdqu RX2, (%rdi); + + FRAME_END + RET +SYM_FUNC_END(gf128_mul_by_2) + +/** + * void cis_sm4_set_key(unsigned char *key, + * unsigned int *rk) + */ +SYM_FUNC_START(cis_sm4_set_key) + /* input: + * %rdi: key data, key + * %rsi: round key array, rk + */ + FRAME_BEGIN + + movdqu .Lbswap32_mask rRIP, RX3; + movdqu (%rdi), RX0; + movdqu .L_sm4_fk rRIP, RX1; + pshufb RX3, RX0; + pxor RX1, RX0; + movq $0x08, %r8; + lea .L_sm4_ck rRIP, %rdx + +key_loop: + movdqu (%rdx), RX2; + SM4_ROUND_INST_SM4RK(XMM0, XMM2); + movdqu RX0, (%rsi); + add $0x10, %rsi; + add $0x10, %rdx; + dec %r8; + jne key_loop; + + FRAME_END + RET +SYM_FUNC_END(cis_sm4_set_key) + +/** + * void cis_sm4_blk(unsigned int *sk, + * unsigned char *in, + * unsigned char *out) + */ +SYM_FUNC_START(cis_sm4_blk) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + */ + FRAME_BEGIN + + leaq (16*8)(%rdi), %rax; + movdqa .Lbswap32_mask rRIP, RX3; + movdqu (%rsi), RX0; + pshufb RX3, RX0; + +asm_loop: + movdqu (%rdi), RX1; + SM4_ROUND_INST_SM4RND(XMM0, XMM1); + add $0x10, %rdi; + cmpq %rax, %rdi; + jne asm_loop; + + pshufd $0x1b, RX0, RX0; + pshufb RX3, RX0; + movdqu RX0, (%rdx); + + FRAME_END + RET +SYM_FUNC_END(cis_sm4_blk) + +/** + * void cis_sm4_ecb_crypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_ecb_crypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: data length, len + */ + FRAME_BEGIN + + test %rcx, %rcx; + jle ecb_over; + movq %rdi, %r9; + movdqa .Lbswap32_mask rRIP, RX3; + +ecb_loop: + movq %r9, %rdi; + leaq (16*8)(%rdi), %rax; + movdqu (%rsi), RX0; + pshufb RX3, RX0; + +ecb_rnd_loop: + movdqu (%rdi), RX1; + SM4_ROUND_INST_SM4RND(XMM0, XMM1); + add $0x10, %rdi; + cmpq %rax, %rdi; + jne ecb_rnd_loop; + + pshufd $0x1b, RX0, RX0; + pshufb RX3, RX0; + movdqu RX0, (%rdx); + + add $0x10, %rsi; + add $0x10, %rdx; + sub $0x10, %rcx; + jnz ecb_loop; + +ecb_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_ecb_crypt_blk8) + +/** + * void cis_sm4_cbc_encrypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_cbc_encrypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle cbc_enc_over + movdqa .Lbswap32_mask rRIP, RX3 + movq %rdi, %r9 + movdqu (%rcx), RX0 + +cbc_enc_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + /* pxor must be 16-byte aligned */ + movdqu (%rsi), RX2 + pxor RX2, RX0 + pshufb RX3, RX0 + +cbc_enc_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne cbc_enc_rnd_loop + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + movdqu RX0, (%rdx) + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz cbc_enc_loop + movdqu RX0, (%rcx) + +cbc_enc_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_cbc_encrypt_blk8) + +/** + * void cis_sm4_cbc_decrypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_cbc_decrypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle cbc_dec_over + movq %rdi, %r9 + movdqa .Lbswap32_mask rRIP, RX3 + movdqu (%rcx), RX4 + +cbc_dec_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + movdqu (%rsi), RX0 + movdqa RX0, RX5 + pshufb RX3, RX0 + +cbc_dec_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne cbc_dec_rnd_loop + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + pxor RX4, RX0 + movdqu RX0, (%rdx) + movdqa RX5, RX4 + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz cbc_dec_loop + movdqu RX5, (%rcx) + +cbc_dec_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_cbc_decrypt_blk8) + +/** + * void cis_sm4_ctr_crypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_ctr_crypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle ctr_over + movq %rdi, %r9 + movdqa .Lbswap32_mask rRIP, RX3 + movdqa .Lbswap128_mask rRIP, RX6; + movdqu (%rcx), RX4 + pcmpeqd RX5, RX5 + psrldq $8, RX5 + +#define inc_le128(x, minus_one, tmp) \ + movdqa x, tmp; \ + pcmpeqq minus_one, tmp; \ + psubq minus_one, x; \ + pslldq $8, tmp; \ + psubq tmp, x + +ctr_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + movdqa RX4, RX0 + pshufb RX3, RX0 + +ctr_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne ctr_rnd_loop + + pshufb RX6, RX4 + inc_le128(RX4, RX5, RX7) + pshufb RX6, RX4 + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + movdqu (%rsi), RX2 + pxor RX2, RX0 + movdqu RX0, (%rdx) + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz ctr_loop + movdqu RX4, (%rcx) + +ctr_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_ctr_crypt_blk8) + + +/** + * void cis_sm4_cfb_encrypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_cfb_encrypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle cfb_enc_over + movdqa .Lbswap32_mask rRIP, RX3 + movq %rdi, %r9 + movdqu (%rcx), RX0 + +cfb_enc_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + pshufb RX3, RX0 + +cfb_enc_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne cfb_enc_rnd_loop + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + movdqu (%rsi), RX2 + pxor RX2, RX0 + movdqu RX0, (%rdx) + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz cfb_enc_loop + movdqu RX0, (%rcx) + +cfb_enc_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_cfb_encrypt_blk8) + +/** + * void cis_sm4_cfb_decrypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_cfb_decrypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle cfb_dec_over + movq %rdi, %r9 + movdqa .Lbswap32_mask rRIP, RX3 + movdqu (%rcx), RX4 + +cfb_dec_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + movdqa RX4, RX0 + pshufb RX3, RX0 + +cfb_dec_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne cfb_dec_rnd_loop + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + movdqu (%rsi), RX4 + pxor RX4, RX0 + movdqu RX0, (%rdx) + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz cfb_dec_loop + movdqu RX4, (%rcx) + +cfb_dec_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_cfb_decrypt_blk8) + +/** + * void cis_sm4_ofb_crypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *iv, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_ofb_crypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: input data, in + * %rdx: output data, out + * %rcx: iv data, iv + * %r8: data length, len + */ + FRAME_BEGIN + + test %r8, %r8 + jle ofb_over + movq %rdi, %r9 + movdqa .Lbswap32_mask rRIP, RX3 + movdqu (%rcx), RX0 + +ofb_loop: + movq %r9, %rdi + leaq (16*8)(%rdi), %rax + pshufb RX3, RX0 + +ofb_rnd_loop: + movdqu (%rdi), RX1 + SM4_ROUND_INST_SM4RND(XMM0, XMM1) + add $0x10, %rdi + cmpq %rax, %rdi + jne ofb_rnd_loop + + pshufd $0x1b, RX0, RX0 + pshufb RX3, RX0 + movdqa RX0, RX4 + movdqu (%rsi), RX2 + pxor RX2, RX4 + movdqu RX4, (%rdx) + + add $0x10, %rsi + add $0x10, %rdx + sub $0x10, %r8 + jnz ofb_loop + movdqu RX0, (%rcx) + +ofb_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_ofb_crypt_blk8) + +/** + * void cis_sm4_xts_crypt_blk8(unsigned int *sk, + * unsigned char *in, + * unsigned char *out, + * unsigned char *twk, + * unsigned int len) + */ +SYM_FUNC_START(cis_sm4_xts_crypt_blk8) + /* input: + * %rdi: round key array, sk + * %rsi: inout data, in + * %rdx: output data, out + * %rcx: tweak value, twk + * %r8: data length, len + */ + FRAME_BEGIN + test %r8, %r8; + jle xts_over; + movdqa .Lbswap32_mask rRIP, RX3; + movdqa .Lbswap128_mask rRIP, RX6; + movq %rdi, %r9; + movdqu (%rcx), RX2; + +xts_loop: + movq %r9, %rdi; + leaq (16*8)(%rdi), %rax; + movdqu (%rsi), RX0; + pxor RX2, RX0; + pshufb RX3, RX0; + +xts_rnd_loop: + movdqu (%rdi), RX1; + SM4_ROUND_INST_SM4RND(XMM0, XMM1); + add $0x10, %rdi; + cmpq %rax, %rdi; + jne xts_rnd_loop; + + pshufd $0x1b, RX0, RX0; + pshufb RX3, RX0; + pxor RX2, RX0; + movdqu RX0, (%rdx); + + pshufb RX6, RX2 + _gf128_primitive_mul(RX2, RX5, RX7); + pshufb RX6, RX2; + movdqu RX2, (%rcx); + + add $0x10, %rsi; + add $0x10, %rdx; + sub $0x10, %r8; + jnz xts_loop; + +xts_over: + FRAME_END + RET +SYM_FUNC_END(cis_sm4_xts_crypt_blk8) diff --git a/arch/x86/crypto/sm4_cis_glue.c b/arch/x86/crypto/sm4_cis_glue.c new file mode 100644 index 0000000000000000000000000000000000000000..cfb677088939aa5e7712debbbe319fa6959acee5 --- /dev/null +++ b/arch/x86/crypto/sm4_cis_glue.c @@ -0,0 +1,943 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HYGON SM4 Cipher Algorithm, using cis instructions. + * + * Copyright (C) 2026 Hygon Information Technology Co., Ltd. + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void gf128_mul_by_2(unsigned char *twk); +asmlinkage void cis_sm4_set_key(const unsigned char *key, + unsigned int *rk); +asmlinkage void cis_sm4_blk(unsigned int *sk, + unsigned char *in, + unsigned char *out); +asmlinkage void cis_sm4_ecb_crypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned int len); +asmlinkage void cis_sm4_cbc_encrypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_cbc_decrypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_ctr_crypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_cfb_encrypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_cfb_decrypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_ofb_crypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *iv, + unsigned int len); +asmlinkage void cis_sm4_xts_crypt_blk8(unsigned int *sk, + unsigned char *in, + unsigned char *out, + unsigned char *twk, + unsigned int len); + +struct cis_sm4_ctx { + struct sm4_ctx ctx; + struct sm4_ctx tweak_ctx; + struct crypto_shash *ghash_tfm; + struct shash_desc *ghash_desc; +}; + +#define CRYPTO_SM4_CTX_SIZE sizeof(struct cis_sm4_ctx) + +static int sm4_set_key(struct crypto_skcipher *tfm, + const u8 *key, unsigned int key_len) +{ + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + int i = 0; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_fpu_begin(); + cis_sm4_set_key(key, ctx->rkey_enc); + kernel_fpu_end(); + + for (i = 0; i < SM4_RKEY_WORDS; i++) + ctx->rkey_dec[i] = ctx->rkey_enc[SM4_RKEY_WORDS - 1 - i]; + + return 0; +} + +static int sm4_ecb_do_crypt(struct skcipher_request *req, u32 *rkey) +{ + struct skcipher_walk walk; + unsigned int nbytes = 0; + int err = 0; + + if (req->cryptlen & (SM4_BLOCK_SIZE - 1)) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + kernel_fpu_begin(); + cis_sm4_ecb_crypt_blk8(rkey, walk.src.virt.addr, walk.dst.virt.addr, nbytes); + kernel_fpu_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + if (err) + break; + } + + return err; +} + +static int sm4_ecb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + + return sm4_ecb_do_crypt(req, ctx->rkey_enc); +} + +static int sm4_ecb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + + return sm4_ecb_do_crypt(req, ctx->rkey_dec); +} + +static int sm4_cbc_do_crypt(struct skcipher_request *req, + struct cis_sm4_ctx *cis_ctx, bool encrypt) +{ + struct skcipher_walk walk; + struct sm4_ctx *ctx = &cis_ctx->ctx; + unsigned int nbytes; + int err; + + if (req->cryptlen & (SM4_BLOCK_SIZE - 1)) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + kernel_fpu_begin(); + if (encrypt) { + cis_sm4_cbc_encrypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } else { + cis_sm4_cbc_decrypt_blk8(ctx->rkey_dec, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } + kernel_fpu_end(); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + if (err) + break; + } + + return err; +} + +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + + return sm4_cbc_do_crypt(req, cis_ctx, true); +} + +static int sm4_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + + return sm4_cbc_do_crypt(req, cis_ctx, false); +} + +static int sm4_ctr_do_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE]; + unsigned int nbytes, remain; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_ctr_crypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } + + if (walk.nbytes == walk.total && remain > 0) { + cis_sm4_blk(ctx->rkey_enc, walk.iv, keystream); + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + remain = 0; + } + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + return err; +} + +static int sm4_cfb_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE]; + unsigned int nbytes, remain; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_cfb_encrypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } + + if (walk.nbytes == walk.total && remain > 0) { + cis_sm4_blk(ctx->rkey_enc, walk.iv, keystream); + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + remain = 0; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + return err; +} + +static int sm4_cfb_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE]; + unsigned int nbytes, remain; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_cfb_decrypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } + + if (walk.nbytes == walk.total && remain > 0) { + cis_sm4_blk(ctx->rkey_enc, walk.iv, keystream); + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + remain = 0; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + return err; +} + +static int sm4_ofb_do_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE]; + unsigned int nbytes, remain; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_ofb_crypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, walk.iv, nbytes); + } + + if (walk.nbytes == walk.total && remain > 0) { + cis_sm4_blk(ctx->rkey_enc, walk.iv, keystream); + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + remain = 0; + } + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + return err; +} + +static int sm4_xts_set_key(struct crypto_skcipher *tfm, + const u8 *key, unsigned int key_len) +{ + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct sm4_ctx *tweak_ctx = &cis_ctx->tweak_ctx; + int i = 0; + + if (key_len != 2 * SM4_KEY_SIZE) + return -EINVAL; + + kernel_fpu_begin(); + cis_sm4_set_key(key, ctx->rkey_enc); + cis_sm4_set_key(key + SM4_KEY_SIZE, tweak_ctx->rkey_enc); + kernel_fpu_end(); + + for (i = 0; i < SM4_RKEY_WORDS; i++) + ctx->rkey_dec[i] = ctx->rkey_enc[SM4_RKEY_WORDS - 1 - i]; + + for (i = 0; i < SM4_RKEY_WORDS; i++) + tweak_ctx->rkey_dec[i] = tweak_ctx->rkey_enc[SM4_RKEY_WORDS - 1 - i]; + + return 0; +} + +static int sm4_xts_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct sm4_ctx *tweak_ctx = &cis_ctx->tweak_ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE * 2] = { 0 }; + u8 tweak[SM4_BLOCK_SIZE] = { 0 }; + unsigned int nbytes, remain; + int err = 0; + + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; + + kernel_fpu_begin(); + cis_sm4_blk(tweak_ctx->rkey_enc, req->iv, tweak); + kernel_fpu_end(); + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_xts_crypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, tweak, nbytes); + } + + if (walk.nbytes == walk.total && remain > 0) { + if (walk.nbytes >= SM4_BLOCK_SIZE + remain) { + memcpy(keystream, walk.dst.virt.addr + nbytes - SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + } else { + /** + * skcipher_walk ensures all previous processing is multiple of + * SM4_BLOCK_SIZE, so 'walk.nbytes < SM4_BLOCK_SIZE + remain' + * means 'walk.nbytes == reamin', and last encrypted block is + * from last walk. + */ + scatterwalk_map_and_copy(keystream, + req->dst, req->cryptlen - remain - SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE, 0); + } + + memcpy(keystream + SM4_BLOCK_SIZE, keystream, remain); + memcpy(keystream, walk.src.virt.addr + nbytes, remain); + + crypto_xor_cpy(keystream, tweak, keystream, SM4_BLOCK_SIZE); + cis_sm4_blk(ctx->rkey_enc, keystream, keystream); + crypto_xor_cpy(keystream, tweak, keystream, SM4_BLOCK_SIZE); + + /* copy last SM4_BLOCK_SIZE + remain to dst */ + if (walk.nbytes >= SM4_BLOCK_SIZE + remain) { + memcpy(walk.dst.virt.addr + walk.nbytes - SM4_BLOCK_SIZE - remain, + keystream, SM4_BLOCK_SIZE + remain); + } else { + scatterwalk_map_and_copy(keystream, + req->dst, req->cryptlen - remain - SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE, 1); + memcpy(walk.dst.virt.addr, keystream + SM4_BLOCK_SIZE, + walk.nbytes); + } + + remain = 0; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + + return err; +} + +static int sm4_xts_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_skcipher_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct sm4_ctx *tweak_ctx = &cis_ctx->tweak_ctx; + struct skcipher_walk walk; + u8 keystream[SM4_BLOCK_SIZE * 2] = { 0 }; + u8 tweak[SM4_BLOCK_SIZE] = { 0 }; + u8 tweakn[SM4_BLOCK_SIZE] = { 0 }; + unsigned int nbytes, remain, ntail, nfinish = 0; + int err = 0; + + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; + + ntail = req->cryptlen & (SM4_BLOCK_SIZE - 1); + + kernel_fpu_begin(); + cis_sm4_blk(tweak_ctx->rkey_enc, req->iv, tweak); + kernel_fpu_end(); + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + + if (ntail && nbytes && (nfinish + nbytes == req->cryptlen - ntail)) { + memcpy(keystream, walk.src.virt.addr + nbytes - SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE); + nbytes -= SM4_BLOCK_SIZE; + nfinish += SM4_BLOCK_SIZE; + } + + kernel_fpu_begin(); + if (nbytes) { + cis_sm4_xts_crypt_blk8(ctx->rkey_dec, + walk.src.virt.addr, walk.dst.virt.addr, tweak, nbytes); + nfinish += nbytes; + } + + if (walk.nbytes == walk.total && remain > 0) { + memcpy(tweakn, tweak, SM4_BLOCK_SIZE); + gf128_mul_by_2(tweakn); + crypto_xor_cpy(keystream, tweakn, keystream, SM4_BLOCK_SIZE); + cis_sm4_blk(ctx->rkey_dec, keystream, keystream); + crypto_xor_cpy(keystream, tweakn, keystream, SM4_BLOCK_SIZE); + + memcpy(keystream + SM4_BLOCK_SIZE, keystream, remain); + + memcpy(keystream, walk.src.virt.addr + walk.nbytes - remain, remain); + + crypto_xor_cpy(keystream, tweak, keystream, SM4_BLOCK_SIZE); + cis_sm4_blk(ctx->rkey_dec, keystream, keystream); + crypto_xor_cpy(keystream, tweak, keystream, SM4_BLOCK_SIZE); + + /* copy last SM4_BLOCK_SIZE + remain to dst */ + if (walk.nbytes >= SM4_BLOCK_SIZE + remain) { + memcpy(walk.dst.virt.addr + walk.nbytes - SM4_BLOCK_SIZE - remain, + keystream, SM4_BLOCK_SIZE + remain); + } else { + scatterwalk_map_and_copy(keystream, req->dst, + req->cryptlen - remain - SM4_BLOCK_SIZE, + SM4_BLOCK_SIZE, 1); + memcpy(walk.dst.virt.addr, keystream + SM4_BLOCK_SIZE, + walk.nbytes); + } + + remain = 0; + } + + kernel_fpu_end(); + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + + return err; +} + +static struct skcipher_alg cis_skciphers[] = { + { + .base = { + .cra_name = "__ecb(sm4)", + .cra_driver_name = "__ecb-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .setkey = sm4_set_key, + .encrypt = sm4_ecb_encrypt, + .decrypt = sm4_ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(sm4)", + .cra_driver_name = "__cbc-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .setkey = sm4_set_key, + .encrypt = sm4_cbc_encrypt, + .decrypt = sm4_cbc_decrypt, + }, { + .base = { + .cra_name = "__ctr(sm4)", + .cra_driver_name = "__ctr-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_set_key, + .encrypt = sm4_ctr_do_crypt, + .decrypt = sm4_ctr_do_crypt, + }, { + .base = { + .cra_name = "__cfb(sm4)", + .cra_driver_name = "__cfb-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_set_key, + .encrypt = sm4_cfb_encrypt, + .decrypt = sm4_cfb_decrypt, + }, { + .base = { + .cra_name = "__ofb(sm4)", + .cra_driver_name = "__ofb-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_set_key, + .encrypt = sm4_ofb_do_crypt, + .decrypt = sm4_ofb_do_crypt, + }, { + .base = { + .cra_name = "__gbt17964(xts(sm4))", + .cra_driver_name = "__xts-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_module = THIS_MODULE, + }, + .min_keysize = 2 * SM4_KEY_SIZE, + .max_keysize = 2 * SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .setkey = sm4_xts_set_key, + .encrypt = sm4_xts_encrypt, + .decrypt = sm4_xts_decrypt, + } +}; + +static struct simd_skcipher_alg *cis_simd_skciphers[ARRAY_SIZE(cis_skciphers)]; + +static int sm4_gcm_init(struct crypto_aead *tfm) +{ + struct cis_sm4_ctx *cis_ctx = crypto_aead_ctx(tfm); + struct crypto_shash *ghash = NULL; + struct shash_desc *sdesc = NULL; + + cis_ctx->ghash_tfm = NULL; + cis_ctx->ghash_desc = NULL; + + ghash = crypto_alloc_shash("__ghash-pclmulqdqni", + CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(ghash)) { + pr_warn("load ghash-pclmulqdqni failed\n"); + return PTR_ERR(ghash); + } + sdesc = kzalloc(sizeof(struct shash_desc) + + crypto_shash_descsize(ghash), GFP_KERNEL); + if (unlikely(!sdesc)) { + crypto_free_shash(ghash); + return -ENOMEM; + } + + sdesc->tfm = ghash; + cis_ctx->ghash_tfm = ghash; + cis_ctx->ghash_desc = sdesc; + + return 0; +} + +static void sm4_gcm_exit(struct crypto_aead *tfm) +{ + struct cis_sm4_ctx *cis_ctx = crypto_aead_ctx(tfm); + + if (cis_ctx->ghash_tfm) + crypto_free_shash(cis_ctx->ghash_tfm); + + kfree(cis_ctx->ghash_desc); +} + +static int sm4_gcm_set_key(struct crypto_aead *tfm, + const u8 *key, unsigned int key_len) +{ + struct cis_sm4_ctx *cis_ctx = crypto_aead_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + int i = 0; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_fpu_begin(); + cis_sm4_set_key(key, ctx->rkey_enc); + kernel_fpu_end(); + for (i = 0; i < SM4_RKEY_WORDS; i++) + ctx->rkey_dec[i] = ctx->rkey_enc[SM4_RKEY_WORDS - 1 - i]; + + return 0; +} + +static int sm4_gcm_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int sm4_gcm_do_crypt(struct aead_request *req, bool encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct cis_sm4_ctx *cis_ctx = crypto_aead_ctx(tfm); + struct sm4_ctx *ctx = &cis_ctx->ctx; + struct scatter_walk assoc_sg_walk; + struct skcipher_walk walk; + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + be128 tail = {cpu_to_be64(req->assoclen * 8), 0}; + u8 iv[SM4_BLOCK_SIZE] = {0}; + u8 ghash[GHASH_BLOCK_SIZE] = {0}; + u8 H[GHASH_BLOCK_SIZE] = {0}; + u8 keystream[SM4_BLOCK_SIZE] = {0}; + u8 auth_tag[SM4_BLOCK_SIZE] = {0}; + u8 auth_tag_msg[SM4_BLOCK_SIZE]; + u8 pad[GHASH_BLOCK_SIZE] = {0}; + unsigned int nbytes, remain, zfilled; + u8 *assoc = NULL, *assocmem = NULL; + gfp_t flags; + int err = 0; + + if (!cis_ctx->ghash_tfm || !cis_ctx->ghash_desc) + return -EPERM; + + tail.b = encrypt ? cpu_to_be64(req->cryptlen * 8) : + cpu_to_be64((req->cryptlen - auth_tag_len) * 8); + + memcpy(iv, req->iv, 12); + iv[SM4_BLOCK_SIZE - 1] = 1; + memcpy(auth_tag, iv, SM4_BLOCK_SIZE); + + kernel_fpu_begin(); + cis_sm4_blk(ctx->rkey_enc, auth_tag, auth_tag); + cis_sm4_blk(ctx->rkey_enc, H, H); + kernel_fpu_end(); + + crypto_shash_setkey(cis_ctx->ghash_tfm, H, sizeof(H)); + crypto_shash_init(cis_ctx->ghash_desc); + + if (req->assoclen > 0) { + if (req->src->length >= req->assoclen) { + scatterwalk_start(&assoc_sg_walk, req->src); + assoc = scatterwalk_map(&assoc_sg_walk); + } else { + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + + /* assoc can be any length, so must be on heap */ + assocmem = kmalloc(req->assoclen, flags); + if (unlikely(!assocmem)) + return -ENOMEM; + assoc = assocmem; + + scatterwalk_map_and_copy(assoc, req->src, 0, req->assoclen, 0); + } + + zfilled = ALIGN(req->assoclen, GHASH_BLOCK_SIZE) - req->assoclen; + crypto_shash_update(cis_ctx->ghash_desc, assoc, req->assoclen); + if (zfilled) + crypto_shash_update(cis_ctx->ghash_desc, pad, zfilled); + + } + + if (!assocmem) + scatterwalk_unmap(assoc); + else + kfree(assocmem); + + err = encrypt ? skcipher_walk_aead_encrypt(&walk, req, false) + : skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; + + crypto_inc(iv, SM4_BLOCK_SIZE); + while ((nbytes = walk.nbytes) > 0) { + nbytes &= ~(SM4_BLOCK_SIZE - 1); + remain = walk.nbytes - nbytes; + if (nbytes) { + if (encrypt) { + kernel_fpu_begin(); + cis_sm4_ctr_crypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, iv, nbytes); + kernel_fpu_end(); + + crypto_shash_update(cis_ctx->ghash_desc, + walk.dst.virt.addr, nbytes); + } else { + crypto_shash_update(cis_ctx->ghash_desc, + walk.src.virt.addr, nbytes); + + kernel_fpu_begin(); + cis_sm4_ctr_crypt_blk8(ctx->rkey_enc, + walk.src.virt.addr, walk.dst.virt.addr, iv, nbytes); + kernel_fpu_end(); + } + } + + if (walk.nbytes == walk.total && remain > 0) { + kernel_fpu_begin(); + cis_sm4_blk(ctx->rkey_enc, iv, keystream); + kernel_fpu_end(); + if (encrypt) { + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + + memcpy(pad, walk.dst.virt.addr + nbytes, remain); + crypto_shash_update(cis_ctx->ghash_desc, + pad, GHASH_BLOCK_SIZE); + } else { + memcpy(pad, walk.src.virt.addr + nbytes, remain); + crypto_shash_update(cis_ctx->ghash_desc, + pad, GHASH_BLOCK_SIZE); + + crypto_xor_cpy(walk.dst.virt.addr + nbytes, + walk.src.virt.addr + nbytes, + keystream, remain); + } + + crypto_inc(iv, SM4_BLOCK_SIZE); + remain = 0; + } + + + err = skcipher_walk_done(&walk, remain); + if (err) + break; + } + crypto_shash_update(cis_ctx->ghash_desc, (const u8 *)&tail, sizeof(tail)); + crypto_shash_final(cis_ctx->ghash_desc, ghash); + + crypto_xor(auth_tag, ghash, SM4_BLOCK_SIZE); + if (encrypt) { + scatterwalk_map_and_copy(auth_tag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + } else { + scatterwalk_map_and_copy(auth_tag_msg, req->src, + req->assoclen + req->cryptlen - auth_tag_len, + auth_tag_len, 0); + if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) { + memzero_explicit(auth_tag, sizeof(auth_tag)); + err = -EBADMSG; + } + } + + return err; +} + +static int sm4_gcm_encrypt(struct aead_request *req) +{ + return sm4_gcm_do_crypt(req, 1); +} + + +static int sm4_gcm_decrypt(struct aead_request *req) +{ + return sm4_gcm_do_crypt(req, 0); +} + +static struct aead_alg cis_aeads[] = { + { + .init = sm4_gcm_init, + .exit = sm4_gcm_exit, + .setkey = sm4_gcm_set_key, + .setauthsize = sm4_gcm_set_authsize, + .encrypt = sm4_gcm_encrypt, + .decrypt = sm4_gcm_decrypt, + .ivsize = GCM_SM4_IV_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .maxauthsize = 16, + .base = { + .cra_name = "__gcm(sm4)", + .cra_driver_name = "__gcm-sm4-cis", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_SM4_CTX_SIZE, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + } +}; + +static struct simd_aead_alg *cis_simd_aeads[ARRAY_SIZE(cis_aeads)]; + +static int __init cis_sm4_init(void) +{ + int err = 0; +#ifdef CONFIG_X86_64 + if (!boot_cpu_has(X86_FEATURE_HYGON_SM4)) { + pr_err("CIS SM4 Not Support"); + return -ENODEV; + } +#endif /* CONFIG_X86_64 */ + + err = simd_register_skciphers_compat(cis_skciphers, + ARRAY_SIZE(cis_skciphers), + cis_simd_skciphers); + if (err) + return err; + + err = simd_register_aeads_compat(cis_aeads, ARRAY_SIZE(cis_aeads), + cis_simd_aeads); + if (err) + goto unregister_skciphers; + + return 0; + +unregister_skciphers: + simd_unregister_skciphers(cis_skciphers, ARRAY_SIZE(cis_skciphers), + cis_simd_skciphers); + + return err; +} + +static void __exit cis_sm4_exit(void) +{ + simd_unregister_aeads(cis_aeads, ARRAY_SIZE(cis_aeads), + cis_simd_aeads); + simd_unregister_skciphers(cis_skciphers, ARRAY_SIZE(cis_skciphers), + cis_simd_skciphers); +} + +late_initcall(cis_sm4_init); +module_exit(cis_sm4_exit); + +MODULE_DESCRIPTION("SM4 Cipher Algorithm, Hygon CIS optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("sm4"); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index b2cc6c04cbfe7b75fda77d61f626975dfceb9894..66bec69784f2d45653d8e6ea27abc54a19bb05cd 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -812,8 +812,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) -f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) -f9: MOVDIRI My,Gy +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) | vsm3rndb Vx,Hx,Wx (66),(v1) +f9: MOVDIRI My,Gy | vsm3msgb Vx,Hx,Wx (66),(v1) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -889,6 +889,10 @@ AVXcode: 3 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +88: vsm3randa Vx,Hx,Wx,Ib (66),(v1) +89: vsm3msga Vx,Hx,Wx,Ib (66),(v1) +8a: vsm4rand Vx,Hx,Wx,Ib (66),(v1) | sm4rand Vk,Wx,Ib (66) +8b: vsm4rk Vx,Hx,Wx,Ib (66),(v1) | sm4rk Vk,Wx,Ib (66) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) diff --git a/crypto/Kconfig b/crypto/Kconfig index bcc2a150985e3693eb22f9ba1a3bd96e1541f83f..5a85620d5fecc504695bdab5ebb5d16ab47fa05a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -2059,6 +2059,31 @@ config CRYPTO_USER_API_AKCIPHER is supported by enabling the user space interface of asymmetric key cryptography algorithm. +config CRYPTO_SM3_CIS_X86_64 + tristate "Hash functions: SM3 (Hygon CIS)" + depends on X86 && 64BIT + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3 + + Architecture: x86_64 using: + - CIS (Crypto Instruction Set) + +config CRYPTO_SM4_CIS_X86_64 + tristate "Ciphers: SM4 (Hygon CIS)" + depends on X86 && 64BIT + select CRYPTO_SKCIPHER + select CRYPTO_AEAD + select CRYPTO_SIMD + select CRYPTO_GHASH_CLMUL_NI_INTEL + help + Length-preserving ciphers: SM4 cipher algorithms + (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, OFB, CTR, XTS and GCM modes + + Architecture: x86_64 using: + - CIS (Crypto Instruction Set) + config CRYPTO_SM2_ZHAOXIN_GMI tristate "SM2 Cipher algorithm (Zhaoxin GMI Instruction)" depends on X86 && (CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 9e0e5c041fb0b46b32884b5c4f3833b8d0dcccca..0f1240856d538d695100f341681bcc7713fc09ea 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4992,6 +4992,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .cipher = __VECS(essiv_aes_cbc_tv_template) } + }, { + .alg = "gbt17964(xts(sm4))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_xts_gbt17964_tv_template) + } }, { .alg = "gcm(aes)", .generic_driver = "gcm_base(ctr(aes-generic),ghash-generic)", diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 8b3bb5b85e63607a581243f1761c12f59506a7a3..f0bbbd9add0c5535a92a1f6fa2758adbec4ce85b 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14090,6 +14090,262 @@ static const struct cipher_testvec sm4_xts_tv_template[] = { }, }; +/** + * GBT-17964 Specification Requirement: + * The tweak vector of SM4-XTS algorithm shall be represented in + * big-endian format, which is distinct from the little-endian format + * defined by international standards. + */ +static const struct cipher_testvec sm4_xts_gbt17964_tv_template[] = { + /* Generated from AES-XTS test vectors */ + { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ctext = "\xd9\xb4\x21\xf7\x31\xc8\x94\xfd" + "\xc3\x5b\x77\x29\x1f\xe4\xe3\xb0" + "\xe5\x8e\x55\xe6\x13\xa8\x62\xb4" + "\xd2\xb0\xf1\x07\x3b\x4b\x4f\xd0", + .len = 32, + }, { + .key = "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\xa7\x4d\x72\x6c\x11\x19\x6a\x32" + "\xbe\x04\xe0\x01\xff\x29\xd0\xc7" + "\x72\x4f\xee\xf8\x1d\x66\x6a\xe5" + "\xaf\xdf\xe4\x64\x95\x44\xfc\xf5", + .len = 32, + }, { + .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" + "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\x7f\x76\x08\x8e\xff\xad\xf7\x0c" + "\x02\xea\x9f\x95\xda\x06\x28\xd3" + "\xef\x2d\x6a\x77\x00\x4b\xea\xa9" + "\x01\x60\x01\xd6\x78\x9d\xd5\xa0", + .len = 32, + }, { + .key = "\x27\x18\x28\x18\x28\x45\x90\x45" + "\x23\x53\x60\x28\x74\x71\x35\x26" + "\x31\x41\x59\x26\x53\x58\x97\x93" + "\x23\x84\x62\x64\x33\x83\x27\x95", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .ctext = "\x54\xdd\x65\xb6\x32\x6f\xae\xa8" + "\xfa\xd1\xa8\x3c\x63\x61\x4a\xf3" + "\x98\xbd\xdb\x68\x24\x73\x5d\xab" + "\x93\xec\x4e\x75\x73\x42\x15\xd4" + "\x63\xf6\x7d\xaf\x53\x74\x2f\xb2" + "\xa2\x84\x7d\x5f\xde\x39\x84\xf8" + "\x88\x2c\xfd\x5f\xa9\xd6\x64\x2e" + "\x1e\x87\x1c\x15\x52\x02\x44\x00" + "\x44\x25\x14\x65\x21\x16\x28\xba" + "\x86\xf8\xd2\x99\x83\x87\xa6\x85" + "\xed\xde\x23\xc0\x76\x10\xb7\x38" + "\x8a\xab\x17\xf2\x05\xaa\x5d\xad" + "\xa3\x3c\x0a\x8a\x42\x25\xbc\x11" + "\x42\x54\xc7\x96\xf8\x00\xc6\x38" + "\xe0\x16\xd1\x99\xcd\x21\xdc\x28" + "\xe9\x2d\xc2\xb8\x58\x75\x45\x50" + "\x9a\x8e\x1d\x65\x9c\x59\x6d\x3f" + "\x6c\x8c\x22\x5a\x27\xbd\xb2\xb0" + "\x2f\xe5\xa0\xc0\x18\x3a\x59\x2b" + "\x39\x6d\x32\x76\x5f\xe7\x33\xaf" + "\xb4\x38\xa6\xff\xb3\x05\xae\x13" + "\x77\xc5\x6d\x87\x2b\xad\xce\xbb" + "\xd3\x78\x12\xff\x79\xf0\x57\x1b" + "\x3f\x97\x75\x37\x57\x0a\x1f\x76" + "\xb9\xa5\x0c\x49\xab\x8d\x86\x7f" + "\xa0\x24\xea\x44\x83\xa2\x5f\x79" + "\x47\xb0\x78\x85\xbb\x83\x9e\x77" + "\x7a\xbe\x76\xaf\x11\xad\xf3\x10" + "\x8d\x11\x95\x93\x3f\x96\xb7\x94" + "\x9b\x06\x64\xbd\xb8\x9b\xeb\x3b" + "\xc4\x8f\xb5\xf5\xd2\x10\x9d\x32" + "\x33\x2f\x17\xc9\xa6\xdd\xea\x55" + "\x44\x1d\x1b\xbf\x43\x28\x0e\xc7" + "\xe7\x57\x91\xe2\x34\xd6\x51\xa0" + "\x71\x62\x09\xeb\x21\xae\x06\x06" + "\x1e\x33\xa7\x2b\x0c\x53\x0c\xb1" + "\x5f\xe0\xb5\x50\x16\xb1\x88\xda" + "\xd7\x5c\x4c\x50\x23\x2d\xce\x1f" + "\x5d\xf6\x19\x11\xc7\x9b\xee\x60" + "\x39\x7b\x64\xbb\x91\x4c\x0f\x26" + "\xef\xcf\xb6\xff\xab\x2b\xb3\x3b" + "\xdf\xd8\xdb\x98\xc4\x4d\xeb\xbd" + "\x4c\xa8\x65\xd4\x1c\xbe\x1d\x08" + "\x01\xb0\x1a\xba\x26\x03\xcb\xea" + "\x59\x9b\x32\xc8\x36\x78\x9d\xee" + "\xb9\xa3\xc1\x8f\x3c\xae\x97\x7b" + "\x42\xec\x81\xf1\xdf\xef\x6e\x09" + "\x8d\xd9\xe9\xdd\x6c\x18\x22\xbb" + "\x93\x8b\x08\x64\x1b\xb7\x24\x61" + "\xf8\xd3\x8c\x17\x24\xa4\x3a\xe1" + "\x25\x4b\x92\x23\xe2\x27\x0c\xf9" + "\xf7\xd7\x1a\x6b\xf0\x93\xdf\x20" + "\x79\xfd\x2c\xc2\xfe\x87\xe8\x46" + "\xd7\x99\xde\x30\x48\x3f\x80\x16" + "\x4c\x31\xe6\x5d\x8a\xae\x5f\x72" + "\xd6\xdc\x71\x11\x89\x32\xa0\x08" + "\xdf\x54\x7c\x71\x2b\xee\x45\xdd" + "\xeb\xcd\xce\x09\x8d\x67\x3e\xf5" + "\xed\xe9\x1e\xdf\xd4\x5d\x17\xcb" + "\x90\x96\x3d\x3e\x2e\x2e\x25\x08" + "\xa3\x76\xa7\xb1\xaf\x4d\x69\xe7" + "\x56\xea\x5d\xf5\x2a\xc4\x40\x79" + "\x1d\x57\xd5\x6b\x5e\x05\x7a\xd0" + "\x0e\x07\x7d\x2d\xf5\x00\x94\x16", + .len = 512, + }, { + .key = "\x62\x49\x77\x57\x24\x70\x93\x69" + "\x99\x59\x57\x49\x66\x96\x76\x27" + "\x02\x88\x41\x97\x16\x93\x99\x37" + "\x51\x05\x82\x09\x74\x94\x45\x92", + .klen = 32, + .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xf8\xf9\xfa\xfb\xfc", + .ctext = "\xa2\x9f\x9e\x4e\x71\xdb\x28\x3c" + "\x80\x0e\xf6\xb7\x8e\x57\x1c\xba" + "\x70\x75\xb9\x4d\x0b\xad\x8d\x53" + "\x18\x87\xbe\xa8\x50\x20\x8f\x52" + "\xfd\x6a\xff\xe2\x14\x38\x0d\xaf" + "\x50\xc8\x3f\x43\x3f\x3c\x15\x55" + "\x0e\x39\xff\xb4\xf2\x97\x75\x8f" + "\xa3\xd3\xa7\x3c\x9e\x0d\x1b\x5c" + "\x01\xad\xea\x56\xc3\x46\x0a\x03" + "\xa9\xe1\x94\xfa\x7e\x2f\xec\x13" + "\x2a\x7e\xa6\xca\xac\xf1\x27\x81" + "\xba\xbb\x68\xe2\xac\x80\x74\xc2" + "\xd7\x46\xa8\x28\x0b\x24\x14\x04" + "\x50\x9b\xf0\xd7\x9c\x48\x37\xed" + "\xf4\x4b\x6c\xa3\x85\x0d\x15\x20" + "\xa1\xab\xcd\x05\xf2\x12\x4b\xd6" + "\xed\xdd\x0b\xea\x42\xd8\x6c\x83" + "\x0f\x88\xb8\x1c\x96\xcd\xe4\xd1" + "\x47\xdd\x22\x85\x82\x2d\x1b\x37" + "\x96\x61\x68\x46\x4f\x74\x9b\xa2" + "\x69\x9c\xfd\x16\x10\xc0\xa4\x6a" + "\x57\x31\x05\xba\x11\x63\x86\x82" + "\x54\x91\xab\xd0\x3a\x95\x90\x2b" + "\x54\xe8\x75\xf3\xdb", + .len = 189, + }, +}; + static const struct aead_testvec sm4_gcm_tv_template[] = { { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h index 9d7eff04f22441e08f7c748333e302f83887ebe8..c546a8692ef0f2ac711953ace3f8dc08d26fdd6a 100644 --- a/include/crypto/gcm.h +++ b/include/crypto/gcm.h @@ -6,6 +6,7 @@ #define GCM_AES_IV_SIZE 12 #define GCM_RFC4106_IV_SIZE 8 #define GCM_RFC4543_IV_SIZE 8 +#define GCM_SM4_IV_SIZE 12 /* * validate authentication tag for GCM diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index b2cc6c04cbfe7b75fda77d61f626975dfceb9894..66bec69784f2d45653d8e6ea27abc54a19bb05cd 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -812,8 +812,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) -f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) -f9: MOVDIRI My,Gy +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) | vsm3rndb Vx,Hx,Wx (66),(v1) +f9: MOVDIRI My,Gy | vsm3msgb Vx,Hx,Wx (66),(v1) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -889,6 +889,10 @@ AVXcode: 3 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +88: vsm3randa Vx,Hx,Wx,Ib (66),(v1) +89: vsm3msga Vx,Hx,Wx,Ib (66),(v1) +8a: vsm4rand Vx,Hx,Wx,Ib (66),(v1) | sm4rand Vk,Wx,Ib (66) +8b: vsm4rk Vx,Hx,Wx,Ib (66),(v1) | sm4rk Vk,Wx,Ib (66) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66)